Commit 39da52e7 authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Implement gdal.Sieve to remove small polygons (speedup)

parent 2e3d6255
......@@ -31,7 +31,7 @@ import json
from typing import Union
from shapely.geometry import shape, Polygon, MultiPolygon, GeometryCollection
from osgeo import gdal, osr, ogr
from osgeo import gdal, osr, ogr # noqa
import numpy as np
from ...io.raster.gdal import get_GDAL_ds_inmem
......@@ -60,7 +60,7 @@ def raster2polygon(array: np.ndarray,
:param exact: whether to compute the exact footprint polygon or a simplified one for speed
:param maxfeatCount: the maximum expected number of polygons. If more polygons are found, every further
processing is cancelled and a RunTimeError is raised.
:param min_npx: minmal polygon area to be included in the result (in numbers of pixels)
:param min_npx: minmal polygon area to be included in the result (in numbers of pixels; default: 1)
:param timeout: breaks the process after a given time in seconds
:param progress: show progress bars (default: True)
:param q: quiet mode (default: False)
......@@ -79,7 +79,7 @@ def raster2polygon(array: np.ndarray,
assert array.ndim == 2, "Only 2D arrays are supported. Got a %sD array." % array.ndim
gt_orig = gt
shape_orig = array.shape
rows, cols = shape_orig = array.shape
# downsample input array in case it has more than 1e8 pixels to prevent crash
if not exact and array.size > 1e8: # 10000 x 10000 px
......@@ -91,6 +91,25 @@ def raster2polygon(array: np.ndarray,
rspAlg='near',
q=True)
# remove raster polygons smaller than min_npx
if array.size > min_npx > 1:
drv = gdal.GetDriverByName('GTiff')
path_tmp = '/vsimem/raster2polygon_gdalsieve.tif'
ds = drv.Create(path_tmp, cols, rows, 1, gdal.GDT_Int16)
band = ds.GetRasterBand(1)
band.WriteArray(array)
callback = gdal.TermProgress_nocb if array.size > 1e8 else None
gdal.SieveFilter(srcBand=band, maskBand=None, dstBand=band, threshold=min_npx,
# connectedness=4 if exact else 8,
connectedness=4,
callback=callback)
array = ds.ReadAsArray()
del ds, band
gdal.Unlink(path_tmp)
src_ds = get_GDAL_ds_inmem(array, gt, prj)
src_band = src_ds.GetRasterBand(1)
......
......@@ -46,54 +46,72 @@ tests_path = os.path.abspath(os.path.join(py_tools_ds.__file__, "..", "..", "tes
class Test_raster2polygon(TestCase):
def setUp(self) -> None:
@classmethod
def setUpClass(cls) -> None:
try:
# load a large mask containing an irregular binary shape with a complex footprint (27408x17604)
ds_l = gdal.Open(os.path.join(tests_path, 'data', 'mask_large.tif'))
self.mask_l = ds_l.ReadAsArray().astype(bool)
self.gt_l = ds_l.GetGeoTransform()
self.prj_l = ds_l.GetProjection()
cls.mask_l = ds_l.ReadAsArray().astype(bool)
cls.gt_l = ds_l.GetGeoTransform()
cls.prj_l = ds_l.GetProjection()
finally:
del ds_l
try:
# load a large mask containing 239 features
# load a large mask containing 192 features
ds_mf = gdal.Open(os.path.join(tests_path, 'data', 'mask_many_features.tif'))
self.mask_mf = ds_mf.ReadAsArray().astype(bool)
self.gt_mf = ds_mf.GetGeoTransform()
self.prj_mf = ds_mf.GetProjection()
cls.mask_mf = ds_mf.ReadAsArray().astype(bool)
cls.gt_mf = ds_mf.GetGeoTransform()
cls.prj_mf = ds_mf.GetProjection()
finally:
del ds_mf
self.testarr = np.array([[2, 0, 2, 0, 0],
[0, 2, 0, 1, 2],
[1, 2, 0, 1, 0],
[1, 2, 2, 2, 1],
[1, 1, 2, 1, 1]])
try:
# load a large mask containing 550 features including many very small ones
ds_msf = gdal.Open(os.path.join(tests_path, 'data', 'mask_many_small_features.tif'))
cls.mask_msf = ds_msf.ReadAsArray().astype(bool)
cls.gt_msf = ds_msf.GetGeoTransform()
cls.prj_msf = ds_msf.GetProjection()
finally:
del ds_msf
cls.testarr = np.array([[2, 0, 2, 0, 0],
[0, 2, 0, 1, 2],
[1, 2, 0, 1, 0],
[1, 2, 2, 2, 1],
[1, 1, 2, 1, 1]])
def test_mask_large(self):
poly = raster2polygon(self.mask_l, self.gt_l, self.prj_l, exact=False)
poly = raster2polygon(self.mask_l, self.gt_l, self.prj_l, exact=False, min_npx=1)
self.assertIsInstance(poly, MultiPolygon)
def test_mask_many_features(self):
poly = raster2polygon(self.mask_mf, self.gt_mf, self.prj_mf, exact=False)
poly = raster2polygon(self.mask_mf, self.gt_mf, self.prj_mf, exact=True, min_npx=1)
self.assertIsInstance(poly, MultiPolygon)
self.assertEqual(len(poly.geoms), 239)
def test_min_npx(self):
poly = raster2polygon(self.mask_mf, self.gt_mf, self.prj_mf, exact=False, min_npx=5)
poly = raster2polygon(self.mask_mf, self.gt_mf, self.prj_mf, exact=False, min_npx=20)
self.assertIsInstance(poly, MultiPolygon)
self.assertLess(len(poly.geoms), 239)
def test_mask_many_small_features(self):
# the issue here is that due to the many small features, the largest polygon gets >100k vertices
poly = raster2polygon(self.mask_msf, self.gt_msf, self.prj_msf, exact=False, min_npx=5)
self.assertIsInstance(poly, MultiPolygon)
self.assertLess(len(poly.geoms), 550)
def test_handle_multipart_polygons(self):
# this causes gdal.Polygonize to produce some MultiPolygons instead pf Polygons which has to be handled
# this causes gdal.Polygonize to produce some MultiPolygons instead of Polygons which has to be handled
poly = raster2polygon(~self.mask_mf.astype(bool), self.gt_mf, self.prj_mf, exact=True)
self.assertIsInstance(poly, MultiPolygon)
def test_DN2extract(self):
for DN, nexp in zip(range(3), [5, 3, 4]):
with self.subTest(DN=DN):
poly = raster2polygon(self.testarr, self.gt_mf, self.prj_mf, exact=False, DN2extract=DN)
poly = raster2polygon(self.testarr, self.gt_mf, self.prj_mf, exact=True, DN2extract=DN, min_npx=1)
self.assertIsInstance(poly, MultiPolygon)
self.assertEqual(len(poly.geoms), nexp)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment