Commit a3d9eb3f authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Merge branch 'enhancement/speedup_raster2polygon' into 'master'

Enhancement/speedup raster2polygon

Closes #19

See merge request !35
parents 20640da2 af3e0359
Pipeline #35656 passed with stages
in 6 minutes and 4 seconds
......@@ -2,6 +2,14 @@
History
=======
0.20.1 (2021-12-03)
-------------------
* `!35`_: Added gdal.Sieve to raster2polygon and polygon buffering only runs for invalid polygons.
It is now much faster for masks that contain many small features if min_npx>1.
.. _!35: https://git.gfz-potsdam.de/danschef/py_tools_ds/-/merge_requests/33]
0.20.0 (2021-11-24)
-------------------
......@@ -10,7 +18,6 @@ History
.coveragerc.
* `!34`_: Fixed AttributeError in ProgressBar in case sys.stderr is None. Added tests for processing module.
.. _!33: https://git.gfz-potsdam.de/danschef/py_tools_ds/-/merge_requests/33]
.. _!34: https://git.gfz-potsdam.de/danschef/py_tools_ds/-/merge_requests/34]
......
......@@ -31,7 +31,7 @@ import json
from typing import Union
from shapely.geometry import shape, Polygon, MultiPolygon, GeometryCollection
from osgeo import gdal, osr, ogr
from osgeo import gdal, osr, ogr # noqa
import numpy as np
from ...io.raster.gdal import get_GDAL_ds_inmem
......@@ -60,26 +60,24 @@ def raster2polygon(array: np.ndarray,
:param exact: whether to compute the exact footprint polygon or a simplified one for speed
:param maxfeatCount: the maximum expected number of polygons. If more polygons are found, every further
processing is cancelled and a RunTimeError is raised.
:param min_npx: minmal polygon area to be included in the result (in numbers of pixels)
:param min_npx: minmal polygon area to be included in the result (in numbers of pixels; default: 1)
:param timeout: breaks the process after a given time in seconds
:param progress: show progress bars (default: True)
:param q: quiet mode (default: False)
:return:
"""
array = array.astype(int)
# TODO
if maxfeatCount is not None:
warnings.warn("'maxfeatCount' is deprecated and will be removed soon.", DeprecationWarning) # pragma: no cover
# gdal.Polygonize ignores the 0
if DN2extract == 0:
array = array == 0
DN2extract = 1
if not isinstance(array.dtype, np.integer):
array = array.astype(int)
array = (array == DN2extract).astype(np.uint8)
assert array.ndim == 2, "Only 2D arrays are supported. Got a %sD array." % array.ndim
gt_orig = gt
shape_orig = array.shape
rows, cols = shape_orig = array.shape
# downsample input array in case it has more than 1e8 pixels to prevent crash
if not exact and array.size > 1e8: # 10000 x 10000 px
......@@ -89,8 +87,28 @@ def raster2polygon(array: np.ndarray,
out_gsd=(gt[1] / zoom_factor,
gt[5] / zoom_factor),
rspAlg='near',
CPUs=None, # all CPUs
q=True)
# remove raster polygons smaller than min_npx
if array.size > min_npx > 1:
drv = gdal.GetDriverByName('GTiff')
path_tmp = '/vsimem/raster2polygon_gdalsieve.tif'
ds = drv.Create(path_tmp, cols, rows, 1, gdal.GDT_Byte)
band = ds.GetRasterBand(1)
band.WriteArray(array)
callback = gdal.TermProgress_nocb if array.size > 1e8 and progress else None
gdal.SieveFilter(srcBand=band, maskBand=None, dstBand=band, threshold=min_npx,
# connectedness=4 if exact else 8,
connectedness=4, # 4-connectedness is 30% faster
callback=callback)
array = ds.ReadAsArray()
del ds, band
gdal.Unlink(path_tmp)
src_ds = get_GDAL_ds_inmem(array, gt, prj)
src_band = src_ds.GetRasterBand(1)
......@@ -118,7 +136,8 @@ def raster2polygon(array: np.ndarray,
src_band, # .GetMaskBand(),
dst_layer,
0,
["8CONNECTED=8"] if exact else [],
[] if exact else ["8CONNECTED=8"], # 4-connectedness for exact output
# callback=gdal.TermProgress_nocb)
callback=callback)
del dst_layer, dst_ds
......@@ -147,19 +166,28 @@ def raster2polygon(array: np.ndarray,
# convert JSON string to dict
gjs = json.loads(content_str)
gc = GeometryCollection([shape(f["geometry"]).buffer(0)
for f in gjs['features'] if f['properties']['DN'] == str(DN2extract)])
def get_valid_polys(val: Union[Polygon, MultiPolygon, GeometryCollection]):
if isinstance(val, Polygon):
val = val if val.is_valid else val.buffer(0)
if isinstance(val, Polygon):
return val
for g in val.geoms:
return get_valid_polys(g)
# convert GeometryCollection into MultiPolygon
# extract polygons from GeoJSON dict
polys = []
for i in gc.geoms:
if isinstance(i, Polygon):
polys.append(i)
else:
polys.extend(list(i.geoms))
for f in gjs['features']:
if f['properties']['DN'] == str(1):
geom = shape(f["geometry"])
geom = get_valid_polys(geom)
if isinstance(geom, list):
polys.extend(geom)
else:
polys.append(geom)
# drop polygons with an area below npx
if min_npx > 1:
if min_npx:
area_1px = gt[1] * abs(gt[5])
area_min = min_npx * area_1px
......
......@@ -46,54 +46,72 @@ tests_path = os.path.abspath(os.path.join(py_tools_ds.__file__, "..", "..", "tes
class Test_raster2polygon(TestCase):
def setUp(self) -> None:
@classmethod
def setUpClass(cls) -> None:
try:
# load a large mask containing an irregular binary shape with a complex footprint (27408x17604)
ds_l = gdal.Open(os.path.join(tests_path, 'data', 'mask_large.tif'))
self.mask_l = ds_l.ReadAsArray().astype(bool)
self.gt_l = ds_l.GetGeoTransform()
self.prj_l = ds_l.GetProjection()
cls.mask_l = ds_l.ReadAsArray().astype(bool)
cls.gt_l = ds_l.GetGeoTransform()
cls.prj_l = ds_l.GetProjection()
finally:
del ds_l
try:
# load a large mask containing 239 features
# load a large mask containing 192 features
ds_mf = gdal.Open(os.path.join(tests_path, 'data', 'mask_many_features.tif'))
self.mask_mf = ds_mf.ReadAsArray().astype(bool)
self.gt_mf = ds_mf.GetGeoTransform()
self.prj_mf = ds_mf.GetProjection()
cls.mask_mf = ds_mf.ReadAsArray().astype(bool)
cls.gt_mf = ds_mf.GetGeoTransform()
cls.prj_mf = ds_mf.GetProjection()
finally:
del ds_mf
self.testarr = np.array([[2, 0, 2, 0, 0],
[0, 2, 0, 1, 2],
[1, 2, 0, 1, 0],
[1, 2, 2, 2, 1],
[1, 1, 2, 1, 1]])
try:
# load a large mask containing 550 features including many very small ones
ds_msf = gdal.Open(os.path.join(tests_path, 'data', 'mask_many_small_features.tif'))
cls.mask_msf = ds_msf.ReadAsArray().astype(bool)
cls.gt_msf = ds_msf.GetGeoTransform()
cls.prj_msf = ds_msf.GetProjection()
finally:
del ds_msf
cls.testarr = np.array([[2, 0, 2, 0, 0],
[0, 2, 0, 1, 2],
[1, 2, 0, 1, 0],
[1, 2, 2, 2, 1],
[1, 1, 2, 1, 1]])
def test_mask_large(self):
poly = raster2polygon(self.mask_l, self.gt_l, self.prj_l, exact=False)
poly = raster2polygon(self.mask_l, self.gt_l, self.prj_l, exact=False, min_npx=1)
self.assertIsInstance(poly, MultiPolygon)
def test_mask_many_features(self):
poly = raster2polygon(self.mask_mf, self.gt_mf, self.prj_mf, exact=False)
poly = raster2polygon(self.mask_mf, self.gt_mf, self.prj_mf, exact=True, min_npx=1)
self.assertIsInstance(poly, MultiPolygon)
self.assertEqual(len(poly.geoms), 239)
def test_min_npx(self):
poly = raster2polygon(self.mask_mf, self.gt_mf, self.prj_mf, exact=False, min_npx=5)
poly = raster2polygon(self.mask_mf, self.gt_mf, self.prj_mf, exact=False, min_npx=20)
self.assertIsInstance(poly, MultiPolygon)
self.assertLess(len(poly.geoms), 239)
def test_mask_many_small_features(self):
# the issue here is that due to the many small features, the largest polygon gets >100k vertices
poly = raster2polygon(self.mask_msf, self.gt_msf, self.prj_msf, exact=False, min_npx=5)
self.assertIsInstance(poly, MultiPolygon)
self.assertLess(len(poly.geoms), 550)
def test_handle_multipart_polygons(self):
# this causes gdal.Polygonize to produce some MultiPolygons instead pf Polygons which has to be handled
# this causes gdal.Polygonize to produce some MultiPolygons instead of Polygons which has to be handled
poly = raster2polygon(~self.mask_mf.astype(bool), self.gt_mf, self.prj_mf, exact=True)
self.assertIsInstance(poly, MultiPolygon)
def test_DN2extract(self):
for DN, nexp in zip(range(3), [5, 3, 4]):
with self.subTest(DN=DN):
poly = raster2polygon(self.testarr, self.gt_mf, self.prj_mf, exact=False, DN2extract=DN)
poly = raster2polygon(self.testarr, self.gt_mf, self.prj_mf, exact=True, DN2extract=DN, min_npx=1)
self.assertIsInstance(poly, MultiPolygon)
self.assertEqual(len(poly.geoms), nexp)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment