Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Daniel Scheffler
py_tools_ds
Commits
a9a33146
Commit
a9a33146
authored
Nov 11, 2021
by
Daniel Scheffler
Browse files
Merge branch 'enhancement/revise_polygonize' into 'master'
Enhancement/revise polygonize See merge request
!32
parents
b7177478
d4108888
Pipeline
#30320
passed with stages
in 4 minutes and 34 seconds
Changes
9
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
HISTORY.rst
View file @
a9a33146
...
...
@@ -2,6 +2,14 @@
History
=======
0.19.0 (2021-11-11)
-------------------
* Revised raster2polygon() and fill_holes_within_poly() to be much faster and robust in case of MultiPolygons
with many features.
* Removed 'geopandas' as requirement.
0.18.1 (2021-10-20)
-------------------
...
...
docs/installation.rst
View file @
a9a33146
...
...
@@ -39,7 +39,6 @@ to resolve the following dependencies before the pip installer is run:
* gdal
* geopandas
* numpy
* pyproj >=2.1.0
* shapely
...
...
py_tools_ds/geo/raster/conversion.py
View file @
a9a33146
...
...
@@ -26,8 +26,15 @@
__author__
=
"Daniel Scheffler"
from
shapely.wkb
import
loads
import
os
import
warnings
from
tempfile
import
TemporaryDirectory
import
json
from
typing
import
Union
from
shapely.geometry
import
shape
,
Polygon
,
MultiPolygon
,
GeometryCollection
from
osgeo
import
gdal
,
osr
,
ogr
import
numpy
as
np
from
...io.raster.gdal
import
get_GDAL_ds_inmem
from
...processing.progress_mon
import
ProgressBar
,
Timer
...
...
@@ -35,8 +42,17 @@ from ..raster.reproject import warp_ndarray
from
..vector.topology
import
get_bounds_polygon
,
polyVertices_outside_poly
,
get_overlap_polygon
def
raster2polygon
(
array
,
gt
,
prj
,
DN2extract
=
1
,
exact
=
True
,
maxfeatCount
=
None
,
timeout
=
None
,
progress
=
True
,
q
=
False
):
def
raster2polygon
(
array
:
np
.
ndarray
,
gt
:
Union
[
list
,
tuple
],
prj
:
Union
[
str
,
int
],
DN2extract
:
int
=
1
,
exact
:
bool
=
True
,
maxfeatCount
:
int
=
None
,
min_npx
:
int
=
1
,
timeout
:
float
=
None
,
progress
:
bool
=
True
,
q
:
bool
=
False
)
->
Union
[
Polygon
,
MultiPolygon
]:
"""Calculate a footprint polygon for the given array.
:param array: 2D numpy array
...
...
@@ -44,13 +60,25 @@ def raster2polygon(array, gt, prj, DN2extract=1, exact=True, maxfeatCount=None,
:param prj: projection as WKT string, 'EPSG:1234' or <EPSG_int>
:param DN2extract: <int, float> pixel value to create polygons for
:param exact: whether to compute the exact footprint polygon or a simplified one for speed
:param maxfeatCount:
<int>
the maximum expected number of polygons. If more polygons are found, every further
:param maxfeatCount: the maximum expected number of polygons. If more polygons are found, every further
processing is cancelled and a RunTimeError is raised.
:param min_npx: minmal polygon area to be included in the result (in numbers of pixels)
:param timeout: breaks the process after a given time in seconds
:param progress: show progress bars (default: True)
:param q: quiet mode (default: False)
:return:
"""
array
=
array
.
astype
(
int
)
# TODO
if
maxfeatCount
is
not
None
:
warnings
.
warn
(
"'maxfeatCount' is deprecated and will be removed soon."
,
DeprecationWarning
)
# pragma: no cover
# gdal.Polygonize ignores the 0
if
DN2extract
==
0
:
array
=
array
==
0
DN2extract
=
1
assert
array
.
ndim
==
2
,
"Only 2D arrays are supported. Got a %sD array."
%
array
.
ndim
gt_orig
=
gt
shape_orig
=
array
.
shape
...
...
@@ -68,97 +96,76 @@ def raster2polygon(array, gt, prj, DN2extract=1, exact=True, maxfeatCount=None,
src_ds
=
get_GDAL_ds_inmem
(
array
,
gt
,
prj
)
src_band
=
src_ds
.
GetRasterBand
(
1
)
# Create a memory OGR datasource to put results in.
mem_drv
=
ogr
.
GetDriverByName
(
'Memory'
)
mem_ds
=
mem_drv
.
CreateDataSource
(
'out'
)
# Create a GeoJSON OGR datasource to put results in.
srs
=
osr
.
SpatialReference
()
srs
.
ImportFromWkt
(
prj
)
mem_layer
=
mem_ds
.
CreateLayer
(
'poly'
,
srs
,
ogr
.
wkbPolygon
)
fd
=
ogr
.
FieldDefn
(
'DN'
,
ogr
.
OFTInteger
)
mem_layer
.
CreateField
(
fd
)
# set callback
callback
=
\
ProgressBar
(
prefix
=
'Polygonize progress '
,
suffix
=
'Complete'
,
barLength
=
50
,
timeout
=
timeout
,
use_as_callback
=
True
)
\
if
progress
and
not
q
else
Timer
(
timeout
,
use_as_callback
=
True
)
if
timeout
else
None
# run the algorithm
status
=
gdal
.
Polygonize
(
src_band
,
src_band
.
GetMaskBand
(),
mem_layer
,
0
,
[
"8CONNECTED=8"
]
if
exact
else
[],
callback
=
callback
)
# handle exit status other than 0 (fail)
if
status
!=
0
:
errMsg
=
gdal
.
GetLastErrorMsg
()
# Catch the KeyboardInterrupt raised in case of a timeout within the callback. It seems like there is no other
# way to catch exceptions within callbacks.
if
errMsg
==
'User terminated'
:
raise
TimeoutError
(
'raster2polygon timed out!'
)
raise
Exception
(
errMsg
)
# extract polygon
mem_layer
.
SetAttributeFilter
(
'DN = %s'
%
DN2extract
)
from
geopandas
import
GeoDataFrame
featCount
=
mem_layer
.
GetFeatureCount
()
if
not
featCount
:
raise
RuntimeError
(
'No features with DN=%s found in the input image.'
%
DN2extract
)
if
maxfeatCount
and
featCount
>
maxfeatCount
:
raise
RuntimeError
(
'Found %s features with DN=%s but maximum feature count was set to %s.'
%
(
featCount
,
DN2extract
,
maxfeatCount
))
# tmp = np.full((featCount,2), DN, geoArr.dtype)
# tmp[:,0] = range(featCount)
# GDF = GeoDataFrame(tmp, columns=['idx','DN'])
# def get_shplyPoly(GDF_row):
# if not is_timed_out(3):
# element = mem_layer.GetNextFeature()
# shplyPoly = loads(element.GetGeometryRef().ExportToWkb()).buffer(0)
# element = None
# return shplyPoly
# else:
# raise TimeoutError
# GDF['geometry'] = GDF.apply(get_shplyPoly, axis=1)
GDF
=
GeoDataFrame
(
columns
=
[
'geometry'
,
'DN'
])
GDF
.
DN
=
GDF
.
DN
.
astype
(
float
)
timer
=
Timer
(
timeout
)
for
i
in
range
(
featCount
):
if
not
timer
.
timed_out
:
element
=
mem_layer
.
GetNextFeature
()
wkb
=
bytes
(
element
.
GetGeometryRef
().
ExportToWkb
())
GDF
.
loc
[
i
]
=
[
loads
(
wkb
).
buffer
(
0
),
DN2extract
]
del
element
with
TemporaryDirectory
()
as
td
:
drv
=
ogr
.
GetDriverByName
(
"GeoJSON"
)
path_geojson
=
os
.
path
.
join
(
td
,
"polygonize_result.geojson"
)
dst_ds
=
drv
.
CreateDataSource
(
path_geojson
)
dst_layer
=
dst_ds
.
CreateLayer
(
'polygonize_result'
,
srs
=
srs
)
dst_layer
.
CreateField
(
ogr
.
FieldDefn
(
"DN"
,
4
))
# set callback
callback
=
\
ProgressBar
(
prefix
=
'Polygonize progress '
,
suffix
=
'Complete'
,
barLength
=
50
,
timeout
=
timeout
,
use_as_callback
=
True
)
\
if
progress
and
not
q
else
Timer
(
timeout
,
use_as_callback
=
True
)
if
timeout
else
None
# run the algorithm
status
=
gdal
.
Polygonize
(
src_band
,
src_band
,
# .GetMaskBand(),
dst_layer
,
0
,
[
"8CONNECTED=8"
]
if
exact
else
[],
callback
=
callback
)
del
dst_layer
,
dst_ds
# handle exit status other than 0 (fail)
if
status
!=
0
:
errMsg
=
gdal
.
GetLastErrorMsg
()
# Catch the KeyboardInterrupt raised in case of a timeout within the callback.
# It seems like there is no other way to catch exceptions within callbacks.
if
errMsg
==
'User terminated'
:
raise
TimeoutError
(
'raster2polygon timed out!'
)
raise
Exception
(
errMsg
)
with
open
(
path_geojson
)
as
f
:
gjs
=
json
.
load
(
f
)
gc
=
GeometryCollection
([
shape
(
f
[
"geometry"
]).
buffer
(
0
)
for
f
in
gjs
[
'features'
]
if
f
[
'properties'
][
'DN'
]
==
str
(
DN2extract
)])
# convert GeometryCollection into MultiPolygon
polys
=
[]
for
i
in
gc
:
if
isinstance
(
i
,
Polygon
):
polys
.
append
(
i
)
else
:
raise
TimeoutError
(
'raster2polygon timed out!'
)
polys
.
extend
(
list
(
i
)
)
GDF
=
GDF
.
dissolve
(
by
=
'DN'
)
# drop polygons with an area below npx
if
min_npx
:
area_1px
=
gt
[
1
]
*
abs
(
gt
[
5
])
area_min
=
min_npx
*
area_1px
del
mem_ds
,
mem_layer
polys
=
[
p
for
p
in
polys
if
p
.
area
>=
area_min
]
shplyPoly
=
GDF
.
loc
[
1
,
'geometry'
]
poly
=
MultiPolygon
(
polys
)
# the downsampling in case exact=False may cause vertices of
shplyP
oly to be outside the input array bounds
# -> clip
shplyP
oly with bounds_poly in that case
# the downsampling in case exact=False may cause vertices of
p
oly to be outside the input array bounds
# -> clip
p
oly with bounds_poly in that case
if
not
exact
:
bounds_poly
=
get_bounds_polygon
(
gt_orig
,
*
shape_orig
)
if
polyVertices_outside_poly
(
shplyP
oly
,
bounds_poly
,
1e-5
):
shplyP
oly
=
get_overlap_polygon
(
shplyP
oly
,
bounds_poly
)[
'overlap poly'
]
if
polyVertices_outside_poly
(
p
oly
,
bounds_poly
,
1e-5
):
p
oly
=
get_overlap_polygon
(
p
oly
,
bounds_poly
)[
'overlap poly'
]
return
shplyP
oly
return
p
oly
py_tools_ds/geo/vector/topology.py
View file @
a9a33146
...
...
@@ -29,7 +29,6 @@ import warnings
import
numpy
as
np
from
typing
import
Union
# noqa F401 # flake8 issue
from
geopandas
import
GeoDataFrame
from
shapely.geometry
import
shape
,
Polygon
,
box
from
shapely.geometry
import
MultiPolygon
# noqa F401 # flake8 issue
from
..coord_trafo
import
mapYX2imYX
...
...
@@ -169,13 +168,19 @@ def polyVertices_outside_poly(inner_poly, outer_poly, tolerance=0):
return
not
outer_poly
.
buffer
(
tolerance
).
contains
(
inner_poly
)
def
fill_holes_within_poly
(
poly
)
:
# type: (Union[Polygon, MultiPolygon]
) -> Polygon
def
fill_holes_within_poly
(
poly
:
Union
[
Polygon
,
MultiPolygon
]
)
->
Polygon
:
"""Fill the holes within a shapely Polygon or MultiPolygon and return a Polygon with only the outer boundary.
:param poly: <shapely.geometry.Polygon, shapely.geometry.MultiPolygon>, shapely.geometry.GeometryCollection>
:return:
"""
def
close_holes
(
polygon
:
Polygon
)
->
Polygon
:
if
polygon
.
interiors
:
return
Polygon
(
list
(
polygon
.
exterior
.
coords
))
else
:
return
polygon
if
poly
.
geom_type
not
in
[
'Polygon'
,
'MultiPolygon'
]:
raise
ValueError
(
"Unexpected geometry type %s."
%
poly
.
geom_type
)
...
...
@@ -184,30 +189,17 @@ def fill_holes_within_poly(poly):
filled_poly
=
Polygon
(
poly
.
exterior
)
else
:
# 'MultiPolygon'
gdf
=
GeoDataFrame
(
columns
=
[
'geometry'
])
gdf
[
'geometry'
]
=
poly
.
geoms
# get the area of each polygon of the multipolygon EXCLUDING the gaps in it
# noinspection PyTypeChecker
gdf
[
'area_filled'
]
=
gdf
.
apply
(
lambda
GDF_row
:
Polygon
(
np
.
swapaxes
(
np
.
array
(
GDF_row
.
geometry
.
exterior
.
coords
.
xy
),
0
,
1
)).
area
,
axis
=
1
)
largest_poly_filled
=
gdf
.
loc
[
gdf
[
'area_filled'
].
astype
(
np
.
float64
).
idxmax
()][
'geometry'
]
multipoly_closed
=
MultiPolygon
([
close_holes
(
p
)
for
p
in
poly
])
polys_areasorted
=
list
(
sorted
(
multipoly_closed
,
key
=
lambda
a
:
a
.
area
,
reverse
=
True
))
poly_largest
=
polys_areasorted
[
0
]
# noinspection PyTypeChecker
gdf
[
'within_or_equal'
]
=
gdf
.
apply
(
lambda
GDF_row
:
GDF_row
.
geometry
.
within
(
largest_poly_filled
.
buffer
(
1e-5
))
or
GDF_row
.
geometry
.
equals
(
largest_poly_filled
),
axis
=
1
)
polys_disjunct
=
[
p
for
p
in
polys_areasorted
[
1
:]
if
p
.
disjoint
(
poly_largest
)]
if
False
in
gdf
.
within_or_equal
.
values
:
n_disjunct_polys
=
int
(
np
.
sum
(
~
gdf
.
within_or_equal
))
if
polys_disjunct
:
warnings
.
warn
(
RuntimeWarning
(
'The given MultiPolygon contains %d disjunct polygone(s) outside of the '
'largest polygone. fill_holes_within_poly() will only return the largest '
'polygone as a filled version.'
%
n
_disjunct
_polys
))
'polygone as a filled version.'
%
len
(
polys
_disjunct
)
))
# return the outer boundary of the largest polygon
filled_poly
=
Polygon
(
np
.
swapaxes
(
np
.
array
(
largest_poly_filled
.
exterior
.
coords
.
xy
),
0
,
1
))
filled_poly
=
poly_largest
return
filled_poly
py_tools_ds/version.py
View file @
a9a33146
...
...
@@ -22,5 +22,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__version__
=
'0.1
8.1
'
__versionalias__
=
'20211
020
_01'
__version__
=
'0.1
9.0
'
__versionalias__
=
'20211
111
_01'
setup.py
View file @
a9a33146
...
...
@@ -41,7 +41,6 @@ with open("py_tools_ds/version.py") as version_file:
requirements
=
[
'gdal>=2.1.0'
,
'geopandas'
,
'numpy'
,
'pandas'
,
'pyproj>=2.5.0'
,
...
...
tests/CI_docker/context/environment_py_tools_ds.yml
View file @
a9a33146
...
...
@@ -7,7 +7,7 @@ dependencies:
-
python=3.*.*
-
pip
# avoids that conda uses the wrong pip
-
gdal>=2.1.0
-
geopandas
-
numpy
-
pyproj>=2.5.0
-
shapely
...
...
tests/data/mask_many_features.tif
0 → 100644
LFS
View file @
a9a33146
File added
tests/test_geo/test_raster/test_conversion.py
View file @
a9a33146
...
...
@@ -36,6 +36,8 @@ import os
from
unittest
import
TestCase
from
osgeo
import
gdal
from
shapely.geometry
import
MultiPolygon
import
numpy
as
np
from
py_tools_ds.geo.raster.conversion
import
raster2polygon
import
py_tools_ds
...
...
@@ -47,13 +49,51 @@ class Test_raster2polygon(TestCase):
def
setUp
(
self
)
->
None
:
try
:
# load a large mask containing an irregular binary shape with a complex footprint (27408x17604)
ds
=
gdal
.
Open
(
os
.
path
.
join
(
tests_path
,
'data'
,
'mask_large.tif'
))
self
.
mask
=
ds
.
ReadAsArray
().
astype
(
bool
)
self
.
gt
=
ds
.
GetGeoTransform
()
self
.
prj
=
ds
.
GetProjection
()
ds
_l
=
gdal
.
Open
(
os
.
path
.
join
(
tests_path
,
'data'
,
'mask_large.tif'
))
self
.
mask
_l
=
ds
_l
.
ReadAsArray
().
astype
(
bool
)
self
.
gt
_l
=
ds
_l
.
GetGeoTransform
()
self
.
prj
_l
=
ds
_l
.
GetProjection
()
finally
:
del
ds
del
ds
_l
def
test_raster2polygon
(
self
):
raster2polygon
(
self
.
mask
,
self
.
gt
,
self
.
prj
,
exact
=
False
)
try
:
# load a large mask containing 239 features
ds_mf
=
gdal
.
Open
(
os
.
path
.
join
(
tests_path
,
'data'
,
'mask_many_features.tif'
))
self
.
mask_mf
=
ds_mf
.
ReadAsArray
().
astype
(
bool
)
self
.
gt_mf
=
ds_mf
.
GetGeoTransform
()
self
.
prj_mf
=
ds_mf
.
GetProjection
()
finally
:
del
ds_mf
self
.
testarr
=
np
.
array
([[
2
,
0
,
2
,
0
,
0
],
[
0
,
2
,
0
,
1
,
2
],
[
1
,
2
,
0
,
1
,
0
],
[
1
,
2
,
2
,
2
,
1
],
[
1
,
1
,
2
,
1
,
1
]])
def
test_mask_large
(
self
):
poly
=
raster2polygon
(
self
.
mask_l
,
self
.
gt_l
,
self
.
prj_l
,
exact
=
False
)
self
.
assertIsInstance
(
poly
,
MultiPolygon
)
def
test_mask_many_features
(
self
):
poly
=
raster2polygon
(
self
.
mask_mf
,
self
.
gt_mf
,
self
.
prj_mf
,
exact
=
False
)
self
.
assertIsInstance
(
poly
,
MultiPolygon
)
self
.
assertEqual
(
len
(
poly
),
239
)
def
test_min_npx
(
self
):
poly
=
raster2polygon
(
self
.
mask_mf
,
self
.
gt_mf
,
self
.
prj_mf
,
exact
=
False
,
min_npx
=
5
)
self
.
assertIsInstance
(
poly
,
MultiPolygon
)
self
.
assertLess
(
len
(
poly
),
239
)
def
test_handle_multipart_polygons
(
self
):
# this causes gdal.Polygonize to produce some MultiPolygons instead pf Polygons which has to be handled
poly
=
raster2polygon
(
~
self
.
mask_mf
.
astype
(
bool
),
self
.
gt_mf
,
self
.
prj_mf
,
exact
=
True
)
self
.
assertIsInstance
(
poly
,
MultiPolygon
)
def
test_DN2extract
(
self
):
for
DN
,
nexp
in
zip
(
range
(
3
),
[
5
,
3
,
4
]):
with
self
.
subTest
(
DN
=
DN
):
poly
=
raster2polygon
(
self
.
testarr
,
self
.
gt_mf
,
self
.
prj_mf
,
exact
=
False
,
DN2extract
=
DN
)
self
.
assertEqual
(
len
(
poly
),
nexp
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment