Commit 155ea3ab authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to identify tiles fully contained in a data unit

parent e4ca37b6
Pipeline #26665 passed with stage
in 1 minute and 58 seconds
......@@ -17,11 +17,13 @@
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
import numpy
import pandas
import geopandas
import pyproj
import mercantile
import shapely
from copy import deepcopy
logger = logging.getLogger()
......@@ -48,9 +50,12 @@ class DataUnitTilesHelper:
Quadkey of the associated zoom level 18 quadtile.
geometry (Shapely polygons):
Geometry of the data-unit tile.
contained (bool):
True if the zoom level 18 quadtile is fully contained in 'in_geometry',
False otherwise (i.e. the data-unit tile lies along the boundary)
"""
# Retrieve all quadtiles associated with in_geometry
# Retrieve all quadtiles associated with the bounding box of 'in_geometry'
quadtiles_bbox = DataUnitTilesHelper.get_quadtiles_in_bbox_of_polygon(in_geometry)
# Create a GeoPandas GeoDataFrame with the original geometry
......@@ -62,6 +67,26 @@ class DataUnitTilesHelper:
quadtiles_bbox, in_geometry_df, how="intersection", keep_geom_type=True
)
# Keep the quadtiles from the bounding box that are intersected by 'in_geometry'
filtered_quadtiles = quadtiles_bbox.loc[data_unit_tiles["quadkey"].values, :]
del quadtiles_bbox
# Check that the order of the quadkeys in 'filtered_quadtiles' and 'data_unit_tiles' is
# the same (they should be by definition, but this is critical for the function to work
# well, thus the need to check)
if not numpy.all(
filtered_quadtiles["quadkey"].values == data_unit_tiles["quadkey"].values
):
raise RuntimeError(
"define_data_unit_tiles cannot run: the order of the quadkeys in "
"'filtered_quadtiles' and 'data_unit_tiles' is not the same"
)
# Determine which tiles are fully contained in the original geometry 'in_geometry'
data_unit_tiles["contained"] = DataUnitTilesHelper.determine_if_areas_are_the_same(
data_unit_tiles, filtered_quadtiles
)
return data_unit_tiles
@staticmethod
......@@ -142,8 +167,8 @@ class DataUnitTilesHelper:
}
)
aux_grid = aux_grid.drop_duplicates(ignore_index=True) # Remove potential duplicates
aux_grid = aux_grid.reset_index()
grid = DataUnitTilesHelper.create_geometry_of_quadtiles(aux_grid)
grid = grid.set_index("quadkey", drop=False)
return grid
......@@ -203,3 +228,41 @@ class DataUnitTilesHelper:
return False
else:
return True
@staticmethod
def determine_if_areas_are_the_same(data_unit_tiles, tiles):
"""This function compares the areas of the geometries of 'data_unit_tiles' and 'tiles'
and determines whether they are the same value or not. Each element of .data_unit_tiles'
needs to be equal to or smaller than its corresponding element in 'tiles'. Both
GeoDataFrames neeed to have the same length and be defined in the same CRS.
The areas are determined using the built-in function of GeoPandas GeoDataFrames. Their
actual value is of no consequence (i.e. the CRS can be ESPG:4326) because it is the
ratio that is relevant.
Args:
data_unit_tiles, tiles (GeoPandas GeoDataFrames):
GeoDataFrames with at least a "geometry" field, both with the same CRS (any).
Returns:
are_the_same (array of bool):
Array with the same length as 'data_unit_tiles' and 'tiles', indicating whether
the areas are the same (True) or not (False).
"""
# Create deep copies and re-project to avoid GeoPandas warning "Geometry is in a
# geographic CRS". The actual values of the areas do not matter.
geom_data_unit_tiles = deepcopy(data_unit_tiles["geometry"])
geom_data_unit_tiles = geom_data_unit_tiles.to_crs(pyproj.CRS("epsg:3857"))
geom_tiles = deepcopy(tiles["geometry"])
geom_tiles = geom_tiles.to_crs(pyproj.CRS("epsg:3857"))
area_data_unit_tiles = geom_data_unit_tiles.area
area_tiles = geom_tiles.area
area_ratio = area_data_unit_tiles.values / area_tiles.values
are_the_same = numpy.array([False for i in range(data_unit_tiles.shape[0])])
are_the_same[numpy.where(area_ratio > 0.99999)[0]] = True
return are_the_same
ISO-8859-1
\ No newline at end of file
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
\ No newline at end of file
ISO-8859-1
\ No newline at end of file
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
\ No newline at end of file
quadkey,type,lon_w,lat_s,lon_e,lat_n
120222331000133202,MultiPolygon,4.9994,42.0026,5.000153,42.0032
120222331000133203,MultiPolygon,5.000153,42.0026,5.001526,42.0032
120222331000133212,Polygon,5.001526,42.002366,5.002899,42.0032
120222331000133213,Polygon,5.002899,42.002366,5.0037,42.0032
120222331000133220,Polygon,4.9994,42.0015,5.000153,42.0018
120222331000133221,Polygon,5.000153,42.0015,5.001526,42.0018
120222331000133230,Polygon,5.001526,42.0015,5.002899,42.002366
120222331000133231,Polygon,5.002899,42.0015,5.0037,42.002366
quadkey,type,lon_w,lat_s,lon_e,lat_n,contained
120222331000133202,MultiPolygon,4.9994,42.0026,5.000153,42.0032,False
120222331000133203,MultiPolygon,5.000153,42.0026,5.001526,42.0032,False
120222331000133212,Polygon,5.001526,42.002366,5.002899,42.0032,False
120222331000133213,Polygon,5.002899,42.002366,5.0037,42.0032,False
120222331000133220,Polygon,4.9994,42.0015,5.000153,42.0018,False
120222331000133221,Polygon,5.000153,42.0015,5.001526,42.0018,False
120222331000133230,Polygon,5.001526,42.0015,5.002899,42.002366,False
120222331000133231,Polygon,5.002899,42.0015,5.0037,42.002366,False
......@@ -18,8 +18,8 @@
import os
import logging
import geopandas
import numpy
import geopandas
import pandas
import shapely
import pytest
......@@ -215,7 +215,7 @@ def test_define_data_unit_tiles():
"data",
"expected_results_test_define_data_unit_tiles.csv",
),
dtype={"quadkey": str},
dtype={"quadkey": str, "contained": bool},
)
returned_data_unit_tiles = DataUnitTilesHelper.define_data_unit_tiles(in_polygon)
......@@ -235,3 +235,32 @@ def test_define_data_unit_tiles():
assert round(geom.bounds[3], 6) == round(expected_results["lat_n"].values[which[0]], 6)
assert geom.__class__.__name__ == expected_results["type"].values[which[0]]
assert (
returned_data_unit_tiles["contained"].values[i]
== expected_results["contained"].values[which[0]]
)
def test_determine_if_areas_are_the_same():
# Read quadtiles from file
bbox_tiles = geopandas.read_file(
os.path.join(os.path.dirname(__file__), "data", "bbox_tiles_case_1.shp")
)
# Read data-unit tiles from file
data_unit_tiles = geopandas.read_file(
os.path.join(os.path.dirname(__file__), "data", "data_unit_tiles_case_1.shp")
)
assert numpy.all(data_unit_tiles["quadkey"].values == bbox_tiles["quadkey"].values)
expected_result = numpy.array(
[False, False, False, False, True, False, False, True, False, False, False, False]
)
returned_result = DataUnitTilesHelper.determine_if_areas_are_the_same(
data_unit_tiles, bbox_tiles
)
assert numpy.all(expected_result == returned_result)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment