Commit 155ea3ab authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to identify tiles fully contained in a data unit

parent e4ca37b6
Pipeline #26665 passed with stage
in 1 minute and 58 seconds
......@@ -17,11 +17,13 @@
# along with this program. If not, see
import logging
import numpy
import pandas
import geopandas
import pyproj
import mercantile
import shapely
from copy import deepcopy
logger = logging.getLogger()
......@@ -48,9 +50,12 @@ class DataUnitTilesHelper:
Quadkey of the associated zoom level 18 quadtile.
geometry (Shapely polygons):
Geometry of the data-unit tile.
contained (bool):
True if the zoom level 18 quadtile is fully contained in 'in_geometry',
False otherwise (i.e. the data-unit tile lies along the boundary)
# Retrieve all quadtiles associated with in_geometry
# Retrieve all quadtiles associated with the bounding box of 'in_geometry'
quadtiles_bbox = DataUnitTilesHelper.get_quadtiles_in_bbox_of_polygon(in_geometry)
# Create a GeoPandas GeoDataFrame with the original geometry
......@@ -62,6 +67,26 @@ class DataUnitTilesHelper:
quadtiles_bbox, in_geometry_df, how="intersection", keep_geom_type=True
# Keep the quadtiles from the bounding box that are intersected by 'in_geometry'
filtered_quadtiles = quadtiles_bbox.loc[data_unit_tiles["quadkey"].values, :]
del quadtiles_bbox
# Check that the order of the quadkeys in 'filtered_quadtiles' and 'data_unit_tiles' is
# the same (they should be by definition, but this is critical for the function to work
# well, thus the need to check)
if not numpy.all(
filtered_quadtiles["quadkey"].values == data_unit_tiles["quadkey"].values
raise RuntimeError(
"define_data_unit_tiles cannot run: the order of the quadkeys in "
"'filtered_quadtiles' and 'data_unit_tiles' is not the same"
# Determine which tiles are fully contained in the original geometry 'in_geometry'
data_unit_tiles["contained"] = DataUnitTilesHelper.determine_if_areas_are_the_same(
data_unit_tiles, filtered_quadtiles
return data_unit_tiles
......@@ -142,8 +167,8 @@ class DataUnitTilesHelper:
aux_grid = aux_grid.drop_duplicates(ignore_index=True) # Remove potential duplicates
aux_grid = aux_grid.reset_index()
grid = DataUnitTilesHelper.create_geometry_of_quadtiles(aux_grid)
grid = grid.set_index("quadkey", drop=False)
return grid
......@@ -203,3 +228,41 @@ class DataUnitTilesHelper:
return False
return True
def determine_if_areas_are_the_same(data_unit_tiles, tiles):
"""This function compares the areas of the geometries of 'data_unit_tiles' and 'tiles'
and determines whether they are the same value or not. Each element of .data_unit_tiles'
needs to be equal to or smaller than its corresponding element in 'tiles'. Both
GeoDataFrames neeed to have the same length and be defined in the same CRS.
The areas are determined using the built-in function of GeoPandas GeoDataFrames. Their
actual value is of no consequence (i.e. the CRS can be ESPG:4326) because it is the
ratio that is relevant.
data_unit_tiles, tiles (GeoPandas GeoDataFrames):
GeoDataFrames with at least a "geometry" field, both with the same CRS (any).
are_the_same (array of bool):
Array with the same length as 'data_unit_tiles' and 'tiles', indicating whether
the areas are the same (True) or not (False).
# Create deep copies and re-project to avoid GeoPandas warning "Geometry is in a
# geographic CRS". The actual values of the areas do not matter.
geom_data_unit_tiles = deepcopy(data_unit_tiles["geometry"])
geom_data_unit_tiles = geom_data_unit_tiles.to_crs(pyproj.CRS("epsg:3857"))
geom_tiles = deepcopy(tiles["geometry"])
geom_tiles = geom_tiles.to_crs(pyproj.CRS("epsg:3857"))
area_data_unit_tiles = geom_data_unit_tiles.area
area_tiles = geom_tiles.area
area_ratio = area_data_unit_tiles.values / area_tiles.values
are_the_same = numpy.array([False for i in range(data_unit_tiles.shape[0])])
are_the_same[numpy.where(area_ratio > 0.99999)[0]] = True
return are_the_same
\ No newline at end of file
\ No newline at end of file
\ No newline at end of file
\ No newline at end of file
......@@ -18,8 +18,8 @@
import os
import logging
import geopandas
import numpy
import geopandas
import pandas
import shapely
import pytest
......@@ -215,7 +215,7 @@ def test_define_data_unit_tiles():
dtype={"quadkey": str},
dtype={"quadkey": str, "contained": bool},
returned_data_unit_tiles = DataUnitTilesHelper.define_data_unit_tiles(in_polygon)
......@@ -235,3 +235,32 @@ def test_define_data_unit_tiles():
assert round(geom.bounds[3], 6) == round(expected_results["lat_n"].values[which[0]], 6)
assert geom.__class__.__name__ == expected_results["type"].values[which[0]]
assert (
== expected_results["contained"].values[which[0]]
def test_determine_if_areas_are_the_same():
# Read quadtiles from file
bbox_tiles = geopandas.read_file(
os.path.join(os.path.dirname(__file__), "data", "bbox_tiles_case_1.shp")
# Read data-unit tiles from file
data_unit_tiles = geopandas.read_file(
os.path.join(os.path.dirname(__file__), "data", "data_unit_tiles_case_1.shp")
assert numpy.all(data_unit_tiles["quadkey"].values == bbox_tiles["quadkey"].values)
expected_result = numpy.array(
[False, False, False, False, True, False, False, True, False, False, False, False]
returned_result = DataUnitTilesHelper.determine_if_areas_are_the_same(
data_unit_tiles, bbox_tiles
assert numpy.all(expected_result == returned_result)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment