Commit 6464af0f authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to calculate areas and ratios of data-unit tiles

parent 155ea3ab
Pipeline #28749 failed with stage
in 5 minutes and 23 seconds
......@@ -20,6 +20,7 @@ import logging
from multiprocessing import Pool
from import DataUnitTilesHelper
logger = logging.getLogger()
......@@ -113,7 +114,7 @@ class ExposureEntity:
p = Pool(processes=number_cores)
all_data_unit_tiles =
DataUnitTilesHelper.define_data_unit_tiles, data_units_geoms
DataUnitTilesHelper.define_data_unit_tiles_and_attributes, data_units_geoms
......@@ -32,7 +32,7 @@ class DataUnitTilesHelper:
"""This class contains methods used to define data-unit tiles and their attributes."""
def define_data_unit_tiles(in_geometry):
def get_data_unit_tiles(in_geometry):
"""This function defines the data-unit tiles associated with 'in_geometry'. Data-unit
tiles are defined as the intersection between zoom level 18 quadtiles and 'in_geometry'.
......@@ -50,9 +50,18 @@ class DataUnitTilesHelper:
Quadkey of the associated zoom level 18 quadtile.
geometry (Shapely polygons):
Geometry of the data-unit tile.
contained (bool):
True if the zoom level 18 quadtile is fully contained in 'in_geometry',
False otherwise (i.e. the data-unit tile lies along the boundary)
contained (bool):
True if the zoom level 18 quadtile is fully contained in 'in_geometry',
False otherwise (i.e. the data-unit tile lies along the boundary)
filtered_quadtiles (GeoPandas GeoDataFrame):
GeoDataFrame with the zoom level 18 quadtiles that fully contain 'in_geometry',
with the following columns:
quadkey (str):
Quadkey of the level 18 quadtile.
lon_w, lat_s, lon_e, lat_n (float)
Coordinates of the corners of the quadtile.
geometry (Shapely Polygon):
Geometry of the quadtile.
# Retrieve all quadtiles associated with the bounding box of 'in_geometry'
......@@ -66,6 +75,7 @@ class DataUnitTilesHelper:
data_unit_tiles = geopandas.overlay(
quadtiles_bbox, in_geometry_df, how="intersection", keep_geom_type=True
data_unit_tiles = data_unit_tiles.set_index("quadkey", drop=False)
# Keep the quadtiles from the bounding box that are intersected by 'in_geometry'
filtered_quadtiles = quadtiles_bbox.loc[data_unit_tiles["quadkey"].values, :]
......@@ -78,7 +88,7 @@ class DataUnitTilesHelper:
filtered_quadtiles["quadkey"].values == data_unit_tiles["quadkey"].values
raise RuntimeError(
"define_data_unit_tiles cannot run: the order of the quadkeys in "
"get_data_unit_tiles cannot run: the order of the quadkeys in "
"'filtered_quadtiles' and 'data_unit_tiles' is not the same"
......@@ -87,6 +97,93 @@ class DataUnitTilesHelper:
data_unit_tiles, filtered_quadtiles
return data_unit_tiles, filtered_quadtiles
def define_data_unit_tiles_and_attributes(in_geometry):
"""This function defines the data-unit tiles associated with 'in_geometry' and their
respective attributes. Data-unit tiles are defined as the intersection between zoom
level 18 quadtiles and 'in_geometry'.
in_geometry (Shapely Polygon or MultiPolygon):
Geometry for which the associated data-unit tiles will be defined.
data_unit_tiles (GeoPandas GeoDataFrame without geometry):
GeoDataFrame containing the definition of the data-unit tiles in terms of:
quadkey (str):
Quadkey of the associated zoom level 18 quadtile.
size_data_unit_tile_area (float):
Surface area of the data-unit tile, in square metres.
size_data_unit_tile_built_up_area (float):
Built-up area of the data-unit tile, in square metres.
fraction_data_unit_area (float):
Fraction (0.0, 1.0] that the surface area of the data-unit tile
represents with respect to the surface area of 'in_geometry' (i.e. the
surface area of the summation of all defined data-unit tiles). It cannot
be zero.
fraction_data_unit_built_up_area (float):
Fraction [0.0, 1.0] that the built-up area of the data-unit tile
represents with respect to the built-up area contained by 'in_geometry'
(i.e. the summation of built-up areas of all defined data-unit tiles).
It can be zero.
# Get data-unit tiles geometries, quadkeys and full associated quadtiles
data_unit_tiles, filtered_quadtiles = DataUnitTilesHelper.get_data_unit_tiles(
# Calculate data-unit tile areas and fractions of tiles (initialise)
all_areas = numpy.zeros([data_unit_tiles.shape[0]])
all_fractions = numpy.zeros([data_unit_tiles.shape[0]])
# Tiles that are fully contained in the original geometry 'in_geometry'
all_areas[data_unit_tiles.contained] = DataUnitTilesHelper.get_area_on_sphere(
all_fractions[data_unit_tiles.contained] = numpy.ones(
# Tiles that are partially contained in the original geometry 'in_geometry'
results = DataUnitTilesHelper.get_areas_and_ratios_arbitrary_shapes(
) = results
# Write values to 'data_unit_tiles'
data_unit_tiles["size_data_unit_tile_area"] = all_areas
data_unit_tiles["size_data_unit_tile_built_up_area"] = (
* all_fractions
data_unit_tiles["fraction_data_unit_area"] = all_areas / all_areas.sum()
if data_unit_tiles["size_data_unit_tile_built_up_area"].values.sum() > 1e-15:
data_unit_tiles["fraction_data_unit_built_up_area"] = (
/ data_unit_tiles["size_data_unit_tile_built_up_area"].values.sum()
data_unit_tiles["fraction_data_unit_built_up_area"] = numpy.nan * numpy.ones_like(
# Writing of NaNs to the database and posterior decision-making handled separately
# Discard unnecessary columns of 'data_unit_tiles' (free up memory)
data_unit_tiles = data_unit_tiles.drop(
columns=["geometry", "contained", "lon_w", "lat_s", "lon_e", "lat_n"]
return data_unit_tiles
......@@ -266,3 +363,114 @@ class DataUnitTilesHelper:
are_the_same[numpy.where(area_ratio > 0.99999)[0]] = True
return are_the_same
def get_area_on_sphere(lon_w, lat_s, lon_e, lat_n, earth_sphere_radius=6371000.0):
"""This function returns the area (m2) of a rectangle on Earth calculated assuming the
Earth is a sphere with radius 'earth_sphere_radius'. The rectangle is defined by its
four bounding coordinates 'lon_w', 'lat_s', 'lon_e' and 'lat_n', all given in degrees.
When used to calculate the area of zoom level 18 quadtiles, this approximate function
yields area values that differ in less than 1% with respect to those calculated using
the Albers equal area projection.
Kelly, K., Šavrič, B. (2021). Area and volume computation of longitude–latitude
grids and three-dimensional meshes. Transactions in GIS, 25(1):6–24.
lon_w (float or array of floats):
West bound of the tile(s), in degrees.
lat_s (float or array of floats):
South bound of the tile(s), in degrees.
lon_e (float or array of floats):
East bound of the tile(s), in degrees.
lat_n (float or array of floats):
North bound of the tile(s), in degrees.
earth_sphere_radius (float):
Radius of the Earth assuming a sphere, in meters. Default: 6,371,000 m.
area (float or array of floats):
Area of the rectangles defined by the input coordinates, in square metres.
area = (
(numpy.pi / 180.0)
* numpy.power(earth_sphere_radius, 2)
* numpy.abs(numpy.sin(numpy.deg2rad(lat_n)) - numpy.sin(numpy.deg2rad(lat_s)))
* numpy.abs(lon_e - lon_w)
return area
def get_areas_and_ratios_arbitrary_shapes(geometries_1, geometries_2):
"""This function uses get_area_albers_equal_area() to calculate the area of each polygon
of 'geometries_1' and 'geometries_2' and calculates the ratio of the former with respect
to the latter. Both 'geometries_1' and 'geometries_2' need to have the same length.
geometries_1, geometries_2 (arrays of Shapely Polygons):
Array of polygons of arbitrary shapes, defined in ESPG:4326.
areas_1, areas_2 (arrays of floats):
Areas in meters of the rectangles defined in 'geometries_1' and 'geometries_2',
ratios (array of floats):
Ratios of areas_1 to areas_2.
areas_1 = numpy.zeros([len(geometries_1)])
areas_2 = numpy.zeros([len(geometries_1)])
for i in range(len(geometries_1)):
areas_1[i] = DataUnitTilesHelper.get_area_albers_equal_area(geometries_1[i])
areas_2[i] = DataUnitTilesHelper.get_area_albers_equal_area(geometries_2[i])
ratios = areas_1 / areas_2
return areas_1, areas_2, ratios
def get_area_albers_equal_area(input_polygon):
"""This function returns the area (m2) of a polygon on Earth calculated using the Albers
equal area projection. The polygon's geometry is assumed to be defined in ESPG:4326.
in_geometry (Shapely polygon):
Polygon defined in ESPG:4326.
projected_area: Area of the polygon in m2 (in Albers equal area projection).
# Get the bounding box of the polygon
lon_w = input_polygon.bounds[0]
lat_s = input_polygon.bounds[1]
lon_e = input_polygon.bounds[2]
lat_n = input_polygon.bounds[3]
# Use the resulting coordinates to carry out the transformation
project_albers = pyproj.Proj(
"+proj=aea +lat_1={} +lat_2={} +lat_0={} +lon_0={}".format(
lat_s, lat_n, (lat_s + lat_n) / 2.0, (lon_w + lon_e) / 2.0
geometry = shapely.ops.transform(project_albers, input_polygon)
return geometry.area
def retrieve_built_up_area(quadkeys):
"""This function retrieves the built-up area associated with the input 'quadkeys' from the
OBM Tiles database.
================== TO BE IMPLEMENTED ==================
Array of floats with the built-up area values associated with the 'quadkeys'.
return numpy.ones([len(quadkeys)])
......@@ -199,7 +199,7 @@ def test_get_quadtiles_in_bbox_of_polygon():
assert "OSError" in str(excinfo.type)
def test_define_data_unit_tiles():
def test_get_data_unit_tiles():
# Read shapefile with input polygon to test
input_polygons = geopandas.read_file(
......@@ -213,12 +213,12 @@ def test_define_data_unit_tiles():
dtype={"quadkey": str, "contained": bool},
returned_data_unit_tiles = DataUnitTilesHelper.define_data_unit_tiles(in_polygon)
returned_data_unit_tiles, _ = DataUnitTilesHelper.get_data_unit_tiles(in_polygon)
assert returned_data_unit_tiles.shape[0] == expected_results.shape[0]
......@@ -242,6 +242,43 @@ def test_define_data_unit_tiles():
def test_define_data_unit_tiles_and_attributes():
# Read shapefile with input polygon to test
input_polygons = geopandas.read_file(
os.path.dirname(__file__), "data", "polygons_for_testing_data_unit_tiles.shp"
in_polygon = input_polygons[ == 10]["geometry"].values[0]
# Read expected results (manually determined from QGIS)
expected_results = pandas.read_csv(
os.path.dirname(__file__), "data", "expected_results_test_get_data_unit_tiles.csv"
dtype={"quadkey": str, "contained": bool},
returned_data_unit_tiles = DataUnitTilesHelper.define_data_unit_tiles_and_attributes(
assert returned_data_unit_tiles.shape[0] == expected_results.shape[0]
for i, quadkey in enumerate(returned_data_unit_tiles["quadkey"].values):
which = numpy.where(expected_results["quadkey"].values == quadkey)[0]
assert len(which) == 1 # quadkey found in `expected_results`
assert round(
returned_data_unit_tiles["size_data_unit_tile_area"].values[i], 1
) == round(expected_results["size_data_unit_tile_area"].values[which[0]], 1)
assert round(returned_data_unit_tiles["fraction_data_unit_area"].values[i], 6) == round(
expected_results["fraction_data_unit_area"].values[which[0]], 6
def test_determine_if_areas_are_the_same():
# Read quadtiles from file
bbox_tiles = geopandas.read_file(
......@@ -264,3 +301,52 @@ def test_determine_if_areas_are_the_same():
assert numpy.all(expected_result == returned_result)
def test_get_area_on_sphere():
# Test when input is arrays
lons_w = numpy.array([5.005, -110.009, 5.005, -110.009, -0.001, -0.001, 77.1351])
lons_e = numpy.array([5.006, -110.007, 5.006, -110.007, 0.001, 0.001, 77.1352])
lats_s = numpy.array([42.3004, 42.3004, -20.0006, -20.0006, -0.0002, 55.0002, -0.0002])
lats_n = numpy.array([42.3008, 42.3008, -20.0001, -20.0001, 0.0002, 55.0004, 0.0002])
# Expected areas calculated on a spreadsheet
expected_areas = numpy.array(
returned_areas = DataUnitTilesHelper.get_area_on_sphere(lons_w, lats_s, lons_e, lats_n)
assert numpy.all(numpy.round(expected_areas, 6) == numpy.round(returned_areas, 6))
# Test when input is floats
returned_area = DataUnitTilesHelper.get_area_on_sphere(
lons_w[0], lats_s[0], lons_e[0], lats_n[0]
assert round(returned_area, 6) == round(expected_areas[0], 6)
def test_get_area_albers_equal_area():
# Read shapefile with input polygon to test
input_polygons = geopandas.read_file(
os.path.dirname(__file__), "data", "polygons_for_testing_data_unit_tiles.shp"
in_polygon = input_polygons[ == 1]["geometry"].values[0]
returned_area = DataUnitTilesHelper.get_area_albers_equal_area(in_polygon)
# Expected area retrieved from QGIS:
expected_area = 60403.843 # square metres
assert round(returned_area, 0) == round(expected_area, 0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment