Commit 6464af0f authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to calculate areas and ratios of data-unit tiles

parent 155ea3ab
Pipeline #28749 failed with stage
in 5 minutes and 23 seconds
......@@ -20,6 +20,7 @@ import logging
from multiprocessing import Pool
from gdeimporter.tools.data_unit_tiles import DataUnitTilesHelper
logger = logging.getLogger()
......@@ -113,7 +114,7 @@ class ExposureEntity:
p = Pool(processes=number_cores)
all_data_unit_tiles = p.map(
DataUnitTilesHelper.define_data_unit_tiles, data_units_geoms
DataUnitTilesHelper.define_data_unit_tiles_and_attributes, data_units_geoms
)
p.close()
p.join()
......
......@@ -32,7 +32,7 @@ class DataUnitTilesHelper:
"""This class contains methods used to define data-unit tiles and their attributes."""
@staticmethod
def define_data_unit_tiles(in_geometry):
def get_data_unit_tiles(in_geometry):
"""This function defines the data-unit tiles associated with 'in_geometry'. Data-unit
tiles are defined as the intersection between zoom level 18 quadtiles and 'in_geometry'.
......@@ -50,9 +50,18 @@ class DataUnitTilesHelper:
Quadkey of the associated zoom level 18 quadtile.
geometry (Shapely polygons):
Geometry of the data-unit tile.
contained (bool):
True if the zoom level 18 quadtile is fully contained in 'in_geometry',
False otherwise (i.e. the data-unit tile lies along the boundary)
contained (bool):
True if the zoom level 18 quadtile is fully contained in 'in_geometry',
False otherwise (i.e. the data-unit tile lies along the boundary)
filtered_quadtiles (GeoPandas GeoDataFrame):
GeoDataFrame with the zoom level 18 quadtiles that fully contain 'in_geometry',
with the following columns:
quadkey (str):
Quadkey of the level 18 quadtile.
lon_w, lat_s, lon_e, lat_n (float)
Coordinates of the corners of the quadtile.
geometry (Shapely Polygon):
Geometry of the quadtile.
"""
# Retrieve all quadtiles associated with the bounding box of 'in_geometry'
......@@ -66,6 +75,7 @@ class DataUnitTilesHelper:
data_unit_tiles = geopandas.overlay(
quadtiles_bbox, in_geometry_df, how="intersection", keep_geom_type=True
)
data_unit_tiles = data_unit_tiles.set_index("quadkey", drop=False)
# Keep the quadtiles from the bounding box that are intersected by 'in_geometry'
filtered_quadtiles = quadtiles_bbox.loc[data_unit_tiles["quadkey"].values, :]
......@@ -78,7 +88,7 @@ class DataUnitTilesHelper:
filtered_quadtiles["quadkey"].values == data_unit_tiles["quadkey"].values
):
raise RuntimeError(
"define_data_unit_tiles cannot run: the order of the quadkeys in "
"get_data_unit_tiles cannot run: the order of the quadkeys in "
"'filtered_quadtiles' and 'data_unit_tiles' is not the same"
)
......@@ -87,6 +97,93 @@ class DataUnitTilesHelper:
data_unit_tiles, filtered_quadtiles
)
return data_unit_tiles, filtered_quadtiles
@staticmethod
def define_data_unit_tiles_and_attributes(in_geometry):
"""This function defines the data-unit tiles associated with 'in_geometry' and their
respective attributes. Data-unit tiles are defined as the intersection between zoom
level 18 quadtiles and 'in_geometry'.
Args:
in_geometry (Shapely Polygon or MultiPolygon):
Geometry for which the associated data-unit tiles will be defined.
Returns:
data_unit_tiles (GeoPandas GeoDataFrame without geometry):
GeoDataFrame containing the definition of the data-unit tiles in terms of:
quadkey (str):
Quadkey of the associated zoom level 18 quadtile.
size_data_unit_tile_area (float):
Surface area of the data-unit tile, in square metres.
size_data_unit_tile_built_up_area (float):
Built-up area of the data-unit tile, in square metres.
fraction_data_unit_area (float):
Fraction (0.0, 1.0] that the surface area of the data-unit tile
represents with respect to the surface area of 'in_geometry' (i.e. the
surface area of the summation of all defined data-unit tiles). It cannot
be zero.
fraction_data_unit_built_up_area (float):
Fraction [0.0, 1.0] that the built-up area of the data-unit tile
represents with respect to the built-up area contained by 'in_geometry'
(i.e. the summation of built-up areas of all defined data-unit tiles).
It can be zero.
"""
# Get data-unit tiles geometries, quadkeys and full associated quadtiles
data_unit_tiles, filtered_quadtiles = DataUnitTilesHelper.get_data_unit_tiles(
in_geometry
)
# Calculate data-unit tile areas and fractions of tiles (initialise)
all_areas = numpy.zeros([data_unit_tiles.shape[0]])
all_fractions = numpy.zeros([data_unit_tiles.shape[0]])
# Tiles that are fully contained in the original geometry 'in_geometry'
all_areas[data_unit_tiles.contained] = DataUnitTilesHelper.get_area_on_sphere(
data_unit_tiles[data_unit_tiles.contained]["lon_w"].values,
data_unit_tiles[data_unit_tiles.contained]["lat_s"].values,
data_unit_tiles[data_unit_tiles.contained]["lon_e"].values,
data_unit_tiles[data_unit_tiles.contained]["lat_n"].values,
)
all_fractions[data_unit_tiles.contained] = numpy.ones(
[len(all_areas[data_unit_tiles.contained])]
)
# Tiles that are partially contained in the original geometry 'in_geometry'
results = DataUnitTilesHelper.get_areas_and_ratios_arbitrary_shapes(
data_unit_tiles[numpy.logical_not(data_unit_tiles.contained)]["geometry"].values,
filtered_quadtiles[numpy.logical_not(data_unit_tiles.contained)]["geometry"].values,
)
(
all_areas[numpy.logical_not(data_unit_tiles.contained)],
_,
all_fractions[numpy.logical_not(data_unit_tiles.contained)],
) = results
# Write values to 'data_unit_tiles'
data_unit_tiles["size_data_unit_tile_area"] = all_areas
data_unit_tiles["size_data_unit_tile_built_up_area"] = (
DataUnitTilesHelper.retrieve_built_up_area(data_unit_tiles["quadkey"].values)
* all_fractions
)
data_unit_tiles["fraction_data_unit_area"] = all_areas / all_areas.sum()
if data_unit_tiles["size_data_unit_tile_built_up_area"].values.sum() > 1e-15:
data_unit_tiles["fraction_data_unit_built_up_area"] = (
data_unit_tiles["size_data_unit_tile_built_up_area"].values
/ data_unit_tiles["size_data_unit_tile_built_up_area"].values.sum()
)
else:
data_unit_tiles["fraction_data_unit_built_up_area"] = numpy.nan * numpy.ones_like(
all_areas
)
# Writing of NaNs to the database and posterior decision-making handled separately
# Discard unnecessary columns of 'data_unit_tiles' (free up memory)
data_unit_tiles = data_unit_tiles.drop(
columns=["geometry", "contained", "lon_w", "lat_s", "lon_e", "lat_n"]
)
return data_unit_tiles
@staticmethod
......@@ -266,3 +363,114 @@ class DataUnitTilesHelper:
are_the_same[numpy.where(area_ratio > 0.99999)[0]] = True
return are_the_same
@staticmethod
def get_area_on_sphere(lon_w, lat_s, lon_e, lat_n, earth_sphere_radius=6371000.0):
"""This function returns the area (m2) of a rectangle on Earth calculated assuming the
Earth is a sphere with radius 'earth_sphere_radius'. The rectangle is defined by its
four bounding coordinates 'lon_w', 'lat_s', 'lon_e' and 'lat_n', all given in degrees.
When used to calculate the area of zoom level 18 quadtiles, this approximate function
yields area values that differ in less than 1% with respect to those calculated using
the Albers equal area projection.
References:
Kelly, K., Šavrič, B. (2021). Area and volume computation of longitude–latitude
grids and three-dimensional meshes. Transactions in GIS, 25(1):6–24.
Args:
lon_w (float or array of floats):
West bound of the tile(s), in degrees.
lat_s (float or array of floats):
South bound of the tile(s), in degrees.
lon_e (float or array of floats):
East bound of the tile(s), in degrees.
lat_n (float or array of floats):
North bound of the tile(s), in degrees.
earth_sphere_radius (float):
Radius of the Earth assuming a sphere, in meters. Default: 6,371,000 m.
Returns:
area (float or array of floats):
Area of the rectangles defined by the input coordinates, in square metres.
"""
area = (
(numpy.pi / 180.0)
* numpy.power(earth_sphere_radius, 2)
* numpy.abs(numpy.sin(numpy.deg2rad(lat_n)) - numpy.sin(numpy.deg2rad(lat_s)))
* numpy.abs(lon_e - lon_w)
)
return area
@staticmethod
def get_areas_and_ratios_arbitrary_shapes(geometries_1, geometries_2):
"""This function uses get_area_albers_equal_area() to calculate the area of each polygon
of 'geometries_1' and 'geometries_2' and calculates the ratio of the former with respect
to the latter. Both 'geometries_1' and 'geometries_2' need to have the same length.
Args:
geometries_1, geometries_2 (arrays of Shapely Polygons):
Array of polygons of arbitrary shapes, defined in ESPG:4326.
Returns:
areas_1, areas_2 (arrays of floats):
Areas in meters of the rectangles defined in 'geometries_1' and 'geometries_2',
respectively.
ratios (array of floats):
Ratios of areas_1 to areas_2.
"""
areas_1 = numpy.zeros([len(geometries_1)])
areas_2 = numpy.zeros([len(geometries_1)])
for i in range(len(geometries_1)):
areas_1[i] = DataUnitTilesHelper.get_area_albers_equal_area(geometries_1[i])
areas_2[i] = DataUnitTilesHelper.get_area_albers_equal_area(geometries_2[i])
ratios = areas_1 / areas_2
return areas_1, areas_2, ratios
@staticmethod
def get_area_albers_equal_area(input_polygon):
"""This function returns the area (m2) of a polygon on Earth calculated using the Albers
equal area projection. The polygon's geometry is assumed to be defined in ESPG:4326.
Args:
in_geometry (Shapely polygon):
Polygon defined in ESPG:4326.
Returns:
projected_area: Area of the polygon in m2 (in Albers equal area projection).
"""
# Get the bounding box of the polygon
lon_w = input_polygon.bounds[0]
lat_s = input_polygon.bounds[1]
lon_e = input_polygon.bounds[2]
lat_n = input_polygon.bounds[3]
# Use the resulting coordinates to carry out the transformation
project_albers = pyproj.Proj(
"+proj=aea +lat_1={} +lat_2={} +lat_0={} +lon_0={}".format(
lat_s, lat_n, (lat_s + lat_n) / 2.0, (lon_w + lon_e) / 2.0
)
)
geometry = shapely.ops.transform(project_albers, input_polygon)
return geometry.area
@staticmethod
def retrieve_built_up_area(quadkeys):
"""This function retrieves the built-up area associated with the input 'quadkeys' from the
OBM Tiles database.
================== TO BE IMPLEMENTED ==================
Returns:
Array of floats with the built-up area values associated with the 'quadkeys'.
"""
return numpy.ones([len(quadkeys)])
quadkey,type,lon_w,lat_s,lon_e,lat_n,contained
120222331000133202,MultiPolygon,4.9994,42.0026,5.000153,42.0032,False
120222331000133203,MultiPolygon,5.000153,42.0026,5.001526,42.0032,False
120222331000133212,Polygon,5.001526,42.002366,5.002899,42.0032,False
120222331000133213,Polygon,5.002899,42.002366,5.0037,42.0032,False
120222331000133220,Polygon,4.9994,42.0015,5.000153,42.0018,False
120222331000133221,Polygon,5.000153,42.0015,5.001526,42.0018,False
120222331000133230,Polygon,5.001526,42.0015,5.002899,42.002366,False
120222331000133231,Polygon,5.002899,42.0015,5.0037,42.002366,False
quadkey,type,lon_w,lat_s,lon_e,lat_n,contained,size_data_unit_tile_area,fraction_data_unit_area
120222331000133202,MultiPolygon,4.9994,42.0026,5.000153,42.0032,False,2784.421,0.068256627491064
120222331000133203,MultiPolygon,5.000153,42.0026,5.001526,42.0032,False,5154.434,0.126354556823582
120222331000133212,Polygon,5.001526,42.002366,5.002899,42.0032,False,7544.606,0.184946659039292
120222331000133213,Polygon,5.002899,42.002366,5.0037,42.0032,False,6144.446,0.150623473160473
120222331000133220,Polygon,4.9994,42.0015,5.000153,42.0018,False,2077.655,0.050931135553836
120222331000133221,Polygon,5.000153,42.0015,5.001526,42.0018,False,3791.219,0.092937031799445
120222331000133230,Polygon,5.001526,42.0015,5.002899,42.002366,False,6913.142,0.169467102240224
120222331000133231,Polygon,5.002899,42.0015,5.0037,42.002366,False,6383.493,0.156483413892085
......@@ -199,7 +199,7 @@ def test_get_quadtiles_in_bbox_of_polygon():
assert "OSError" in str(excinfo.type)
def test_define_data_unit_tiles():
def test_get_data_unit_tiles():
# Read shapefile with input polygon to test
input_polygons = geopandas.read_file(
os.path.join(
......@@ -213,12 +213,12 @@ def test_define_data_unit_tiles():
os.path.join(
os.path.dirname(__file__),
"data",
"expected_results_test_define_data_unit_tiles.csv",
"expected_results_test_get_data_unit_tiles.csv",
),
dtype={"quadkey": str, "contained": bool},
)
returned_data_unit_tiles = DataUnitTilesHelper.define_data_unit_tiles(in_polygon)
returned_data_unit_tiles, _ = DataUnitTilesHelper.get_data_unit_tiles(in_polygon)
assert returned_data_unit_tiles.shape[0] == expected_results.shape[0]
......@@ -242,6 +242,43 @@ def test_define_data_unit_tiles():
)
def test_define_data_unit_tiles_and_attributes():
# Read shapefile with input polygon to test
input_polygons = geopandas.read_file(
os.path.join(
os.path.dirname(__file__), "data", "polygons_for_testing_data_unit_tiles.shp"
)
)
in_polygon = input_polygons[input_polygons.id == 10]["geometry"].values[0]
# Read expected results (manually determined from QGIS)
expected_results = pandas.read_csv(
os.path.join(
os.path.dirname(__file__), "data", "expected_results_test_get_data_unit_tiles.csv"
),
dtype={"quadkey": str, "contained": bool},
)
returned_data_unit_tiles = DataUnitTilesHelper.define_data_unit_tiles_and_attributes(
in_polygon
)
assert returned_data_unit_tiles.shape[0] == expected_results.shape[0]
for i, quadkey in enumerate(returned_data_unit_tiles["quadkey"].values):
which = numpy.where(expected_results["quadkey"].values == quadkey)[0]
assert len(which) == 1 # quadkey found in `expected_results`
assert round(
returned_data_unit_tiles["size_data_unit_tile_area"].values[i], 1
) == round(expected_results["size_data_unit_tile_area"].values[which[0]], 1)
assert round(returned_data_unit_tiles["fraction_data_unit_area"].values[i], 6) == round(
expected_results["fraction_data_unit_area"].values[which[0]], 6
)
def test_determine_if_areas_are_the_same():
# Read quadtiles from file
bbox_tiles = geopandas.read_file(
......@@ -264,3 +301,52 @@ def test_determine_if_areas_are_the_same():
)
assert numpy.all(expected_result == returned_result)
def test_get_area_on_sphere():
# Test when input is arrays
lons_w = numpy.array([5.005, -110.009, 5.005, -110.009, -0.001, -0.001, 77.1351])
lons_e = numpy.array([5.006, -110.007, 5.006, -110.007, 0.001, 0.001, 77.1352])
lats_s = numpy.array([42.3004, 42.3004, -20.0006, -20.0006, -0.0002, 55.0002, -0.0002])
lats_n = numpy.array([42.3008, 42.3008, -20.0001, -20.0001, 0.0002, 55.0004, 0.0002])
# Expected areas calculated on a spreadsheet
expected_areas = numpy.array(
[
3657.976907,
7315.953815,
5809.313322,
11618.626644,
9891.449369,
2836.729927,
494.572468,
]
)
returned_areas = DataUnitTilesHelper.get_area_on_sphere(lons_w, lats_s, lons_e, lats_n)
assert numpy.all(numpy.round(expected_areas, 6) == numpy.round(returned_areas, 6))
# Test when input is floats
returned_area = DataUnitTilesHelper.get_area_on_sphere(
lons_w[0], lats_s[0], lons_e[0], lats_n[0]
)
assert round(returned_area, 6) == round(expected_areas[0], 6)
def test_get_area_albers_equal_area():
# Read shapefile with input polygon to test
input_polygons = geopandas.read_file(
os.path.join(
os.path.dirname(__file__), "data", "polygons_for_testing_data_unit_tiles.shp"
)
)
in_polygon = input_polygons[input_polygons.id == 1]["geometry"].values[0]
returned_area = DataUnitTilesHelper.get_area_albers_equal_area(in_polygon)
# Expected area retrieved from QGIS:
expected_area = 60403.843 # square metres
assert round(returned_area, 0) == round(expected_area, 0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment