Commit 2b789197 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to calculate areas and ratios of district tiles

parent beafa805
Pipeline #26312 passed with stage
in 1 minute and 59 seconds
......@@ -18,7 +18,7 @@
import logging
from multiprocessing import Pool
from gdeimporter.tools_tiles import define_district_tiles
from gdeimporter.tools_tiles import define_district_tiles_and_attributes
logger = logging.getLogger()
......@@ -112,7 +112,7 @@ class ExposureEntity:
]
p = Pool(processes=number_cores)
all_district_tiles = p.map(define_district_tiles, data_units_geoms)
all_district_tiles = p.map(define_district_tiles_and_attributes, data_units_geoms)
p.close()
p.join()
......
......@@ -28,7 +28,7 @@ from copy import deepcopy
logger = logging.getLogger()
def define_district_tiles(in_geometry):
def get_district_tiles(in_geometry):
"""This function defines the district tiles associated with 'in_geometry'. District tiles
are defined as the intersection between zoom level 18 quadtiles and 'in_geometry'.
......@@ -62,6 +62,7 @@ def define_district_tiles(in_geometry):
district_tiles = geopandas.overlay(
quadtiles_bbox, in_geometry_df, how="intersection", keep_geom_type=True
)
district_tiles = district_tiles.set_index("quadkey", drop=False)
# Keep the quadtiles from the bounding box that are intersected by 'in_geometry'
filtered_quadtiles = quadtiles_bbox.loc[district_tiles["quadkey"].values, :]
......@@ -81,6 +82,88 @@ def define_district_tiles(in_geometry):
district_tiles, filtered_quadtiles
)
return district_tiles, filtered_quadtiles
def define_district_tiles_and_attributes(in_geometry):
"""This function defines the district tiles associated with 'in_geometry' and their
respective attributes. District tiles are defined as the intersection between zoom level 18
quadtiles and 'in_geometry'.
Args:
in_geometry (Shapely Polygon or MultiPolygon):
Geometry for which the associated district tiles will be defined.
Returns:
district_tiles (GeoPandas GeoDataFrame without geometry):
GeoDataFrame containing the definition of the district tiles in terms of:
quadkey (str):
Quadkey of the associated zoom level 18 quadtile.
size_district_tile_area (float):
Surface area of the district tile, in square metres.
size_district_tile_built_up_area (float):
Built-up area of the district tile, in square metres.
fraction_district_area (float):
Fraction (0.0, 1.0] that the surface area of the district tile represents
with respect to the surface area of 'in_geometry' (i.e. the surface area of
the summation of all defined district tiles). It cannot be zero.
fraction_district_built_up_area (float):
Fraction [0.0, 1.0] that the built-up area of the district tile represents
with respect to the built-up area contained by 'in_geometry' (i.e. the
summation of built-up areas of all defined district tiles). It can be zero.
"""
# Get district tiles geometries, quadkeys and full associated quadtiles
district_tiles, filtered_quadtiles = get_district_tiles(in_geometry)
# Calculate district tile areas and fractions of tiles (initialise)
all_areas = numpy.zeros([district_tiles.shape[0]])
all_fractions = numpy.zeros([district_tiles.shape[0]])
# Tiles that are fully contained in the original geometry 'in_geometry'
all_areas[district_tiles.contained] = get_area_on_sphere(
district_tiles[district_tiles.contained]["lon_w"].values,
district_tiles[district_tiles.contained]["lat_s"].values,
district_tiles[district_tiles.contained]["lon_e"].values,
district_tiles[district_tiles.contained]["lat_n"].values,
)
all_fractions[district_tiles.contained] = numpy.ones(
[len(all_areas[district_tiles.contained])]
)
# Tiles that are partially contained in the original geometry 'in_geometry'
results = get_areas_and_ratios_arbitrary_shapes(
district_tiles[numpy.logical_not(district_tiles.contained)]["geometry"].values,
filtered_quadtiles[numpy.logical_not(district_tiles.contained)]["geometry"].values,
)
(
all_areas[numpy.logical_not(district_tiles.contained)],
_,
all_fractions[numpy.logical_not(district_tiles.contained)],
) = results
# Write values to 'district_tiles'
district_tiles["size_district_tile_area"] = all_areas
district_tiles["size_district_tile_built_up_area"] = (
retrieve_built_up_area(district_tiles["quadkey"].values) * all_fractions
)
district_tiles["fraction_district_area"] = all_areas / all_areas.sum()
if district_tiles["size_district_tile_built_up_area"].values.sum() > 1e-15:
district_tiles["fraction_district_built_up_area"] = (
district_tiles["size_district_tile_built_up_area"].values
/ district_tiles["size_district_tile_built_up_area"].values.sum()
)
else:
district_tiles["fraction_district_built_up_area"] = numpy.nan * numpy.ones_like(
all_areas
)
# Writing of NaNs to the database and posterior decision-making handled separately
# Discard unnecessary columns of 'district_tiles' (free up memory)
district_tiles = district_tiles.drop(
columns=["geometry", "contained", "lon_w", "lat_s", "lon_e", "lat_n"]
)
return district_tiles
......@@ -257,3 +340,114 @@ def determine_if_areas_are_the_same(district_tiles, tiles):
are_the_same[numpy.where(area_ratio > 0.99999)[0]] = True
return are_the_same
def get_area_on_sphere(lon_w, lat_s, lon_e, lat_n, earth_sphere_radius=6371000.0):
"""This function returns the area (m2) of a rectangle on Earth calculated assuming the Earth
is a sphere with radius 'earth_sphere_radius'. The rectangle is defined by its four bounding
coordinates 'lon_w', 'lat_s', 'lon_e' and 'lat_n', all given in degrees.
When used to calculate the area of zoom level 18 quadtiles, this approximate function yields
area values that differ in less than 1% with respect to those calculated using the Albers
equal area projection.
References:
Kelly, K., Šavrič, B. (2021). Area and volume computation of longitude–latitude grids
and three-dimensional meshes. Transactions in GIS, 25(1):6–24.
Args:
lon_w (float or array of floats):
West bound of the tile(s), in degrees.
lat_s (float or array of floats):
South bound of the tile(s), in degrees.
lon_e (float or array of floats):
East bound of the tile(s), in degrees.
lat_n (float or array of floats):
North bound of the tile(s), in degrees.
earth_sphere_radius (float):
Radius of the Earth assuming a sphere, in meters. Default: 6,371,000 m.
Returns:
area (float or array of floats):
Area of the rectangles defined by the input coordinates, in square metres.
"""
area = (
(numpy.pi / 180.0)
* numpy.power(earth_sphere_radius, 2)
* numpy.abs(numpy.sin(numpy.deg2rad(lat_n)) - numpy.sin(numpy.deg2rad(lat_s)))
* numpy.abs(lon_e - lon_w)
)
return area
def get_areas_and_ratios_arbitrary_shapes(geometries_1, geometries_2):
"""This function uses get_area_Albers_equal_area() to calculate the area of each polygon of
'geometries_1' and 'geometries_2' and calculates the ratio of the former with respect to the
latter. Both 'geometries_1' and 'geometries_2' need to have the same length.
Args:
geometries_1, geometries_2 (arrays of Shapely Polygons):
Array of polygons of arbitrary shapes, defined in ESPG:4326.
Returns:
areas_1, areas_2 (arrays of floats):
Areas in meters of the rectangles defined in 'geometries_1' and 'geometries_2',
respectively.
ratios (array of floats):
Ratios of areas_1 to areas_2.
"""
areas_1 = numpy.zeros([len(geometries_1)])
areas_2 = numpy.zeros([len(geometries_1)])
for i in range(len(geometries_1)):
areas_1[i] = get_area_Albers_equal_area(geometries_1[i])
areas_2[i] = get_area_Albers_equal_area(geometries_2[i])
ratios = areas_1 / areas_2
return areas_1, areas_2, ratios
def get_area_Albers_equal_area(input_polygon):
"""This function returns the area (m2) of a polygon on Earth calculated using the Albers
equal area projection. The polygon's geometry is assumed to be defined in ESPG:4326.
Args:
in_geometry (Shapely polygon):
Polygon defined in ESPG:4326.
Returns:
projected_area: Area of the polygon in m2 (in Albers equal area projection).
"""
# Get the bounding box of the polygon
lon_w = input_polygon.bounds[0]
lat_s = input_polygon.bounds[1]
lon_e = input_polygon.bounds[2]
lat_n = input_polygon.bounds[3]
# Use the resulting coordinates to carry out the transformation
project_albers = pyproj.Proj(
"+proj=aea +lat_1={} +lat_2={} +lat_0={} +lon_0={}".format(
lat_s, lat_n, (lat_s + lat_n) / 2.0, (lon_w + lon_e) / 2.0
)
)
geometry = shapely.ops.transform(project_albers, input_polygon)
return geometry.area
def retrieve_built_up_area(quadkeys):
"""This function retrieves the built-up area associated with the input 'quadkeys' from the
OBM Tiles database.
================== TO BE IMPLEMENTED ==================
Returns:
Array of floats with the built-up area values associated with the 'quadkeys'.
"""
return numpy.ones([len(quadkeys)])
quadkey,type,lon_w,lat_s,lon_e,lat_n,contained
120222331000133202,MultiPolygon,4.9994,42.0026,5.000153,42.0032,False
120222331000133203,MultiPolygon,5.000153,42.0026,5.001526,42.0032,False
120222331000133212,Polygon,5.001526,42.002366,5.002899,42.0032,False
120222331000133213,Polygon,5.002899,42.002366,5.0037,42.0032,False
120222331000133220,Polygon,4.9994,42.0015,5.000153,42.0018,False
120222331000133221,Polygon,5.000153,42.0015,5.001526,42.0018,False
120222331000133230,Polygon,5.001526,42.0015,5.002899,42.002366,False
120222331000133231,Polygon,5.002899,42.0015,5.0037,42.002366,False
quadkey,type,lon_w,lat_s,lon_e,lat_n,contained,size_district_tile_area,fraction_district_area
120222331000133202,MultiPolygon,4.9994,42.0026,5.000153,42.0032,False,2784.421,0.068256627491064
120222331000133203,MultiPolygon,5.000153,42.0026,5.001526,42.0032,False,5154.434,0.126354556823582
120222331000133212,Polygon,5.001526,42.002366,5.002899,42.0032,False,7544.606,0.184946659039292
120222331000133213,Polygon,5.002899,42.002366,5.0037,42.0032,False,6144.446,0.150623473160473
120222331000133220,Polygon,4.9994,42.0015,5.000153,42.0018,False,2077.655,0.050931135553836
120222331000133221,Polygon,5.000153,42.0015,5.001526,42.0018,False,3791.219,0.092937031799445
120222331000133230,Polygon,5.001526,42.0015,5.002899,42.002366,False,6913.142,0.169467102240224
120222331000133231,Polygon,5.002899,42.0015,5.0037,42.002366,False,6383.493,0.156483413892085
......@@ -191,7 +191,7 @@ def test_get_quadtiles_in_bbox_of_polygon():
assert "OSError" in str(excinfo.type)
def test_define_district_tiles():
def test_get_district_tiles():
# Read shapefile with input polygon to test
input_polygons = geopandas.read_file(
os.path.join(
......@@ -203,12 +203,12 @@ def test_define_district_tiles():
# Read expected results (manually determined from QGIS)
expected_results = pandas.read_csv(
os.path.join(
os.path.dirname(__file__), "data", "expected_results_test_define_district_tiles.csv"
os.path.dirname(__file__), "data", "expected_results_test_get_district_tiles.csv"
),
dtype={"quadkey": str, "contained": bool},
)
returned_district_tiles = tools_tiles.define_district_tiles(in_polygon)
returned_district_tiles, _ = tools_tiles.get_district_tiles(in_polygon)
assert returned_district_tiles.shape[0] == expected_results.shape[0]
......@@ -237,7 +237,46 @@ def test_define_district_tiles():
assert (
returned_district_tiles["contained"].values[i]
== expected_results["contained"].values[i]
== expected_results[expected_results.quadkey == quadkey]["contained"].values[0]
)
def test_define_district_tiles_and_attributes():
# Read shapefile with input polygon to test
input_polygons = geopandas.read_file(
os.path.join(
os.path.dirname(__file__), "data", "polygons_for_testing_district_tiles.shp"
)
)
in_polygon = input_polygons[input_polygons.id == 10]["geometry"].values[0]
# Read expected results (manually determined from QGIS)
expected_results = pandas.read_csv(
os.path.join(
os.path.dirname(__file__), "data", "expected_results_test_get_district_tiles.csv"
),
dtype={"quadkey": str, "contained": bool},
)
returned_district_tiles = tools_tiles.define_district_tiles_and_attributes(in_polygon)
assert returned_district_tiles.shape[0] == expected_results.shape[0]
for i, quadkey in enumerate(returned_district_tiles["quadkey"].values):
assert quadkey in expected_results["quadkey"].values
assert round(returned_district_tiles["size_district_tile_area"].values[i], 1) == round(
expected_results[expected_results.quadkey == quadkey][
"size_district_tile_area"
].values[0],
1,
)
assert round(returned_district_tiles["fraction_district_area"].values[i], 6) == round(
expected_results[expected_results.quadkey == quadkey][
"fraction_district_area"
].values[0],
6,
)
......@@ -261,3 +300,50 @@ def test_determine_if_areas_are_the_same():
returned_result = tools_tiles.determine_if_areas_are_the_same(district_tiles, bbox_tiles)
assert numpy.all(expected_result == returned_result)
def test_get_area_on_sphere():
# Test when input is arrays
lons_w = numpy.array([5.005, -110.009, 5.005, -110.009, -0.001, -0.001, 77.1351])
lons_e = numpy.array([5.006, -110.007, 5.006, -110.007, 0.001, 0.001, 77.1352])
lats_s = numpy.array([42.3004, 42.3004, -20.0006, -20.0006, -0.0002, 55.0002, -0.0002])
lats_n = numpy.array([42.3008, 42.3008, -20.0001, -20.0001, 0.0002, 55.0004, 0.0002])
# Expected areas calculated on a spreadsheet
expected_areas = numpy.array(
[
3657.976907,
7315.953815,
5809.313322,
11618.626644,
9891.449369,
2836.729927,
494.572468,
]
)
returned_areas = tools_tiles.get_area_on_sphere(lons_w, lats_s, lons_e, lats_n)
assert numpy.all(numpy.round(expected_areas, 6) == numpy.round(returned_areas, 6))
# Test when input is floats
returned_area = tools_tiles.get_area_on_sphere(lons_w[0], lats_s[0], lons_e[0], lats_n[0])
assert round(returned_area, 6) == round(expected_areas[0], 6)
def test_get_area_Albers_equal_area():
# Read shapefile with input polygon to test
input_polygons = geopandas.read_file(
os.path.join(
os.path.dirname(__file__), "data", "polygons_for_testing_district_tiles.shp"
)
)
in_polygon = input_polygons[input_polygons.id == 1]["geometry"].values[0]
returned_area = tools_tiles.get_area_Albers_equal_area(in_polygon)
# Expected area retrieved from QGIS:
expected_area = 60403.843 # square metres
assert round(returned_area, 0) == round(expected_area, 0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment