Commit e4ca37b6 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to create geometry of data-unit tiles

parent a58b2bbf
Pipeline #26637 passed with stage
in 1 minute and 38 seconds
......@@ -4,3 +4,4 @@ data_pathname: path_to_directory_with_model_data
boundaries_pathname: path_to_directory_with_boundary_files
occupancies_to_run: residential, commercial # Need to exist for the indicated `exposure format`, industrial not supported
exposure_entities_to_run: all # Either "all", a comma-space-separated list of entity names, or a name of a .txt or .csv file
number_cores: 1 # Number of cores used for parallelisation
......@@ -40,6 +40,8 @@ class Configuration:
data will be retrieved.
self.exposure_entities_to_run (list of str):
List of names of the exposure entities for which the data units will be retrieved.
self.number_cores (int):
Number of cores that will be used to run the code.
"""
REQUIRES = [
......@@ -47,6 +49,7 @@ class Configuration:
"boundaries_pathname",
"occupancies_to_run",
"exposure_entities_to_run",
"number_cores",
]
def __init__(self, filepath):
......@@ -67,6 +70,7 @@ class Configuration:
self.exposure_entities_to_run = self._assign_listed_parameters(
config, "exposure_entities_to_run"
)
self.number_cores = self._assign_integer_parameter(config, "number_cores")
# Terminate if critical parameters are missing (not all parameters are critical)
for key_parameter in self.REQUIRES:
......@@ -205,3 +209,36 @@ class Configuration:
assigned_parameter = assigned_parameter.split(", ")
return assigned_parameter
def _assign_integer_parameter(self, config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config, and
converts it into an integer.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (int):
The content of config[input_parameter] converted into a string.
"""
assigned_parameter = self._assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
try:
assigned_parameter = int(assigned_parameter)
except ValueError:
logger.critical(
"Error reading %s from configuration file: not an integer" % (input_parameter)
)
assigned_parameter = None
return assigned_parameter
......@@ -32,11 +32,21 @@ class DataUnit:
ID of the DataUnit (e.g. ID of the administrative unit it represents).
self.geometry (Shapely polygon):
Geometry of the data unit.
self.data_unit_tiles (GeoPandas GeoDataFrame):
Definition of the data-unit tiles associated with the data unit. It is filled in by
ExposureEntity.create_data_unit_tiles(), processing all the data units associated
with an ExposureEntity and an occupancy case at the same time. It contains the
following columns:
quadkey (str):
Quadkey of the associated zoom level 18 quadtile.
geometry (Shapely polygons):
Geometry of the data-unit tile.
"""
def __init__(self, dataunit_id, geometries_table, target_column_name):
self.id = dataunit_id
self.geometry = self.get_data_unit_geometry(geometries_table, target_column_name)
self.data_unit_tiles = None
def get_data_unit_geometry(self, geometries_table, target_column_name):
"""This function retrieves the geometry of the data unit, by reading it from
......
......@@ -17,6 +17,8 @@
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
from multiprocessing import Pool
from gdeimporter.tools.data_unit_tiles import DataUnitTilesHelper
logger = logging.getLogger()
......@@ -76,3 +78,49 @@ class ExposureEntity:
def __init__(self, name):
self.name = name
self.occupancy_cases = {}
def create_data_unit_tiles(self, occupancy_case, number_cores):
"""This function creates the data-unit tiles associated with all data units of the
ExposureEntity for a specified 'occupancy_case'. The latter needs to be a key of
self.occupancy_cases. The data units will be paralellised into as many cores as
specified by 'number_cores'. The data units need to exist in
self.occupancy_cases[occupancy_case][data_units].
Args:
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial") for
which the data-unit tiles will be created. It needs to exist as a key of
self.occupancy_cases.
number_cores (int):
Number of CPU cores to be used to run this function.
Returns:
This function writes the 'data_unit_tiles' attribute of the data units of the
ExposureEntity (i.e. to
self.occupancy_cases[occupancy_case]["data_units"][data_unit_id].data_unit_tiles).
"""
logger.info(
"Creating data-unit tiles for data units of %s, %s occupancy"
% (self.name, occupancy_case)
)
data_units_ids = list(self.occupancy_cases[occupancy_case]["data_units"].keys())
data_units_geoms = [
dataunit.geometry
for dataunit in self.occupancy_cases[occupancy_case]["data_units"].values()
]
p = Pool(processes=number_cores)
all_data_unit_tiles = p.map(
DataUnitTilesHelper.define_data_unit_tiles, data_units_geoms
)
p.close()
p.join()
for i, data_unit_id in enumerate(data_units_ids):
self.occupancy_cases[occupancy_case]["data_units"][
data_unit_id
].data_unit_tiles = all_data_unit_tiles[i]
return
......@@ -54,6 +54,9 @@ def main():
for exposure_entity_name in config.exposure_entities_to_run:
for occupancy_case in config.occupancies_to_run:
aem.get_data_units(config, exposure_entity_name, occupancy_case)
aem.exposure_entities[exposure_entity_name].create_data_unit_tiles(
occupancy_case, config.number_cores
)
print("Name of the model: %s" % (aem.model_name))
print("Format: %s" % (aem.exposure_format))
......
#!/usr/bin/env python3
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
import pandas
import geopandas
import pyproj
import mercantile
import shapely
logger = logging.getLogger()
class DataUnitTilesHelper:
"""This class contains methods used to define data-unit tiles and their attributes."""
@staticmethod
def define_data_unit_tiles(in_geometry):
"""This function defines the data-unit tiles associated with 'in_geometry'. Data-unit
tiles are defined as the intersection between zoom level 18 quadtiles and 'in_geometry'.
If the intersection results in a point or a line geometry, these are discarded as they
are not really data-unit tiles.
Args:
in_geometry (Shapely Polygon or MultiPolygon):
Geometry for which the associated data-unit tiles will be defined.
Returns:
data_unit_tiles (GeoPandas GeoDataFrame):
GeoDataFrame containing the definition of the data-unit tiles in terms of:
quadkey (str):
Quadkey of the associated zoom level 18 quadtile.
geometry (Shapely polygons):
Geometry of the data-unit tile.
"""
# Retrieve all quadtiles associated with in_geometry
quadtiles_bbox = DataUnitTilesHelper.get_quadtiles_in_bbox_of_polygon(in_geometry)
# Create a GeoPandas GeoDataFrame with the original geometry
in_geometry_df = geopandas.GeoDataFrame({"geometry": [in_geometry]})
in_geometry_df.crs = pyproj.CRS("epsg:4326")
# Intersect quadtiles with the original geometry
data_unit_tiles = geopandas.overlay(
quadtiles_bbox, in_geometry_df, how="intersection", keep_geom_type=True
)
return data_unit_tiles
@staticmethod
def get_quadtiles_in_bbox_of_polygon(in_geometry, zoomlevel=18):
"""This function retrieves the quadtiles of zoom level 'zoomlevel' that fully contain
the bounding box(es) of 'in_geometry'. If 'in_geometry' is a MultiPolygon, the bounding
boxes are defined for each Polygon in the MultiPolygon, not the MultiPolygon as a whole.
The quadtiles are defined in EPSG:3857 projection, but their geometry is given in the
WGS84 coordinate system (ESPG:4326).
Args:
in_geometry (Shapely Polygon or MultiPolygon):
Geometry for which the associated quadtiles will be retrieved, defined in
ESPG:4326.
zoomlevel (int):
Zoom level of the quadtiles to be retrieved.
Returns:
grid (GeoPandas GeoDataFrame):
GeoDataFrame containing the definition of the quadtiles in terms of:
quadkey (str):
Quadkey of the level 18 quadtile.
lon_w, lat_s, lon_e, lat_n (float)
Coordinates of the corners of the quadtile.
geometry (Shapely Polygon):
Geometry of the quadtile.
"""
if in_geometry.geom_type == "Polygon":
geometries = [in_geometry]
elif in_geometry.geom_type == "MultiPolygon":
geometries = list(in_geometry)
else:
raise OSError("get_quadtiles_in_bbox_of_polygon: argument is not a valid geometry")
tiles_quadkeys = []
tiles_lons_w = []
tiles_lats_s = []
tiles_lons_e = []
tiles_lats_n = []
for geometry in geometries:
if DataUnitTilesHelper.crosses_antimeridian(geometry):
raise OSError(
"get_quadtiles_in_bbox_of_polygon: geometry crosses the antimeridian"
)
lon_w, lat_s, lon_e, lat_n = geometry.bounds
if lat_s < -85.0511 or lat_n > 85.0511:
raise OSError(
"get_quadtiles_in_bbox_of_polygon: "
"quadtiles cannot be defined for latitudes outside [-85.0511, +85.0511]"
)
tiles = list(mercantile.tiles(lon_w, lat_s, lon_e, lat_n, zoomlevel))
quadkeys = list([mercantile.quadkey(tile) for tile in tiles])
lons_w = list([mercantile.bounds(tile).west for tile in tiles])
lats_s = list([mercantile.bounds(tile).south for tile in tiles])
lons_e = list([mercantile.bounds(tile).east for tile in tiles])
lats_n = list([mercantile.bounds(tile).north for tile in tiles])
tiles_quadkeys.extend(quadkeys)
tiles_lons_w.extend(lons_w)
tiles_lats_s.extend(lats_s)
tiles_lons_e.extend(lons_e)
tiles_lats_n.extend(lats_n)
# Create a Pandas DataFrame
aux_grid = pandas.DataFrame(
{
"quadkey": tiles_quadkeys,
"lon_w": tiles_lons_w,
"lat_s": tiles_lats_s,
"lon_e": tiles_lons_e,
"lat_n": tiles_lats_n,
}
)
aux_grid = aux_grid.drop_duplicates(ignore_index=True) # Remove potential duplicates
aux_grid = aux_grid.reset_index()
grid = DataUnitTilesHelper.create_geometry_of_quadtiles(aux_grid)
return grid
@staticmethod
def create_geometry_of_quadtiles(in_grid):
"""This function generates the geometry of the tiles defined by their corners in
'in_grid' and outputs the latter (in ESPG:4326) together with all the contents of
'in_grid' as a GeoPandas GeoDataFrame.
Args:
in_grid (Pandas DataFrame):
DataFrame with at least the following columns:
lon_w, lat_s, lon_e, lat_n (float):
Coordinates of the corners of the quadtile (in degrees).
Returns:
out_grid (GeoPandas GeoDataFrame):
GeoDataFrame with the same contents as in_grid plus the geometry of the
quadtiles in ESPG:4326.
"""
geoms = []
for i in range(in_grid.shape[0]):
lon_w = in_grid["lon_w"].values[i]
lat_s = in_grid["lat_s"].values[i]
lon_e = in_grid["lon_e"].values[i]
lat_n = in_grid["lat_n"].values[i]
geoms.append(
shapely.geometry.Polygon(
[(lon_w, lat_s), (lon_e, lat_s), (lon_e, lat_n), (lon_w, lat_n)]
)
)
geometries = geopandas.GeoSeries(geoms)
out_grid = geopandas.GeoDataFrame(in_grid, geometry=geometries)
out_grid.crs = pyproj.CRS("epsg:4326")
return out_grid
@staticmethod
def crosses_antimeridian(in_geometry):
"""This function checks whether 'in_geometry' crosses the antimeridian (i.e. longitude
equal to -180 or +180.
Args:
in_geometry (Shapely Polygon or MultiPolygon):
Geometry for which the function will assess if the antimeridian is crossed,
defined in ESPG:4326.
Returns (bool):
True if the antimeridian is crossed, False if the antimeridian is not crossed.
"""
longitude_width = abs(in_geometry.bounds[2] - in_geometry.bounds[0])
if longitude_width < 180.0:
return False
else:
return True
......@@ -28,7 +28,17 @@ setup(
keywords="Global Dynamic Exposure, GDE, buildings, exposure model",
author="Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ",
license="AGPLv3+",
install_requires=["numpy", "pyyaml", "pandas", "openpyxl", "geopandas"],
install_requires=[
"numpy",
"pyyaml",
"pandas",
"openpyxl",
"geopandas",
"rtree",
"mercantile",
"pyproj",
"shapely",
],
extras_require={
"tests": tests_require,
"linters": linters_require,
......
model_name: esrm20
exposure_format: esrm20
data_pathname: /some/path/to/directory
boundaries_pathname: /some/path/to/directory
occupancies_to_run: residential, commercial, industrial
exposure_entities_to_run: all
number_cores: some
......@@ -4,3 +4,4 @@ data_pathname: /some/path/to/directory
boundaries_pathname: /some/path/to/directory
occupancies_to_run: residential, commercial, industrial
exposure_entities_to_run: all
number_cores: 4
......@@ -4,3 +4,4 @@ data_pathname: /some/path/to/directory
boundaries_pathname: /some/path/to/directory
occupancies_to_run: residential
exposure_entities_to_run: Name1
number_cores: 4
......@@ -4,3 +4,4 @@ data_pathname: /some/path/to/directory
boundaries_pathname: /some/path/to/directory
occupancies_to_run: residential, commercial, industrial
exposure_entities_to_run: Name1, Name2, Name3
number_cores: 4
quadkey,type,lon_w,lat_s,lon_e,lat_n
120222331000133202,MultiPolygon,4.9994,42.0026,5.000153,42.0032
120222331000133203,MultiPolygon,5.000153,42.0026,5.001526,42.0032
120222331000133212,Polygon,5.001526,42.002366,5.002899,42.0032
120222331000133213,Polygon,5.002899,42.002366,5.0037,42.0032
120222331000133220,Polygon,4.9994,42.0015,5.000153,42.0018
120222331000133221,Polygon,5.000153,42.0015,5.001526,42.0018
120222331000133230,Polygon,5.001526,42.0015,5.002899,42.002366
120222331000133231,Polygon,5.002899,42.0015,5.0037,42.002366
ISO-8859-1
\ No newline at end of file
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
\ No newline at end of file
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]
......@@ -33,6 +33,7 @@ def test_Configuration():
assert returned_config.boundaries_pathname == "/some/path/to/directory"
assert returned_config.occupancies_to_run == ["residential", "commercial", "industrial"]
assert returned_config.exposure_entities_to_run == ["all"]
assert returned_config.number_cores == 4
# Test case in which the file is not found
with pytest.raises(OSError) as excinfo:
......@@ -133,3 +134,12 @@ def test_Configuration():
returned_config.interpret_exposure_entities_to_run(dummy_aem)
assert returned_config.exposure_entities_to_run == ["Name1", "Name2", "Name3"]
# Test case in which the number of cores is not an integer
with pytest.raises(OSError) as excinfo:
returned_config = Configuration(
os.path.join(
os.path.dirname(__file__), "data", "config_for_testing_cores_not_integer.yml"
)
)
assert "OSError" in str(excinfo.type)
#!/usr/bin/env python3
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import os
import logging
import geopandas
import numpy
import pandas
import shapely
import pytest
from gdeimporter.tools.data_unit_tiles import DataUnitTilesHelper
logger = logging.getLogger()
def test_get_quadtiles_in_bbox_of_polygon():
# Read shapefile with input polygons to test
input_polygons = geopandas.read_file(
os.path.join(
os.path.dirname(__file__), "data", "polygons_for_testing_data_unit_tiles.shp"
)
)
# Define expected output:
expected_quadtiles = {}
# Test case 1: Normal polygon in Luxembourg
expected_quadtiles[1] = [
"120203202122231123",
"120203202122231301",
"120203202122231303",
"120203202122231132",
"120203202122231310",
"120203202122231312",
"120203202122231133",
"120203202122231311",
"120203202122231313",
"120203202122320022",
"120203202122320200",
"120203202122320202",
]
# Test case 2: polygon smaller than a zoom level 18 tile in Luxembourg
expected_quadtiles[2] = ["120203202122320210"]
# Test case 3: polygon with west bound = -180.0, northern hemisphere
expected_quadtiles[3] = [
"020202202022220200",
"020202202022220202",
"020202202022220220",
"020202202022220201",
"020202202022220203",
"020202202022220221",
]
# Test case 4: polygon with west bound = -180.0, southern hemisphere
expected_quadtiles[4] = [
"202020020200002002",
"202020020200002020",
"202020020200002022",
"202020020200002003",
"202020020200002021",
"202020020200002023",
]
# Test case 5: polygon with east bound = +180.0, southern hemisphere
expected_quadtiles[5] = [
"313131131311113112",
"313131131311113130",
"313131131311113132",
"313131131311113113",
"313131131311113131",
"313131131311113133",
]
# Test case 6: polygon with east bound = +180.0, northern hemisphere
expected_quadtiles[6] = [
"131313313133331310",
"131313313133331312",
"131313313133331330",
"131313313133331311",
"131313313133331313",
"131313313133331331",
]
# Test case 7: polygon crossing Greenwich, northern hemisphere
expected_quadtiles[7] = [
"031313131131313313",
"031313131131313331",
"031313131131313333",
"120202020020202202",
"120202020020202220",
"120202020020202222",
"120202020020202203",
"120202020020202221",
"120202020020202223",
]
# Test case 8: polygon crossing Greenwich and the Equator
expected_quadtiles[8] = [
"033333333333333333",
"211111111111111111",
"122222222222222222", #
"300000000000000000",
"122222222222222223", #
"300000000000000001",
]
# Test case 9: multipolygon made of two triangles with overlapping bounding boxes
expected_quadtiles[9] = [
"120203230202332010",
"120203230202332012",
"120203230202332030",
"120203230202332032",
"120203230202332210",
"120203230202332011",
"120203230202332013",
"120203230202332031",
"120203230202332033",
"120203230202332211",
"120203230202332100",
"120203230202332102",
"120203230202332120",
"120203230202332122",
"120203230202332300",
"120203230202332101",
"120203230202332103",
"120203230202332121",
"120203230202332123",
"120203230202332301",
"120203230202330303",
"120203230202330321",
"120203230202330323",
"120203230202330312",
"120203230202330330",
"120203230202330332",
"120203230202332110",
"120203230202332112",
"120203230202330313",
"120203230202330331",
"120203230202330333",
"120203230202332111",
"120203230202332113",
"120203230202331202",
"120203230202331220",
"120203230202331222",
"120203230202333000",
"120203230202333002",
"120203230202331203",
"120203230202331221",
"120203230202331223",