Commit 412506c0 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to ensure full geographic coverage of exposure entities

parent 91140475
Pipeline #37902 passed with stage
in 2 minutes and 9 seconds
......@@ -8,7 +8,7 @@ model.
### Software dependencies
- Python >= 3.7
- Python 3.7, 3.8 or 3.9
### Python libraries
......@@ -16,6 +16,14 @@ model.
- `pandas`
- `pyyaml`
- `openpyxl`
- `geopandas`
- `rtree`
- `mercantile`
- `pyproj`
- `shapely`
- `psycopg2-binary`
- `python-dotenv`
- `iso3166`
### Install
......@@ -64,6 +72,7 @@ The following configuration options are available in the `config.yml`:
- `exposure_format`: Format of the input aggregated exposure model. Currently supported values: esrm20.
- `data_pathname`: Path to directory that contains the model data.
- `boundaries_pathname`: Path to directory that contains the boundary geodata files.
- `domain_boundary_filepath`: Path to the geodata file that contains the boundaries within which the input aggregated model is defined.
- `occupancies_to_run`: List of occupancies for which the code will be run, separated by ", " (comma and space). They need to exist for the indicated `exposure format`. Currently supported values: residential, commercial.
- `exposure_entities_to_run`: List of names of exposure entities for which the code will be run. Currently supported options:
- "all": The list of names will be retrieved from the metadata of the input aggregated exposure model.
......@@ -73,6 +82,9 @@ The following configuration options are available in the `config.yml`:
- `number_cores`: Number of cores used for parallelisation.
- `database_built_up`: Credentials for the [database](https://git.gfz-potsdam.de/dynamicexposure/openbuildingmap/database-obmtiles#obm_built_area_assessments-completeness-assessments-information) where the built-up areas per quadtile are stored.
- `database_gde_tiles`: Credentials for the [database](https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/database-gdetiles) where information on the GDE tiles is stored.
- `data_units_surface_threshold`: Percentage difference (float between 0.0 and 100.0) of geographic areas to define the need to create data units to fill an exposure entity, if the data units defined in an aggregated exposure model do not fully cover the geographic extents of the exposure entity. Default: 1.0%.
- `force_creation_data_units`: Create data units to fill an exposure entity irrespective of other conditions (e.g. irrespective of `data_units_surface_threshold`). Default: False.
## Running gde-importer
......
......@@ -20,3 +20,5 @@ database_gde_tiles: # Database where info on the GDE tiles is stored
port: port_number # Leave empty if a port number is not needed
username: username
password: password_of_username
data_units_surface_threshold: 1.0 # Percentage difference of areas to define the need to create data units to fill an exposure entity
force_creation_data_units: False # Create data units to fill an exposure entity irrespective of other conditions; optional (default: False)
This diff is collapsed.
......@@ -19,6 +19,7 @@
import os
import sys
import logging
import numpy
import yaml
from dotenv import load_dotenv
......@@ -89,6 +90,13 @@ class Configuration:
User name to connect to the SQL database.
password (str):
Password associated with self.username.
self.data_units_surface_threshold (float between 0.0 and 100.0):
Percentage difference of geographic areas to define the need to create data units to
fill an exposure entity, if the data units defined in an aggregated exposure model
do not fully cover the geographic extents of the exposure entity. Default: 1.0%.
self.force_creation_data_units (bool):
Create data units to fill an exposure entity irrespective of other conditions (e.g.
irrespective of self.surface_threshold). Default: False.
"""
REQUIRES = [
......@@ -100,6 +108,7 @@ class Configuration:
"number_cores",
"database_built_up",
"database_gde_tiles",
"data_units_surface_threshold",
]
def __init__(self, filepath, force_config_over_hierarchies=False):
......@@ -154,10 +163,25 @@ class Configuration:
self.database_gde_tiles = self._retrieve_database_credentials(
config, "database_gde_tiles", "test_db_gde_tiles.env", force_config_over_hierarchies
)
self.data_units_surface_threshold = self._assign_float_parameter(
config, "data_units_surface_threshold", True, -1e-15, 100.0
)
if self.data_units_surface_threshold is not None:
if numpy.sign(self.data_units_surface_threshold) < -0.5:
self.data_units_surface_threshold = abs(self.data_units_surface_threshold)
self.force_creation_data_units = self._assign_boolean_parameter(
config, "force_creation_data_units"
)
if self.force_creation_data_units is None:
logger.info(
"The parameter 'force_creation_data_units' could not be retrieved from the "
"configuration file and was automatically set to 'False'."
)
self.force_creation_data_units = False
# Terminate if critical parameters are missing (not all parameters are critical)
for key_parameter in self.REQUIRES:
if not getattr(self, key_parameter):
if getattr(self, key_parameter) is None:
error_message = (
"Error: parameter '%s' could not be retrieved from "
"configuration file. The program cannot run." % (key_parameter)
......@@ -166,8 +190,8 @@ class Configuration:
raise OSError(error_message)
def read_config_file(self, filepath):
"""This function attempts to open the configuration file. If not found, it returns an
empty dictionary and logs a critical error.
"""This function attempts to open the configuration file. If not found, it logs a
critical error and raises an OSError.
Args:
filepath (str):
......@@ -183,8 +207,10 @@ class Configuration:
with open(filepath, "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.FullLoader)
except FileNotFoundError:
logger.critical("Error instantiating Configuration: configuration file not found")
config = {}
error_message = "Error instantiating Configuration: configuration file not found"
logger.critical(error_message)
raise OSError(error_message)
return config
......@@ -307,7 +333,7 @@ class Configuration:
Returns:
assigned_parameter (int):
The content of config[input_parameter] converted into a string.
The content of config[input_parameter] converted into an integer.
"""
......@@ -326,6 +352,102 @@ class Configuration:
return assigned_parameter
def _assign_float_parameter(
self, config, input_parameter, check_range, lower_bound, upper_bound
):
"""This function searches for the key input_parameter in the dictionary config, and
converts it into a float.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
check_range (bool):
If True, it will be verified that the desired float parameter belongs to the
interval 'bounding_range'.
lower_bound (float):
Lower possible value of the desired float parameter, inclusive. Only verified
if 'check_range' is True.
upper_bound (float):
Upper possible value of the desired float parameter, inclusive. Only verified
if 'check_range' is True.
Returns:
assigned_parameter (float):
The content of config[input_parameter] converted into a float.
"""
assigned_parameter = self._assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
try:
assigned_parameter = float(assigned_parameter)
if check_range:
if assigned_parameter < lower_bound or assigned_parameter > upper_bound:
error_message = (
"Error reading %s from configuration file: float out of range. "
"Valid range: [%s, %s]"
% (
input_parameter,
"{:.2f}".format(lower_bound),
"{:.2f}".format(upper_bound),
)
)
logger.critical(error_message)
raise ValueError(error_message)
except ValueError:
logger.critical(
"Error reading %s from configuration file: not a float" % (input_parameter)
)
assigned_parameter = None
return assigned_parameter
def _assign_boolean_parameter(self, config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config, and
converts it into a boolean.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (bool):
The content of config[input_parameter] converted into a boolean.
"""
assigned_parameter = self._assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
if not isinstance(assigned_parameter, bool): # yaml tries to interpret data types
if assigned_parameter.lower() in ["true", "yes"]:
assigned_parameter = True
elif assigned_parameter.lower() in ["false", "no"]:
assigned_parameter = False
else:
logger.critical(
"Error reading %s from configuration file: not a boolean"
% (input_parameter)
)
assigned_parameter = None
return assigned_parameter
def _assign_hierarchical_parameters(self, config, input_parameter, requested_nested=[]):
"""This function searches for the key input_parameter in the dictionary config, and for
each of the elements of requested_nested as keys of config[input_parameter].
......
......@@ -17,11 +17,13 @@
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
import numpy
import iso3166
from multiprocessing import Pool
from functools import partial
from gdeimporter.data_unit_tiles import DataUnitTilesHelper
from gdeimporter.tools.database import Database
from gdeimporter.tools.spatial import SpatialTools
logger = logging.getLogger()
......@@ -532,6 +534,68 @@ class ExposureEntity:
return
def sum_surface_area_of_data_units(self, occupancy_case, number_cores):
"""This function calculates and adds up the surface area of all the data units contained
in this Exposure Entity for 'occupancy_case' using the Albers equal area projection.
Args:
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial") for
which the surface areas of the data units will be calculated and added.
number_cores (int):
Number of CPU cores to be used to run this function.
Returns:
total_surface (float):
Sum of the surface area of all the data units contained in this Exposure Entity
for 'occupancy_case', in m2.
"""
data_units_geometries = [
dataunit.geometry
for dataunit in self.occupancy_cases[occupancy_case]["data_units"].values()
]
p = Pool(processes=number_cores)
data_units_surface_areas = p.map(
SpatialTools.get_area_albers_equal_area, data_units_geometries
)
p.close()
p.join()
total_surface = numpy.array(data_units_surface_areas).sum()
return total_surface
def join_data_units(self, occupancy_case):
"""This function merges and returns the geometries of all the data units contained
in this Exposure Entity for 'occupancy_case' as one Shapely Polygon/MultiPolygon.
Args:
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial") for
which the surface areas of the data units will be calculated and added.
Returns:
joined_data_units (Shapely Polygon or MultiPolygon):
Polygon or MultiPolygon representing the geometries of all the data units
contained in this Exposure Entity for 'occupancy_case' merged together.
"""
data_units_geometries = [
dataunit.geometry
for dataunit in self.occupancy_cases[occupancy_case]["data_units"].values()
]
data_units_gdf = SpatialTools.geodataframe_from_shapely(data_units_geometries)
data_units_gdf = data_units_gdf.dissolve()
if data_units_gdf.shape[0] > 0:
joined_data_units = data_units_gdf["geometry"].to_numpy()[0]
else:
joined_data_units = None
return joined_data_units
def _interpret_exposure_entities_code(self, config_code):
"""This function interprets the value of exposure_entities_code given as configuration.
......
......@@ -295,8 +295,8 @@ class SpatialTools:
return fully_contained
@staticmethod
def geometries_intersection(in_geometry_1, in_geometry_2, in_crs="epsg:4326"):
"""This function returns the intersection of 'in_geometry_1' and 'in_geometry_2',
def single_geometries_difference(in_geometry_1, in_geometry_2, in_crs="epsg:4326"):
"""This function returns the difference of 'in_geometry_1' minus 'in_geometry_2',
carried out using the Albers Equal Area projection.
Args:
......@@ -308,9 +308,12 @@ class SpatialTools:
Returns:
out_geometry (Shapely Polygon or MultiPolygon):
Intersection of 'in_geometry_1' and 'in_geometry_2', defined in 'in_crs'.
Difference of 'in_geometry_1' minus 'in_geometry_2', defined in 'in_crs'.
"""
if in_geometry_2 is None:
return in_geometry_1
# Create GeoPandas GeoDataFrames
in_geometry_1_gdf = SpatialTools.geodataframe_from_shapely([in_geometry_1], in_crs)
in_geometry_2_gdf = SpatialTools.geodataframe_from_shapely([in_geometry_2], in_crs)
......@@ -324,15 +327,92 @@ class SpatialTools:
)
out_geometry_gdf = geopandas.overlay(
in_geometry_1_gdf, in_geometry_2_gdf, how="intersection"
in_geometry_1_gdf, in_geometry_2_gdf, how="difference"
)
out_geometry_gdf = out_geometry_gdf.to_crs(in_crs)
if out_geometry_gdf.shape[0] > 1:
out_geometry_gdf = out_geometry_gdf.dissolve()
out_geometry_gdf = out_geometry_gdf.to_crs(in_crs)
out_geometry = out_geometry_gdf["geometry"].to_numpy()[0]
return out_geometry
@staticmethod
def single_geometries_intersection(in_geometry_1, in_geometry_2, in_crs="epsg:4326"):
"""This function returns the intersection of 'in_geometry_1' and 'in_geometry_2',
carried out using the Albers Equal Area projection.
Args:
in_geometry_1, in_geometry_2 (Shapely Polygons or MultiPolygons):
Polygons or multipolygons defined in 'in_crs'.
in_crs (str):
Coordinate reference system (CRS) in which 'in_geometry_1' and 'in_geometry_2'
are defined. Default: "epsg:4326".
Returns:
out_geometry (Shapely Polygon or MultiPolygon):
Intersection of 'in_geometry_1' and 'in_geometry_2', defined in 'in_crs'.
"""
out_geometries = SpatialTools.multiple_geometries_intersection(
[in_geometry_1], [in_geometry_2], True, in_crs="epsg:4326"
)
return out_geometries[0]
@staticmethod
def multiple_geometries_intersection(
in_geometries_1, in_geometries_2, join_output, in_crs="epsg:4326"
):
"""This function returns the intersection of 'in_geometry_1' and 'in_geometry_2',
carried out using the Albers Equal Area projection.
Args:
in_geometries_1, in_geometries_2 (lists of Shapely Polygons or MultiPolygons):
Lists of polygons or multipolygons defined in 'in_crs'.
join_output (bool):
If True, the result of the intersection will be merged to form one Shapely
Polygon or MultiPolygon. If False, the result of the intersection will be as
many Shapely Polygons or MultiPolygons as it yields.
in_crs (str):
Coordinate reference system (CRS) in which 'in_geometry_1' and 'in_geometry_2'
are defined. Default: "epsg:4326".
Returns:
out_geometries (list of Shapely Polygons or MultiPolygons):
Intersection of 'in_geometries_1' and 'in_geometries_2', defined in 'in_crs'.
If 'join_output' is True, 'out_geometries' contains only one element.
"""
# Create GeoPandas GeoDataFrames
in_geometry_1_gdf = SpatialTools.geodataframe_from_shapely(in_geometries_1, in_crs)
in_geometry_2_gdf = SpatialTools.geodataframe_from_shapely(in_geometries_2, in_crs)
# Project both geometries using Albers equal area
(
in_geometry_1_gdf,
in_geometry_2_gdf,
) = SpatialTools.project_two_geodataframes_to_albers_equal_area(
in_geometry_1_gdf, in_geometry_2_gdf
)
out_geometry_gdf = geopandas.overlay(
in_geometry_1_gdf, in_geometry_2_gdf, how="intersection"
)
out_geometry_gdf = out_geometry_gdf.to_crs(in_crs)
if join_output:
if out_geometry_gdf.shape[0] > 1:
out_geometry_gdf = out_geometry_gdf.dissolve()
# Trick to avoid splitting MultiPolygons
aux_geometry = out_geometry_gdf["geometry"].to_numpy()[0]
out_geometries = [aux_geometry]
else:
out_geometries = list(out_geometry_gdf["geometry"].to_numpy())
return out_geometries
@staticmethod
def geodataframe_from_shapely(in_geometries, in_crs="epsg:4326"):
"""This function returns a GeoPandas GeoDataFrame containing the geometry/geometries
......
......@@ -48,5 +48,5 @@ setup(
},
packages=find_packages(),
entry_points={"console_scripts": ["gdeimporter = gdeimporter.gdeimporter:main"]},
python_requires=">=3.7",
python_requires=">=3.7, <3.10",
)
......@@ -8,3 +8,6 @@ Entity_1,Unit_3,16.00,17.00,40.00,39.00
Entity_2,Unit_A,21.00,22.00,45.00,44.00
Entity_2,Unit_B,22.00,23.00,46.00,45.00
Entity_2,Unit_C,23.00,24.00,47.00,46.00
Entity_1,residential_FILLER,12.00,17.00,40.00,35.02
Entity_1,commercial_FILLER,14.00,17.00,40.00,37.00
Entity_2,commercial_FILLER,22.50,23.00,46.00,45.00
......@@ -11,3 +11,4 @@ database_built_up:
dbname: some_database_name
username: some_username
password: some_password
data_units_surface_threshold: 1.5
model_name: esrm20
exposure_format: esrm20
data_pathname: /some/path/to/directory
boundaries_pathname: /some/path/to/directory
occupancies_to_run: residential, commercial, industrial
exposure_entities_to_run: all
exposure_entities_code:
Entity_1: EN1
Entity_2: EN2
Entity_3: EN3
Entity_4: EN4
Entity_5: EN5
Entity_6: EN6
Entity_7: EN7
Entity_8: EN8
number_cores: 4
database_built_up:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
database_gde_tiles:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
data_units_surface_threshold: -1E-16
force_creation_data_units: False
model_name: esrm20
exposure_format: esrm20
data_pathname: /some/path/to/directory
boundaries_pathname: /some/path/to/directory
occupancies_to_run: residential, commercial, industrial
exposure_entities_to_run: all
exposure_entities_code:
Entity_1: EN1
Entity_2: EN2
Entity_3: EN3
Entity_4: EN4
Entity_5: EN5
Entity_6: EN6
Entity_7: EN7
Entity_8: EN8
number_cores: 4
database_built_up:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
database_gde_tiles:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
data_units_surface_threshold: 123.5
force_creation_data_units: False
......@@ -7,3 +7,4 @@ exposure_entities_to_run: all
exposure_entities_code: ISO3
number_cores: 4
database_built_up: some_database_name
data_units_surface_threshold: 1.5
......@@ -19,3 +19,4 @@ database_gde_tiles:
port: 1111
username: xxx
password: xxx
data_units_surface_threshold: 1.5
......@@ -20,3 +20,4 @@ database_gde_tiles:
dbname: some_database_name
username: some_username
password: some_password
data_units_surface_threshold: 1.5
......@@ -16,3 +16,4 @@ database_gde_tiles:
dbname: some_database_name
username: some_username
password: some_password
data_units_surface_threshold: 1.5
......@@ -18,3 +18,4 @@ database_gde_tiles:
dbname: some_database_name
username: some_username
password: some_password
data_units_surface_threshold: 1.5
......@@ -16,3 +16,4 @@ database_gde_tiles:
dbname: some_database_name
username: some_username
password: some_password
data_units_surface_threshold: 1.5
......@@ -24,3 +24,5 @@ database_gde_tiles:
dbname: some_database_name
username: some_username
password: some_password
data_units_surface_threshold: 1.5
force_creation_data_units: False
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment