Commit 7d337956 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to retrieve OBM buildings of data unit

parent 5c50ec2e
Pipeline #40774 passed with stage
in 2 minutes and 30 seconds
......@@ -23,6 +23,7 @@ of [gde-importer](https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposu
- `numpy`
- `shapely`
- `geopandas`
- `gde-importer`
### Install
......@@ -67,9 +68,6 @@ necessary parameters. Required parameters are:
been imported by
[gde-importer](https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/gde-importer)
already.
- `database_gde_tiles`: Credentials for the
[GDE Tiles](https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/database-gdetiles)
database where information on the GDE tiles is stored.
- `exposure_entities_to_run`: List of names of exposure entities for which the code will be run.
Currently supported options:
- "all": The list of names associated with `model_name` will be retrieved from the
......@@ -83,6 +81,12 @@ and 3-character codes. When running `model_name=esrm20`, "ISO3" is the preferred
- `occupancies_to_run`: List of occupancies for which the code will be run, separated by ", "
(comma and space). They need to exist for the indicated `exposure format`. Currently supported
values: residential, commercial, industrial.
- `database_gde_tiles`: Credentials for the
[GDE Tiles](https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/database-gdetiles)
database where information on the GDE tiles is stored.
- `database_obm_buildings`: Credentials for the
[OBM Buildings](https://git.gfz-potsdam.de/dynamicexposure/openbuildingmap/database-obmbuildings)
database where information on the OBM buildings is stored.
## Running gde-core
......
model_name: esrm20 # Needs to exist in 'aggregated_sources' database table
exposure_entities_to_run: all # Either "all", a comma-space-separated list of entity names, or a name of a .txt or .csv file
exposure_entities_code: ISO3 # Either "ISO3" in this or a nested structure with exposure entities names and 3-character codes
occupancies_to_run: residential, commercial, industrial # Need to exist for the indicated `model_name`
database_gde_tiles: # Database where info on the GDE tiles is stored
host: host_name
dbname: database_name
port: port_number # Leave empty if a port number is not needed
username: username
password: password_of_username
exposure_entities_to_run: all # Either "all", a comma-space-separated list of entity names, or a name of a .txt or .csv file
exposure_entities_code: ISO3 # Either "ISO3" in this or a nested structure with exposure entities names and 3-character codes
occupancies_to_run: residential, commercial, industrial # Need to exist for the indicated `model_name`
database_obm_buildings: # Database where info on the OBM buildings is stored
host: host_name
dbname: database_name
port: port_number # Leave empty if a port number is not needed
username: username
password: password_of_username
......@@ -32,20 +32,6 @@ class Configuration:
Attributes:
self.model_name (str):
Name of the input aggregated model.
self.database_gde_tiles (dict):
Dictionary containing the credentials needed to connect to the SQL database in which
information on the GDE tiles is stored. The exact parameters needed depend on the
database. They can be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
self.exposure_entities_to_run (list of str):
List of names of the exposure entities for which the code will be run.
self.exposure_entities_code (str or dict):
......@@ -61,14 +47,43 @@ class Configuration:
self.occupancies_to_run (list of str):
List of occupancy cases of the input aggregated exposure model for which the code
will be run.
self.database_gde_tiles (dict):
Dictionary containing the credentials needed to connect to the SQL database in which
information on the GDE tiles is stored. The exact parameters needed depend on the
database. They can be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
self.database_obm_buildings (dict):
Dictionary containing the credentials needed to connect to the SQL database in which
information on the OBM buildings is stored. The exact parameters needed depend on
the database. They can be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
"""
REQUIRES = [
"model_name",
"database_gde_tiles",
"exposure_entities_to_run",
"exposure_entities_code",
"occupancies_to_run",
"database_gde_tiles",
"database_obm_buildings",
]
def __init__(self, filepath, force_config_over_hierarchies=False):
......@@ -92,6 +107,13 @@ class Configuration:
config, "database_gde_tiles", "test_db_gde_tiles.env", force_config_over_hierarchies
)
self.database_obm_buildings = ConfigurationMethods.retrieve_database_credentials(
config,
"database_obm_buildings",
"test_db_obm_buildings.env",
force_config_over_hierarchies,
)
self.exposure_entities_to_run = ConfigurationMethods.assign_listed_parameters(
config, "exposure_entities_to_run"
)
......
......@@ -19,6 +19,7 @@
import logging
import numpy
import shapely
import geopandas
from gdeimporter.tools.database import Database
......@@ -284,3 +285,108 @@ class DatabaseQueries:
ids_no_geometry = ids[which_none]
return ids_processed, geometries_processed, ids_no_geometry
@staticmethod
def get_OBM_buildings_in_data_unit_by_occupancy_case(
occupancy_case,
geographic_area,
db_obm_buildings_config,
db_table,
):
"""This function retrieves information on the OBM buildings of an 'occupancy_case'
located in a target region (represented by its 'geographic_area'), retrieved from the
table with name 'db_table' in the database whose credentials are indicated in
'db_obm_buildings_config'. An OBM building is considered to be located in a target
region if its centroid falls within that region.
Args:
occupancy_case (str):
Name of the target occupancy case (e.g. "residential", "commercial",
"industrial").
geographic_area (Shapely Polygon or MultiPolygon):
Geometry of the target region for which the OBM buildings will be retrieved,
defined in EPSG:4326.
db_obm_buildings_config (dict):
Dictionary containing the credentials needed to connect to the SQL database in
which information on the OBM buildings is stored. The keys of the dictionary
need to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
Name of the table of the SQL database where the OBM buildings are stored. It
is assumed that this table contains, at least, the following fields:
osm_id (int):
OpenStreetMap (OSM) ID of the OBM building.
geometry (PSQL geometry):
Geometry (footprint) of the building, defined in EPSG:4326.
relation_id (int):
OpenStreetMap (OSM) ID of the relation to which an osm_id belongs, if
any.
quadkey (str):
String indicating the quadkey of the tile to which the centroid of the
building belongs.
storeys (int):
Number of storeys of the building.
occupancy (str):
Occupancy of the building as per the GEM Building Taxonomy v3.0.
occupancy_case (enum):
SQL enumerated type describing the building occupancy cases (e.g.
"residential", "commercial", "industrial", "other").
Returns:
obm_buildings (GeoPandas GeoDataFrame):
GeoDataFrame with data on the OBM buildings of 'occupancy_case' whose centroids
fall within 'geographic_area'. It comprises the following columns:
osm_id (str):
OpenStreetMap (OSM) ID of the building.
relation_id (int):
OpenStreetMap (OSM) ID of the relation to which an osm_id belongs, if
any.
quadkey (array of str):
String indicating the quadkey of the tile to which the centroid of the
building belongs.
storeys (int):
Number of storeys of the building.
occupancy (str):
Occupancy of the building as per the GEM Building Taxonomy v3.0.
geometry (Shapely Polygon or MultiPolygon):
Geometry (footprint) of the building, defined in EPSG:4326.
Any missing attributes of a building are returned as numpy.nan.
"""
if (
geographic_area.geom_type != "Polygon"
and geographic_area.geom_type != "MultiPolygon"
):
error_message = (
"DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case: "
"'geographic_area' must be an instance of a Shapely Polygon or MultiPolygon"
)
raise TypeError(error_message)
sql_query = "SELECT b.osm_id, b.relation_id, b.quadkey, b.storeys, b.occupancy, "
sql_query += "b.geometry FROM %s AS b "
sql_query += "WHERE ST_Contains('SRID=%s;%s', ST_Centroid(b.geometry)) "
sql_query += "AND b.occupancy_case='%s'"
db_obm_buildings = Database(**db_obm_buildings_config)
db_obm_buildings.create_connection_and_cursor()
obm_buildings = geopandas.GeoDataFrame.from_postgis(
sql_query % (db_table, "4326", geographic_area, occupancy_case),
db_obm_buildings.connection,
geom_col="geometry",
crs="epsg:4326",
)
db_obm_buildings.close_connection()
return obm_buildings
......@@ -100,6 +100,23 @@ def main():
)
)
# Retrieve OBM buildings and assign building classes and probabilities to them
for i, data_unit_id in enumerate(data_units_ids):
# Going by data unit so as to minimise intersection operations and because
# building classes are associated with specific data units
obm_buildings_raw = (
DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case(
occupancy_case,
data_units_geometries[i],
config.database_obm_buildings,
"obm_buildings",
)
)
logger.info(
"Data unit '%s': %s OBM buildings retrieved"
% (data_unit_id, str(obm_buildings_raw.shape[0]))
)
# Leave the program
logger.info("gde-core has finished")
sys.exit()
......
......@@ -31,6 +31,7 @@ setup(
install_requires=[
"numpy",
"shapely",
"geopandas",
# pylint: disable=line-too-long
"gdeimporter@git+https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/gde-importer.git", # noqa: E501
],
......
......@@ -29,6 +29,8 @@ load_dotenv(Path(".env").resolve())
def test_db():
"""A test database simulating to contain the following tables:
- aggregated_sources (of the GDE Tiles database)
- data_units (of the GDE Tiles database)
- obm_buildings (of the OBM Buildings database)
"""
init_test_db()
......
model_name: esrm20
exposure_entities_to_run: Italy
exposure_entities_code: ISO3
occupancies_to_run: residential, commercial
database_gde_tiles:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
exposure_entities_to_run: Italy
exposure_entities_code: ISO3
occupancies_to_run: residential, commercial
database_obm_buildings:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
model_name: esrm20
exposure_entities_to_run: Italy
exposure_entities_code: ISO3
database_gde_tiles:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
exposure_entities_to_run: Italy
exposure_entities_code: ISO3
database_obm_buildings:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
DROP TABLE IF EXISTS aggregated_sources;
DROP TABLE IF EXISTS data_units;
DROP TABLE IF EXISTS obm_buildings;
DROP TYPE IF EXISTS occupancycase;
DROP EXTENSION IF EXISTS postgis;
......@@ -62,3 +63,38 @@ INSERT INTO data_units(data_unit_id,
people_census,
cost_total)
VALUES ('DEF_00000', 'residential', 2, 'DEF', 0.0, 0.0, 0.0, 0.0);
CREATE TABLE obm_buildings
(
osm_id INTEGER,
storeys INTEGER,
relation_id INTEGER,
occupancy VARCHAR,
occupancy_case occupancycase,
quadkey CHAR(18),
geometry GEOMETRY (GEOMETRY, 4326),
PRIMARY KEY (osm_id)
);
INSERT INTO obm_buildings(osm_id, storeys, occupancy, occupancy_case, quadkey, geometry)
VALUES (
11223344, 4, 'RES2', 'residential', '122010321033023130',
ST_GeomFromText('POLYGON((15.0487 37.4812,15.0489 37.4810,15.0486 37.4808,15.0484 37.4810,15.0487 37.4812))'));
INSERT INTO obm_buildings(osm_id, occupancy, occupancy_case, quadkey, geometry)
VALUES (
22334455, 'RES1', 'residential', '122010321033023130',
ST_GeomFromText('POLYGON((15.0492 37.4808,15.0492 37.4806,15.0490 37.4806,15.0490 37.4808,15.0492 37.4808))')),
(
55667788, 'RES3', 'commercial', '122010321033023130',
ST_GeomFromText('POLYGON((15.0495 37.4810,15.0498 37.4810,15.0498 37.4808,15.0495 37.4808,15.0495 37.4810))')),
(
88990011, 'RES', 'residential', '122010321033023130',
ST_GeomFromText('POLYGON((15.0463 37.4809,15.0463 37.4808,15.0461 37.4808,15.0461 37.4809,15.0463 37.4809))'));
INSERT INTO obm_buildings(osm_id, storeys, relation_id, occupancy, occupancy_case, quadkey, geometry)
VALUES (
33445566, 2, -101010, 'COM3', 'commercial', '122010321033023130',
ST_GeomFromText('POLYGON((15.0491 37.4811,15.0494 37.4814,15.0495 37.4813,15.0492 37.4810,15.0491 37.4811))')),
(
44556677, 3, -101010, 'COM3', 'commercial', '122010321033023130',
ST_GeomFromText('POLYGON((15.0495 37.4813,15.0497 37.4812,15.0495 37.4811,15.0494 37.4812,15.0495 37.4813))'));
......@@ -18,6 +18,8 @@
import os
import numpy
import pytest
import shapely
from gdecore.configuration import Configuration
from gdecore.database_queries import DatabaseQueries
......@@ -137,3 +139,84 @@ def test_get_data_unit_ids_geometries_of_entity_and_occupancy_case(test_db):
assert len(returned_data_units_geometries) == 0
assert len(returned_data_units_ids_no_geometry) == 1
assert returned_data_units_ids_no_geometry[0] == expected_data_unit_id_no_geometry
def test_get_OBM_buildings_in_data_unit_by_occupancy_case(test_db):
# Database connection (the Configuration class will define the credentials based on whether
# the code is running in the CI or locally)
config = Configuration(
os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml")
)
expected_columns = ["osm_id", "relation_id", "quadkey", "storeys", "occupancy", "geometry"]
# Auxiliary, to retrieve the geometry of the data unit
(
returned_data_units_ids,
returned_data_units_geometries,
_,
) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case(
"ABC", "residential", 2, config.database_gde_tiles, "data_units"
)
# Test cases in which buildings will be retrieved
geometry = returned_data_units_geometries[
numpy.where(returned_data_units_ids == "ABC_10269")[0][0]
]
returned_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case(
"residential",
geometry,
config.database_obm_buildings,
"obm_buildings",
)
assert returned_obm_buildings.shape[0] == 2
for col_name in expected_columns:
assert col_name in returned_obm_buildings.columns
assert 11223344 in returned_obm_buildings["osm_id"].to_numpy()
assert 22334455 in returned_obm_buildings["osm_id"].to_numpy()
# Check building whose footprint intersects the data unit but whose centroid is outside
assert 88990011 not in returned_obm_buildings["osm_id"].to_numpy()
returned_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case(
"commercial",
geometry,
config.database_obm_buildings,
"obm_buildings",
)
assert returned_obm_buildings.shape[0] == 3
for col_name in expected_columns:
assert col_name in returned_obm_buildings.columns
assert 33445566 in returned_obm_buildings["osm_id"].to_numpy()
assert 44556677 in returned_obm_buildings["osm_id"].to_numpy()
assert 55667788 in returned_obm_buildings["osm_id"].to_numpy()
assert numpy.isnan(returned_obm_buildings.loc[0, "storeys"])
assert numpy.isnan(returned_obm_buildings.loc[0, "relation_id"])
# Test case in which no buildings will be retrieved
geometry = returned_data_units_geometries[
numpy.where(returned_data_units_ids == "ABC_10277")[0][0]
]
returned_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case(
"residential",
geometry,
config.database_obm_buildings,
"obm_buildings",
)
assert returned_obm_buildings.shape[0] == 0
# Test case in which the geometry passed is not a Polygon or MultiPolygon
with pytest.raises(TypeError) as excinfo:
returned_obm_buildings = (
DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case(
"commercial",
shapely.geometry.Point(0.0, 0.0),
config.database_obm_buildings,
"obm_buildings",
)
)
assert "TypeError" in str(excinfo.type)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment