diff --git a/gdecore/database_storage.py b/gdecore/database_storage.py index d2fe7a8b87ea8e815a1ccac9f9914e84610432f9..96909c44d1a28209d97f6dc8e1efcb92704c86c0 100644 --- a/gdecore/database_storage.py +++ b/gdecore/database_storage.py @@ -154,6 +154,7 @@ class DatabaseStorage: occupancy_case, aggregated_source_id, obm_buildings_building_classes, + obm_buildings_quadkeys_geometry, db_gde_tiles_config, db_table, ): @@ -174,6 +175,18 @@ class DatabaseStorage: aggregated_source_id (int): ID of the source of the aggregated exposure model associated with the building classes of the OBM buildings in 'obm_buildings_building_classes'. + obm_buildings_quadkeys_geometry (Pandas DataFrame): + DataFrame indicating the footprints and quadkeys associated with the centroids + of the OBM buildings in 'obm_buildings_building_classes'. It is assumed to have + at least the following columns: + osm_id (int): + OpenStreetMap (OSM) ID of the building. If the building is represented + by a relation, this is the ID of the relation. + quadkey (str): + String indicating the quadkey of the tile to which the centroid of the + building belongs. + geometry (str): + Footprint of the building in Well-Known Text format and EPSG:4326. obm_buildings_building_classes (dict): Dictionary containing the building classes and their probabilities for each OBM building. Dictionary keys correspond to the OSM ID of the building. Each key @@ -214,6 +227,9 @@ class DatabaseStorage: SQL enumerated type describing the building occupancy cases. data_unit_id (str): ID of the data unit the OBM building belongs to. + quadkey (str): + Quadkey of the zoom-level 18 tile to which the centroid of the building + belongs. building_class_names (array of str): Building class as per the GEM Building Taxonomy. settlement_types (array of enum): @@ -224,6 +240,8 @@ class DatabaseStorage: class. probabilities (array of float): Probabilities of the OBM building belonging to each building class. + geometry (PSQL geometry): + Footprint of the OBM building. """ sql_commands = {} @@ -231,15 +249,17 @@ class DatabaseStorage: sql_commands["query"] = "SELECT COUNT(*) FROM %s " sql_commands["query"] += "WHERE (osm_id=%s AND aggregated_source_id=%s);" - sql_commands["update"] = "UPDATE %s SET (occupancy_case, data_unit_id, " + sql_commands["update"] = "UPDATE %s SET (occupancy_case, data_unit_id, quadkey, " sql_commands["update"] += "building_class_names, settlement_types, occupancy_subtypes, " - sql_commands["update"] += "probabilities) = ('%s','%s','%s','%s','%s','%s')" - sql_commands["update"] += " WHERE (osm_id=%s AND aggregated_source_id=%s);" + sql_commands["update"] += "probabilities, geometry) = " + sql_commands["update"] += "('%s','%s','%s','%s','%s','%s','%s','%s') " + sql_commands["update"] += "WHERE (osm_id=%s AND aggregated_source_id=%s);" sql_commands["insert"] = "INSERT INTO %s(osm_id, aggregated_source_id, occupancy_case, " - sql_commands["insert"] += "data_unit_id, building_class_names, settlement_types, " - sql_commands["insert"] += "occupancy_subtypes, probabilities) " - sql_commands["insert"] += "VALUES (%s, %s, '%s', '%s', '%s', '%s', '%s', '%s');" + sql_commands["insert"] += "data_unit_id, quadkey, building_class_names, " + sql_commands["insert"] += "settlement_types, occupancy_subtypes, probabilities, " + sql_commands["insert"] += "geometry) VALUES ( " + sql_commands["insert"] += "%s, %s, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');" db_gde_tiles = Database(**db_gde_tiles_config) db_gde_tiles.create_connection_and_cursor() @@ -259,6 +279,9 @@ class DatabaseStorage: db_table, occupancy_case, data_unit_id, + obm_buildings_quadkeys_geometry.loc[ + obm_buildings_quadkeys_geometry["osm_id"] == osm_id, "quadkey" + ].to_numpy()[0], '{"%s"}' % ( '", "'.join( @@ -275,6 +298,9 @@ class DatabaseStorage: list(building_classes["probabilities"].to_numpy().astype(str)) ) ), + obm_buildings_quadkeys_geometry.loc[ + obm_buildings_quadkeys_geometry["osm_id"] == osm_id, "geometry" + ].to_numpy()[0], osm_id, aggregated_source_id, ) @@ -288,6 +314,9 @@ class DatabaseStorage: aggregated_source_id, occupancy_case, data_unit_id, + obm_buildings_quadkeys_geometry.loc[ + obm_buildings_quadkeys_geometry["osm_id"] == osm_id, "quadkey" + ].to_numpy()[0], '{"%s"}' % ( '", "'.join( @@ -304,6 +333,9 @@ class DatabaseStorage: list(building_classes["probabilities"].to_numpy().astype(str)) ) ), + obm_buildings_quadkeys_geometry.loc[ + obm_buildings_quadkeys_geometry["osm_id"] == osm_id, "geometry" + ].to_numpy()[0], ) ) else: # this should not occur diff --git a/gdecore/processor.py b/gdecore/processor.py index 9e8971e3e384aba4eeb629795e8f97d3a1221df9..24a96fd506c8fc323c621ddab337373159899641 100644 --- a/gdecore/processor.py +++ b/gdecore/processor.py @@ -84,6 +84,8 @@ class GDEProcessor: occupancy (str): Occupancy of the building as per the GEM Building Taxonomy v3.0. Missing values can be "nan" or "None". + geometry (Polygon or MultiPolygon in Well-Known Text format): + Geometry (footprint) of the building, defined in EPSG:4326. """ # Identify unique relation IDs in 'obm_buildings' @@ -105,15 +107,14 @@ class GDEProcessor: if len(which_no_relation) > 0: # Start output DataFrame with buildings that are not part of relations obm_buildings_adjusted = obm_buildings.iloc[which_no_relation, :] - obm_buildings_adjusted = obm_buildings_adjusted.drop( - columns=["relation_id", "geometry"] - ) + obm_buildings_adjusted = obm_buildings_adjusted.drop(columns=["relation_id"]) # Process buildings that are part of relations add_osm_ids = [] add_quadkey = [] add_storeys = [] add_occupancy = [] + add_geometry = [] for relation_id in unique_relations: # Identify components of this relation_id @@ -136,12 +137,17 @@ class GDEProcessor: GDEProcessor._ensure_unique_occupancy(obm_buildings_of_relation) ) + add_geometry.append( + GDEProcessor._define_geometry_of_relation(obm_buildings_of_relation) + ) + relation_buildings = pandas.DataFrame( { "osm_id": pandas.Series(numpy.array(add_osm_ids).astype(int), dtype="int"), "quadkey": pandas.Series(numpy.array(add_quadkey).astype(str), dtype="str"), "storeys": pandas.Series(numpy.array(add_storeys).astype(float), dtype="float"), "occupancy": pandas.Series(numpy.array(add_occupancy).astype(str), dtype="str"), + "geometry": pandas.Series(numpy.array(add_geometry).astype(str), dtype="str"), } ) @@ -310,6 +316,25 @@ class GDEProcessor: return unique_occupancy + @staticmethod + def _define_geometry_of_relation(obm_buildings): + """ + This function merges all the geometries contained in 'obm_buildings' and returns it in + Well-Known Text (WKT) format (string). + + Args: + obm_buildings (GeoPandas GeoDataFrame): + GeoDataFrame with at least a geometry column. + + Returns: + merged_geometry (str): + Merged geometries of 'obm_buildings' in Well-Known Text (WKT) format. + """ + + merged_geometry = obm_buildings.dissolve()["geometry"].to_numpy()[0].wkt + + return merged_geometry + @staticmethod def assign_building_classes_to_obm_buildings( obm_buildings, data_unit_building_classes, occupancy_case @@ -901,6 +926,7 @@ class GDEProcessor: occupancy_case, aggregated_source_id, obm_buildings_building_classes, + obm_buildings, configuration.database_gde_tiles, "gde_buildings", ) diff --git a/tests/data/test_database_set_up.sql b/tests/data/test_database_set_up.sql index 8055ed8afe29f457af565b3545dd3e7d25db0fa7..b86fac63df8b84aa45d1952dc6af79a1c9bb24fd 100644 --- a/tests/data/test_database_set_up.sql +++ b/tests/data/test_database_set_up.sql @@ -224,23 +224,26 @@ CREATE TABLE gde_buildings osm_id integer, aggregated_source_id SMALLINT, occupancy_case occupancycase, - data_unit_id VARCHAR, + data_unit_id VARCHAR, + quadkey CHAR(18), building_class_names VARCHAR[], settlement_types settlement[], occupancy_subtypes VARCHAR[], probabilities FLOAT[], + geometry GEOMETRY, PRIMARY KEY (osm_id, aggregated_source_id) ); INSERT INTO gde_buildings(osm_id, aggregated_source_id, occupancy_case, - data_unit_id, + data_unit_id, + quadkey, building_class_names, settlement_types, occupancy_subtypes, probabilities) -VALUES (-101010, 2, 'industrial', 'ABC_10269', +VALUES (-101010, 2, 'industrial', 'ABC_10269', '333333333333333333', '{"CLASS/X/params/H:1", "CLASS/Y/params/H:2"}', '{"rural", "rural"}', '{"all", "all"}', diff --git a/tests/test_database_storage.py b/tests/test_database_storage.py index 6282377e5c8da721df23731d5b14788125ed8b80..9b96b7cc44dbd77a626069467f746b8cc88aca0f 100644 --- a/tests/test_database_storage.py +++ b/tests/test_database_storage.py @@ -19,6 +19,7 @@ import os import numpy import pandas +import shapely from gdeimporter.tools.database import Database from gdecore.configuration import Configuration from gdecore.database_storage import DatabaseStorage @@ -141,11 +142,23 @@ def test_store_OBM_building_classes(test_db): } ) + obm_bdg_quadkey_geom = pandas.DataFrame( + { + "osm_id": [11223344], + "quadkey": ["122010321033023130"], + "geometry": [ + "POLYGON((15.0487 37.4812,15.0489 37.4810,15.0486 37.4808,15.0484 37.4810," + "15.0487 37.4812))" + ], + } + ) + DatabaseStorage.store_OBM_building_classes( "ABC_10269", "residential", 2, obm_bdg_classes, + obm_bdg_quadkey_geom, config.database_gde_tiles, "gde_buildings", ) @@ -153,14 +166,28 @@ def test_store_OBM_building_classes(test_db): ( returned_occupancy_case, returned_data_unit_id, + returned_quadkey, returned_building_class_names, returned_settlement_types, returned_occupancy_subtypes, returned_probabilities, + returned_geometry, ) = query_OBM_building_classes(config.database_gde_tiles, 11223344, 2) assert returned_occupancy_case == "residential" assert returned_data_unit_id == "ABC_10269" + assert returned_quadkey == "122010321033023130" + + expected_geometry = shapely.wkt.loads( + obm_bdg_quadkey_geom.loc[ + obm_bdg_quadkey_geom["osm_id"] == 11223344, "geometry" + ].to_numpy()[0] + ) + assert round(returned_geometry.bounds[0], 2) == round(expected_geometry.bounds[0], 2) + assert round(returned_geometry.bounds[1], 2) == round(expected_geometry.bounds[1], 2) + assert round(returned_geometry.bounds[2], 2) == round(expected_geometry.bounds[2], 2) + assert round(returned_geometry.bounds[3], 2) == round(expected_geometry.bounds[3], 2) + assert len(returned_building_class_names) == len( obm_bdg_classes[11223344]["building_class_name"].to_numpy() ) @@ -205,11 +232,25 @@ def test_store_OBM_building_classes(test_db): } ) + obm_bdg_quadkey_geom = pandas.DataFrame( + { + "osm_id": [-101010, -202020], + "quadkey": ["122010321033023130", "122010321033023132"], + "geometry": [ + "POLYGON((15.0491 37.4811,15.0494 37.4814,15.0495 37.4813,15.0497 37.4812," + "15.0495 37.4811,15.0494 37.4812,15.0492 37.4810,15.0491 37.4811))", + "POLYGON((15.0490 37.4802,15.04924 37.48036,15.0492004 37.4804,15.0494 37.4805," + "15.0496 37.4803,15.0494 37.4802,15.0491 37.4800,15.0490 37.4802))", + ], + } + ) + DatabaseStorage.store_OBM_building_classes( "ABC_10269", "commercial", 2, obm_bdg_classes, + obm_bdg_quadkey_geom, config.database_gde_tiles, "gde_buildings", ) @@ -218,14 +259,34 @@ def test_store_OBM_building_classes(test_db): ( returned_occupancy_case, returned_data_unit_id, + returned_quadkey, returned_building_class_names, returned_settlement_types, returned_occupancy_subtypes, returned_probabilities, + returned_geometry, ) = query_OBM_building_classes(config.database_gde_tiles, osm_id, 2) assert returned_occupancy_case == "commercial" assert returned_data_unit_id == "ABC_10269" + + assert ( + returned_quadkey + == obm_bdg_quadkey_geom.loc[ + obm_bdg_quadkey_geom["osm_id"] == osm_id, "quadkey" + ].to_numpy()[0] + ) + + expected_geometry = shapely.wkt.loads( + obm_bdg_quadkey_geom.loc[ + obm_bdg_quadkey_geom["osm_id"] == osm_id, "geometry" + ].to_numpy()[0] + ) + assert round(returned_geometry.bounds[0], 2) == round(expected_geometry.bounds[0], 2) + assert round(returned_geometry.bounds[1], 2) == round(expected_geometry.bounds[1], 2) + assert round(returned_geometry.bounds[2], 2) == round(expected_geometry.bounds[2], 2) + assert round(returned_geometry.bounds[3], 2) == round(expected_geometry.bounds[3], 2) + assert len(returned_building_class_names) == len( obm_bdg_classes[osm_id]["building_class_name"].to_numpy() ) @@ -277,6 +338,8 @@ def query_OBM_building_classes(credentials, osm_id, aggregated_source_id): data_unit_id (str): ID of the data unit of the 'aggregated_source_id' and 'occupancy_case' associated with the OBM building. + quadkey (str): + Quadkey of the zoom-level 18 tile to which the centroid of the OBM building belongs. building_class_names (list of str): Building classes as per the GEM Building Taxonomy. settlement_types (list of str): @@ -286,11 +349,14 @@ def query_OBM_building_classes(credentials, osm_id, aggregated_source_id): Details on the occupancy, if relevant to characterise the building classes. probabilities (list of float): Probabilities of the building belonging to the building classes. + geometry (Shapely Polygon or Multipolygon): + Geometry of the OBM building. + """ sql_command = ( - "SELECT occupancy_case, data_unit_id, building_class_names, settlement_types, " - "occupancy_subtypes, probabilities FROM gde_buildings " + "SELECT occupancy_case, data_unit_id, quadkey, building_class_names, settlement_types, " + "occupancy_subtypes, probabilities, geometry FROM gde_buildings " "WHERE (osm_id=%s AND aggregated_source_id=%s);" % (osm_id, aggregated_source_id) ) @@ -301,18 +367,22 @@ def query_OBM_building_classes(credentials, osm_id, aggregated_source_id): occupancy_case = result[0][0] data_unit_id = result[0][1] - building_class_names = result[0][2] - settlement_types = result[0][3].replace("{", "").replace("}", "").split(",") - occupancy_subtypes = result[0][4] - probabilities = result[0][5] + quadkey = result[0][2] + building_class_names = result[0][3] + settlement_types = result[0][4].replace("{", "").replace("}", "").split(",") + occupancy_subtypes = result[0][5] + probabilities = result[0][6] + geometry = shapely.wkb.loads(result[0][7], hex=True) db_test.close_connection() return ( occupancy_case, data_unit_id, + quadkey, building_class_names, settlement_types, occupancy_subtypes, probabilities, + geometry, )