From e75002f6a59db2a7af623b3c4f9ba3e9d9a33a10 Mon Sep 17 00:00:00 2001 From: Cecilia Nievas Date: Fri, 29 Apr 2022 16:35:11 +0200 Subject: [PATCH] Added handling of occupancy cases as lists of occupancy types --- gdecore/database_queries.py | 42 ++++++++------- gdecore/gdecore.py | 15 ++++-- gdecore/occupancy_cases.py | 81 +++++++++++++++++++++++++++++ tests/data/test_database_set_up.sql | 27 +++++----- tests/test_database_queries.py | 26 +++++---- tests/test_processor.py | 16 +++--- 6 files changed, 153 insertions(+), 54 deletions(-) create mode 100644 gdecore/occupancy_cases.py diff --git a/gdecore/database_queries.py b/gdecore/database_queries.py index 3ef8045..e45fbb1 100644 --- a/gdecore/database_queries.py +++ b/gdecore/database_queries.py @@ -33,7 +33,7 @@ class DatabaseQueries: """ @staticmethod - def retrieve_aggregated_source_id(model_name, db_gde_tiles_config, db_table): + def retrieve_aggregated_source_id_and_format(model_name, db_gde_tiles_config, db_table): """This function retrieves the ID of the aggregated exposure model source whose name is 'model_name'. @@ -66,9 +66,12 @@ class DatabaseQueries: aggregated_source_id (int): ID of the source of the aggregated exposure model with name 'model_name'. If 'model_name' is not found, 'aggregated_source_id' is -999. + aggregated_source_format (str): + Format of the aggregated exposure model with name 'model_name'. If 'model_name' + is not found, 'aggregated_source_format' is "UNKNOWN". """ - sql_query = "SELECT aggregated_source_id FROM %s WHERE name='%s';" + sql_query = "SELECT aggregated_source_id, format FROM %s WHERE name='%s';" db_gde_tiles = Database(**db_gde_tiles_config) db_gde_tiles.create_connection_and_cursor() @@ -80,14 +83,16 @@ class DatabaseQueries: if len(exec_result) == 1: # Entry exists --> retrieve aggregated_source_id = exec_result[0][0] + aggregated_source_format = exec_result[0][1] else: # More than one entries found, this is an error logger.error( - "Error in retrieve_aggregated_source_id: " + "Error in retrieve_aggregated_source_id_and_format: " "more than one or no entry found for name = %s" % (model_name) ) aggregated_source_id = -999 + aggregated_source_format = "UNKNOWN" - return aggregated_source_id + return aggregated_source_id, aggregated_source_format @staticmethod def retrieve_all_exposure_entities_of_aggregated_source_id( @@ -288,22 +293,21 @@ class DatabaseQueries: return ids_processed, geometries_processed, ids_no_geometry @staticmethod - def get_OBM_buildings_in_data_unit_by_occupancy_case( - occupancy_case, + def get_OBM_buildings_in_data_unit_by_occupancy_types( + occupancy_types, geographic_area, db_obm_buildings_config, db_table, ): - """This function retrieves information on the OBM buildings of an 'occupancy_case' + """This function retrieves information on the OBM buildings of certain 'occupancy_types' located in a target region (represented by its 'geographic_area'), retrieved from the table with name 'db_table' in the database whose credentials are indicated in 'db_obm_buildings_config'. An OBM building is considered to be located in a target region if its centroid falls within that region. Args: - occupancy_case (str): - Name of the target occupancy case (e.g. "residential", "commercial", - "industrial"). + occupancy_types (list of str): + List of names of target occupancy types (e.g. "RES", "RES1", "COM5", etc). geographic_area (Shapely Polygon or MultiPolygon): Geometry of the target region for which the OBM buildings will be retrieved, defined in EPSG:4326. @@ -338,13 +342,10 @@ class DatabaseQueries: Number of storeys of the building. occupancy (str): Occupancy of the building as per the GEM Building Taxonomy v3.0. - occupancy_case (enum): - SQL enumerated type describing the building occupancy cases (e.g. - "residential", "commercial", "industrial", "other"). Returns: obm_buildings (GeoPandas GeoDataFrame): - GeoDataFrame with data on the OBM buildings of 'occupancy_case' whose centroids + GeoDataFrame with data on the OBM buildings of 'occupancy_types' whose centroids fall within 'geographic_area'. It comprises the following columns: osm_id (int): OpenStreetMap (OSM) ID of the building. It cannot contain missing values @@ -371,21 +372,26 @@ class DatabaseQueries: and geographic_area.geom_type != "MultiPolygon" ): error_message = ( - "DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case: " + "DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_types: " "'geographic_area' must be an instance of a Shapely Polygon or MultiPolygon" ) raise TypeError(error_message) + # Convert occupancy_types into a string to feed in to the query + occupancy_types_aux = [ + "b.occupancy='%s'" % (occupancy_types[i]) for i in range(len(occupancy_types)) + ] + occupancy_types_condition = " OR ".join(occupancy_types_aux) + sql_query = "SELECT b.osm_id, b.relation_id, b.quadkey, b.storeys, b.occupancy, " sql_query += "b.geometry FROM %s AS b " - sql_query += "WHERE ST_Contains('SRID=%s;%s', ST_Centroid(b.geometry)) " - sql_query += "AND b.occupancy_case='%s'" + sql_query += "WHERE (ST_Contains('SRID=%s;%s', ST_Centroid(b.geometry)) AND (%s));" db_obm_buildings = Database(**db_obm_buildings_config) db_obm_buildings.create_connection_and_cursor() obm_buildings = geopandas.GeoDataFrame.from_postgis( - sql_query % (db_table, "4326", geographic_area, occupancy_case), + sql_query % (db_table, "4326", geographic_area, occupancy_types_condition), db_obm_buildings.connection, geom_col="geometry", crs="epsg:4326", diff --git a/gdecore/gdecore.py b/gdecore/gdecore.py index e9f257b..a9cbcb5 100644 --- a/gdecore/gdecore.py +++ b/gdecore/gdecore.py @@ -23,12 +23,15 @@ from gdecore.configuration import Configuration from gdecore.database_queries import DatabaseQueries from gdecore.database_storage import DatabaseStorage from gdecore.processor import GDEProcessor +from gdecore.occupancy_cases import OccupancyCasesESRM20 # Add a logger printing error, warning, info and debug messages to the screen logger = logging.getLogger() logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler(sys.stdout)) +AGGREGATED_MODELS_OCCUPANCY = {"esrm20": OccupancyCasesESRM20} + def main(): """Run the gde-core.""" @@ -39,7 +42,10 @@ def main(): # Read configuration parameters config = Configuration("config.yml") - aggregated_source_id = DatabaseQueries.retrieve_aggregated_source_id( + ( + aggregated_source_id, + aggregated_source_format, + ) = DatabaseQueries.retrieve_aggregated_source_id_and_format( config.model_name, config.database_gde_tiles, "aggregated_sources", @@ -56,6 +62,9 @@ def main(): % (config.model_name, aggregated_source_id) ) + # Initialise occupancy cases class + occupancy_cases = AGGREGATED_MODELS_OCCUPANCY[aggregated_source_format]() + # Interpret and update config.exposure_entities_to_run config.interpret_exposure_entities_to_run(aggregated_source_id) @@ -115,8 +124,8 @@ def main(): # Retrieve OBM buildings and assign building classes and probabilities to them # Retrieve OBM buildings obm_buildings_raw = ( - DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( - occupancy_case, + DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_types( + occupancy_cases.mapping[occupancy_case], data_units_geometries[i], config.database_obm_buildings, "obm_buildings", diff --git a/gdecore/occupancy_cases.py b/gdecore/occupancy_cases.py new file mode 100644 index 0000000..fe30e75 --- /dev/null +++ b/gdecore/occupancy_cases.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import abc + + +class OccupancyCases(abc.ABC): + """This class represents the grouping of occupancy types (as per the GEM Building Taxonomy + v3.0) into occupancy cases. Occupancy cases are broader groups of occupancy types and are + relevant to connect individual OBM buildings with aggregated exposure models, as the latter + are usually defined for particular occupancy cases. + + Attributes: + self.mapping (dict): + Each key is an occupancy case and contains a list of occupancy types (strings) + associated with it. + """ + + def __init__(self): + self.mapping = None + + +class OccupancyCasesESRM20(OccupancyCases): + """This class represents the grouping of occupancy types that is compatible with the + aggregated exposure model of the European Seismic Risk Model 2020 (ESRM20). ESRM20 covers + three occupancy cases: residential, commercial and industrial. + """ + + def __init__(self): + self.mapping = { + "residential": [ + "RES", + "RES99", + "RES1", + "RES2", + "RES2A", + "RES2B", + "RES2C", + "RES2D", + "RES2E", + "RES2F", + "RES4", + "RES6", + "MIX1", + "MIX2", + "MIX4", + ], + "commercial": [ + "COM", + "COM99", + "COM1", + "COM2", + "COM3", + "COM5", + "RES3", + ], + "industrial": [ + "IND", + "IND99", + "IND1", + "IND2", + "MIX3", + "MIX5", + "MIX6", + ], + } diff --git a/tests/data/test_database_set_up.sql b/tests/data/test_database_set_up.sql index 99c078c..8055ed8 100644 --- a/tests/data/test_database_set_up.sql +++ b/tests/data/test_database_set_up.sql @@ -76,7 +76,6 @@ CREATE TABLE obm_buildings storeys INTEGER, relation_id INTEGER, occupancy VARCHAR, - occupancy_case occupancycase, quadkey CHAR(18), geometry GEOMETRY (GEOMETRY, 4326), @@ -84,39 +83,39 @@ CREATE TABLE obm_buildings ); -- Residential building with number of storeys, not part of relation -INSERT INTO obm_buildings(osm_id, storeys, occupancy, occupancy_case, quadkey, geometry) +INSERT INTO obm_buildings(osm_id, storeys, occupancy, quadkey, geometry) VALUES ( - 11223344, 4, 'RES2', 'residential', '122010321033023130', + 11223344, 4, 'RES2', '122010321033023130', ST_GeomFromText('POLYGON((15.0487 37.4812,15.0489 37.4810,15.0486 37.4808,15.0484 37.4810,15.0487 37.4812))')), ( - 99001122, 15, 'RES2', 'residential', '122010321033023130', + 99001122, 15, 'RES2', '122010321033023130', ST_GeomFromText('POLYGON((15.0490 37.4813,15.0490 37.4812,15.0488 37.4812,15.0488 37.4813,15.0490 37.4813))')); -- Buildings that are not part of a relation and do not have number of storeys -INSERT INTO obm_buildings(osm_id, occupancy, occupancy_case, quadkey, geometry) +INSERT INTO obm_buildings(osm_id, occupancy, quadkey, geometry) VALUES ( - 22334455, 'RES1', 'residential', '122010321033023130', + 22334455, 'RES1', '122010321033023130', ST_GeomFromText('POLYGON((15.0492 37.4808,15.0492 37.4806,15.0490 37.4806,15.0490 37.4808,15.0492 37.4808))')), ( - 55667788, 'RES3', 'commercial', '122010321033023130', + 55667788, 'RES3', '122010321033023130', ST_GeomFromText('POLYGON((15.0495 37.4810,15.0498 37.4810,15.0498 37.4808,15.0495 37.4808,15.0495 37.4810))')), ( - 88990011, 'RES', 'residential', '122010321033023120', + 88990011, 'RES', '122010321033023120', ST_GeomFromText('POLYGON((15.0463 37.4809,15.0463 37.4808,15.0461 37.4808,15.0461 37.4809,15.0463 37.4809))')); -- Commercial buildings that are part of a relation, with number of storeys -INSERT INTO obm_buildings(osm_id, storeys, relation_id, occupancy, occupancy_case, quadkey, geometry) +INSERT INTO obm_buildings(osm_id, storeys, relation_id, occupancy, quadkey, geometry) VALUES ( - 33445566, 2, -101010, 'COM3', 'commercial', '122010321033023130', + 33445566, 2, -101010, 'COM3', '122010321033023130', ST_GeomFromText('POLYGON((15.0491 37.4811,15.0494 37.4814,15.0495 37.4813,15.0492 37.4810,15.0491 37.4811))')), ( - 44556677, 3, -101010, 'COM3', 'commercial', '122010321033023130', + 44556677, 3, -101010, 'COM3', '122010321033023130', ST_GeomFromText('POLYGON((15.0495 37.4813,15.0497 37.4812,15.0495 37.4811,15.0494 37.4812,15.0495 37.4813))')), ( - 66778899, 4, -202020, 'COM2', 'commercial', '122010321033023132', + 66778899, 4, -202020, 'COM2', '122010321033023132', ST_GeomFromText('POLYGON((15.0490 37.4802,15.0493 37.4804,15.0494 37.4802,15.0491 37.4800,15.0490 37.4802))')); -- Commercial buildings that are part of a relation, without number of storeys -INSERT INTO obm_buildings(osm_id, relation_id, occupancy, occupancy_case, quadkey, geometry) +INSERT INTO obm_buildings(osm_id, relation_id, occupancy, quadkey, geometry) VALUES ( - 77889900, -202020, 'COM2', 'commercial', '122010321033023130', + 77889900, -202020, 'COM2', '122010321033023130', ST_GeomFromText('POLYGON((15.0494 37.4805,15.0496 37.4803,15.0494 37.4802,15.0492 37.4804,15.0494 37.4805))')); CREATE TABLE data_units_buildings diff --git a/tests/test_database_queries.py b/tests/test_database_queries.py index 864963d..90f2435 100644 --- a/tests/test_database_queries.py +++ b/tests/test_database_queries.py @@ -24,18 +24,22 @@ from gdecore.configuration import Configuration from gdecore.database_queries import DatabaseQueries -def test_retrieve_aggregated_source_id(test_db): +def test_retrieve_aggregated_source_id_and_format(test_db): # Database connection (the Configuration class will define the credentials based on whether # the code is running in the CI or locally) config = Configuration( os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") ) - returned_aggregated_source_id = DatabaseQueries.retrieve_aggregated_source_id( + ( + returned_aggregated_source_id, + returned_aggregated_format, + ) = DatabaseQueries.retrieve_aggregated_source_id_and_format( "second_source", config.database_gde_tiles, "aggregated_sources" ) assert returned_aggregated_source_id == 2 + assert returned_aggregated_format == "bbb" def test_retrieve_all_exposure_entities_of_aggregated_source_id(test_db): @@ -141,7 +145,7 @@ def test_get_data_unit_ids_geometries_of_entity_and_occupancy_case(test_db): assert returned_data_units_ids_no_geometry[0] == expected_data_unit_id_no_geometry -def test_get_OBM_buildings_in_data_unit_by_occupancy_case(test_db): +def test_get_OBM_buildings_in_data_unit_by_occupancy_types(test_db): # Database connection (the Configuration class will define the credentials based on whether # the code is running in the CI or locally) config = Configuration( @@ -164,8 +168,8 @@ def test_get_OBM_buildings_in_data_unit_by_occupancy_case(test_db): numpy.where(returned_data_units_ids == "ABC_10269")[0][0] ] - returned_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( - "residential", + returned_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_types( + ["RES", "RES99", "RES1", "RES2", "RES4", "RES6", "MIX1", "MIX2", "MIX4"], geometry, config.database_obm_buildings, "obm_buildings", @@ -184,8 +188,8 @@ def test_get_OBM_buildings_in_data_unit_by_occupancy_case(test_db): for i in range(returned_obm_buildings.shape[0]): assert isinstance(returned_obm_buildings[column_name].to_numpy()[i], str) - returned_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( - "commercial", + returned_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_types( + ["COM", "COM99", "COM1", "COM2", "COM3", "COM5", "RES3"], geometry, config.database_obm_buildings, "obm_buildings", @@ -209,8 +213,8 @@ def test_get_OBM_buildings_in_data_unit_by_occupancy_case(test_db): numpy.where(returned_data_units_ids == "ABC_10277")[0][0] ] - returned_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( - "residential", + returned_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_types( + ["RES", "RES99", "RES1", "RES2", "RES4", "RES6", "MIX1", "MIX2", "MIX4"], geometry, config.database_obm_buildings, "obm_buildings", @@ -221,8 +225,8 @@ def test_get_OBM_buildings_in_data_unit_by_occupancy_case(test_db): # Test case in which the geometry passed is not a Polygon or MultiPolygon with pytest.raises(TypeError) as excinfo: returned_obm_buildings = ( - DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( - "commercial", + DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_types( + ["COM", "COM99", "COM1", "COM2", "COM3", "COM5", "RES3"], shapely.geometry.Point(0.0, 0.0), config.database_obm_buildings, "obm_buildings", diff --git a/tests/test_processor.py b/tests/test_processor.py index 224d1c6..aa8857c 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -48,8 +48,8 @@ def test_post_process_obm_relations(test_db): ] # Group of residential buildings that do not belong to relations - raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( - "residential", + raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_types( + ["RES", "RES99", "RES1", "RES2", "RES4", "RES6", "MIX1", "MIX2", "MIX4"], geometry, config.database_obm_buildings, "obm_buildings", @@ -63,8 +63,8 @@ def test_post_process_obm_relations(test_db): assert 99001122 in returned_obm_buildings["osm_id"].to_numpy() # Group of commercial buildings, some in relations, some not - raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( - "commercial", + raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_types( + ["COM", "COM99", "COM1", "COM2", "COM3", "COM5", "RES3"], geometry, config.database_obm_buildings, "obm_buildings", @@ -172,8 +172,8 @@ def test_assign_building_classes_to_obm_buildings(test_db): ] # Group of residential buildings that do not belong to relations - raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( - "residential", + raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_types( + ["RES", "RES99", "RES1", "RES2", "RES4", "RES6", "MIX1", "MIX2", "MIX4"], geometry, config.database_obm_buildings, "obm_buildings", @@ -223,8 +223,8 @@ def test_assign_building_classes_to_obm_buildings(test_db): assert bdg_class_name in case_99001122["building_class_name"].to_numpy() # Group of commercial buildings, some in relations, some not - raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( - "commercial", + raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_types( + ["COM", "COM99", "COM1", "COM2", "COM3", "COM5", "RES3"], geometry, config.database_obm_buildings, "obm_buildings", -- GitLab