diff --git a/gdecore/database_storage.py b/gdecore/database_storage.py index 78be877572ce2a0587f45ed4a500031b8ea2c7d9..d2fe7a8b87ea8e815a1ccac9f9914e84610432f9 100644 --- a/gdecore/database_storage.py +++ b/gdecore/database_storage.py @@ -44,13 +44,14 @@ class DatabaseStorage: Args: data_unit_id (str): - ID of the data unit for which the data-unit tiles will be retrieved. + ID of the data unit for which the number of OBM and remainder buildings will be + stored. occupancy_case (str): Name of the occupancy case (e.g. "residential", "commercial", "industrial") - for which the data-unit tiles will be retrieved. + for which the number of OBM and remainder buildings will be stored. aggregated_source_id (int): - ID of the source of the aggregated exposure model for which the data-unit tiles - will be retrieved. + ID of the source of the aggregated exposure model for which the number of OBM + and remainder buildings will be stored. data_unit_tiles: Pandas DataFrame with data-unit tiles. It contains the following columns: quadkey (str): @@ -140,6 +141,177 @@ class DatabaseStorage: "data_unit_id='%s' AND occupancy_case='%s' AND aggregated_source_id='%s'. " "Numbers of OBM and remainder buildings were not stored " "for this data-unit tile." + % (quadkey, data_unit_id, occupancy_case, aggregated_source_id) + ) + + db_gde_tiles.close_connection() + + return + + @staticmethod + def store_OBM_building_classes( + data_unit_id, + occupancy_case, + aggregated_source_id, + obm_buildings_building_classes, + db_gde_tiles_config, + db_table, + ): + """This function writes to the table with name 'db_table' in the database whose + credentials are indicated in 'db_gde_tiles_config' the building classes and associated + probabilities for each of the OBM buildings in 'obm_buildings_building_classes'. + Reference to the corresponding 'data_unit_id', 'occupancy_case' and + 'aggregated_source_id' is needed to be able (at a later stage) to retrieve attributes of + the building classes. + + Args: + data_unit_id (str): + ID of the data unit associated with the OBM buildings in + 'obm_buildings_building_classes'. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial") + of the OBM buildings in 'obm_buildings_building_classes'. + aggregated_source_id (int): + ID of the source of the aggregated exposure model associated with the building + classes of the OBM buildings in 'obm_buildings_building_classes'. + obm_buildings_building_classes (dict): + Dictionary containing the building classes and their probabilities for each OBM + building. Dictionary keys correspond to the OSM ID of the building. Each key + contains a Pandas DataFrame with the following columns: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + probabilities (float): + Probabilities of the building belonging to the building class (defined + by 'building_class_name', 'settlement_type' and 'occupancy_subtype'). + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the GDE buildings is stored. The keys of the dictionary + need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the GDE buildings are stored. It is + assumed that this table contains, at least, the following fields: + osm_id (int): + ID of the OBM building. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + occupancy_case (enum): + SQL enumerated type describing the building occupancy cases. + data_unit_id (str): + ID of the data unit the OBM building belongs to. + building_class_names (array of str): + Building class as per the GEM Building Taxonomy. + settlement_types (array of enum): + Type of settlement within the Data Unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtypes (array of str): + Details on the occupancy, if relevant to characterise the building + class. + probabilities (array of float): + Probabilities of the OBM building belonging to each building class. + """ + + sql_commands = {} + + sql_commands["query"] = "SELECT COUNT(*) FROM %s " + sql_commands["query"] += "WHERE (osm_id=%s AND aggregated_source_id=%s);" + + sql_commands["update"] = "UPDATE %s SET (occupancy_case, data_unit_id, " + sql_commands["update"] += "building_class_names, settlement_types, occupancy_subtypes, " + sql_commands["update"] += "probabilities) = ('%s','%s','%s','%s','%s','%s')" + sql_commands["update"] += " WHERE (osm_id=%s AND aggregated_source_id=%s);" + + sql_commands["insert"] = "INSERT INTO %s(osm_id, aggregated_source_id, occupancy_case, " + sql_commands["insert"] += "data_unit_id, building_class_names, settlement_types, " + sql_commands["insert"] += "occupancy_subtypes, probabilities) " + sql_commands["insert"] += "VALUES (%s, %s, '%s', '%s', '%s', '%s', '%s', '%s');" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + for osm_id in obm_buildings_building_classes.keys(): + building_classes = obm_buildings_building_classes[osm_id] + + db_gde_tiles.cursor.execute( + sql_commands["query"] % (db_table, osm_id, aggregated_source_id) + ) + number_entries = db_gde_tiles.cursor.fetchall()[0][0] + + if number_entries == 1: # One entry exists for this OSM ID --> update + db_gde_tiles.cursor.execute( + sql_commands["update"] + % ( + db_table, + occupancy_case, + data_unit_id, + '{"%s"}' + % ( + '", "'.join( + list(building_classes["building_class_name"].to_numpy()) + ) + ), + '{"%s"}' + % ('", "'.join(list(building_classes["settlement_type"].to_numpy()))), + '{"%s"}' + % ('", "'.join(list(building_classes["occupancy_subtype"].to_numpy()))), + '{"%s"}' + % ( + '", "'.join( + list(building_classes["probabilities"].to_numpy().astype(str)) + ) + ), + osm_id, + aggregated_source_id, + ) + ) + elif number_entries == 0: # Entry does not exist yet for this OSM ID --> insert + db_gde_tiles.cursor.execute( + sql_commands["insert"] + % ( + db_table, + osm_id, + aggregated_source_id, + occupancy_case, + data_unit_id, + '{"%s"}' + % ( + '", "'.join( + list(building_classes["building_class_name"].to_numpy()) + ) + ), + '{"%s"}' + % ('", "'.join(list(building_classes["settlement_type"].to_numpy()))), + '{"%s"}' + % ('", "'.join(list(building_classes["occupancy_subtype"].to_numpy()))), + '{"%s"}' + % ( + '", "'.join( + list(building_classes["probabilities"].to_numpy().astype(str)) + ) + ), + ) + ) + else: # this should not occur + logger.error( + "DatabaseStorage.store_OBM_building_classes() has found more than one " + "entry for osm_id=%s and aggregated_source_id=%s. " + "Building classes for this building were not stored." + % (osm_id, aggregated_source_id) ) db_gde_tiles.close_connection() diff --git a/gdecore/gdecore.py b/gdecore/gdecore.py index c8870b9cea9ee24fd6abd98d5a1a818ad28b4b7e..e9f257b2b5159408d03a902fa835db6615bf838e 100644 --- a/gdecore/gdecore.py +++ b/gdecore/gdecore.py @@ -168,6 +168,16 @@ def main(): % (aux_log_string, str(len(obm_buildings_building_classes.keys()))) ) + # Store building classes of OBM buildings + DatabaseStorage.store_OBM_building_classes( + data_unit_id, + occupancy_case, + aggregated_source_id, + obm_buildings_building_classes, + config.database_gde_tiles, + "gde_buildings", + ) + # Retrieve data-unit tiles (quadkey, aggregated_buildings) as a Pandas DataFrame data_unit_tiles = DatabaseQueries.get_data_unit_tiles_of_data_unit_as_DataFrame( data_unit_id, diff --git a/gdecore/processor.py b/gdecore/processor.py index 42d608abd84bcd2a16a21530f9de6a59dcba2a23..f77aa7dafdd708b7dcdd06ffcd83fbbb523d73ff 100644 --- a/gdecore/processor.py +++ b/gdecore/processor.py @@ -315,11 +315,11 @@ class GDEProcessor: def assign_building_classes_to_obm_buildings( obm_buildings, data_unit_building_classes, occupancy_case ): - """This function assigns building classes and proportions from + """This function assigns building classes and probabilities from 'data_unit_building_classes' to each of the OBM buildings in 'obm_buildings', by calling 'GDEProcessor.assign_building_classes_to_obm_building'. The latter selects from 'data_unit_building_classes' only the building classes that are compatible with the - attributes of the building contained in 'obm_buildings'. The proportions are + attributes of the building contained in 'obm_buildings'. The probabilities are recalculated to reflect only the building classes retained. Args: @@ -364,7 +364,7 @@ class GDEProcessor: Returns: obm_buildings_building_classes (dict): - Dictionary containing the building classes and their proportions for each OBM + Dictionary containing the building classes and their probabilities for each OBM building. Dictionary keys correspond to the OSM ID of the building. Each key contains a Pandas DataFrame with the following columns: building_class_name (str): @@ -375,10 +375,9 @@ class GDEProcessor: occupancy_subtype (str): Details on the occupancy, if relevant to characterise the building class. - proportions (float): - Proportions in which the building class (defined by - 'building_class_name', 'settlement_type' and 'occupancy_subtype') is - present in the data unit. + probabilities (float): + Probabilities of the building belonging to the building class (defined + by 'building_class_name', 'settlement_type' and 'occupancy_subtype'). """ obm_buildings_building_classes = {} @@ -400,7 +399,7 @@ class GDEProcessor: def assign_building_classes_to_obm_building( osm_id, storeys, occupancy, occupancy_case, data_unit_building_classes ): - """This function assigns building classes and proportions from + """This function assigns building classes and probabilities from 'data_unit_building_classes' to the OBM building with ID 'osm_id'. When building attributes such as 'storeys' or 'occupancy' do not allow for a narrowing down of all the potential building classes, the output consists of all building classes in the input @@ -459,7 +458,7 @@ class GDEProcessor: Returns: obm_building_building_classes (Pandas DataFrame): - DataFrame with the building classes and proportions assigned to the input OBM + DataFrame with the building classes and probabilities assigned to the input OBM building. It contains the following columns: building_class_name (str): Building class as per the GEM Building Taxonomy. @@ -469,10 +468,9 @@ class GDEProcessor: occupancy_subtype (str): Details on the occupancy, if relevant to characterise the building class. - proportions (float): - Proportions in which the building class (defined by - 'building_class_name', 'settlement_type' and 'occupancy_subtype') is - present in the data unit. + probabilities (float): + Probabilities of the building belonging to the building class (defined + by 'building_class_name', 'settlement_type' and 'occupancy_subtype'). """ obm_building_building_classes = deepcopy(data_unit_building_classes) @@ -509,6 +507,10 @@ class GDEProcessor: obm_building_building_classes = deepcopy(data_unit_building_classes) + obm_building_building_classes = obm_building_building_classes.rename( + columns={"proportions": "probabilities"} + ) + return obm_building_building_classes @staticmethod diff --git a/tests/data/test_database_set_up.sql b/tests/data/test_database_set_up.sql index 04135b2cb71d39dfba5801ed962016bf439b1b57..99c078c8ad36940f4473dc56c378339972a63b67 100644 --- a/tests/data/test_database_set_up.sql +++ b/tests/data/test_database_set_up.sql @@ -4,6 +4,7 @@ DROP TABLE IF EXISTS obm_buildings; DROP TABLE IF EXISTS data_units_buildings; DROP TABLE IF EXISTS data_unit_tiles; DROP TABLE IF EXISTS obm_built_area_assessments; +DROP TABLE IF EXISTS gde_buildings; DROP TYPE IF EXISTS occupancycase; DROP TYPE IF EXISTS settlement; DROP EXTENSION IF EXISTS postgis; @@ -218,3 +219,31 @@ VALUES ('122010321033023130', 1, 0), ('122010321033023132', 1, 1), ('122010321033023121', 1, 0), ('122010321033023123', 1, 1); + +CREATE TABLE gde_buildings +( + osm_id integer, + aggregated_source_id SMALLINT, + occupancy_case occupancycase, + data_unit_id VARCHAR, + building_class_names VARCHAR[], + settlement_types settlement[], + occupancy_subtypes VARCHAR[], + probabilities FLOAT[], + + PRIMARY KEY (osm_id, aggregated_source_id) +); +INSERT INTO gde_buildings(osm_id, + aggregated_source_id, + occupancy_case, + data_unit_id, + building_class_names, + settlement_types, + occupancy_subtypes, + probabilities) +VALUES (-101010, 2, 'industrial', 'ABC_10269', + '{"CLASS/X/params/H:1", "CLASS/Y/params/H:2"}', + '{"rural", "rural"}', + '{"all", "all"}', + '{0.723, 0.277}' + ); diff --git a/tests/test_database_storage.py b/tests/test_database_storage.py index fbbba5dffd493b9d7e34316dae9bc41f08b02be5..6282377e5c8da721df23731d5b14788125ed8b80 100644 --- a/tests/test_database_storage.py +++ b/tests/test_database_storage.py @@ -17,6 +17,7 @@ # along with this program. If not, see http://www.gnu.org/licenses/. import os +import numpy import pandas from gdeimporter.tools.database import Database from gdecore.configuration import Configuration @@ -121,3 +122,197 @@ def query_obm_and_remainder( db_test.close_connection() return obm_buildings, remainder_buildings + + +def test_store_OBM_building_classes(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + obm_bdg_classes = {} + obm_bdg_classes[11223344] = pandas.DataFrame( + { + "building_class_name": ["A2/HBET:4-6", "B2/H:4"], + "settlement_type": ["urban", "rural"], + "occupancy_subtype": ["all", "all"], + "probabilities": [0.666667, 0.333333], + } + ) + + DatabaseStorage.store_OBM_building_classes( + "ABC_10269", + "residential", + 2, + obm_bdg_classes, + config.database_gde_tiles, + "gde_buildings", + ) + + ( + returned_occupancy_case, + returned_data_unit_id, + returned_building_class_names, + returned_settlement_types, + returned_occupancy_subtypes, + returned_probabilities, + ) = query_OBM_building_classes(config.database_gde_tiles, 11223344, 2) + + assert returned_occupancy_case == "residential" + assert returned_data_unit_id == "ABC_10269" + assert len(returned_building_class_names) == len( + obm_bdg_classes[11223344]["building_class_name"].to_numpy() + ) + for i, bdg_class_name in enumerate(returned_building_class_names): + which = numpy.where( + obm_bdg_classes[11223344]["building_class_name"].to_numpy() == bdg_class_name + )[0][0] + assert ( + returned_settlement_types[i] + == obm_bdg_classes[11223344]["settlement_type"].to_numpy()[which] + ) + assert ( + returned_occupancy_subtypes[i] + == obm_bdg_classes[11223344]["occupancy_subtype"].to_numpy()[which] + ) + assert round(returned_probabilities[i], 4) == round( + obm_bdg_classes[11223344]["probabilities"].to_numpy()[which], 4 + ) + + obm_bdg_classes = {} + obm_bdg_classes[-101010] = pandas.DataFrame( + { + "building_class_name": ["C6/HBET:3-5"], + "settlement_type": ["urban"], + "occupancy_subtype": ["Offices"], + "probabilities": [1.0], + } + ) + obm_bdg_classes[-202020] = pandas.DataFrame( + { + "building_class_name": [ + "C1/HBET:1-2", + "C2/HBET:3-", + "C3/H:1", + "C4/HBET:2-3", + "C5/HBET:1-2", + "C6/HBET:3-5", + ], + "settlement_type": ["urban", "urban", "urban", "urban", "urban", "urban"], + "occupancy_subtype": ["Hotels", "Hotels", "Trade", "Trade", "Offices", "Offices"], + "probabilities": [0.10, 0.25, 0.05, 0.10, 0.20, 0.30], + } + ) + + DatabaseStorage.store_OBM_building_classes( + "ABC_10269", + "commercial", + 2, + obm_bdg_classes, + config.database_gde_tiles, + "gde_buildings", + ) + + for osm_id in obm_bdg_classes.keys(): + ( + returned_occupancy_case, + returned_data_unit_id, + returned_building_class_names, + returned_settlement_types, + returned_occupancy_subtypes, + returned_probabilities, + ) = query_OBM_building_classes(config.database_gde_tiles, osm_id, 2) + + assert returned_occupancy_case == "commercial" + assert returned_data_unit_id == "ABC_10269" + assert len(returned_building_class_names) == len( + obm_bdg_classes[osm_id]["building_class_name"].to_numpy() + ) + for i, bdg_class_name in enumerate(returned_building_class_names): + which = numpy.where( + obm_bdg_classes[osm_id]["building_class_name"].to_numpy() == bdg_class_name + )[0][0] + assert ( + returned_settlement_types[i] + == obm_bdg_classes[osm_id]["settlement_type"].to_numpy()[which] + ) + assert ( + returned_occupancy_subtypes[i] + == obm_bdg_classes[osm_id]["occupancy_subtype"].to_numpy()[which] + ) + assert round(returned_probabilities[i], 4) == round( + obm_bdg_classes[osm_id]["probabilities"].to_numpy()[which], 4 + ) + + +def query_OBM_building_classes(credentials, osm_id, aggregated_source_id): + """This auxiliary function queries the 'gde_buildings' table of the test database to + retrieve the building classes (and related attributes) associated with an OBM building with + ID 'osm_id' as per an aggregated model with 'aggregated_source_id'. + + Args: + credentials (dict): + Dictionary containing the credentials needed to connect to the test SQL database. + The keys of the dictionary need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + osm_id (int): + OSM ID of the building to query. + aggregated_source_id (int): + ID of the source of the aggregated exposure model associated with the building + classes. + + Returns: + occupancy_case (str): + Occupancy case associated with the OBM building. + data_unit_id (str): + ID of the data unit of the 'aggregated_source_id' and 'occupancy_case' associated + with the OBM building. + building_class_names (list of str): + Building classes as per the GEM Building Taxonomy. + settlement_types (list of str): + Type of settlements within the data unit. Possible values: "urban", "rural", + "big_city", "all". + occupancy_subtypes (list of str): + Details on the occupancy, if relevant to characterise the building classes. + probabilities (list of float): + Probabilities of the building belonging to the building classes. + """ + + sql_command = ( + "SELECT occupancy_case, data_unit_id, building_class_names, settlement_types, " + "occupancy_subtypes, probabilities FROM gde_buildings " + "WHERE (osm_id=%s AND aggregated_source_id=%s);" % (osm_id, aggregated_source_id) + ) + + db_test = Database(**credentials) + db_test.create_connection_and_cursor() + db_test.cursor.execute(sql_command) + result = db_test.cursor.fetchall() + + occupancy_case = result[0][0] + data_unit_id = result[0][1] + building_class_names = result[0][2] + settlement_types = result[0][3].replace("{", "").replace("}", "").split(",") + occupancy_subtypes = result[0][4] + probabilities = result[0][5] + + db_test.close_connection() + + return ( + occupancy_case, + data_unit_id, + building_class_names, + settlement_types, + occupancy_subtypes, + probabilities, + ) diff --git a/tests/test_processor.py b/tests/test_processor.py index 3c1e52c1f977517b8ed6063fc9409020d3b31e4b..224d1c65145686bb2a20125322b418eed110f37d 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -198,16 +198,16 @@ def test_assign_building_classes_to_obm_buildings(test_db): assert case_11223344.shape[0] == 2 expected_class_names = ["A2/HBET:4-6", "B2/H:4"] - expected_proportions = [0.667, 0.333] + expected_probabilities = [0.667, 0.333] for i in range(len(expected_class_names)): assert ( round( case_11223344[case_11223344.building_class_name == expected_class_names[i]][ - "proportions" + "probabilities" ].to_numpy()[0], 3, ) - == expected_proportions[i] + == expected_probabilities[i] ) # Test Case 2: Residential building with unknown number of storeys (all classes returned) @@ -249,16 +249,16 @@ def test_assign_building_classes_to_obm_buildings(test_db): assert case_55667788.shape[0] == 2 expected_class_names = ["C1/HBET:1-2", "C2/HBET:3-"] - expected_proportions = [0.286, 0.714] + expected_probabilities = [0.286, 0.714] for i in range(len(expected_class_names)): assert ( round( case_55667788[case_55667788.building_class_name == expected_class_names[i]][ - "proportions" + "probabilities" ].to_numpy()[0], 3, ) - == expected_proportions[i] + == expected_probabilities[i] ) # Test Case 5: Commercial building (from relation) with 3 storeys, "COM3" @@ -267,7 +267,7 @@ def test_assign_building_classes_to_obm_buildings(test_db): assert ( round( case_101010[case_101010.building_class_name == "C6/HBET:3-5"][ - "proportions" + "probabilities" ].to_numpy()[0], 3, )