From f1fe00cbef3bf9a4cb68871dc3a60a060fefd5e8 Mon Sep 17 00:00:00 2001 From: Cecilia Nievas <cnievas@gfz-potsdam.de> Date: Tue, 28 Jun 2022 15:28:18 +0200 Subject: [PATCH] Added TileExposure and ExportHandler classes --- config_example.yml | 2 + gdeexporter/configuration.py | 123 ++- gdeexporter/database_queries.py | 790 ++++++++++++++++++ gdeexporter/gdeexporter.py | 25 +- gdeexporter/handler.py | 211 +++++ gdeexporter/tileexposure.py | 373 +++++++++ .../config_for_testing_cost_case_invalid.yml | 20 + .../config_for_testing_geographic_bbox.yml | 6 + ...ig_for_testing_geographic_bbox_missing.yml | 6 + ...nfig_for_testing_geographic_data_units.yml | 6 + ..._testing_geographic_data_units_missing.yml | 6 + ...config_for_testing_geographic_quadkeys.yml | 6 + ...or_testing_geographic_quadkeys_missing.yml | 6 + tests/data/config_for_testing_good.yml | 8 + ...g_for_testing_good_people_case_invalid.yml | 19 + tests/data/config_for_testing_missing.yml | 6 + .../expected_results_append_OBM_buildings.csv | 5 + ...pected_results_append_lumped_buildings.csv | 11 + tests/data/test_database_set_up.sql | 174 +++- tests/test_configuration.py | 28 + tests/test_database_queries.py | 382 +++++++++ tests/test_tileexposure.py | 247 ++++++ 22 files changed, 2434 insertions(+), 26 deletions(-) create mode 100644 gdeexporter/handler.py create mode 100644 gdeexporter/tileexposure.py create mode 100644 tests/data/config_for_testing_cost_case_invalid.yml create mode 100644 tests/data/config_for_testing_good_people_case_invalid.yml create mode 100644 tests/data/expected_results_append_OBM_buildings.csv create mode 100644 tests/data/expected_results_append_lumped_buildings.csv create mode 100644 tests/test_tileexposure.py diff --git a/config_example.yml b/config_example.yml index 1a3d1c1..119bf96 100644 --- a/config_example.yml +++ b/config_example.yml @@ -14,9 +14,11 @@ geographic_selection: # Selection of the geographic area for which GDE will be lon_e: 23.713597 lat_s: 37.965450 lat_n: 37.972561 +export_OBM_footprints: True # If True, geometries of OBM buildings will be exported database_gde_tiles: # Database where info on the GDE tiles is stored host: localhost dbname: gde_tiles_attica_2022_04_12_0900 port: 5432 username: tester password: somepass +number_cores: 1 # Number of cores used for parallelisation diff --git a/gdeexporter/configuration.py b/gdeexporter/configuration.py index 6602b41..13af281 100644 --- a/gdeexporter/configuration.py +++ b/gdeexporter/configuration.py @@ -88,6 +88,21 @@ class Configuration: "lon_e" (float): East-most longitude. "lat_s" (float): South-most latitude. "lat_n" (float): North-most latitude. + self.cost_cases (dict): + Dictionary containing indications on the sort of costs to retrieve. The minimum + number of keys is one. The sort of costs that are available are: structural, + non_structural, contents and total. The keys are the names as they will appear in + the output, the values refer to the intrinsic naming in the model (i.e. the way + values are stored in the database). + self.people_cases (dict): + Dictionary containing indications on the time of the day for which the number of + people in the buildings is to be output. The minimum number of keys is one. The + available times of the day are: day, night, transit and census. The keys are the + names as they will appear in the output, the values refer to the intrinsic naming in + the model (i.e. the way values are stored in the database). + self.export_OBM_footprints (bool): + If True, the geometries of OpenBuildingMap buildings will be retrieved and exported, + if False, they will not. self.database_gde_tiles (dict): Dictionary containing the credentials needed to connect to the SQL database in which information on the GDE tiles is stored. The exact parameters needed depend on the @@ -111,6 +126,8 @@ class Configuration: - "bounding_box" self.number_quadkeys_to_process (int): Total number of quadkeys to process (from all keys of self.quadkeys_to_process). + self.number_cores (int): + Number of cores that will be used to run the code. """ REQUIRES = [ @@ -119,7 +136,11 @@ class Configuration: "exposure_entities_to_run", "exposure_entities_code", "geographic_selection", + "cost_cases", + "people_cases", + "export_OBM_footprints", "database_gde_tiles", + "number_cores", ] def __init__(self, filepath, force_config_over_hierarchies=False): @@ -171,6 +192,20 @@ class Configuration: ) self.interpret_geographic_selection() + self.cost_cases = ConfigurationMethods.assign_hierarchical_parameters( + config, "cost_cases" + ) + self.validate_cost_cases() + + self.people_cases = ConfigurationMethods.assign_hierarchical_parameters( + config, "people_cases" + ) + self.validate_people_cases() + + self.export_OBM_footprints = ConfigurationMethods.assign_boolean_parameter( + config, "export_OBM_footprints" + ) + self.database_gde_tiles = ConfigurationMethods.retrieve_database_credentials( config, "database_gde_tiles", @@ -179,6 +214,10 @@ class Configuration: force_config_over_hierarchies, ) + self.number_cores = ConfigurationMethods.assign_integer_parameter( + config, "number_cores" + ) + self.quadkeys_to_process = None self.number_quadkeys_to_process = None @@ -403,8 +442,15 @@ class Configuration: "data_unit_tiles", ) ) - quadkeys_to_process[exposure_entity_code] = quadkeys_list - number_quadkeys += len(quadkeys_list) + + if len(quadkeys_list) > 0: + quadkeys_to_process[exposure_entity_code] = quadkeys_list + number_quadkeys += len(quadkeys_list) + else: + logger.info( + "No quadkeys found for exposure entity '%s', skipping" + % (exposure_entity_code) + ) if self.geographic_selection["selection_mode"].lower() == "data_unit_id": quadkeys_to_process = {} @@ -418,8 +464,14 @@ class Configuration: "data_unit_tiles", ) ) - quadkeys_to_process[data_unit_id] = quadkeys_list - number_quadkeys += len(quadkeys_list) + + if len(quadkeys_list) > 0: + quadkeys_to_process[data_unit_id] = quadkeys_list + number_quadkeys += len(quadkeys_list) + else: + logger.info( + "No quadkeys found for data unit '%s', skipping" % (data_unit_id) + ) if self.geographic_selection["selection_mode"].lower() == "quadkeys": # Retrieve quadkeys from the indicated file @@ -432,9 +484,16 @@ class Configuration: quadkeys_list.append(element) f.close() quadkeys_list = list(dict.fromkeys(quadkeys_list)) - quadkeys_to_process = {"quadkeys_list": quadkeys_list} number_quadkeys = len(quadkeys_list) + if len(quadkeys_list) > 0: + quadkeys_to_process = {"quadkeys_list": quadkeys_list} + else: + logger.info( + "No quadkeys found in '%s'" % (self.geographic_selection["quadkeys_file"]) + ) + quadkeys_to_process = {} + if self.geographic_selection["selection_mode"].lower() == "bounding_box": tiles = list( mercantile.tiles( @@ -445,11 +504,63 @@ class Configuration: 18, ) ) + quadkeys_list = list([mercantile.quadkey(tile) for tile in tiles]) - quadkeys_to_process = {"bounding_box": quadkeys_list} number_quadkeys = len(quadkeys_list) + if len(quadkeys_list) > 0: + quadkeys_to_process = {"bounding_box": quadkeys_list} + else: + logger.info("No quadkeys found in bounding box") + quadkeys_to_process = {} + self.quadkeys_to_process = quadkeys_to_process self.number_quadkeys_to_process = number_quadkeys return + + def validate_cost_cases(self): + """ + This function guarantees that the cost cases indicated as values of the self.cost_cases + dictionary are only those supported by this software. Currently supported values are: + "structural", "non_structural", "contents" and "total". If any other value is found, the + item is removed from self.cost_cases and a warning is logged. + """ + + valid_cost_cases = ["structural", "non_structural", "contents", "total"] + + to_delete = [] + for cost_case_key in self.cost_cases.keys(): + if self.cost_cases[cost_case_key] not in valid_cost_cases: + logger.warning( + "Invalid cost case found in configuration file: " + "cost case '%s':'%s' will be ignored" + % (cost_case_key, self.cost_cases[cost_case_key]) + ) + to_delete.append(cost_case_key) + + for case_to_delete in to_delete: + del self.cost_cases[case_to_delete] + + def validate_people_cases(self): + """ + This function guarantees that the people cases indicated as values of the + self.people_cases dictionary are only those supported by this software. Currently + supported values are: "day", "night", "transit" and "census". If any other value is + found, the item is removedfrom self.people_cases and a warning is logged. + """ + + valid_people_cases = ["day", "night", "transit", "census", "average"] + + to_delete = [] + for people_case_key in self.people_cases.keys(): + if self.people_cases[people_case_key] not in valid_people_cases: + logger.warning( + "Invalid people case found in configuration file: " + "people case '%s':'%s' will be ignored" + % (people_case_key, self.people_cases[people_case_key]) + ) + to_delete.append(people_case_key) + + for case_to_delete in to_delete: + del self.people_cases[case_to_delete] diff --git a/gdeexporter/database_queries.py b/gdeexporter/database_queries.py index b324131..7953199 100644 --- a/gdeexporter/database_queries.py +++ b/gdeexporter/database_queries.py @@ -17,6 +17,8 @@ # along with this program. If not, see http://www.gnu.org/licenses/. import logging +import numpy +import pandas from gdeimporter.tools.database import Database @@ -274,3 +276,791 @@ class DatabaseQueries: quadkeys = [] return quadkeys + + @staticmethod + def retrieve_data_unit_ids( + quadkey, + aggregated_source_id, + exposure_entities, + occupancy_case, + db_gde_tiles_config, + db_table, + ): + """ + This function retrieves the data unit IDs associated with 'quadkey', + 'aggregated_source_id', 'occupancy_case' and any of the exposure entities listed in + 'exposure_entities' in the table 'db_table' of the database whose credentials are given + by 'db_gde_tiles_config'. + + Args: + quadkey (str): + Quadkey of the zoom level 18 data-unit tile for which the data unit IDs will be + retrieved. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the data unit IDs + will be retrieved. + exposure_entities (list of str): + List of names of the exposure entities for which the data unit IDs will be + retrieved. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial") + for which the data unit IDs will be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the data-unit tiles is stored. The keys of the dictionary + need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the data-unit tiles are stored. It + is assumed that this table contains, at least, the following fields: + quadkey (str): + String indicating the quadkey of a tile. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + occupancy_case (enum): + SQL enumerated type describing the building occupancy cases. + exposure_entity (str): + 3-char identifier of the exposure entity. If a country, ISO3 code. + data_unit_id (str): + ID of the data unit. + + Returns: + data_unit_ids (list of str): + List of data unit IDs associated with the query. + """ + + if not isinstance(exposure_entities, list): + logger.warning( + "'exposure_entities' passed to retrieve_data_unit_ids is not a list: " + "results of the query are likely invalid" + ) + + # Convert exposure entities into a string to feed in to the query + exposure_entities_aux = [ + "exposure_entity='%s'" % (exposure_entities[i]) + for i in range(len(exposure_entities)) + ] + exposure_entities_condition = " OR ".join(exposure_entities_aux) + + sql_query = "SELECT data_unit_id FROM %s " + sql_query += "WHERE (quadkey='%s' AND aggregated_source_id=%s AND occupancy_case='%s' " + sql_query += "AND (%s));" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute( + sql_query + % ( + db_table, + quadkey, + aggregated_source_id, + occupancy_case, + exposure_entities_condition, + ) + ) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + if len(exec_result) > 0: # Entries exist --> retrieve + data_unit_ids = [exec_result[i][0] for i in range(len(exec_result))] + else: + data_unit_ids = [] + + return data_unit_ids + + @staticmethod + def get_numbers_buildings_for_data_unit_tile( + quadkey, + aggregated_source_id, + occupancy_case, + data_unit_id, + db_gde_tiles_config, + db_table, + ): + """This function retrieves the number of remainder, aggregated and total buildings of + the data-unit tile defined by the combination of 'quadkey', 'aggregated_source_id', + 'occupancy_case' and 'data_unit_id' from the table 'db_table' of the database whose + credentials are given by 'db_gde_tiles_config'. + + Args: + quadkey (str): + Quadkey of the zoom level 18 data-unit tile for which the number of buildings + will be retrieved. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the number of + buildings will be retrieved. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial") + for which the number of buildings will be retrieved. + data_unit_id (str): + ID of the data unit for which the number of buildings will be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the data-unit tiles is stored. The keys of the dictionary + need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the data-unit tiles are stored. It + is assumed that this table contains, at least, the following fields: + quadkey (str): + String indicating the quadkey of a tile. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + occupancy_case (enum): + SQL enumerated type describing the building occupancy cases. + exposure_entity (str): + 3-char identifier of the exposure entity. If a country, ISO3 code. + data_unit_id (str): + ID of the data unit. + aggregated_buildings (float): + Number of buildings in the data-unit tile as per the aggregated exposure + model with ID 'aggregated_source_id'. + obm_buildings (int): + Number of OBM buildings in the data-unit tile as per the aggregated + exposure model with ID 'aggregated_source_id'. + remainder_buildings (float): + Number of remainder buildings in the data-unit tile as per the + aggregated exposure model with ID 'aggregated_source_id'. + + Returns: + number_aggregated (float): + Number of aggregated buildings in the data-unit tile. + number_obm (float): + Number of OBM buildings in the data-unit tile. + number_remainder (float): + Number of remainder buildings in the data-unit tile. + """ + + sql_query = "SELECT aggregated_buildings, obm_buildings, remainder_buildings " + sql_query += "FROM %s WHERE (quadkey='%s' AND aggregated_source_id=%s " + sql_query += "AND occupancy_case ='%s' AND data_unit_id='%s');" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute( + sql_query % (db_table, quadkey, aggregated_source_id, occupancy_case, data_unit_id) + ) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + if len(exec_result) == 1: # Entry found + number_aggregated = exec_result[0][0] + number_obm = exec_result[0][1] + number_remainder = exec_result[0][2] + elif len(exec_result) == 0: # No entry found + number_aggregated = -999.9 + number_obm = -999 + number_remainder = -999.9 + else: # More than one entries found, this is an error + # This should not happen, as the database should not allow two entries with the + # same primary key + number_aggregated = -999.9 + number_obm = -999 + number_remainder = -999.9 + logger.error( + "ERROR in get_numbers_buildings_for_data_unit_tile: " + "more than one entry found for quadkey='%s' AND aggregated_source_id=%s " + "AND occupancy_case ='%s' AND data_unit_id='%s' " + % (quadkey, aggregated_source_id, occupancy_case, data_unit_id) + ) + + return number_aggregated, number_obm, number_remainder + + @staticmethod + def get_building_classes_of_data_unit( + data_unit_id, occupancy_case, aggregated_source_id, db_gde_tiles_config, db_table + ): + """This function retrieves the building classes and proportions as per + 'aggregated_source_id' associated with a data unit with 'data_unit_id' and + 'occupancy_case', from 'db_table' of the database whose credentials are given in + 'db_gde_tiles_config'. The building classes are defined in terms of three parameters: + the building_class_name, the settlement_type and the occupancy_subtype. + + Args: + data_unit_id (str): + ID of the data unit for which the building classes and their proportions will be + retrieved. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial") + for which the building classes and their proportions will be retrieved. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the building classes + and their proportions will be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the data unit buildings is stored. The keys of the + dictionary need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the data-unit buildings are stored. + It is assumed that this table contains, at least, the following fields: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (enum): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + occupancy_case (enum): + SQL enumerated type describing the building occupancy cases. + data_unit_id (str): + ID of the data unit. + proportions (float): + Proportions in which the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype') is + present in the data unit. + census_people_per_building (float): + Number of census-derived people per building (i.e. not accounting for + time of the day). + total_cost_per_building (float): + Total replacement cost per building, including costs of structural and + non-structural components as well as contents. + + Returns: + building_classes (Pandas DataFrame): + DataFrame containing the building classes and their proportions. It comprises + the following columns: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + proportions (float): + Proportions in which the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype') is + present in the data unit. + census_people_per_building (float): + Number of census-derived people per building (i.e. not accounting for + time of the day). + total_cost_per_building (float): + Total replacement cost per building, including costs of structural and + non-structural components as well as contents. + """ + + sql_query = "SELECT building_class_name, settlement_type, occupancy_subtype, " + sql_query += "proportions, census_people_per_building, total_cost_per_building FROM %s " + sql_query += "WHERE (data_unit_id='%s' AND occupancy_case='%s' AND " + sql_query += "aggregated_source_id=%s);" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute( + sql_query % (db_table, data_unit_id, occupancy_case, aggregated_source_id) + ) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + if len(exec_result) > 0: # Entries exist --> retrieve + building_class_names = numpy.array( + [exec_result[i][0] for i in range(len(exec_result))], dtype="str" + ) + settlement_types = numpy.array( + [exec_result[i][1] for i in range(len(exec_result))], dtype="str" + ) + occupancy_subtypes = numpy.array( + [exec_result[i][2] for i in range(len(exec_result))], dtype="str" + ) + proportions = numpy.array( + [exec_result[i][3] for i in range(len(exec_result))], dtype="float" + ) + census_people_per_building = numpy.array( + [exec_result[i][4] for i in range(len(exec_result))], dtype="float" + ) + total_cost_per_building = numpy.array( + [exec_result[i][5] for i in range(len(exec_result))], dtype="float" + ) + + if abs(proportions.sum() - 1.0) > 1e-5: + warning_message = ( + "DatabaseQueries.get_building_classes_of_data_unit: the sum of proportions " + "of building classes found for 'data_unit_id'=%s, 'occupancy_case'=%s and " + "'aggregated_source_id'=%s is different from 1.0; actual value is %s." + % ( + data_unit_id, + occupancy_case, + aggregated_source_id, + "{:.6f}".format(proportions.sum()), + ) + ) + logger.warning(warning_message) + + else: # No entries found + building_class_names = numpy.array([], dtype="str") + settlement_types = numpy.array([], dtype="str") + occupancy_subtypes = numpy.array([], dtype="str") + proportions = numpy.array([], dtype="float") + census_people_per_building = numpy.array([], dtype="float") + total_cost_per_building = numpy.array([], dtype="float") + + building_classes = pandas.DataFrame( + { + "building_class_name": building_class_names, + "settlement_type": settlement_types, + "occupancy_subtype": occupancy_subtypes, + "proportions": proportions, + "census_people_per_building": census_people_per_building, + "total_cost_per_building": total_cost_per_building, + } + ) + + return building_classes + + @staticmethod + def get_exposure_entities_costs_assumptions( + cost_cases, + exposure_entity, + occupancy_case, + aggregated_source_id, + db_gde_tiles_config, + db_table, + ): + """ + This function retrieves the factors by which the total replacement cost of a building + can be multiplied to disaggregate it into the cost of structural and non-structural + components, as well as contents. The factors retrieved are those indicated in the + 'cost_cases' dictionary, whose keys are names of relevance for the output (user-defined) + and whose values can be any of the three cases existing in the + 'exposure_entities_costs_assumptions' table of the GDE Tiles database ("structural", + "non-structural", "contents"), as well as "total". The factor for "total" is 1. + + Args: + cost_cases (dict): + Dictionary containing indications on the sort of costs to retrieve. The names of + the keys can be arbitrary, but the values can only be "structural", + "non-structural", "contents" or "total". + exposure_entity (str): + 3-char identifier of the exposure entity for which the cost assumptions will be + retrieved. If a country, ISO3 code. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial") + for which the cost assumptions will be retrieved. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the building classes + and their proportions will be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the cost assumptions is stored. The keys of the dictionary + need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where information on the cost assumptions + is stored. It is assumed that this table contains, at least, the following + fields: + exposure_entity (str): + 3-character code of the exposure entity. + occupancy_case (enum): + SQL enumerated type describing the building occupancy cases. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + structural (float): + Factor to obtain the cost of the structural components. + non_structural (float): + Factor to obtain the cost of the non-structural components. + contents (float): + Factor to obtain the cost of the building contents. + + Returns: + cost_assumptions (dict): + Dictionary with the same keys as the input 'cost_cases' and whose values are the + retrieved factors. + """ + + # Retrieving all fields and then sorting out as per 'cost_cases' as it is simpler + sql_query = "SELECT structural, non_structural, contents FROM %s " + sql_query += "WHERE (exposure_entity='%s' AND occupancy_case='%s' AND " + sql_query += "aggregated_source_id='%s');" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute( + sql_query % (db_table, exposure_entity, occupancy_case, aggregated_source_id) + ) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + retrieved = {} + if len(exec_result) == 1: # Entries exist --> retrieve + retrieved["structural"] = exec_result[0][0] + retrieved["non_structural"] = exec_result[0][1] + retrieved["contents"] = exec_result[0][2] + retrieved["total"] = 1.0 + else: + logger.error( + "ERROR in get_exposure_entities_costs_assumptions: " + "more than one entry or no entry found for exposure_entity='%s' " + "AND occupancy_case ='%s' AND aggregated_source_id='%s' " + % (exposure_entity, occupancy_case, aggregated_source_id) + ) + retrieved["structural"] = 0.0 + retrieved["non_structural"] = 0.0 + retrieved["contents"] = 0.0 + retrieved["total"] = 0.0 + + cost_assumptions = {} + for cost_case_key in cost_cases.keys(): + cost_assumptions[cost_case_key] = retrieved[cost_cases[cost_case_key]] + + return cost_assumptions + + @staticmethod + def get_exposure_entities_population_time_distribution( + people_cases, + exposure_entity, + occupancy_case, + aggregated_source_id, + db_gde_tiles_config, + db_table, + ): + """ + This function retrieves the factors by which the census population per building can be + multiplied to obtain an estimate of the people in the buildings at a certain time of the + day. The factors retrieved are those indicated in the 'people_cases' dictionary, whose + keys are names of relevance for the output (user-defined) and whose values can be any of + the three cases existing in the 'exposure_entities_population_time_distribution' table + of the GDE Tiles database ("day", "night", "transit"), as well as "census" and + "average". The factor for "census" is 1, while that for "average" is the average of + "day", "night", "transit". + + Args: + people_cases (dict): + Dictionary containing indications on the times of the day to retrieve. The names + of the keys can be arbitrary, but the values can only be "day", "night", + "transit", "census" or "average". + exposure_entity (str): + 3-char identifier of the exposure entity for which the factors for the + distribution of the popupation in time will be retrieved. If a country, ISO3 + code. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial") + for which the factors for the distribution of the popupation in time will be + retrieved. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the factors for the + distribution of the popupation in time will be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the distribution of the population at different times of + the day is stored. The keys of the dictionary need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where information on the cost assumptions + is stored. It is assumed that this table contains, at least, the following + fields: + exposure_entity (str): + 3-character code of the exposure entity. + occupancy_case (enum): + SQL enumerated type describing the building occupancy cases. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + day (float): + Factor to obtain the number of people expected to be inside the + buildings during the day (approx. 10 am to 6 pm). + night (float): + Factor to obtain the number of people expected to be inside the + buildings during the night (approx. 10 pm to 6 am). + transit (float): + Factor to obtain the number of people expected to be inside the + buildings during transit times (approx. 6 am to 10 am and 6 pm to 10 + pm). + + Returns: + people_distribution (dict): + Dictionary with the same keys as the input 'people_cases' and whose values are + the retrieved factors. + """ + + # Retrieving all fields and then sorting out as per 'cost_cases' as it is simpler + sql_query = "SELECT day, night, transit FROM %s " + sql_query += "WHERE (exposure_entity='%s' AND occupancy_case='%s' AND " + sql_query += "aggregated_source_id='%s');" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute( + sql_query % (db_table, exposure_entity, occupancy_case, aggregated_source_id) + ) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + retrieved = {} + if len(exec_result) == 1: # Entries exist --> retrieve + retrieved["day"] = exec_result[0][0] + retrieved["night"] = exec_result[0][1] + retrieved["transit"] = exec_result[0][2] + retrieved["census"] = 1.0 + retrieved["average"] = ( + retrieved["day"] + retrieved["night"] + retrieved["transit"] + ) / 3.0 + else: + logger.error( + "ERROR in get_exposure_entities_population_time_distribution: " + "more than one entry or no entry found for exposure_entity='%s' " + "AND occupancy_case ='%s' AND aggregated_source_id='%s' " + % (exposure_entity, occupancy_case, aggregated_source_id) + ) + retrieved["day"] = 0.0 + retrieved["night"] = 0.0 + retrieved["transit"] = 0.0 + retrieved["census"] = 0.0 + retrieved["average"] = 0.0 + + people_distribution = {} + for people_case_key in people_cases.keys(): + people_distribution[people_case_key] = retrieved[people_cases[people_case_key]] + + return people_distribution + + @staticmethod + def get_GDE_buildings( + quadkey, + data_unit_id, + occupancy_case, + aggregated_source_id, + get_footprints, + db_gde_tiles_config, + db_table, + ): + """ + This function retrieves and returns all the GDE-processed OBM buildings from the table + 'db_table' of the database whose credentials are given by 'db_gde_tiles_config' that are + associated with 'quadkey', 'data_unit_id', 'occupancy_case' and 'aggregated_source_id'. + If 'get_footprints' is True, it also returns the centroids and footprints of + these buildings. If no buildings are found for the input selection criteria, the output + 'obm_buildings' is a Pandas DataFrame with column structure but no rows and + 'obm_geometries' is an empty dictionary. + + Args: + quadkey (str): + Quadkey of the zoom-level 18 tile for which the GDE buildings will be retrieved. + data_unit_id (str): + ID of the data unit for which the GDE buildings will be retrieved. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial") + for which the GDE buildings will be retrieved. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the GDE buildings + will be retrieved. + get_footprints (bool): + If True, the geometries and centroids of the GDE buildings will be retrieved and + returned, if False, they will not. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the GDE buildings is stored. The keys of the + dictionary need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the GDE buildings are stored. It is + assumed that this table contains, at least, the following fields: + osm_id (int): + ID of the OBM building. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + occupancy_case (enum): + SQL enumerated type describing the building occupancy cases. + data_unit_id (str): + ID of the data unit the OBM building belongs to. + quadkey (str): + Quadkey of the zoom-level 18 tile to which the centroid of the building + belongs. + building_class_names (array of str): + Building class as per the GEM Building Taxonomy. + settlement_types (list of str): + Type of settlements within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtypes (list of str): + Details on the occupancy, if relevant to characterise the building + classes. + probabilities (array of float): + Probabilities of the OBM building belonging to each building class. + geometry (PSQL geometry): + Footprint of the OBM building. + + Returns: + obm_buildings (Pandas DataFrame): + DataFrame containing the GDE building classes and their probabilities. It + comprises the following columns: + osm_id (int): + ID of the OBM building (several rows of the DataFrame can correspond to + the same OpenStreetMap ID). + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + probabilities (float): + Probabilities of the building class (defined by 'building_class_name', + 'settlement_type' and 'occupancy_subtype') being the correct class of + the OBM building with 'osm_id'. + + obm_geometries (dict): + Dictionary in which each key is a unique 'osm_id' from 'obm_buildings', with the + following subkeys (only if 'get_footprints' is set to True): + centroid (str): + Centroid of the OBM building in Well-Known Text format. + footprint (str): + Footprint of the OBM building in Well-Known Text format. + If 'get_footprints' is False, 'obm_geometries' is an empty dictionary. + """ + + sql_query = "SELECT osm_id, building_class_names, settlement_types, occupancy_subtypes," + sql_query += " probabilities, ST_AsText(ST_Centroid(geometry)), ST_AsText(geometry)" + sql_query += " FROM %s WHERE(quadkey='%s' AND data_unit_id='%s' AND occupancy_case='%s'" + sql_query += " AND aggregated_source_id=%s);" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute( + sql_query % (db_table, quadkey, data_unit_id, occupancy_case, aggregated_source_id) + ) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + if len(exec_result) > 0: # Entries exist --> retrieve + raw_osm_ids = numpy.array( + [exec_result[i][0] for i in range(len(exec_result))], dtype="int" + ) + raw_building_class_names = numpy.array( + [exec_result[i][1] for i in range(len(exec_result))], dtype="object" + ) + raw_settlement_types = numpy.array( + [ + exec_result[i][2].replace("{", "").replace("}", "").split(",") + for i in range(len(exec_result)) + ], + dtype="object", + ) # settlement type is a personalised enumerated type and works differently + raw_occupancy_subtypes = numpy.array( + [exec_result[i][3] for i in range(len(exec_result))], dtype="object" + ) + raw_probabilities = numpy.array( + [exec_result[i][4] for i in range(len(exec_result))], dtype="object" + ) + if get_footprints: + raw_centroids = numpy.array( + [exec_result[i][5] for i in range(len(exec_result))], dtype="str" + ) + raw_footprints = numpy.array( + [exec_result[i][6] for i in range(len(exec_result))], dtype="str" + ) + else: + raw_centroids = numpy.array(["" for i in range(len(exec_result))], dtype="str") + raw_footprints = numpy.array(["" for i in range(len(exec_result))], dtype="str") + + else: # No entries found + raw_osm_ids = numpy.array([], dtype="int") + raw_building_class_names = numpy.array([], dtype="object") + raw_settlement_types = numpy.array([], dtype="object") + raw_occupancy_subtypes = numpy.array([], dtype="object") + raw_probabilities = numpy.array([], dtype="object") + raw_centroids = numpy.array([], dtype="str") + raw_footprints = numpy.array([], dtype="str") + + obm_geometries = {} + osm_ids = numpy.array([], dtype="int") + building_class_names = numpy.array([], dtype="str") + settlement_types = numpy.array([], dtype="str") + occupancy_subtypes = numpy.array([], dtype="str") + probabilities = numpy.array([], dtype="float") + + for i, osm_id in enumerate(raw_osm_ids): + if get_footprints: + obm_geometries[osm_id] = {} + obm_geometries[osm_id]["centroid"] = raw_centroids[i] + obm_geometries[osm_id]["footprint"] = raw_footprints[i] + number_building_classes = len(raw_building_class_names[i]) + osm_ids = numpy.hstack( + (osm_ids, numpy.array([osm_id for j in range(number_building_classes)])) + ) + building_class_names = numpy.hstack( + (building_class_names, raw_building_class_names[i]) + ) + settlement_types = numpy.hstack((settlement_types, raw_settlement_types[i])) + occupancy_subtypes = numpy.hstack((occupancy_subtypes, raw_occupancy_subtypes[i])) + probabilities = numpy.hstack((probabilities, raw_probabilities[i])) + + obm_buildings = pandas.DataFrame( + { + "osm_id": osm_ids, + "building_class_name": building_class_names, + "settlement_type": settlement_types, + "occupancy_subtype": occupancy_subtypes, + "probabilities": probabilities, + } + ) + + return obm_buildings, obm_geometries diff --git a/gdeexporter/gdeexporter.py b/gdeexporter/gdeexporter.py index 11c32b1..00ecfb5 100644 --- a/gdeexporter/gdeexporter.py +++ b/gdeexporter/gdeexporter.py @@ -18,8 +18,11 @@ import logging import sys +from multiprocessing import Pool +from functools import partial from gdeexporter.configuration import Configuration from gdeexporter.database_queries import DatabaseQueries +from gdeexporter.handler import ExportHandler # Add a logger printing error, warning, info and debug messages to the screen logger = logging.getLogger() @@ -78,11 +81,25 @@ def main(): logger.info("%s quadkeys will be processed" % (config.number_quadkeys_to_process)) - for quadkeys_group in config.quadkeys_to_process.keys(): - logger.info( - "Processing of %s quadkeys from quadkey group '%s' has started" - % (len(config.quadkeys_to_process[quadkeys_group]), quadkeys_group) + # Create groups of quadkey groups and occupancies, so as to parallelise + if config.number_quadkeys_to_process > 0: + quadkeys_occupancy_groups = [ + (quadkeys_group, occupancy_case) + for quadkeys_group in config.quadkeys_to_process.keys() + for occupancy_case in config.occupancies_to_run + ] + + p = Pool(processes=config.number_cores) + func = partial( + ExportHandler.process_quadkey_occupancy_group, + config, + aggregated_source_id, ) + summary_values = p.map(func, quadkeys_occupancy_groups) + p.close() + p.join() + + print(summary_values) # Leave the program logger.info("gde-exporter has finished") diff --git a/gdeexporter/handler.py b/gdeexporter/handler.py new file mode 100644 index 0000000..d01547b --- /dev/null +++ b/gdeexporter/handler.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import logging +from gdeexporter.tileexposure import TileExposure +from gdeexporter.database_queries import DatabaseQueries + + +logger = logging.getLogger() + + +class ExportHandler: + """This class handles the main processing activities of the gde-exporter.""" + + @staticmethod + def process_quadkey_occupancy_group(config, aggregated_source_id, group_attributes): + """ + This function processes a particular quadkey group and occupancy case, both of which are + passed as arguments under 'group_attributes', to enable parallelisation. + + Args: + config (Configuration): + Instance of the gdeexporter.configuration.Configuration class. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the processing will + take place. + group_attributes (tuple of (quadkeys_group, occupancy_case)): + Tuple with two elements: + quadkeys_group: + Name of the quadkey group for which the processing will take place. It + needs to be a key of the config.quadkeys_to_process dictionary. The + content of config.quadkeys_to_process[quadkeys_group] is a list of + quadkeys. + occupancy_case (str): + Occupancy case for which the processing will take place. + + Returns: + summary_values (dict): + Dictionary summarising the number of buildings processed for the input quadkey + group and occupancy case, with the following keys: + processed_quadkeys (int): + Number of quadkeys processed (includes quadkeys with no buildings). + OBM_buildings (int): + Number of GDE-processed OBM buildings. + aggregated_buildings (float): + Number of aggregated buildings. + remainder_buildings (float): + Number of remainder buildings. + total_buildings (float): + Number of total buildings (remainder plus OBM). + """ + + quadkeys_group = group_attributes[0] + occupancy_case = group_attributes[1] + + logger.info( + "Processing of %s quadkeys from group '%s' and occupancy case '%s' has started" + % (len(config.quadkeys_to_process[quadkeys_group]), quadkeys_group, occupancy_case) + ) + + summary_values = {} + summary_values["processed_quadkeys"] = 0 + summary_values["OBM_buildings"] = 0 + summary_values["aggregated_buildings"] = 0.0 + summary_values["remainder_buildings"] = 0.0 + summary_values["total_buildings"] = 0.0 + + for quadkey in config.quadkeys_to_process[quadkeys_group]: + quadtile = TileExposure(quadkey, config.cost_cases, config.people_cases) + + if config.geographic_selection["selection_mode"].lower() == "data_unit_id": + data_unit_ids = [quadkeys_group] + else: + data_unit_ids = DatabaseQueries.retrieve_data_unit_ids( + quadkey, + aggregated_source_id, + config.exposure_entities_to_run, + occupancy_case, + config.database_gde_tiles, + "data_unit_tiles", + ) + + for data_unit_id in data_unit_ids: + # Retrieve building classes associated with this data unit, occupancy case and + # aggregated source ID + building_classes = DatabaseQueries.get_building_classes_of_data_unit( + data_unit_id, + occupancy_case, + aggregated_source_id, + config.database_gde_tiles, + "data_units_buildings", + ) + + exposure_entity_code = data_unit_id[:3] + # Retrieve cost assumptions + cost_assumptions = DatabaseQueries.get_exposure_entities_costs_assumptions( + config.cost_cases, + exposure_entity_code, + occupancy_case, + aggregated_source_id, + config.database_gde_tiles, + "exposure_entities_costs_assumptions", + ) + # Retrieve distribution of people at different times of the day + people_distribution = ( + DatabaseQueries.get_exposure_entities_population_time_distribution( + config.people_cases, + exposure_entity_code, + occupancy_case, + aggregated_source_id, + config.database_gde_tiles, + "exposure_entities_population_time_distribution", + ) + ) + + # Retrieve number of aggregated, OBM and remainder buildings in the tile + ( + number_aggregated, + number_obm, + number_remainder, + ) = DatabaseQueries.get_numbers_buildings_for_data_unit_tile( + quadkey, + aggregated_source_id, + occupancy_case, + data_unit_id, + config.database_gde_tiles, + "data_unit_tiles", + ) + if number_aggregated < -1.0 or number_obm < -1.0 or number_remainder < -1.0: + logger.error( + "get_numbers_buildings_for_data_unit_tile could not retrieve number " + "of aggregated, remainder and OBM buildings for quadkey = '%s' and " + "data unit ID = '%s'" % (quadkey, data_unit_id) + ) + + # Append aggregated buildings to quadtile.aggregated_buildings + if number_aggregated > 1e-6: # If smaller, consider equal to zero + quadtile.append_lumped_buildings( + "aggregated_buildings", + building_classes, + number_aggregated, + cost_assumptions, + people_distribution, + data_unit_id, + ) + + # Append remainder buildings to quadtile.remainder_buildings + if number_remainder > 1e-6: # If smaller, consider equal to zero + quadtile.append_lumped_buildings( + "remainder_buildings", + building_classes, + number_remainder, + cost_assumptions, + people_distribution, + data_unit_id, + ) + + # Retrieve OBM buildings + obm_buildings, obm_geometries = DatabaseQueries.get_GDE_buildings( + quadkey, + data_unit_id, + occupancy_case, + aggregated_source_id, + config.export_OBM_footprints, + config.database_gde_tiles, + "gde_buildings", + ) + + if obm_buildings.shape[0] > 0: + # Append OBM buildings to quadtile.obm_buildings + quadtile.append_OBM_buildings( + obm_buildings, + building_classes, + cost_assumptions, + people_distribution, + data_unit_id, + ) + + # Append obm_geometries to quadtile.obm_buildings_geometries (dictionary) + quadtile.obm_buildings_geometries.update(obm_geometries) + + # Add to summary values + summary_values["aggregated_buildings"] += ( + quadtile.aggregated_buildings["number"].to_numpy().sum() + ) + summary_values["remainder_buildings"] += ( + quadtile.remainder_buildings["number"].to_numpy().sum() + ) + summary_values["total_buildings"] += ( + quadtile.total_buildings["number"].to_numpy().sum() + ) + summary_values["OBM_buildings"] += quadtile.obm_buildings["number"].to_numpy().sum() + + summary_values["processed_quadkeys"] += len(config.quadkeys_to_process[quadkeys_group]) + + return summary_values diff --git a/gdeexporter/tileexposure.py b/gdeexporter/tileexposure.py new file mode 100644 index 0000000..82e120a --- /dev/null +++ b/gdeexporter/tileexposure.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import logging +import pandas +from copy import deepcopy + + +logger = logging.getLogger() + +# Empty DataFrame +BUILDINGS = pandas.DataFrame( + { + "building_class_name": pandas.Series(dtype="str"), + "number": pandas.Series(dtype="float"), + "data_unit_id": pandas.Series(dtype="str"), + } +) + + +class TileExposure: + """This class represents the exposure of a tile of zoom level 18. + + Attributes: + self.quadkey (str): + Quadkey of the zoom level 18 tile. + self.obm_buildings (Pandas DataFrame): + DataFrame with the OBM buildings that belong to the tile, in terms of: + osm_id (int): + OpenStreetMap ID of the building. + building_class_name (str): + Name of the building class as per the GEM Building Taxonomy v3.0. + number (float): + Probability of the building (identified by its OSM ID) belonging to the + building class. + Columns associated with building replacement costs (float): + Names and contents are user-defined. Values correspond to values per + building multiplied by the probability of the building class corresponding + to the particular building (identified by its OSM ID). + Columns associated with the number of people in the building at different times + of the day (float): + Names and contents are user-defined. Values correspond to values per + building multiplied by the probability of the building class corresponding + to the particular building (identified by its OSM ID). + data_unit_id (str): + ID of the data unit the building belongs to. + self.obm_buildings_geometries (dict): + Dictionary in which each key is a unique 'osm_id' from self.obm_buildings, with the + following subkeys: + centroid (str): + Centroid of the OBM building in Well-Known Text format. + footprint (str) (only if instructed to retrieve footprints by the user): + Footprint of the OBM building in Well-Known Text format. + self.remainder_buildings (Pandas DataFrame): + DataFrame with the remainder buildings that belong to the tile, in terms of: + building_class_name (str): + Name of the building class as per the GEM Building Taxonomy v3.0. + number (float): + Number of buildings of this building class. + Columns associated with building replacement costs (float): + Names and contents are user-defined. Values correspond to values per + building multiplied by the number of buildings of the class. + Columns associated with the number of people in the building at different times + of the day (float): + Names and contents are user-defined. Values correspond to values per + building multiplied by the number of buildings of the class. + data_unit_id (str): + ID of the data unit the buildings belong to. + self.aggregated_buildings (Pandas DataFrame): + DataFrame with the remainder buildings that belong to the tile, in terms of the same + fields described for self.remainder_buildings. + self.total_buildings (Pandas DataFrame): + DataFrame with the total buildings that belong to the tile (aggregation of remainder + and OBM buildings), in terms of the same fields described for + self.remainder_buildings. + """ + + def __init__(self, quadkey, cost_cases, people_cases): + """ + Args: + quadkey (str): + Quadkey of the zoom level 18 tile. + cost_cases (dict): + Dictionary containing indications on the sort of costs to output. + people_cases (dict): + Dictionary containing indications on the time of the day for which the number of + people in the buildings is to be output. + """ + + self.quadkey = quadkey + self.obm_buildings = self._create_empty_building_dataframes( + cost_cases, people_cases, additional_cols={"osm_id": "str"} + ) + self.obm_buildings_geometries = {} + self.remainder_buildings = self._create_empty_building_dataframes( + cost_cases, people_cases + ) + self.aggregated_buildings = self._create_empty_building_dataframes( + cost_cases, people_cases + ) + self.total_buildings = self._create_empty_building_dataframes(cost_cases, people_cases) + + def _create_empty_building_dataframes(self, cost_cases, people_cases, additional_cols={}): + """ + Args: + cost_cases (dict): + Dictionary containing indications on the sort of costs to output. + people_cases (dict): + Dictionary containing indications on the time of the day for which the number of + people in the buildings is to be output. + additional_cols (dict): + Dictionary containing names (keys) and data types (values) of any other column + that the output is required to have. + """ + + empty_buildings = deepcopy(BUILDINGS) + for cost_case in cost_cases: + empty_buildings[cost_case] = pandas.Series(dtype="float") + for people_case in people_cases: + empty_buildings[people_case] = pandas.Series(dtype="float") + for col in additional_cols: + empty_buildings[col] = pandas.Series(dtype=additional_cols[col]) + + return empty_buildings + + def append_lumped_buildings( + self, + lumped_building_case, + building_classes, + number_buildings, + cost_assumptions, + people_distribution, + data_unit_id, + ): + """ + This function appends buildings to the case of lumped buildings indicated by + 'lumped_building_case', which can be either "aggregated_buildings" or + "remainder_buildings". The building classes and their proportions are as indicated in + 'building_classes' and the total number of aggregated or remainder buildings is + indicated by 'number_buildings'. The dictionaries 'cost_assumptions' and + 'people_distribution' indicate the desired disaggregation of replacement costs and + distribution of people at different times of the day. The output costs and number of + people correspond to the total number of buildings. + + Args: + lumped_building_case (str): + Case of lumped buildings to which buildings will be updated. These can be: + "aggregated_buildings" or "remainder_buildings". + building_classes (Pandas DataFrame): + DataFrame containing the building classes and their proportions. It comprises + the following columns: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + proportions (float): + Proportions in which the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype') is + present in the data unit. + census_people_per_building (float): + Number of census-derived people per building (i.e. not accounting for + time of the day). + total_cost_per_building (float): + Total replacement cost per building, including costs of structural and + non-structural components as well as contents. + number_buildings (float): + Number of aggregated or remainder buildings. + cost_assumptions (dict): + Dictionary containing the factors by which the total replacement cost of a + building can be multiplied to disaggregate it into the cost of structural and + non-structural components, as well as contents. + people_distribution (dict): + Dictionary containing the factors by which the census population per building + can be multiplied to obtain an estimate of the people in the buildings at a + certain time of the day. + data_unit_id (str): + ID of the data unit that the buildings belong to. + """ + + lumped_buildings = pandas.DataFrame( + { + "building_class_name": building_classes["building_class_name"], + "number": number_buildings * building_classes["proportions"], + "data_unit_id": [data_unit_id for i in range(building_classes.shape[0])], + } + ) + for cost_case in cost_assumptions: + lumped_buildings[cost_case] = pandas.Series( + cost_assumptions[cost_case] + * building_classes["total_cost_per_building"] + * lumped_buildings["number"], + dtype="float", + ) + for people_case in people_distribution: + lumped_buildings[people_case] = pandas.Series( + people_distribution[people_case] + * building_classes["census_people_per_building"] + * lumped_buildings["number"], + dtype="float", + ) + + updated_lumped_buildings = pandas.concat( + [getattr(self, lumped_building_case), lumped_buildings], + ignore_index=True, + ) + + setattr(self, lumped_building_case, updated_lumped_buildings) + + def append_OBM_buildings( + self, + obm_buildings, + building_classes, + cost_assumptions, + people_distribution, + data_unit_id, + ): + """ + This function appends the buildings from 'obm_buildings' to self.obm_buildings. The + total replacement cost and number of census people per building are retrieved from + 'building_classes', as a function of the building class, which is defined by the columns + building_class_name', 'settlement_type' and 'occupancy_subtype' present in both input + DataFrames. It is assumed that the buildings in 'obm_buildings' and the building classes + in 'building_classes' are consistent with each other in terms of belonging to the same + occupancy case, data unit ID and aggregated source ID. This assumption implies that all + building classes from 'obm_buildings' will be found in 'building_classes' (apart from + overall consistency). The dictionaries 'cost_assumptions' and 'people_distribution' + indicate the desired disaggregation of replacement costs and distribution of people at + different times of the day. The output costs and number of people correspond to the + probability of the building belonging to each building class (as per the probabilities + input via 'obm_buildings'). + + Args: + obm_buildings (Pandas DataFrame): + DataFrame containing the OBM building classes and their probabilities. It + comprises the following columns: + osm_id (int): + ID of the OBM building (several rows of the DataFrame can correspond to + the same OpenStreetMap ID). + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + probabilities (float): + Probabilities of the building class (defined by 'building_class_name', + 'settlement_type' and 'occupancy_subtype') being the correct class of + the OBM building with 'osm_id'. + building_classes (Pandas DataFrame): + DataFrame containing the building classes associated with 'obm_buildings' and + their total replacement cost per building and census number of people per + building. It comprises at least the following columns: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + census_people_per_building (float): + Number of census-derived people per building (i.e. not accounting for + time of the day). + total_cost_per_building (float): + Total replacement cost per building, including costs of structural and + non-structural components as well as contents. + cost_assumptions (dict): + Dictionary containing the factors by which the total replacement cost of a + building can be multiplied to disaggregate it into the cost of structural and + non-structural components, as well as contents. + people_distribution (dict): + Dictionary containing the factors by which the census population per building + can be multiplied to obtain an estimate of the people in the buildings at a + certain time of the day. + data_unit_id (str): + ID of the data unit that the buildings belong to. + """ + + # Set multiindex for building_classes, drop "proportions" columns if exists + building_classes_copy = deepcopy(building_classes) + if "proportions" in building_classes_copy.columns: + building_classes_copy = building_classes_copy.drop(columns=["proportions"]) + building_classes_copy = building_classes_copy.set_index( + ["building_class_name", "settlement_type", "occupancy_subtype"] + ) + + # Join 'obm_buildings' and 'building_classes_copy' to assign + # 'census_people_per_building' and 'total_cost_per_building' to each row in + # 'obm_buildings' + obm_buildings_expanded = obm_buildings.join( + building_classes_copy, + on=["building_class_name", "settlement_type", "occupancy_subtype"], + ) + + # Identify any cases in which the building class of 'obm_buildings' was not found in + # 'building_classes' + which_nans = ( + obm_buildings_expanded["census_people_per_building"].isnull().values + ) # boolean array + osm_ids_without_costs_or_people = list( + obm_buildings_expanded.loc[which_nans, "osm_id"].to_numpy().astype(str) + ) + + if len(osm_ids_without_costs_or_people) > 0: + # Need to log an error if a building class is not found + # (and costs/people cannot be assigned) + logger.error( + "TileExposure.append_OBM_buildings: input 'building_classes' does not cover " + "all building classes contained in input 'obm_buildings'; the following OSM " + "IDs have NULL values of costs and number of people: %s" + % (", ".join(osm_ids_without_costs_or_people)) + ) + + # Transform 'obm_buildings_expanded' to the output format (self.obm_buildings) + obm_buildings_expanded = obm_buildings_expanded.rename( + columns={"probabilities": "number"} + ) + obm_buildings_expanded["data_unit_id"] = [ + data_unit_id for i in range(obm_buildings_expanded.shape[0]) + ] + + for cost_case in cost_assumptions: + obm_buildings_expanded[cost_case] = pandas.Series( + cost_assumptions[cost_case] + * obm_buildings_expanded["total_cost_per_building"] + * obm_buildings_expanded["number"], + dtype="float", + ) + for people_case in people_distribution: + obm_buildings_expanded[people_case] = pandas.Series( + people_distribution[people_case] + * obm_buildings_expanded["census_people_per_building"] + * obm_buildings_expanded["number"], + dtype="float", + ) + + # Drop unnecessary columns + obm_buildings_expanded = obm_buildings_expanded.drop( + columns=[ + "total_cost_per_building", + "census_people_per_building", + "settlement_type", + "occupancy_subtype", + ] + ) + + updated_obm_buildings = pandas.concat( + [getattr(self, "obm_buildings"), obm_buildings_expanded], + ignore_index=True, + ) + + setattr(self, "obm_buildings", updated_obm_buildings) diff --git a/tests/data/config_for_testing_cost_case_invalid.yml b/tests/data/config_for_testing_cost_case_invalid.yml new file mode 100644 index 0000000..89a14ac --- /dev/null +++ b/tests/data/config_for_testing_cost_case_invalid.yml @@ -0,0 +1,20 @@ +model_name: esrm20 +occupancies_to_run: residential, commercial +exposure_entities_to_run: Italy +exposure_entities_code: ISO3 +geographic_selection: + selection_mode: exposure_entity +cost_cases: + structural: total + contents: euros +people_cases: + day: day + night: night + transit: transit +export_OBM_footprints: True +database_gde_tiles: + host: host.somewhere.xx + dbname: some_database_name + username: some_username + password: some_password +number_cores: 1 diff --git a/tests/data/config_for_testing_geographic_bbox.yml b/tests/data/config_for_testing_geographic_bbox.yml index 1dae02e..0b94cea 100644 --- a/tests/data/config_for_testing_geographic_bbox.yml +++ b/tests/data/config_for_testing_geographic_bbox.yml @@ -10,8 +10,14 @@ geographic_selection: lat_s: 37.965450 lat_n: 37.972561 quadkeys_file: /path/to/quadkeys.txt +cost_cases: + structural: total +people_cases: + average: average +export_OBM_footprints: True database_gde_tiles: host: host.somewhere.xx dbname: some_database_name username: some_username password: some_password +number_cores: 1 diff --git a/tests/data/config_for_testing_geographic_bbox_missing.yml b/tests/data/config_for_testing_geographic_bbox_missing.yml index a76462b..de513c1 100644 --- a/tests/data/config_for_testing_geographic_bbox_missing.yml +++ b/tests/data/config_for_testing_geographic_bbox_missing.yml @@ -5,8 +5,14 @@ exposure_entities_code: ISO3 geographic_selection: selection_mode: bounding_box quadkeys_file: /path/to/quadkeys.txt +cost_cases: + structural: total +people_cases: + average: average +export_OBM_footprints: True database_gde_tiles: host: host.somewhere.xx dbname: some_database_name username: some_username password: some_password +number_cores: 1 diff --git a/tests/data/config_for_testing_geographic_data_units.yml b/tests/data/config_for_testing_geographic_data_units.yml index 56f2091..adee216 100644 --- a/tests/data/config_for_testing_geographic_data_units.yml +++ b/tests/data/config_for_testing_geographic_data_units.yml @@ -6,8 +6,14 @@ geographic_selection: selection_mode: data_unit_id data_unit_ids: ABC_10278, DEF_00000 quadkeys_file: /path/to/quadkeys.txt +cost_cases: + structural: total +people_cases: + average: average +export_OBM_footprints: True database_gde_tiles: host: host.somewhere.xx dbname: some_database_name username: some_username password: some_password +number_cores: 1 diff --git a/tests/data/config_for_testing_geographic_data_units_missing.yml b/tests/data/config_for_testing_geographic_data_units_missing.yml index fc0efbe..188fc03 100644 --- a/tests/data/config_for_testing_geographic_data_units_missing.yml +++ b/tests/data/config_for_testing_geographic_data_units_missing.yml @@ -5,8 +5,14 @@ exposure_entities_code: ISO3 geographic_selection: selection_mode: data_unit_id quadkeys_file: /path/to/quadkeys.txt +cost_cases: + structural: total +people_cases: + average: average +export_OBM_footprints: True database_gde_tiles: host: host.somewhere.xx dbname: some_database_name username: some_username password: some_password +number_cores: 1 diff --git a/tests/data/config_for_testing_geographic_quadkeys.yml b/tests/data/config_for_testing_geographic_quadkeys.yml index 3d66fda..7277434 100644 --- a/tests/data/config_for_testing_geographic_quadkeys.yml +++ b/tests/data/config_for_testing_geographic_quadkeys.yml @@ -5,8 +5,14 @@ exposure_entities_code: ISO3 geographic_selection: selection_mode: quadkeys quadkeys_file: /path/to/quadkeys.txt +cost_cases: + structural: total +people_cases: + average: average +export_OBM_footprints: True database_gde_tiles: host: host.somewhere.xx dbname: some_database_name username: some_username password: some_password +number_cores: 1 diff --git a/tests/data/config_for_testing_geographic_quadkeys_missing.yml b/tests/data/config_for_testing_geographic_quadkeys_missing.yml index 3fb4e3a..35efe8e 100644 --- a/tests/data/config_for_testing_geographic_quadkeys_missing.yml +++ b/tests/data/config_for_testing_geographic_quadkeys_missing.yml @@ -4,8 +4,14 @@ exposure_entities_to_run: Greece exposure_entities_code: ISO3 geographic_selection: selection_mode: quadkeys +cost_cases: + structural: total +people_cases: + average: average +export_OBM_footprints: True database_gde_tiles: host: host.somewhere.xx dbname: some_database_name username: some_username password: some_password +number_cores: 1 diff --git a/tests/data/config_for_testing_good.yml b/tests/data/config_for_testing_good.yml index caa6f8b..eae7027 100644 --- a/tests/data/config_for_testing_good.yml +++ b/tests/data/config_for_testing_good.yml @@ -4,8 +4,16 @@ exposure_entities_to_run: Italy exposure_entities_code: ISO3 geographic_selection: selection_mode: exposure_entity +cost_cases: + structural: total +people_cases: + day: day + night: night + transit: transit +export_OBM_footprints: True database_gde_tiles: host: host.somewhere.xx dbname: some_database_name username: some_username password: some_password +number_cores: 1 diff --git a/tests/data/config_for_testing_good_people_case_invalid.yml b/tests/data/config_for_testing_good_people_case_invalid.yml new file mode 100644 index 0000000..f7cf29e --- /dev/null +++ b/tests/data/config_for_testing_good_people_case_invalid.yml @@ -0,0 +1,19 @@ +model_name: esrm20 +occupancies_to_run: residential, commercial +exposure_entities_to_run: Italy +exposure_entities_code: ISO3 +geographic_selection: + selection_mode: exposure_entity +cost_cases: + structural: total +people_cases: + day: day + mid-day: midday + transit: transit +export_OBM_footprints: True +database_gde_tiles: + host: host.somewhere.xx + dbname: some_database_name + username: some_username + password: some_password +number_cores: 1 diff --git a/tests/data/config_for_testing_missing.yml b/tests/data/config_for_testing_missing.yml index 190e203..5f0a2b9 100644 --- a/tests/data/config_for_testing_missing.yml +++ b/tests/data/config_for_testing_missing.yml @@ -3,8 +3,14 @@ occupancies_to_run: residential, commercial exposure_entities_code: ISO3 geographic_selection: selection_mode: exposure_entity +cost_cases: + structural: total +people_cases: + average: average +export_OBM_footprints: True database_gde_tiles: host: host.somewhere.xx dbname: some_database_name username: some_username password: some_password +number_cores: 1 diff --git a/tests/data/expected_results_append_OBM_buildings.csv b/tests/data/expected_results_append_OBM_buildings.csv new file mode 100644 index 0000000..e986c00 --- /dev/null +++ b/tests/data/expected_results_append_OBM_buildings.csv @@ -0,0 +1,5 @@ +osm_id,building_class_name,number,structural,random_name_1,census,random_name_2,data_unit_id +123456789,A1/HBET:1-3,0.444444444,755555.5556,226666.6667,3.955555556,3.851128889,ABC_10269 +123456789,B1/HBET:1-3,0.555555556,1166666.6667,350000,6.888888889,6.707022222,ABC_10269 +987654321,A2/HBET:4-6,0.666666667,1666666.6667,500000,7,6.8152,ABC_10269 +987654321,B2/H:4,0.333333333,966666.6667,290000,8.9,8.66504,ABC_10269 diff --git a/tests/data/expected_results_append_lumped_buildings.csv b/tests/data/expected_results_append_lumped_buildings.csv new file mode 100644 index 0000000..79741c3 --- /dev/null +++ b/tests/data/expected_results_append_lumped_buildings.csv @@ -0,0 +1,11 @@ +building_class_name,number,structural,random_name_1,census,random_name_2,data_unit_id +A1/HBET:1-3,25.06,42602000,12780600,223.034,217.1459024,ABC_10269 +A2/HBET:4-6,37.59,93975000,28192500,394.695,384.275052,ABC_10269 +A3/HBET:7-12,12.53,225540000,67662000,730.499,711.2138264,ABC_10269 +B1/HBET:1-3,31.325,65782500,19734750,388.43,378.175448,ABC_10269 +B2/H:4,18.795,54505500,16351650,501.8265,488.5782804,ABC_10269 +A1/HBET:1-3,14.56,24752000,7425600,129.584,126.1629824,DEF_00000 +A2/HBET:4-6,21.84,54600000,16380000,229.32,223.265952,DEF_00000 +A3/HBET:7-12,7.28,131040000,39312000,424.424,413.2192064,DEF_00000 +B1/HBET:1-3,18.2,38220000,11466000,225.68,219.722048,DEF_00000 +B2/H:4,10.92,31668000,9500400,291.564,283.8667104,DEF_00000 diff --git a/tests/data/test_database_set_up.sql b/tests/data/test_database_set_up.sql index d35b6ca..fdf9637 100644 --- a/tests/data/test_database_set_up.sql +++ b/tests/data/test_database_set_up.sql @@ -1,12 +1,18 @@ DROP TABLE IF EXISTS aggregated_sources; DROP TABLE IF EXISTS data_units; DROP TABLE IF EXISTS data_unit_tiles; +DROP TABLE IF EXISTS data_units_buildings; +DROP TABLE IF EXISTS exposure_entities_costs_assumptions; +DROP TABLE IF EXISTS exposure_entities_population_time_distribution; +DROP TABLE IF EXISTS gde_buildings; DROP TYPE IF EXISTS occupancycase; +DROP TYPE IF EXISTS settlement; DROP EXTENSION IF EXISTS postgis; CREATE EXTENSION postgis; CREATE TYPE occupancycase AS ENUM ('residential', 'commercial', 'industrial'); +CREATE TYPE settlement AS ENUM ('urban', 'rural', 'big_city', 'all'); CREATE TABLE aggregated_sources ( @@ -90,19 +96,155 @@ INSERT INTO data_unit_tiles(quadkey, size_data_unit_tile_built_up_area, fraction_data_unit_area, fraction_data_unit_built_up_area, - aggregated_buildings) -VALUES ('122010321033023130', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 15.7), -('122010321033023130', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 23.4), -('122010321033023120', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 39.1), -('122010321033023120', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 17.6), -('122010321033023132', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 34.4), -('122010321033023132', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 11.5), -('122010321033023121', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 26.2), -('122010321033023121', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 0.0), -('122010321033023123', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 16.5), -('122010321033023123', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 0.0), -('122010321033032123', 2, 'residential', 'ABC', 'ABC_10278', 0.0, 0.0, 0.0, 0.0, 0.0), -('122010321033032123', 2, 'commercial', 'ABC', 'ABC_10278', 0.0, 0.0, 0.0, 0.0, 0.0), -('122010321033032301', 2, 'commercial', 'ABC', 'ABC_10278', 0.0, 0.0, 0.0, 0.0, 0.0), -('122010321033211220', 2, 'residential', 'DEF', 'DEF_00000', 0.0, 0.0, 0.0, 0.0, 0.0), -('122010321033211220', 2, 'commercial', 'DEF', 'DEF_00000', 0.0, 0.0, 0.0, 0.0, 0.0); + aggregated_buildings, + obm_buildings, + remainder_buildings) +VALUES ('122010321033023130', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 15.7, 3, 12.7), +('122010321033023130', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 23.4, 0.0, 0.0), +('122010321033023120', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 39.1, 41, 0.0), +('122010321033023120', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 17.6, 0.0, 0.0), +('122010321033023132', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 34.4, 12, 0.0), +('122010321033023132', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 11.5, 0.0, 0.0), +('122010321033023121', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 26.2, 0, 26.2), +('122010321033023121', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033023123', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 16.5, 0, 16.5), +('122010321033023123', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033032123', 2, 'residential', 'ABC', 'ABC_10278', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033032123', 2, 'commercial', 'ABC', 'ABC_10278', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033032301', 2, 'commercial', 'ABC', 'ABC_10278', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033211220', 2, 'residential', 'DEF', 'DEF_00000', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033211220', 2, 'commercial', 'DEF', 'DEF_00000', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033023120', 2, 'residential', 'GHI', 'GHI_22222', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + +CREATE TABLE data_units_buildings +( + building_class_name VARCHAR, + settlement_type settlement, + occupancy_subtype VARCHAR, + aggregated_source_id SMALLINT, + exposure_entity CHAR(3), + occupancy_case occupancycase, + data_unit_id VARCHAR, + proportions FLOAT, + census_people_per_building FLOAT, + total_cost_per_building FLOAT, + storeys_min SMALLINT, + storeys_max SMALLINT, + + PRIMARY KEY ( + data_unit_id, + occupancy_case, + aggregated_source_id, + building_class_name, + settlement_type, + occupancy_subtype + ) + +); +INSERT INTO data_units_buildings(building_class_name, + settlement_type, + occupancy_subtype, + aggregated_source_id, + exposure_entity, + occupancy_case, + data_unit_id, + proportions, + census_people_per_building, + total_cost_per_building, + storeys_min, + storeys_max) +VALUES ('A1/HBET:1-3', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.20, 0.0, 0.0, 1, 3), +('A2/HBET:4-6', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.30, 10.5, 2500000.0, 4, 6), +('A3/HBET:7-12', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.10, 0.0, 0.0, 7, 12), +('B1/HBET:1-3', 'rural', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.25, 0.0, 0.0, 1, 3), +('B2/H:4', 'rural', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.15, 0.0, 0.0, 4, 4), +('C1/HBET:1-2', 'urban', 'Hotels', 2, 'ABC', 'commercial', 'ABC_10269', 0.10, 0.0, 0.0, 1, 2), +('C2/HBET:3-', 'urban', 'Hotels', 2, 'ABC', 'commercial', 'ABC_10269', 0.25, 0.0, 0.0, 3, 9999), +('C3/H:1', 'urban', 'Trade', 2, 'ABC', 'commercial', 'ABC_10269', 0.05, 0.0, 0.0, 1, 1), +('C4/HBET:2-3', 'urban', 'Trade', 2, 'ABC', 'commercial', 'ABC_10269', 0.10, 0.0, 0.0, 2, 3), +('C5/HBET:1-2', 'urban', 'Offices', 2, 'ABC', 'commercial', 'ABC_10269', 0.20, 0.0, 0.0, 1, 2), +('C6/HBET:3-5', 'urban', 'Offices', 2, 'ABC', 'commercial', 'ABC_10269', 0.30, 0.0, 0.0, 3, 5); + +CREATE TABLE exposure_entities_costs_assumptions +( + aggregated_source_id SMALLINT, + exposure_entity CHAR(3), + occupancy_case OCCUPANCYCASE, + structural FLOAT, + non_structural FLOAT, + contents FLOAT, + currency VARCHAR, + + PRIMARY KEY (exposure_entity, occupancy_case, aggregated_source_id) + +); +INSERT INTO exposure_entities_costs_assumptions(aggregated_source_id, + exposure_entity, + occupancy_case, + structural, + non_structural, + contents) +VALUES (2, 'ABC', 'residential', 0.30, 0.50, 0.20), +(2, 'ABC', 'commercial', 0.20, 0.30, 0.50); + +CREATE TABLE exposure_entities_population_time_distribution +( + aggregated_source_id SMALLINT, + exposure_entity CHAR(3), + occupancy_case OCCUPANCYCASE, + day FLOAT, + night FLOAT, + transit FLOAT, + + PRIMARY KEY (exposure_entity, occupancy_case, aggregated_source_id) + +); +INSERT INTO exposure_entities_population_time_distribution(aggregated_source_id, + exposure_entity, + occupancy_case, + day, + night, + transit) +VALUES (2, 'ABC', 'residential', 0.2457, 0.9621, 0.6028), +(2, 'ABC', 'commercial', 0.2863, 0.9736, 0.5662); + +CREATE TABLE gde_buildings +( + osm_id integer, + aggregated_source_id smallint, + occupancy_case occupancycase, + data_unit_id varchar, + quadkey CHAR(18), + building_class_names varchar[], + settlement_types settlement[], + occupancy_subtypes varchar[], + probabilities float[], + geometry GEOMETRY, + + PRIMARY KEY (osm_id, aggregated_source_id) +); + +INSERT INTO gde_buildings(osm_id, + aggregated_source_id, + occupancy_case, + data_unit_id, + quadkey, + building_class_names, + settlement_types, + occupancy_subtypes, + probabilities, + geometry) +VALUES (-101010, 2, 'industrial', 'ABC_10269', '333333333333333333', + '{"CLASS/X/params/H:1", "CLASS/Y/params/H:2"}', + '{"rural", "rural"}', + '{"all", "all"}', + '{0.723, 0.277}', + ST_GeomFromText('POLYGON((15.0491 37.4811,15.0494 37.4814,15.0495 37.4813,15.0497 37.4812,15.0495 37.4811,15.0494 37.4812,15.0492 37.4810,15.0491 37.4811))') + ), +(-707070, 2, 'industrial', 'ABC_10269', '333333333333333333', + '{"CLASS/Z/params/H:1", "CLASS/V/params/H:2", "CLASS/W/params/HBET:1-3"}', + '{"rural", "rural", "urban"}', + '{"all", "all", "all"}', + '{0.5, 0.2, 0.3}', + ST_GeomFromText('POLYGON((0.0 0.0,0.002 0.0,0.002 0.003,0.0 0.003,0.0 0.0))') +); diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 69e9ab2..ab8ca3d 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -43,6 +43,14 @@ def test_Configuration(): assert returned_config.geographic_selection["quadkeys_file"] is None assert returned_config.geographic_selection["data_unit_ids"] is None assert returned_config.geographic_selection["bounding_box"] is None + assert len(returned_config.cost_cases.keys()) == 1 + assert returned_config.cost_cases["structural"] == "total" + assert len(returned_config.people_cases.keys()) == 3 + assert returned_config.people_cases["day"] == "day" + assert returned_config.people_cases["night"] == "night" + assert returned_config.people_cases["transit"] == "transit" + assert returned_config.export_OBM_footprints is True + assert returned_config.number_cores == 1 # Test case in which the file is not found with pytest.raises(OSError) as excinfo: @@ -145,6 +153,26 @@ def test_Configuration(): ) assert "OSError" in str(excinfo.type) + # Test case: invalid cost case + returned_config = Configuration( + os.path.join( + os.path.dirname(__file__), "data", "config_for_testing_cost_case_invalid.yml" + ), + force_config_over_hierarchies=True, + ) + + assert len(returned_config.cost_cases.keys()) == 1 + + # Test case: invalid cost case + returned_config = Configuration( + os.path.join( + os.path.dirname(__file__), "data", "config_for_testing_good_people_case_invalid.yml" + ), + force_config_over_hierarchies=True, + ) + + assert len(returned_config.people_cases.keys()) == 2 + def test_Configuration_interpret_exposure_entities_to_run(test_db): returned_config = Configuration( diff --git a/tests/test_database_queries.py b/tests/test_database_queries.py index 423cde3..5f34827 100644 --- a/tests/test_database_queries.py +++ b/tests/test_database_queries.py @@ -17,9 +17,13 @@ # along with this program. If not, see http://www.gnu.org/licenses/. import os +import numpy +import shapely from gdeexporter.configuration import Configuration from gdeexporter.database_queries import DatabaseQueries +# pylint: disable=no-member + def test_retrieve_aggregated_source_id_and_format(test_db): # Database connection (the Configuration class will define the credentials based on whether @@ -136,3 +140,381 @@ def test_retrieve_quadkeys_by_data_unit_id_aggregated_source_id(test_db): ) assert len(returned_quadkeys) == 0 + + +def test_retrieve_data_unit_ids(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + quadkeys = [ + "122010321033023130", + "122010321033023130", + "122010321033023120", + "122010321033023120", + ] + exposure_entities = [["ABC"], ["DEF"], ["ABC", "GHI"], ["GHI"]] + occupancies = ["residential", "residential", "residential", "residential"] + expected = [["ABC_10269"], [], ["ABC_10269", "GHI_22222"], ["GHI_22222"]] + + for i, quadkey in enumerate(quadkeys): + returned_data_unit_ids = DatabaseQueries.retrieve_data_unit_ids( + quadkey, + 2, + exposure_entities[i], + occupancies[i], + config.database_gde_tiles, + "data_unit_tiles", + ) + + assert len(returned_data_unit_ids) == len(expected[i]) + for data_unit_id in expected[i]: + assert data_unit_id in returned_data_unit_ids + + +def test_get_numbers_buildings_for_data_unit_tile(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + returned_vals = DatabaseQueries.get_numbers_buildings_for_data_unit_tile( + "122010321033023130", + 2, + "residential", + "ABC_10269", + config.database_gde_tiles, + "data_unit_tiles", + ) + (number_aggregated, number_obm, number_remainder) = returned_vals + + assert round(number_aggregated, 1) == 15.7 + assert number_obm == 3 + assert round(number_remainder, 1) == 12.7 + + returned_vals = DatabaseQueries.get_numbers_buildings_for_data_unit_tile( + "999999999999999999", + 2, + "residential", + "ABC_10269", + config.database_gde_tiles, + "data_unit_tiles", + ) + (number_aggregated, number_obm, number_remainder) = returned_vals + + assert round(number_aggregated, 1) == -999.9 + assert number_obm == -999 + assert round(number_remainder, 1) == -999.9 + + +def test_get_building_classes_of_data_unit(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + expected_columns = [ + "building_class_name", + "settlement_type", + "occupancy_subtype", + "census_people_per_building", + "total_cost_per_building", + "proportions", + ] + + # Residential buildings + returned_building_classes = DatabaseQueries.get_building_classes_of_data_unit( + "ABC_10269", "residential", 2, config.database_gde_tiles, "data_units_buildings" + ) + + expected_bdg_class_names = [ + "A1/HBET:1-3", + "A2/HBET:4-6", + "A3/HBET:7-12", + "B1/HBET:1-3", + "B2/H:4", + ] + + assert returned_building_classes.shape[0] == len(expected_bdg_class_names) + for col_name in expected_columns: + assert col_name in returned_building_classes.columns + assert round(returned_building_classes["proportions"].sum(), 5) == 1.0 + for name in expected_bdg_class_names: + assert name in returned_building_classes["building_class_name"].to_numpy() + assert ( + round( + returned_building_classes[ + returned_building_classes.building_class_name == "A2/HBET:4-6" + ]["census_people_per_building"].to_numpy()[0], + 1, + ) + == 10.5 + ) + assert ( + round( + returned_building_classes[ + returned_building_classes.building_class_name == "A2/HBET:4-6" + ]["total_cost_per_building"].to_numpy()[0], + 1, + ) + == 2500000.0 + ) + assert ( + round( + returned_building_classes[ + returned_building_classes.building_class_name == "B1/HBET:1-3" + ]["proportions"].to_numpy()[0], + 2, + ) + == 0.25 + ) + + # Industrial buildings (no entries to be found) + returned_building_classes = DatabaseQueries.get_building_classes_of_data_unit( + "ABC_10269", "industrial", 2, config.database_gde_tiles, "data_units_buildings" + ) + + assert returned_building_classes.shape[0] == 0 + for col_name in expected_columns: + assert col_name in returned_building_classes.columns + assert round(returned_building_classes["proportions"].sum(), 5) == 0.0 + + +def test_get_exposure_entities_costs_assumptions(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + cost_cases = { + "structural": "structural", + "non-structural": "non_structural", + "contents": "contents", + "total": "total", + "random_name": "structural", + } + + # Test case in which values are retrieved + returned_cost_assumptions = DatabaseQueries.get_exposure_entities_costs_assumptions( + cost_cases, + "ABC", + "residential", + 2, + config.database_gde_tiles, + "exposure_entities_costs_assumptions", + ) + + expected_cost_assumptions = { + "structural": 0.30, + "non-structural": 0.50, + "contents": 0.20, + "total": 1.0, + "random_name": 0.30, + } + + for cost_case in expected_cost_assumptions.keys(): + assert cost_case in returned_cost_assumptions.keys() + assert round(expected_cost_assumptions[cost_case], 1) == round( + returned_cost_assumptions[cost_case], 1 + ) + + # Test case in which no matching entry is found in the database + returned_cost_assumptions = DatabaseQueries.get_exposure_entities_costs_assumptions( + cost_cases, + "DEF", + "residential", + 2, + config.database_gde_tiles, + "exposure_entities_costs_assumptions", + ) + + for cost_case in expected_cost_assumptions.keys(): + assert cost_case in returned_cost_assumptions.keys() + assert round(returned_cost_assumptions[cost_case], 1) == 0.0 + + +def test_get_exposure_entities_population_time_distribution(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + people_cases = { + "day": "day", + "night": "night", + "transit": "transit", + "census": "census", + "average": "average", + "random_name": "night", + } + + # Test case in which values are retrieved + returned_people_distribution = ( + DatabaseQueries.get_exposure_entities_population_time_distribution( + people_cases, + "ABC", + "commercial", + 2, + config.database_gde_tiles, + "exposure_entities_population_time_distribution", + ) + ) + + expected_people_distribution = { + "day": 0.2863, + "night": 0.9736, + "transit": 0.5662, + "census": 1.0, + "average": 0.6087, # (0.2863 + 0.9736 + 0.5662) / 3.0 + "random_name": 0.9736, + } + + for cost_case in expected_people_distribution.keys(): + assert cost_case in returned_people_distribution.keys() + assert round(expected_people_distribution[cost_case], 1) == round( + returned_people_distribution[cost_case], 1 + ) + + # Test case in which no matching entry is found in the database + returned_people_distribution = ( + DatabaseQueries.get_exposure_entities_population_time_distribution( + people_cases, + "DEF", + "commercial", + 2, + config.database_gde_tiles, + "exposure_entities_population_time_distribution", + ) + ) + + for cost_case in expected_people_distribution.keys(): + assert cost_case in returned_people_distribution.keys() + assert round(returned_people_distribution[cost_case], 1) == 0.0 + + +def test_get_GDE_buildings(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + # Test case in which the building footprints are requested as output + returned_obm_buildings, returned_obm_geometries = DatabaseQueries.get_GDE_buildings( + "333333333333333333", + "ABC_10269", + "industrial", + 2, + True, + config.database_gde_tiles, + "gde_buildings", + ) + + expected_obm_geometries = { + -101010: { + "centroid": "POINT (15.0494 37.4812)", + "footprint": ( + "POLYGON((15.0491 37.4811,15.0494 37.4814,15.0495 37.4813,15.0497 37.4812," + "15.0495 37.4811,15.0494 37.4812,15.0492 37.4810,15.0491 37.4811))" + ), + }, + -707070: { + "centroid": "POINT (0.001 0.0015)", + "footprint": "POLYGON((0.0 0.0,0.002 0.0,0.002 0.003,0.0 0.003,0.0 0.0))", + }, + } + + expected_obm_buildings = { + -101010: { + "rows": 2, + "building_class_name": ["CLASS/X/params/H:1", "CLASS/Y/params/H:2"], + "settlement_type": ["rural", "rural"], + "occupancy_subtype": ["all", "all"], + "probabilities": [0.723, 0.277], + }, + -707070: { + "rows": 3, + "building_class_name": [ + "CLASS/Z/params/H:1", + "CLASS/V/params/H:2", + "CLASS/W/params/HBET:1-3", + ], + "settlement_type": ["rural", "rural", "urban"], + "occupancy_subtype": ["all", "all", "all"], + "probabilities": [0.5, 0.2, 0.3], + }, + } + + assert returned_obm_buildings.shape[0] == 5 + + for osm_id in expected_obm_geometries.keys(): + assert osm_id in returned_obm_geometries.keys() + assert round( + shapely.wkt.loads(expected_obm_geometries[osm_id]["centroid"]).x, 4 + ) == round(shapely.wkt.loads(returned_obm_geometries[osm_id]["centroid"]).x, 4) + assert round( + shapely.wkt.loads(expected_obm_geometries[osm_id]["centroid"]).y, 4 + ) == round(shapely.wkt.loads(returned_obm_geometries[osm_id]["centroid"]).y, 4) + for i in range(4): + assert round( + shapely.wkt.loads(expected_obm_geometries[osm_id]["footprint"]).bounds[0], 4 + ) == round( + shapely.wkt.loads(returned_obm_geometries[osm_id]["footprint"]).bounds[0], 4 + ) + + which_in_frame = numpy.where(returned_obm_buildings["osm_id"].to_numpy() == osm_id)[0] + assert expected_obm_buildings[osm_id]["rows"] == len(which_in_frame) + + for row in which_in_frame: + assert ( + returned_obm_buildings["building_class_name"].to_numpy()[row] + in expected_obm_buildings[osm_id]["building_class_name"] + ) + which_within_expected = numpy.where( + numpy.array(expected_obm_buildings[osm_id]["building_class_name"]) + == returned_obm_buildings["building_class_name"].to_numpy()[row] + )[0][0] + assert ( + returned_obm_buildings["settlement_type"].to_numpy()[row] + == expected_obm_buildings[osm_id]["settlement_type"][which_within_expected] + ) + assert ( + returned_obm_buildings["occupancy_subtype"].to_numpy()[row] + == expected_obm_buildings[osm_id]["occupancy_subtype"][which_within_expected] + ) + assert round(returned_obm_buildings["probabilities"].to_numpy()[row], 3) == round( + expected_obm_buildings[osm_id]["probabilities"][which_within_expected], 3 + ) + + # Same as above, but the building footprints are NOT requested as output + returned_obm_buildings, returned_obm_geometries = DatabaseQueries.get_GDE_buildings( + "333333333333333333", + "ABC_10269", + "industrial", + 2, + False, + config.database_gde_tiles, + "gde_buildings", + ) + + assert len(returned_obm_geometries.keys()) == 0 + + # Test case in which no entries will be found + returned_obm_buildings, returned_obm_geometries = DatabaseQueries.get_GDE_buildings( + "222222222222222222", + "ABC_10269", + "industrial", + 2, + True, + config.database_gde_tiles, + "gde_buildings", + ) + + assert returned_obm_buildings.shape[0] == 0 + assert len(returned_obm_geometries.keys()) == 0 diff --git a/tests/test_tileexposure.py b/tests/test_tileexposure.py new file mode 100644 index 0000000..de4f9d6 --- /dev/null +++ b/tests/test_tileexposure.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import os +import pandas +from gdeexporter.tileexposure import TileExposure + + +def test_init(): + + cost_cases = { + "structural": "total", + "random_name_1": "structural", + } + + people_cases = { + "census": "census", + "random_name_2": "night", + } + + returned_tile = TileExposure("123456", cost_cases, people_cases) + + assert returned_tile.quadkey == "123456" + + expected_empty_dataframes = pandas.DataFrame( + { + "building_class_name": pandas.Series(dtype="str"), + "number": pandas.Series(dtype="float"), + "data_unit_id": pandas.Series(dtype="str"), + } + ) + for cost_case in cost_cases: + expected_empty_dataframes[cost_case] = pandas.Series(dtype="float") + for people_case in people_cases: + expected_empty_dataframes[people_case] = pandas.Series(dtype="float") + + expected_attributes = [ + "obm_buildings", + "remainder_buildings", + "aggregated_buildings", + "total_buildings", + ] + + for attribute in expected_attributes: + returned_attribute = getattr(returned_tile, attribute) + + assert returned_attribute.shape[0] == 0 + + for expected_column in expected_empty_dataframes: + assert expected_column in returned_attribute.columns + + if attribute == "obm_buildings": + assert "osm_id" in returned_attribute.columns + else: + assert "osm_id" not in returned_attribute.columns + + +def test_append_lumped_buildings(): + + cost_cases = { + "structural": "total", + "random_name_1": "structural", + } + + people_cases = { + "census": "census", + "random_name_2": "night", + } + + returned_tile = TileExposure("123456", cost_cases, people_cases) + + cost_assumptions = { + "structural": 1.0, + "random_name_1": 0.30, + } + + people_distribution = { + "census": 1.0, + "random_name_2": 0.9736, + } + + building_classes = pandas.DataFrame( + { + "building_class_name": [ + "A1/HBET:1-3", + "A2/HBET:4-6", + "A3/HBET:7-12", + "B1/HBET:1-3", + "B2/H:4", + ], + "settlement_type": ["urban", "urban", "urban", "rural", "rural"], + "occupancy_subtype": ["all", "all", "all", "all", "all"], + "proportions": [0.20, 0.30, 0.10, 0.25, 0.15], + "census_people_per_building": [8.9, 10.5, 58.3, 12.4, 26.7], + "total_cost_per_building": [1700000.0, 2500000.0, 18000000.0, 2100000.0, 2900000.0], + } + ) + + returned_tile.append_lumped_buildings( + "aggregated_buildings", + building_classes, + 125.3, + cost_assumptions, + people_distribution, + "ABC_10269", + ) + + returned_tile.append_lumped_buildings( + "aggregated_buildings", + building_classes, + 72.8, + cost_assumptions, + people_distribution, + "DEF_00000", + ) + + expected_results = pandas.read_csv( + os.path.join( + os.path.dirname(__file__), + "data", + "expected_results_append_lumped_buildings.csv", + ), + sep=",", + ) + + assert returned_tile.aggregated_buildings.shape[0] == expected_results.shape[0] + + for i in expected_results.index: + assert ( + returned_tile.aggregated_buildings.loc[i, "building_class_name"] + == expected_results.loc[i, "building_class_name"] + ) + assert ( + returned_tile.aggregated_buildings.loc[i, "data_unit_id"] + == expected_results.loc[i, "data_unit_id"] + ) + for column in expected_results.columns: + if column != "building_class_name" and column != "data_unit_id": + assert round(returned_tile.aggregated_buildings.loc[i, column], 4) == round( + expected_results.loc[i, column], 4 + ) + + +def test_append_OBM_buildings(): + + cost_cases = { + "structural": "total", + "random_name_1": "structural", + } + + people_cases = { + "census": "census", + "random_name_2": "night", + } + + returned_tile = TileExposure("123456", cost_cases, people_cases) + + cost_assumptions = { + "structural": 1.0, + "random_name_1": 0.30, + } + + people_distribution = { + "census": 1.0, + "random_name_2": 0.9736, + } + + data_unit_building_classes = pandas.DataFrame( + { + "building_class_name": [ + "A1/HBET:1-3", + "A2/HBET:4-6", + "A3/HBET:7-12", + "B1/HBET:1-3", + "B2/H:4", + ], + "settlement_type": ["urban", "urban", "urban", "rural", "rural"], + "occupancy_subtype": ["all", "all", "all", "all", "all"], + "proportions": [0.20, 0.30, 0.10, 0.25, 0.15], + "census_people_per_building": [8.9, 10.5, 58.3, 12.4, 26.7], + "total_cost_per_building": [1700000.0, 2500000.0, 18000000.0, 2100000.0, 2900000.0], + } + ) + + obm_buildings = pandas.DataFrame( + { + "osm_id": [123456789, 123456789, 987654321, 987654321], + "building_class_name": ["A1/HBET:1-3", "B1/HBET:1-3", "A2/HBET:4-6", "B2/H:4"], + "settlement_type": ["urban", "rural", "urban", "rural"], + "occupancy_subtype": ["all", "all", "all", "all"], + "probabilities": [4.0 / 9.0, 5.0 / 9.0, 2.0 / 3.0, 1.0 / 3.0], + } + ) + + returned_tile.append_OBM_buildings( + obm_buildings, + data_unit_building_classes, + cost_assumptions, + people_distribution, + "ABC_10269", + ) + + expected_results = pandas.read_csv( + os.path.join( + os.path.dirname(__file__), + "data", + "expected_results_append_OBM_buildings.csv", + ), + sep=",", + ) + + assert returned_tile.obm_buildings.shape[0] == expected_results.shape[0] + + for i in expected_results.index: + assert returned_tile.obm_buildings.loc[i, "osm_id"] == expected_results.loc[i, "osm_id"] + assert ( + returned_tile.obm_buildings.loc[i, "building_class_name"] + == expected_results.loc[i, "building_class_name"] + ) + assert ( + returned_tile.obm_buildings.loc[i, "data_unit_id"] + == expected_results.loc[i, "data_unit_id"] + ) + for column in expected_results.columns: + if ( + column != "building_class_name" + and column != "data_unit_id" + and column != "osm_id" + ): + assert round(returned_tile.obm_buildings.loc[i, column], 4) == round( + expected_results.loc[i, column], 4 + ) -- GitLab