diff --git a/gdecore/database_queries.py b/gdecore/database_queries.py index 874f81db70399d18b23da195fc5c7a115e8b804c..d49e6399a2c27a9461113d4763e48fa27095f225 100644 --- a/gdecore/database_queries.py +++ b/gdecore/database_queries.py @@ -631,3 +631,157 @@ class DatabaseQueries: ) return building_classes_proportions + + @staticmethod + def get_data_unit_tiles_of_data_unit( + data_unit_id, occupancy_case, aggregated_source_id, db_gde_tiles_config, db_table + ): + """This function retrieves all the data-unit tiles associated with 'data_unit_id', + 'occupancy_case' and 'aggregated_source_id' in 'db_table', in terms of their quadkeys + and number of buildings in the data-unit tile as per the aggregated exposure model. + + Args: + data_unit_id (str): + ID of the data unit for which the data-unit tiles will be retrieved. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial") + for which the data-unit tiles will be retrieved. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the data-unit tiles + will be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the data-unit tiles is stored. The keys of the dictionary + need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the data-unit tiles are stored. It + is assumed that this table contains, at least, the following fields: + quadkey (str): + String indicating the quadkey of a tile. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + occupancy_case (enum): + SQL enumerated type describing the building occupancy cases. + data_unit_id (str): + ID of the data unit. + aggregated_buildings (float): + Number of buildings in the data-unit tile as per the aggregated exposure + model with ID 'aggregated_source_id'. + + Returns: + quadkeys (array of str): + Strings indicating the quadkeys of all data-unit tiles associated with + 'data_unit_id', 'occupancy_case' and 'aggregated_source_id'. + aggregated_buildings (array of float): + Number of buildings in the data-unit tiles as per the aggregated exposure + model with ID 'aggregated_source_id'. + """ + + sql_query = "SELECT quadkey, aggregated_buildings FROM %s" + sql_query += " WHERE (data_unit_id='%s' AND occupancy_case='%s'" + sql_query += " AND aggregated_source_id='%s');" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute( + sql_query % (db_table, data_unit_id, occupancy_case, aggregated_source_id) + ) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + if len(exec_result) > 1: # Entries exist --> retrieve + quadkeys = numpy.array( + [exec_result[i][0] for i in range(len(exec_result))], dtype="str" + ) + aggregated_buildings = numpy.array( + [exec_result[i][1] for i in range(len(exec_result))], dtype="float" + ) + else: # No entries found + quadkeys = numpy.array([], dtype="str") + aggregated_buildings = numpy.array([], dtype="float") + + return quadkeys, aggregated_buildings + + @staticmethod + def get_data_unit_tiles_of_data_unit_as_DataFrame( + data_unit_id, occupancy_case, aggregated_source_id, db_gde_tiles_config, db_table + ): + """This function retrieves all the data-unit tiles associated with 'data_unit_id', + 'occupancy_case' and 'aggregated_source_id' in 'db_table', in terms of their quadkeys + and number of buildings in the data-unit tile as per the aggregated exposure model. The + output is gathered together as a Pandas DataFrame. + + Args: + data_unit_id (str): + ID of the data unit for which the data-unit tiles will be retrieved. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial") + for which the data-unit tiles will be retrieved. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the data-unit tiles + will be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the data-unit tiles is stored. The keys of the dictionary + need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the data-unit tiles are stored. It + is assumed that this table contains, at least, the following fields: + quadkey (str): + String indicating the quadkey of a tile. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + occupancy_case (enum): + SQL enumerated type describing the building occupancy cases. + data_unit_id (str): + ID of the data unit. + aggregated_buildings (float): + Number of buildings in the data-unit tile as per the aggregated exposure + model with ID 'aggregated_source_id'. + + Returns: + data_unit_tiles (Pandas DataFrame): + Pandas DataFrame with all data-unit tiles associated with 'data_unit_id', + 'occupancy_case' and 'aggregated_source_id' in 'db_table'. It contains the + following columns: + quadkey (str): + String indicating the quadkey of a tile. + aggregated_buildings (float): + Number of buildings in the data-unit tile as per the aggregated exposure + model with ID 'aggregated_source_id'. + """ + + quadkeys, aggregated_buildings = DatabaseQueries.get_data_unit_tiles_of_data_unit( + data_unit_id, occupancy_case, aggregated_source_id, db_gde_tiles_config, db_table + ) + + data_unit_tiles = pandas.DataFrame( + { + "quadkey": quadkeys, + "aggregated_buildings": aggregated_buildings, + } + ) + + return data_unit_tiles diff --git a/gdecore/gdecore.py b/gdecore/gdecore.py index 579207ffdce2204da0aeecbe61c5d714106e7442..b8a4a6f5fac5ad55b591dd2c8a1101bc8ea120b6 100644 --- a/gdecore/gdecore.py +++ b/gdecore/gdecore.py @@ -159,6 +159,19 @@ def main(): % (aux_log_string, str(len(obm_buildings_building_classes.keys()))) ) + # Retrieve data-unit tiles (quadkey, aggregated_buildings) as a Pandas DataFrame + data_unit_tiles = DatabaseQueries.get_data_unit_tiles_of_data_unit_as_DataFrame( + data_unit_id, + occupancy_case, + aggregated_source_id, + config.database_gde_tiles, + "data_unit_tiles", + ) + logger.info( + "%s: %s data-unit tiles retrieved" + % (aux_log_string, str(data_unit_tiles.shape[0])) + ) + # Leave the program logger.info("gde-core has finished") sys.exit() diff --git a/tests/data/test_database_set_up.sql b/tests/data/test_database_set_up.sql index ac93a2f8c77ca70295dcc91e0d9db0a4a46ce42e..be3a9dadb93166177897cf2dff68d0afcd641f65 100644 --- a/tests/data/test_database_set_up.sql +++ b/tests/data/test_database_set_up.sql @@ -2,6 +2,7 @@ DROP TABLE IF EXISTS aggregated_sources; DROP TABLE IF EXISTS data_units; DROP TABLE IF EXISTS obm_buildings; DROP TABLE IF EXISTS data_units_buildings; +DROP TABLE IF EXISTS data_unit_tiles; DROP TYPE IF EXISTS occupancycase; DROP TYPE IF EXISTS settlement; DROP EXTENSION IF EXISTS postgis; @@ -164,3 +165,35 @@ VALUES ('A1/HBET:1-3', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.2 ('C4/HBET:2-3', 'urban', 'Trade', 2, 'ABC', 'commercial', 'ABC_10269', 0.10, 0.0, 0.0, 2, 3), ('C5/HBET:1-2', 'urban', 'Offices', 2, 'ABC', 'commercial', 'ABC_10269', 0.20, 0.0, 0.0, 1, 2), ('C6/HBET:3-5', 'urban', 'Offices', 2, 'ABC', 'commercial', 'ABC_10269', 0.30, 0.0, 0.0, 3, 5); + +CREATE TABLE data_unit_tiles +( + quadkey char(18), + aggregated_source_id SMALLINT, + occupancy_case occupancycase, + exposure_entity char(3), + data_unit_id varchar, + size_data_unit_tile_area FLOAT, + size_data_unit_tile_built_up_area FLOAT, + fraction_data_unit_area FLOAT, + fraction_data_unit_built_up_area FLOAT, + aggregated_buildings FLOAT, + + PRIMARY KEY (quadkey, aggregated_source_id, occupancy_case, data_unit_id) +); +INSERT INTO data_unit_tiles(quadkey, + aggregated_source_id, + occupancy_case, + exposure_entity, + data_unit_id, + size_data_unit_tile_area, + size_data_unit_tile_built_up_area, + fraction_data_unit_area, + fraction_data_unit_built_up_area, + aggregated_buildings) +VALUES ('122010321033023130', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 15.7), +('122010321033023130', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 23.4), +('122010321033023120', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 39.1), +('122010321033023120', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 17.6), +('122010321033023132', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 34.4), +('122010321033023132', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 11.5); diff --git a/tests/test_database_queries.py b/tests/test_database_queries.py index 4daa33a5e5a1a5913b8c22da959969e42eeae31a..e53406497f5e35e4fdaf38fc8c93128c3ed3b7ec 100644 --- a/tests/test_database_queries.py +++ b/tests/test_database_queries.py @@ -339,3 +339,43 @@ def test_get_building_classes_of_data_unit(test_db): for col_name in expected_columns: assert col_name in returned_building_classes.columns assert round(returned_building_classes["proportions"].sum(), 5) == 0.0 + + +def test_get_data_unit_tiles_of_data_unit_as_DataFrame(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + expected_quadkeys = ["122010321033023130", "122010321033023120", "122010321033023132"] + + expected_aggregated_buildings = {} + expected_aggregated_buildings["residential"] = [15.7, 39.1, 34.4] + expected_aggregated_buildings["commercial"] = [23.4, 17.6, 11.5] + + for occupancy in expected_aggregated_buildings: + returned_data_unit_tiles = ( + DatabaseQueries.get_data_unit_tiles_of_data_unit_as_DataFrame( + "ABC_10269", occupancy, 2, config.database_gde_tiles, "data_unit_tiles" + ) + ) + + assert returned_data_unit_tiles.shape[0] == len(expected_quadkeys) + + for i, quadkey in enumerate(expected_quadkeys): + assert ( + returned_data_unit_tiles[returned_data_unit_tiles.quadkey == quadkey][ + "aggregated_buildings" + ].to_numpy()[0] + == expected_aggregated_buildings[occupancy][i] + ) + + # Test case in which there are no entries to retrieve + returned_data_unit_tiles = DatabaseQueries.get_data_unit_tiles_of_data_unit_as_DataFrame( + "ABC_10269", "industrial", 2, config.database_gde_tiles, "data_unit_tiles" + ) + + assert returned_data_unit_tiles.shape[0] == 0 + assert "quadkey" in returned_data_unit_tiles.columns + assert "aggregated_buildings" in returned_data_unit_tiles.columns