diff --git a/gdecore/configuration.py b/gdecore/configuration.py index cd678c31770af47277f3e35bb362afebe976fc2a..a75ae7589f7dde81a6646595829f751234661cd5 100644 --- a/gdecore/configuration.py +++ b/gdecore/configuration.py @@ -58,6 +58,9 @@ class Configuration: "Exposure Entity 1": "EE1", "Exposure Entity 2": "XXX" } + self.occupancies_to_run (list of str): + List of occupancy cases of the input aggregated exposure model for which the code + will be run. """ REQUIRES = [ diff --git a/gdecore/database_queries.py b/gdecore/database_queries.py index 15099f724b580321bfc8749937373f3baf04ffd6..ff7a27bbbb9c9c0493a8f9a8fe2491dd1ee34a2c 100644 --- a/gdecore/database_queries.py +++ b/gdecore/database_queries.py @@ -18,6 +18,7 @@ import logging import numpy +import shapely from gdeimporter.tools.database import Database @@ -144,3 +145,142 @@ class DatabaseQueries: exposure_entities = list(numpy.unique(numpy.array(exposure_entities_all))) return exposure_entities + + @staticmethod + def get_data_unit_ids_geometries_of_entity_and_occupancy_case( + exposure_entity, occupancy_case, aggregated_source_id, db_gde_tiles_config, db_table + ): + """This function retrieves the IDs and geometries of all data units associated with + 'exposure_entity', 'occupancy_case' and 'aggregated_source_id' in 'db_table' of the + database whose credentials are given in 'db_gde_tiles_config'. IDs of data units for + which no geometry is defined in 'db_table' are returned separately. + + Args: + exposure_entity (str): + 3-character code of the exposure entity for which the data unit IDs and + geometries will be retrieved. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial") + for which the data unit IDs and geometries will be retrieved. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the data unit IDs + and geometries will be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the data units is stored. The keys of the dictionary need + to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the data units are stored. It is + assumed that this table contains, at least, the following fields: + data_unit_id (str): + ID of the data unit. + occupancy_case (enum): + SQL enumerated type describing the building occupancy cases. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + exposure_entity (str): + 3-character code of the exposure entity. + geometry (PSQL geometry): + Geometry of the data unit. + + Returns: + data_units_ids (array of str): + IDs of all data units associated with 'exposure_entity', 'occupancy_case' and + 'aggregated_source_id' in 'db_table', for which a corresponding geometry was + retrieved. + data_units_geometries (array of Shapely Polygons/MultiPolygons): + Geometries associated with the data units whose IDs are 'data_units_ids'. + data_units_ids_no_geometry (array of str): + IDs of data units associated with 'exposure_entity', 'occupancy_case' and + 'aggregated_source_id' in 'db_table', for which no geometry exists in + 'db_table'. + """ + + sql_query = "SELECT data_unit_id, geometry FROM %s " + sql_query += "WHERE (exposure_entity='%s' AND occupancy_case='%s' AND " + sql_query += "aggregated_source_id=%s);" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute( + sql_query % (db_table, exposure_entity, occupancy_case, aggregated_source_id) + ) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + if len(exec_result) > 0: # Entries exist --> retrieve + data_units_ids_raw = numpy.array( + [exec_result[i][0] for i in range(len(exec_result))], dtype="str" + ) + data_units_geometries_raw = numpy.array( + [exec_result[i][1] for i in range(len(exec_result))], + dtype="object", + ) # geometries in WKB format, might include Nones + ( + data_units_ids, + data_units_geometries, + data_units_ids_no_geometry, + ) = DatabaseQueries._post_process_data_units_and_geometries( + data_units_ids_raw, data_units_geometries_raw + ) + else: # No entries found + data_units_ids = numpy.array([], dtype="str") + data_units_geometries = numpy.array([], dtype="object") + data_units_ids_no_geometry = numpy.array([], dtype="str") + + return data_units_ids, data_units_geometries, data_units_ids_no_geometry + + @staticmethod + def _post_process_data_units_and_geometries(ids, geometries): + """This function separates 'ids' and 'geometries' according to whether the geometries + are defined or not (in which case they are "None"). Elements in 'ids' and 'geometries' + are assumed to correspond to one another. + + Args: + ids (array of str): + IDs of elements whose geometry is contained in 'geometries'. + geometries (array of Well-Known Binary (WKB) geometries): + Geometries of the elements with IDs contained in 'ids', in Well-Known Binary + (WKB) format. + + Returns: + ids_processed (array of str): + Elements of 'ids' for which their corresponding element in 'geometries' is not + "None". + geometries_processed (array of Shapely Polygons/MultiPolygons): + Elements of 'geometries' that are not "None" (in correspondence with + 'ids_processed'). + ids_no_geometry (array of str): + Elements of 'ids' for which their corresponding element in 'geometries' is + "None". + """ + + which_not_none = numpy.where(geometries != None)[0] # noqa: E711 + which_none = numpy.where(geometries == None)[0] # noqa: E711 + + ids_processed = ids[which_not_none] + if len(which_not_none) > 0: + geometries_processed = numpy.array( + [ + shapely.wkb.loads(geometries[which_not_none][i], hex=True) + for i in range(len(which_not_none)) + ], + dtype="object", + ) + else: + geometries_processed = numpy.array([], dtype="object") + ids_no_geometry = ids[which_none] + + return ids_processed, geometries_processed, ids_no_geometry diff --git a/gdecore/gdecore.py b/gdecore/gdecore.py index c7cf04799485fdd5b3ee2ec390e1021e859af02c..d305b9d5a7693edb84234ecbebfe0232caaf2cbc 100644 --- a/gdecore/gdecore.py +++ b/gdecore/gdecore.py @@ -68,12 +68,37 @@ def main(): ) # Run by exposure entity and occupancy case - for exposure_entity_name in config.exposure_entities_to_run: + for exposure_entity_code in config.exposure_entities_to_run: for occupancy_case in config.occupancies_to_run: + # Retrieve data unit IDs and geometries + ( + data_units_ids, + data_units_geometries, + data_units_ids_no_geometry, + ) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case( + exposure_entity_code, + occupancy_case, + aggregated_source_id, + config.database_gde_tiles, + "data_units", + ) logger.info( - "Running exposure entity '%s', occupancy case '%s'." - % (exposure_entity_name, occupancy_case) + "Running exposure entity '%s', occupancy case '%s': %s data units retrieved." + % (exposure_entity_code, occupancy_case, str(len(data_units_ids))) ) + if len(data_units_ids_no_geometry) > 0: + concatenated_ids = ", ".join(list(data_units_ids_no_geometry)) + logger.warning( + "%s data units associated with exposure entity '%s' and occupancy case '%s'" + " found for which the 'data_units' table contained no geometry. These data" + " units will not be run: %s" + % ( + str(len(data_units_ids_no_geometry)), + exposure_entity_code, + occupancy_case, + concatenated_ids, + ) + ) # Leave the program logger.info("gde-core has finished") diff --git a/tests/data/test_database_set_up.sql b/tests/data/test_database_set_up.sql index fe1e305faec91b93b219361888e5ccc0212ab932..6f95816400ee3d7cea34f8a95bbfb1db067a0e4c 100644 --- a/tests/data/test_database_set_up.sql +++ b/tests/data/test_database_set_up.sql @@ -33,6 +33,26 @@ CREATE TABLE data_units PRIMARY KEY (data_unit_id, occupancy_case, aggregated_source_id) ); +INSERT INTO data_units(data_unit_id, + occupancy_case, + aggregated_source_id, + exposure_entity, + buildings_total, + dwellings_total, + people_census, + cost_total, + geometry) +VALUES ( + 'ABC_10269', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0, + ST_GeomFromText( + 'POLYGON((15.04625 37.48424,15.05455 37.48424,15.05455 37.475893,15.04625 37.475893,15.04625 37.48424))')), +( + 'ABC_10278', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0, + ST_GeomFromText('POLYGON((15.05455 37.48424,15.0629 37.48424,15.0629 37.475893,15.05455 37.475893,15.05455 37.48424))')), +( + 'ABC_10277', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0, + ST_GeomFromText('POLYGON((15.05455 37.475893,15.0629 37.475893,15.0629 37.4675485,15.05455 37.4675485,15.05455 37.475893))')); + INSERT INTO data_units(data_unit_id, occupancy_case, aggregated_source_id, @@ -41,7 +61,4 @@ INSERT INTO data_units(data_unit_id, dwellings_total, people_census, cost_total) -VALUES ('ABC_123456', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0), -('ABC_123456', 'commercial', 2, 'ABC', 0.0, 0.0, 0.0, 0.0), -('ABC_456789', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0), -('DEF_456789', 'residential', 2, 'DEF', 0.0, 0.0, 0.0, 0.0); +VALUES ('DEF_00000', 'residential', 2, 'DEF', 0.0, 0.0, 0.0, 0.0); diff --git a/tests/test_database_queries.py b/tests/test_database_queries.py index ad10c46783ff606dd44c58fa39e18d6d4a942ece..83ad1d1b5fb19bbf36413cc1b6f39a019a02ec18 100644 --- a/tests/test_database_queries.py +++ b/tests/test_database_queries.py @@ -17,6 +17,7 @@ # along with this program. If not, see http://www.gnu.org/licenses/. import os +import numpy from gdecore.configuration import Configuration from gdecore.database_queries import DatabaseQueries @@ -70,3 +71,69 @@ def test_retrieve_all_exposure_entities_of_aggregated_source_id(test_db): ) assert len(returned_exposure_entities) == 0 + + +def test_get_data_unit_ids_geometries_of_entity_and_occupancy_case(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + # data units will be retrieved, all with associated geometries + ( + returned_data_units_ids, + returned_data_units_geometries, + returned_data_units_ids_no_geometry, + ) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case( + "ABC", "residential", 2, config.database_gde_tiles, "data_units" + ) + + expected_data_units_ids = ["ABC_10269", "ABC_10278", "ABC_10277"] + expected_bounds_west = [15.04625, 15.05455, 15.05455] + expected_bounds_east = [15.05455, 15.0629, 15.0629] + expected_bounds_south = [37.475893, 37.475893, 37.4675485] + expected_bounds_north = [37.48424, 37.48424, 37.475893] + + assert len(returned_data_units_ids) == 3 + assert len(returned_data_units_geometries) == 3 + assert len(returned_data_units_ids_no_geometry) == 0 + + for i, data_unit_id in enumerate(expected_data_units_ids): + assert data_unit_id in returned_data_units_ids + + which = numpy.where(returned_data_units_ids == data_unit_id)[0][0] + + assert returned_data_units_geometries[which].bounds[0] == expected_bounds_west[i] + assert returned_data_units_geometries[which].bounds[1] == expected_bounds_south[i] + assert returned_data_units_geometries[which].bounds[2] == expected_bounds_east[i] + assert returned_data_units_geometries[which].bounds[3] == expected_bounds_north[i] + + # data units will not be found + ( + returned_data_units_ids, + returned_data_units_geometries, + returned_data_units_ids_no_geometry, + ) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case( + "ABC", "commercial", 2, config.database_gde_tiles, "data_units" + ) + + assert len(returned_data_units_ids) == 0 + assert len(returned_data_units_geometries) == 0 + assert len(returned_data_units_ids_no_geometry) == 0 + + # data units will be retrieved, without associated geometries + ( + returned_data_units_ids, + returned_data_units_geometries, + returned_data_units_ids_no_geometry, + ) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case( + "DEF", "residential", 2, config.database_gde_tiles, "data_units" + ) + + expected_data_unit_id_no_geometry = "DEF_00000" + + assert len(returned_data_units_ids) == 0 + assert len(returned_data_units_geometries) == 0 + assert len(returned_data_units_ids_no_geometry) == 1 + assert returned_data_units_ids_no_geometry[0] == expected_data_unit_id_no_geometry