Commit a7b5263d authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to retrieve IDs and geometries of data units

parent e438ec23
Pipeline #40666 passed with stage
in 1 minute and 58 seconds
......@@ -58,6 +58,9 @@ class Configuration:
"Exposure Entity 1": "EE1",
"Exposure Entity 2": "XXX"
}
self.occupancies_to_run (list of str):
List of occupancy cases of the input aggregated exposure model for which the code
will be run.
"""
REQUIRES = [
......
......@@ -18,6 +18,7 @@
import logging
import numpy
import shapely
from gdeimporter.tools.database import Database
......@@ -144,3 +145,142 @@ class DatabaseQueries:
exposure_entities = list(numpy.unique(numpy.array(exposure_entities_all)))
return exposure_entities
@staticmethod
def get_data_unit_ids_geometries_of_entity_and_occupancy_case(
exposure_entity, occupancy_case, aggregated_source_id, db_gde_tiles_config, db_table
):
"""This function retrieves the IDs and geometries of all data units associated with
'exposure_entity', 'occupancy_case' and 'aggregated_source_id' in 'db_table' of the
database whose credentials are given in 'db_gde_tiles_config'. IDs of data units for
which no geometry is defined in 'db_table' are returned separately.
Args:
exposure_entity (str):
3-character code of the exposure entity for which the data unit IDs and
geometries will be retrieved.
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial")
for which the data unit IDs and geometries will be retrieved.
aggregated_source_id (int):
ID of the source of the aggregated exposure model for which the data unit IDs
and geometries will be retrieved.
db_gde_tiles_config (dict):
Dictionary containing the credentials needed to connect to the SQL database in
which information on the data units is stored. The keys of the dictionary need
to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
Name of the table of the SQL database where the data units are stored. It is
assumed that this table contains, at least, the following fields:
data_unit_id (str):
ID of the data unit.
occupancy_case (enum):
SQL enumerated type describing the building occupancy cases.
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
exposure_entity (str):
3-character code of the exposure entity.
geometry (PSQL geometry):
Geometry of the data unit.
Returns:
data_units_ids (array of str):
IDs of all data units associated with 'exposure_entity', 'occupancy_case' and
'aggregated_source_id' in 'db_table', for which a corresponding geometry was
retrieved.
data_units_geometries (array of Shapely Polygons/MultiPolygons):
Geometries associated with the data units whose IDs are 'data_units_ids'.
data_units_ids_no_geometry (array of str):
IDs of data units associated with 'exposure_entity', 'occupancy_case' and
'aggregated_source_id' in 'db_table', for which no geometry exists in
'db_table'.
"""
sql_query = "SELECT data_unit_id, geometry FROM %s "
sql_query += "WHERE (exposure_entity='%s' AND occupancy_case='%s' AND "
sql_query += "aggregated_source_id=%s);"
db_gde_tiles = Database(**db_gde_tiles_config)
db_gde_tiles.create_connection_and_cursor()
db_gde_tiles.cursor.execute(
sql_query % (db_table, exposure_entity, occupancy_case, aggregated_source_id)
)
exec_result = db_gde_tiles.cursor.fetchall()
db_gde_tiles.close_connection()
if len(exec_result) > 0: # Entries exist --> retrieve
data_units_ids_raw = numpy.array(
[exec_result[i][0] for i in range(len(exec_result))], dtype="str"
)
data_units_geometries_raw = numpy.array(
[exec_result[i][1] for i in range(len(exec_result))],
dtype="object",
) # geometries in WKB format, might include Nones
(
data_units_ids,
data_units_geometries,
data_units_ids_no_geometry,
) = DatabaseQueries._post_process_data_units_and_geometries(
data_units_ids_raw, data_units_geometries_raw
)
else: # No entries found
data_units_ids = numpy.array([], dtype="str")
data_units_geometries = numpy.array([], dtype="object")
data_units_ids_no_geometry = numpy.array([], dtype="str")
return data_units_ids, data_units_geometries, data_units_ids_no_geometry
@staticmethod
def _post_process_data_units_and_geometries(ids, geometries):
"""This function separates 'ids' and 'geometries' according to whether the geometries
are defined or not (in which case they are "None"). Elements in 'ids' and 'geometries'
are assumed to correspond to one another.
Args:
ids (array of str):
IDs of elements whose geometry is contained in 'geometries'.
geometries (array of Well-Known Binary (WKB) geometries):
Geometries of the elements with IDs contained in 'ids', in Well-Known Binary
(WKB) format.
Returns:
ids_processed (array of str):
Elements of 'ids' for which their corresponding element in 'geometries' is not
"None".
geometries_processed (array of Shapely Polygons/MultiPolygons):
Elements of 'geometries' that are not "None" (in correspondence with
'ids_processed').
ids_no_geometry (array of str):
Elements of 'ids' for which their corresponding element in 'geometries' is
"None".
"""
which_not_none = numpy.where(geometries != None)[0] # noqa: E711
which_none = numpy.where(geometries == None)[0] # noqa: E711
ids_processed = ids[which_not_none]
if len(which_not_none) > 0:
geometries_processed = numpy.array(
[
shapely.wkb.loads(geometries[which_not_none][i], hex=True)
for i in range(len(which_not_none))
],
dtype="object",
)
else:
geometries_processed = numpy.array([], dtype="object")
ids_no_geometry = ids[which_none]
return ids_processed, geometries_processed, ids_no_geometry
......@@ -68,12 +68,37 @@ def main():
)
# Run by exposure entity and occupancy case
for exposure_entity_name in config.exposure_entities_to_run:
for exposure_entity_code in config.exposure_entities_to_run:
for occupancy_case in config.occupancies_to_run:
# Retrieve data unit IDs and geometries
(
data_units_ids,
data_units_geometries,
data_units_ids_no_geometry,
) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case(
exposure_entity_code,
occupancy_case,
aggregated_source_id,
config.database_gde_tiles,
"data_units",
)
logger.info(
"Running exposure entity '%s', occupancy case '%s'."
% (exposure_entity_name, occupancy_case)
"Running exposure entity '%s', occupancy case '%s': %s data units retrieved."
% (exposure_entity_code, occupancy_case, str(len(data_units_ids)))
)
if len(data_units_ids_no_geometry) > 0:
concatenated_ids = ", ".join(list(data_units_ids_no_geometry))
logger.warning(
"%s data units associated with exposure entity '%s' and occupancy case '%s'"
" found for which the 'data_units' table contained no geometry. These data"
" units will not be run: %s"
% (
str(len(data_units_ids_no_geometry)),
exposure_entity_code,
occupancy_case,
concatenated_ids,
)
)
# Leave the program
logger.info("gde-core has finished")
......
......@@ -33,6 +33,26 @@ CREATE TABLE data_units
PRIMARY KEY (data_unit_id, occupancy_case, aggregated_source_id)
);
INSERT INTO data_units(data_unit_id,
occupancy_case,
aggregated_source_id,
exposure_entity,
buildings_total,
dwellings_total,
people_census,
cost_total,
geometry)
VALUES (
'ABC_10269', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0,
ST_GeomFromText(
'POLYGON((15.04625 37.48424,15.05455 37.48424,15.05455 37.475893,15.04625 37.475893,15.04625 37.48424))')),
(
'ABC_10278', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0,
ST_GeomFromText('POLYGON((15.05455 37.48424,15.0629 37.48424,15.0629 37.475893,15.05455 37.475893,15.05455 37.48424))')),
(
'ABC_10277', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0,
ST_GeomFromText('POLYGON((15.05455 37.475893,15.0629 37.475893,15.0629 37.4675485,15.05455 37.4675485,15.05455 37.475893))'));
INSERT INTO data_units(data_unit_id,
occupancy_case,
aggregated_source_id,
......@@ -41,7 +61,4 @@ INSERT INTO data_units(data_unit_id,
dwellings_total,
people_census,
cost_total)
VALUES ('ABC_123456', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0),
('ABC_123456', 'commercial', 2, 'ABC', 0.0, 0.0, 0.0, 0.0),
('ABC_456789', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0),
('DEF_456789', 'residential', 2, 'DEF', 0.0, 0.0, 0.0, 0.0);
VALUES ('DEF_00000', 'residential', 2, 'DEF', 0.0, 0.0, 0.0, 0.0);
......@@ -17,6 +17,7 @@
# along with this program. If not, see http://www.gnu.org/licenses/.
import os
import numpy
from gdecore.configuration import Configuration
from gdecore.database_queries import DatabaseQueries
......@@ -70,3 +71,69 @@ def test_retrieve_all_exposure_entities_of_aggregated_source_id(test_db):
)
assert len(returned_exposure_entities) == 0
def test_get_data_unit_ids_geometries_of_entity_and_occupancy_case(test_db):
# Database connection (the Configuration class will define the credentials based on whether
# the code is running in the CI or locally)
config = Configuration(
os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml")
)
# data units will be retrieved, all with associated geometries
(
returned_data_units_ids,
returned_data_units_geometries,
returned_data_units_ids_no_geometry,
) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case(
"ABC", "residential", 2, config.database_gde_tiles, "data_units"
)
expected_data_units_ids = ["ABC_10269", "ABC_10278", "ABC_10277"]
expected_bounds_west = [15.04625, 15.05455, 15.05455]
expected_bounds_east = [15.05455, 15.0629, 15.0629]
expected_bounds_south = [37.475893, 37.475893, 37.4675485]
expected_bounds_north = [37.48424, 37.48424, 37.475893]
assert len(returned_data_units_ids) == 3
assert len(returned_data_units_geometries) == 3
assert len(returned_data_units_ids_no_geometry) == 0
for i, data_unit_id in enumerate(expected_data_units_ids):
assert data_unit_id in returned_data_units_ids
which = numpy.where(returned_data_units_ids == data_unit_id)[0][0]
assert returned_data_units_geometries[which].bounds[0] == expected_bounds_west[i]
assert returned_data_units_geometries[which].bounds[1] == expected_bounds_south[i]
assert returned_data_units_geometries[which].bounds[2] == expected_bounds_east[i]
assert returned_data_units_geometries[which].bounds[3] == expected_bounds_north[i]
# data units will not be found
(
returned_data_units_ids,
returned_data_units_geometries,
returned_data_units_ids_no_geometry,
) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case(
"ABC", "commercial", 2, config.database_gde_tiles, "data_units"
)
assert len(returned_data_units_ids) == 0
assert len(returned_data_units_geometries) == 0
assert len(returned_data_units_ids_no_geometry) == 0
# data units will be retrieved, without associated geometries
(
returned_data_units_ids,
returned_data_units_geometries,
returned_data_units_ids_no_geometry,
) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case(
"DEF", "residential", 2, config.database_gde_tiles, "data_units"
)
expected_data_unit_id_no_geometry = "DEF_00000"
assert len(returned_data_units_ids) == 0
assert len(returned_data_units_geometries) == 0
assert len(returned_data_units_ids_no_geometry) == 1
assert returned_data_units_ids_no_geometry[0] == expected_data_unit_id_no_geometry
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment