Commit 9d969f3d authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to retrieve data_unit_tiles

parent 66233723
Pipeline #41239 passed with stage
in 2 minutes and 45 seconds
......@@ -631,3 +631,157 @@ class DatabaseQueries:
)
return building_classes_proportions
@staticmethod
def get_data_unit_tiles_of_data_unit(
data_unit_id, occupancy_case, aggregated_source_id, db_gde_tiles_config, db_table
):
"""This function retrieves all the data-unit tiles associated with 'data_unit_id',
'occupancy_case' and 'aggregated_source_id' in 'db_table', in terms of their quadkeys
and number of buildings in the data-unit tile as per the aggregated exposure model.
Args:
data_unit_id (str):
ID of the data unit for which the data-unit tiles will be retrieved.
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial")
for which the data-unit tiles will be retrieved.
aggregated_source_id (int):
ID of the source of the aggregated exposure model for which the data-unit tiles
will be retrieved.
db_gde_tiles_config (dict):
Dictionary containing the credentials needed to connect to the SQL database in
which information on the data-unit tiles is stored. The keys of the dictionary
need to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
Name of the table of the SQL database where the data-unit tiles are stored. It
is assumed that this table contains, at least, the following fields:
quadkey (str):
String indicating the quadkey of a tile.
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
occupancy_case (enum):
SQL enumerated type describing the building occupancy cases.
data_unit_id (str):
ID of the data unit.
aggregated_buildings (float):
Number of buildings in the data-unit tile as per the aggregated exposure
model with ID 'aggregated_source_id'.
Returns:
quadkeys (array of str):
Strings indicating the quadkeys of all data-unit tiles associated with
'data_unit_id', 'occupancy_case' and 'aggregated_source_id'.
aggregated_buildings (array of float):
Number of buildings in the data-unit tiles as per the aggregated exposure
model with ID 'aggregated_source_id'.
"""
sql_query = "SELECT quadkey, aggregated_buildings FROM %s"
sql_query += " WHERE (data_unit_id='%s' AND occupancy_case='%s'"
sql_query += " AND aggregated_source_id='%s');"
db_gde_tiles = Database(**db_gde_tiles_config)
db_gde_tiles.create_connection_and_cursor()
db_gde_tiles.cursor.execute(
sql_query % (db_table, data_unit_id, occupancy_case, aggregated_source_id)
)
exec_result = db_gde_tiles.cursor.fetchall()
db_gde_tiles.close_connection()
if len(exec_result) > 1: # Entries exist --> retrieve
quadkeys = numpy.array(
[exec_result[i][0] for i in range(len(exec_result))], dtype="str"
)
aggregated_buildings = numpy.array(
[exec_result[i][1] for i in range(len(exec_result))], dtype="float"
)
else: # No entries found
quadkeys = numpy.array([], dtype="str")
aggregated_buildings = numpy.array([], dtype="float")
return quadkeys, aggregated_buildings
@staticmethod
def get_data_unit_tiles_of_data_unit_as_DataFrame(
data_unit_id, occupancy_case, aggregated_source_id, db_gde_tiles_config, db_table
):
"""This function retrieves all the data-unit tiles associated with 'data_unit_id',
'occupancy_case' and 'aggregated_source_id' in 'db_table', in terms of their quadkeys
and number of buildings in the data-unit tile as per the aggregated exposure model. The
output is gathered together as a Pandas DataFrame.
Args:
data_unit_id (str):
ID of the data unit for which the data-unit tiles will be retrieved.
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial")
for which the data-unit tiles will be retrieved.
aggregated_source_id (int):
ID of the source of the aggregated exposure model for which the data-unit tiles
will be retrieved.
db_gde_tiles_config (dict):
Dictionary containing the credentials needed to connect to the SQL database in
which information on the data-unit tiles is stored. The keys of the dictionary
need to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
Name of the table of the SQL database where the data-unit tiles are stored. It
is assumed that this table contains, at least, the following fields:
quadkey (str):
String indicating the quadkey of a tile.
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
occupancy_case (enum):
SQL enumerated type describing the building occupancy cases.
data_unit_id (str):
ID of the data unit.
aggregated_buildings (float):
Number of buildings in the data-unit tile as per the aggregated exposure
model with ID 'aggregated_source_id'.
Returns:
data_unit_tiles (Pandas DataFrame):
Pandas DataFrame with all data-unit tiles associated with 'data_unit_id',
'occupancy_case' and 'aggregated_source_id' in 'db_table'. It contains the
following columns:
quadkey (str):
String indicating the quadkey of a tile.
aggregated_buildings (float):
Number of buildings in the data-unit tile as per the aggregated exposure
model with ID 'aggregated_source_id'.
"""
quadkeys, aggregated_buildings = DatabaseQueries.get_data_unit_tiles_of_data_unit(
data_unit_id, occupancy_case, aggregated_source_id, db_gde_tiles_config, db_table
)
data_unit_tiles = pandas.DataFrame(
{
"quadkey": quadkeys,
"aggregated_buildings": aggregated_buildings,
}
)
return data_unit_tiles
......@@ -159,6 +159,19 @@ def main():
% (aux_log_string, str(len(obm_buildings_building_classes.keys())))
)
# Retrieve data-unit tiles (quadkey, aggregated_buildings) as a Pandas DataFrame
data_unit_tiles = DatabaseQueries.get_data_unit_tiles_of_data_unit_as_DataFrame(
data_unit_id,
occupancy_case,
aggregated_source_id,
config.database_gde_tiles,
"data_unit_tiles",
)
logger.info(
"%s: %s data-unit tiles retrieved"
% (aux_log_string, str(data_unit_tiles.shape[0]))
)
# Leave the program
logger.info("gde-core has finished")
sys.exit()
......
......@@ -2,6 +2,7 @@ DROP TABLE IF EXISTS aggregated_sources;
DROP TABLE IF EXISTS data_units;
DROP TABLE IF EXISTS obm_buildings;
DROP TABLE IF EXISTS data_units_buildings;
DROP TABLE IF EXISTS data_unit_tiles;
DROP TYPE IF EXISTS occupancycase;
DROP TYPE IF EXISTS settlement;
DROP EXTENSION IF EXISTS postgis;
......@@ -164,3 +165,35 @@ VALUES ('A1/HBET:1-3', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.2
('C4/HBET:2-3', 'urban', 'Trade', 2, 'ABC', 'commercial', 'ABC_10269', 0.10, 0.0, 0.0, 2, 3),
('C5/HBET:1-2', 'urban', 'Offices', 2, 'ABC', 'commercial', 'ABC_10269', 0.20, 0.0, 0.0, 1, 2),
('C6/HBET:3-5', 'urban', 'Offices', 2, 'ABC', 'commercial', 'ABC_10269', 0.30, 0.0, 0.0, 3, 5);
CREATE TABLE data_unit_tiles
(
quadkey char(18),
aggregated_source_id SMALLINT,
occupancy_case occupancycase,
exposure_entity char(3),
data_unit_id varchar,
size_data_unit_tile_area FLOAT,
size_data_unit_tile_built_up_area FLOAT,
fraction_data_unit_area FLOAT,
fraction_data_unit_built_up_area FLOAT,
aggregated_buildings FLOAT,
PRIMARY KEY (quadkey, aggregated_source_id, occupancy_case, data_unit_id)
);
INSERT INTO data_unit_tiles(quadkey,
aggregated_source_id,
occupancy_case,
exposure_entity,
data_unit_id,
size_data_unit_tile_area,
size_data_unit_tile_built_up_area,
fraction_data_unit_area,
fraction_data_unit_built_up_area,
aggregated_buildings)
VALUES ('122010321033023130', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 15.7),
('122010321033023130', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 23.4),
('122010321033023120', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 39.1),
('122010321033023120', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 17.6),
('122010321033023132', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 34.4),
('122010321033023132', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 11.5);
......@@ -339,3 +339,43 @@ def test_get_building_classes_of_data_unit(test_db):
for col_name in expected_columns:
assert col_name in returned_building_classes.columns
assert round(returned_building_classes["proportions"].sum(), 5) == 0.0
def test_get_data_unit_tiles_of_data_unit_as_DataFrame(test_db):
# Database connection (the Configuration class will define the credentials based on whether
# the code is running in the CI or locally)
config = Configuration(
os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml")
)
expected_quadkeys = ["122010321033023130", "122010321033023120", "122010321033023132"]
expected_aggregated_buildings = {}
expected_aggregated_buildings["residential"] = [15.7, 39.1, 34.4]
expected_aggregated_buildings["commercial"] = [23.4, 17.6, 11.5]
for occupancy in expected_aggregated_buildings:
returned_data_unit_tiles = (
DatabaseQueries.get_data_unit_tiles_of_data_unit_as_DataFrame(
"ABC_10269", occupancy, 2, config.database_gde_tiles, "data_unit_tiles"
)
)
assert returned_data_unit_tiles.shape[0] == len(expected_quadkeys)
for i, quadkey in enumerate(expected_quadkeys):
assert (
returned_data_unit_tiles[returned_data_unit_tiles.quadkey == quadkey][
"aggregated_buildings"
].to_numpy()[0]
== expected_aggregated_buildings[occupancy][i]
)
# Test case in which there are no entries to retrieve
returned_data_unit_tiles = DatabaseQueries.get_data_unit_tiles_of_data_unit_as_DataFrame(
"ABC_10269", "industrial", 2, config.database_gde_tiles, "data_unit_tiles"
)
assert returned_data_unit_tiles.shape[0] == 0
assert "quadkey" in returned_data_unit_tiles.columns
assert "aggregated_buildings" in returned_data_unit_tiles.columns
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment