Commit 0fc9502d authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to write building classes to database

parent c7e627a1
Pipeline #36063 passed with stage
in 2 minutes and 28 seconds
......@@ -173,7 +173,8 @@ class AggregatedExposureModel(abc.ABC):
def store_data_units(
self,
db_data_units_config,
db_table,
db_table_data_units,
db_table_data_units_buildings,
exposure_entity_name,
occupancy_case,
aggregated_source_id,
......@@ -197,7 +198,7 @@ class AggregatedExposureModel(abc.ABC):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
db_table_data_units (str):
Name of the table of the SQL database where the data units will be stored. It is
assumed that this table contains, at least, the following fields:
data_unit_id (str):
......@@ -216,6 +217,39 @@ class AggregatedExposureModel(abc.ABC):
Total number of census people in the DataUnit.
cost_total (float):
Total replacement cost of buildings in the DataUnit.
db_table_data_units_buildings (str):
Name of the table of the SQL database where the attributes of the building
classes of the Data Unit will be stored. It is assumed that this table contains,
at least, the following fields:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (enum):
Type of settlement within the Data Unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
exposure_entity (str):
3-character code of the exposure entity.
occupancy_case (enum):
SQL enumerated type describing the building occupancy cases.
data_unit_id (str):
ID of the data unit.
proportions (float):
Proportions in which each of the building classes defined by
building_class_name, settlement_type and occupancy_subtype are
present in the Data Unit. This column needs to add up to 1.0 for all
rows with the same building_class_name, settlement_type,
occupancy_subtype, data_unit_id, occupancy_case and
aggregated_source_id.
census_people_per_building (float):
Number of census-derived people (i.e. not accounting for time of the
day) per building of this class.
total_cost_per_building (float):
Total replacement cost of a building of this class, including costs of
structural and non-structural components as well as contents.
exposure_entity_name (str):
Name of the ExposureEntity whose data units will be stored. It needs to be a
key of self.exposure_entities and self.exposure_entities[exposure_entity_name]
......@@ -233,9 +267,13 @@ class AggregatedExposureModel(abc.ABC):
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
Returns:
This function calls DataUnit.write_data_unit_to_database() to store attributes of
all data units present in self.exposure_entities[exposure_entity_name]
.occupancy_cases[occupancy_case]["data_units"].
This function calls DataUnit.write_data_unit_to_database() to store the following
attributes of all data units present in self.exposure_entities[exposure_entity_name]
.occupancy_cases[occupancy_case]["data_units"]: ID, occupancy_case, total_buildings,
total_dwellings, total_people["Census"] and total_cost["Total"]. This function also
calls DataUnit.write_data_unit_buildings_to_database() to store attributes and
proportions of building classes present in all data units of self.exposure_entities
[exposure_entity_name].occupancy_cases[occupancy_case]["data_units"].
"""
data_units = self.exposure_entities[exposure_entity_name].occupancy_cases[
......@@ -245,7 +283,14 @@ class AggregatedExposureModel(abc.ABC):
for data_unit_id in data_units.keys():
data_units[data_unit_id].write_data_unit_to_database(
db_data_units_config,
db_table,
db_table_data_units,
aggregated_source_id,
occupancy_case,
self.exposure_entities[exposure_entity_name].code,
)
data_units[data_unit_id].write_data_unit_buildings_to_database(
db_data_units_config,
db_table_data_units_buildings,
aggregated_source_id,
occupancy_case,
self.exposure_entities[exposure_entity_name].code,
......
......@@ -18,6 +18,7 @@
import logging
import numpy
from copy import deepcopy
from gdeimporter.tools.database import Database
logger = logging.getLogger()
......@@ -379,3 +380,144 @@ class DataUnit:
db_gde_tiles.close_connection()
return
def write_data_unit_buildings_to_database(
self,
db_data_units_config,
db_table,
aggregated_source_id,
occupancy_case,
exposure_entity_code,
):
"""This function writes the attributes of the buildings in the DataUnit to the table
with name db_table in the database whose credentials are indicated in
db_data_units_config.
Args:
db_data_units_config (dict):
Dictionary containing the credentials needed to connect to the SQL database in
which information on the data units is stored. The keys of the dictionary need
to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
Name of the table of the SQL database where the attributes of the building
classes of the Data Unit will be stored. It is assumed that this table contains,
at least, the following fields:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (enum):
Type of settlement within the Data Unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
exposure_entity (str):
3-character code of the exposure entity.
occupancy_case (enum):
SQL enumerated type describing the building occupancy cases.
data_unit_id (str):
ID of the data unit.
proportions (float):
Proportions in which each of the building classes defined by
building_class_name, settlement_type and occupancy_subtype are
present in the Data Unit. This column needs to add up to 1.0 for all
rows with the same building_class_name, settlement_type,
occupancy_subtype, data_unit_id, occupancy_case and
aggregated_source_id.
census_people_per_building (float):
Number of census-derived people (i.e. not accounting for time of the
day) per building of this class.
total_cost_per_building (float):
Total replacement cost of a building of this class, including costs of
structural and non-structural components as well as contents.
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial")
associated with this data_unit.
exposure_entity_code (str):
3-character string containing the ISO3 code of the country, if the exposure
entity is a country, or a code that the user defines in the configuration file.
Returns:
This function writes to the table with name db_table in the database whose
credentials are indicated in db_data_units_config.
"""
data_unit_full_id = "%s_%s" % (exposure_entity_code, self.id)
sql_commands = {}
sql_commands["query"] = "SELECT COUNT(*) FROM %s"
sql_commands["query"] += " WHERE (data_unit_id='%s' AND occupancy_case='%s'"
sql_commands["query"] += " AND aggregated_source_id='%s');"
sql_commands["delete"] = "DELETE FROM %s"
sql_commands["delete"] += " WHERE (data_unit_id='%s' AND occupancy_case='%s'"
sql_commands["delete"] += " AND aggregated_source_id='%s');"
sql_commands["insert"] = "INSERT INTO"
sql_commands["insert"] += " %s(data_unit_id, occupancy_case, aggregated_source_id,"
sql_commands["insert"] += " building_class_name, settlement_type, occupancy_subtype,"
sql_commands["insert"] += " exposure_entity, proportions,"
sql_commands["insert"] += " census_people_per_building, total_cost_per_building)"
sql_commands["insert"] += " VALUES('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s');"
db_gde_tiles = Database(**db_data_units_config)
db_gde_tiles.create_connection_and_cursor()
# Check if an entry already exists for this data unit
db_gde_tiles.cursor.execute(
sql_commands["query"]
% (
db_table,
data_unit_full_id,
occupancy_case,
str(aggregated_source_id),
)
)
exec_result = db_gde_tiles.cursor.fetchall()
if exec_result[0][0] > 0: # Entry exists --> remove existing entries
db_gde_tiles.cursor.execute(
sql_commands["delete"]
% (
db_table,
data_unit_full_id,
occupancy_case,
str(aggregated_source_id),
)
)
# Insert building classes for this Data Unit
buildings_data = deepcopy(self.building_classes_proportions_and_properties)
for i in range(buildings_data.shape[0]):
db_gde_tiles.cursor.execute(
sql_commands["insert"]
% (
db_table,
data_unit_full_id,
occupancy_case,
str(aggregated_source_id),
buildings_data["building_class_name"].to_numpy()[i],
buildings_data["settlement_type"].to_numpy()[i].lower(),
buildings_data["occupancy_subtype"].to_numpy()[i],
exposure_entity_code,
buildings_data["proportions"].to_numpy()[i],
buildings_data["census_people_per_building"].to_numpy()[i],
buildings_data["total_cost_per_building"].to_numpy()[i],
)
)
db_gde_tiles.close_connection()
return
......@@ -68,6 +68,7 @@ def main():
aem.store_data_units(
config.database_gde_tiles,
"data_units",
"data_units_buildings",
exposure_entity_name,
occupancy_case,
aem_source_id,
......@@ -108,7 +109,11 @@ def main():
)
for case in aem.exposure_entities[exposure_entity].occupancy_cases.keys():
logger.info(" %s:" % case)
for attr in ["data_units_type", "data_units_level", "data_units_definition"]:
for attr in [
"data_units_type",
"data_units_level",
"data_units_definition",
]:
logger.info(
" %s: %s"
% (
......
......@@ -34,6 +34,7 @@ def test_db():
- data_unit_tiles (of the GDE Tiles database)
- exposure_entities_costs_assumptions (of the GDE Tiles database)
- exposure_entities_population_time_distribution (of the GDE Tiles database)
- data_units_buildings (of the GDE Tiles database)
"""
init_test_db()
......@@ -42,8 +43,9 @@ def test_db():
def init_test_db():
"""Populates the test database that simulates to contain obm_built_area_assessments,
aggregated_sources, data_units, data_unit_tiles, exposure_entities_costs_assumptions and
exposure_entities_population_time_distribution with a basic schema and data.
aggregated_sources, data_units, data_unit_tiles, exposure_entities_costs_assumptions,
exposure_entities_population_time_distribution and data_units_buildings with a basic schema
and data.
"""
if "GDEIMPORTER_DB_HOST" in os.environ: # When running the CI pipeline
......
......@@ -4,9 +4,12 @@ DROP TABLE IF EXISTS data_units;
DROP TABLE IF EXISTS data_unit_tiles;
DROP TABLE IF EXISTS exposure_entities_costs_assumptions;
DROP TABLE IF EXISTS exposure_entities_population_time_distribution;
DROP TABLE IF EXISTS data_units_buildings;
DROP TYPE IF EXISTS occupancycase;
DROP TYPE IF EXISTS settlement;
CREATE TYPE occupancycase AS ENUM ('residential', 'commercial', 'industrial');
CREATE TYPE settlement AS ENUM ('urban', 'rural', 'big_city', 'all');
CREATE TABLE obm_built_area_assessments
(
......@@ -17,17 +20,17 @@ CREATE TABLE obm_built_area_assessments
);
INSERT INTO obm_built_area_assessments(quadkey, source_id, built_area_size)
VALUES ('122100203232131100', 1, 7512.38),
('122100203232131101', 1, 3449.81),
('122100203232131102', 1, 1266.45),
('122100203232131103', 1, 5016.22),
('120222331000133202', 1, 1823.553),
('120222331000133203', 1, 3391.246),
('120222331000133212', 1, 7403.582),
('120222331000133213', 1, 5937.64),
('120222331000133220', 1, 2072.341),
('120222331000133221', 1, 1227.524),
('120222331000133230', 1, 2921.676),
('120222331000133231', 1, 4244.951);
('122100203232131101', 1, 3449.81),
('122100203232131102', 1, 1266.45),
('122100203232131103', 1, 5016.22),
('120222331000133202', 1, 1823.553),
('120222331000133203', 1, 3391.246),
('120222331000133212', 1, 7403.582),
('120222331000133213', 1, 5937.64),
('120222331000133220', 1, 2072.341),
('120222331000133221', 1, 1227.524),
('120222331000133230', 1, 2921.676),
('120222331000133231', 1, 4244.951);
CREATE TABLE aggregated_sources
(
......@@ -37,13 +40,13 @@ CREATE TABLE aggregated_sources
);
INSERT INTO aggregated_sources(name, format)
VALUES ('first_source', 'aaa'),
('second_source', 'bbb'),
('third_source', 'ccc'),
('first_source', 'ddd');
('second_source', 'bbb'),
('third_source', 'ccc'),
('first_source', 'ddd');
CREATE TABLE data_unit_tiles
(
quadkey char(18),
quadkey char(18),
aggregated_source_id SMALLINT,
occupancy_case occupancycase,
exposure_entity char(3),
......@@ -55,7 +58,7 @@ CREATE TABLE data_unit_tiles
PRIMARY KEY (quadkey, aggregated_source_id, occupancy_case, data_unit_id)
);
INSERT INTO data_unit_tiles(quadkey,
INSERT INTO data_unit_tiles(quadkey,
aggregated_source_id,
occupancy_case,
exposure_entity,
......@@ -64,7 +67,17 @@ INSERT INTO data_unit_tiles(quadkey,
size_data_unit_tile_built_up_area,
fraction_data_unit_area,
fraction_data_unit_built_up_area)
VALUES ('120222331000133202',2,'residential','GRC','GRC_39821',2532.671,287.720,0.0572566,0.086);
VALUES (
'120222331000133202',
2,
'residential',
'GRC',
'GRC_39821',
2532.671,
287.720,
0.0572566,
0.086
);
CREATE TABLE data_units
(
......@@ -87,7 +100,7 @@ INSERT INTO data_units(data_unit_id,
dwellings_total,
people_census,
cost_total)
VALUES ('ABC_123456','residential',17,'ABC',0.0,0.0,0.0,0.0);
VALUES ('ABC_123456', 'residential', 17, 'ABC', 0.0, 0.0, 0.0, 0.0);
CREATE TABLE exposure_entities_costs_assumptions
(
......@@ -108,7 +121,7 @@ INSERT INTO exposure_entities_costs_assumptions(exposure_entity,
non_structural,
contents,
currency)
VALUES ('GRC','commercial',19,0.0,0.0,0.0,'USD');
VALUES ('GRC', 'commercial', 19, 0.0, 0.0, 0.0, 'USD');
CREATE TABLE exposure_entities_population_time_distribution
(
......@@ -127,4 +140,61 @@ INSERT INTO exposure_entities_population_time_distribution(exposure_entity,
day,
night,
transit)
VALUES ('GRC','commercial',19,0.0,0.0,0.0);
VALUES ('GRC', 'commercial', 19, 0.0, 0.0, 0.0);
CREATE TABLE data_units_buildings
(
building_class_name VARCHAR,
settlement_type settlement,
occupancy_subtype VARCHAR,
aggregated_source_id SMALLINT,
exposure_entity CHAR(3),
occupancy_case occupancycase,
data_unit_id VARCHAR,
proportions FLOAT,
census_people_per_building FLOAT,
total_cost_per_building FLOAT,
PRIMARY KEY (
data_unit_id,
occupancy_case,
aggregated_source_id,
building_class_name,
settlement_type,
occupancy_subtype
)
);
INSERT INTO data_units_buildings(building_class_name,
settlement_type,
occupancy_subtype,
aggregated_source_id,
exposure_entity,
occupancy_case,
data_unit_id,
proportions,
census_people_per_building,
total_cost_per_building)
VALUES (
'SOMECLASS1',
'urban',
'Single',
18,
'EN1',
'residential',
'EN1_Unit_1',
0.3,
12.7,
1200000.0
),
('SOMECLASS2',
'urban',
'Single',
18,
'EN1',
'residential',
'EN1_Unit_1',
0.7,
2.5,
500000.0
);
......@@ -356,7 +356,17 @@ def test_ExposureModelESRM20(test_db):
assert difference_returned_expected < 1e-5
# Check building classes, their proportions and properties
# Write data units to the test database
returned_aem.store_data_units(
config.database_gde_tiles,
"data_units",
"data_units_buildings",
entity_names[i],
occupancy_names[i],
18,
)
# Check building classes, their proportions and properties, and entries in the database
for unit_id in unit_ids[i]:
returned_data_unit = returned_aem.exposure_entities[
entity_names[i]
......@@ -374,6 +384,17 @@ def test_ExposureModelESRM20(test_db):
)
assert returned_proportions_and_properties.shape[0] == len(rows_data_unit)
# Query the data_units_buildings database for this data unit
db_queried_proportions_and_properties = query_data_units_buildings(
config.database_gde_tiles,
"EN%s_%s" % (entity_names[i][-1], unit_id),
occupancy_names[i],
18,
)
assert db_queried_proportions_and_properties["number_entries"] == len(
rows_data_unit
)
for j in range(returned_proportions_and_properties.shape[0]):
returned_building_class_name = returned_proportions_and_properties[
"building_class_name"
......@@ -385,6 +406,7 @@ def test_ExposureModelESRM20(test_db):
"occupancy_subtype"
].to_numpy()[j]
# Compare values returned by the class with expected ones
which_in_expected = numpy.where(
numpy.logical_and(
numpy.logical_and(
......@@ -437,14 +459,52 @@ def test_ExposureModelESRM20(test_db):
4,
)
# Write data units to the test database
returned_aem.store_data_units(
config.database_gde_tiles,
"data_units",
entity_names[i],
occupancy_names[i],
18,
)
# Compare values written to the database with expected ones
which_in_db_query = numpy.where(
numpy.logical_and(
numpy.logical_and(
db_queried_proportions_and_properties["building_class_name"]
== returned_building_class_name,
db_queried_proportions_and_properties["settlement_type"]
== returned_settlement_type.lower(),
),
db_queried_proportions_and_properties["occupancy_subtype"]
== returned_occupancy_subtype,
)
)[0][0]
assert round(
db_queried_proportions_and_properties["proportions"][which_in_db_query], 4
) == round(
expected_results_proportions_and_properties["proportions"].to_numpy()[
rows_data_unit
][which_in_expected],
4,
)
assert round(
db_queried_proportions_and_properties["census_people_per_building"][
which_in_db_query
],
4,
) == round(
expected_results_proportions_and_properties[
"census_people_per_building"
].to_numpy()[rows_data_unit][which_in_expected],
4,
)
assert round(
db_queried_proportions_and_properties["total_cost_per_building"][
which_in_db_query
],
4,
) == round(
expected_results_proportions_and_properties[
"total_cost_per_building"
].to_numpy()[rows_data_unit][which_in_expected],
4,
)
# Test that the data units have been stored correctly
for row in range(2, 11):
......@@ -559,3 +619,83 @@ def query_aggregated_sources(db_cursor, source_id):
exec_result = db_cursor.fetchall()
return exec_result[0][0], exec_result[0][1]
def query_data_units_buildings(
credentials, data_unit_id_full, occupancy_case, aggregated_source_id
):
"""This auxiliary function queries the 'data_units_buildings' table of the test database to
find all entries corresponding to 'data_unit_id_full', 'occupancy_case' and
'aggregated_source_id'.
Args:
credentials (dict):
Dictionary containing the credentials needed to connect to the test SQL database.
The keys of the dictionary need to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
data_unit_id_full (str):
ID of the Data Unit of interest, including the 3-character code of its corresponding
exposure entity.
occupancy_case (str):
Occupancy case associated with 'data_unit_id'.
aggregated_source_id (int):
ID of the aggregated exposure model source associated with 'data_unit_id'.
Returns:
results (dict):
Dictionary of arrays whose length is equal to the number of entries found. The
contents are:
exposure_entity (arr of str):
3-character code of the exposure entity.
building_class_name (arr of str):
Building class as per the GEM Building Taxonomy.
settlement_type (arr of str):
Type of settlement within the Data Unit. Possible values: "urban", "rural",
"big_city", "all".
occupancy_subtype (arr of str):
Details on the occupancy, if relevant to characterise the building class.
proportions (arr of float):
Proportions in which each of the building classes defined by
building_class_name, settlement_type and occupancy_subtype are present in
the Data Unit.
census_people_per_building (arr of float):
Number of census-derived people per building of this class.
total_cost_per_building (arr of float):
Total replacement cost of a building of this class.
"""
sql_command = (
"SELECT exposure_entity, building_class_name, settlement_type, occupancy_subtype,"
" proportions, census_people_per_building, total_cost_per_building"
" FROM data_units_buildings"