Commit 4cf1e7f3 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to store building classes of OBM buildings

parent 00f3e618
Pipeline #41788 passed with stage
in 2 minutes and 28 seconds
......@@ -44,13 +44,14 @@ class DatabaseStorage:
Args:
data_unit_id (str):
ID of the data unit for which the data-unit tiles will be retrieved.
ID of the data unit for which the number of OBM and remainder buildings will be
stored.
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial")
for which the data-unit tiles will be retrieved.
for which the number of OBM and remainder buildings will be stored.
aggregated_source_id (int):
ID of the source of the aggregated exposure model for which the data-unit tiles
will be retrieved.
ID of the source of the aggregated exposure model for which the number of OBM
and remainder buildings will be stored.
data_unit_tiles:
Pandas DataFrame with data-unit tiles. It contains the following columns:
quadkey (str):
......@@ -140,6 +141,177 @@ class DatabaseStorage:
"data_unit_id='%s' AND occupancy_case='%s' AND aggregated_source_id='%s'. "
"Numbers of OBM and remainder buildings were not stored "
"for this data-unit tile."
% (quadkey, data_unit_id, occupancy_case, aggregated_source_id)
)
db_gde_tiles.close_connection()
return
@staticmethod
def store_OBM_building_classes(
data_unit_id,
occupancy_case,
aggregated_source_id,
obm_buildings_building_classes,
db_gde_tiles_config,
db_table,
):
"""This function writes to the table with name 'db_table' in the database whose
credentials are indicated in 'db_gde_tiles_config' the building classes and associated
probabilities for each of the OBM buildings in 'obm_buildings_building_classes'.
Reference to the corresponding 'data_unit_id', 'occupancy_case' and
'aggregated_source_id' is needed to be able (at a later stage) to retrieve attributes of
the building classes.
Args:
data_unit_id (str):
ID of the data unit associated with the OBM buildings in
'obm_buildings_building_classes'.
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial")
of the OBM buildings in 'obm_buildings_building_classes'.
aggregated_source_id (int):
ID of the source of the aggregated exposure model associated with the building
classes of the OBM buildings in 'obm_buildings_building_classes'.
obm_buildings_building_classes (dict):
Dictionary containing the building classes and their probabilities for each OBM
building. Dictionary keys correspond to the OSM ID of the building. Each key
contains a Pandas DataFrame with the following columns:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (str):
Type of settlement within the data unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
probabilities (float):
Probabilities of the building belonging to the building class (defined
by 'building_class_name', 'settlement_type' and 'occupancy_subtype').
db_gde_tiles_config (dict):
Dictionary containing the credentials needed to connect to the SQL database in
which information on the GDE buildings is stored. The keys of the dictionary
need to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
Name of the table of the SQL database where the GDE buildings are stored. It is
assumed that this table contains, at least, the following fields:
osm_id (int):
ID of the OBM building.
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
occupancy_case (enum):
SQL enumerated type describing the building occupancy cases.
data_unit_id (str):
ID of the data unit the OBM building belongs to.
building_class_names (array of str):
Building class as per the GEM Building Taxonomy.
settlement_types (array of enum):
Type of settlement within the Data Unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtypes (array of str):
Details on the occupancy, if relevant to characterise the building
class.
probabilities (array of float):
Probabilities of the OBM building belonging to each building class.
"""
sql_commands = {}
sql_commands["query"] = "SELECT COUNT(*) FROM %s "
sql_commands["query"] += "WHERE (osm_id=%s AND aggregated_source_id=%s);"
sql_commands["update"] = "UPDATE %s SET (occupancy_case, data_unit_id, "
sql_commands["update"] += "building_class_names, settlement_types, occupancy_subtypes, "
sql_commands["update"] += "probabilities) = ('%s','%s','%s','%s','%s','%s')"
sql_commands["update"] += " WHERE (osm_id=%s AND aggregated_source_id=%s);"
sql_commands["insert"] = "INSERT INTO %s(osm_id, aggregated_source_id, occupancy_case, "
sql_commands["insert"] += "data_unit_id, building_class_names, settlement_types, "
sql_commands["insert"] += "occupancy_subtypes, probabilities) "
sql_commands["insert"] += "VALUES (%s, %s, '%s', '%s', '%s', '%s', '%s', '%s');"
db_gde_tiles = Database(**db_gde_tiles_config)
db_gde_tiles.create_connection_and_cursor()
for osm_id in obm_buildings_building_classes.keys():
building_classes = obm_buildings_building_classes[osm_id]
db_gde_tiles.cursor.execute(
sql_commands["query"] % (db_table, osm_id, aggregated_source_id)
)
number_entries = db_gde_tiles.cursor.fetchall()[0][0]
if number_entries == 1: # One entry exists for this OSM ID --> update
db_gde_tiles.cursor.execute(
sql_commands["update"]
% (
db_table,
occupancy_case,
data_unit_id,
'{"%s"}'
% (
'", "'.join(
list(building_classes["building_class_name"].to_numpy())
)
),
'{"%s"}'
% ('", "'.join(list(building_classes["settlement_type"].to_numpy()))),
'{"%s"}'
% ('", "'.join(list(building_classes["occupancy_subtype"].to_numpy()))),
'{"%s"}'
% (
'", "'.join(
list(building_classes["probabilities"].to_numpy().astype(str))
)
),
osm_id,
aggregated_source_id,
)
)
elif number_entries == 0: # Entry does not exist yet for this OSM ID --> insert
db_gde_tiles.cursor.execute(
sql_commands["insert"]
% (
db_table,
osm_id,
aggregated_source_id,
occupancy_case,
data_unit_id,
'{"%s"}'
% (
'", "'.join(
list(building_classes["building_class_name"].to_numpy())
)
),
'{"%s"}'
% ('", "'.join(list(building_classes["settlement_type"].to_numpy()))),
'{"%s"}'
% ('", "'.join(list(building_classes["occupancy_subtype"].to_numpy()))),
'{"%s"}'
% (
'", "'.join(
list(building_classes["probabilities"].to_numpy().astype(str))
)
),
)
)
else: # this should not occur
logger.error(
"DatabaseStorage.store_OBM_building_classes() has found more than one "
"entry for osm_id=%s and aggregated_source_id=%s. "
"Building classes for this building were not stored."
% (osm_id, aggregated_source_id)
)
db_gde_tiles.close_connection()
......
......@@ -168,6 +168,16 @@ def main():
% (aux_log_string, str(len(obm_buildings_building_classes.keys())))
)
# Store building classes of OBM buildings
DatabaseStorage.store_OBM_building_classes(
data_unit_id,
occupancy_case,
aggregated_source_id,
obm_buildings_building_classes,
config.database_gde_tiles,
"gde_buildings",
)
# Retrieve data-unit tiles (quadkey, aggregated_buildings) as a Pandas DataFrame
data_unit_tiles = DatabaseQueries.get_data_unit_tiles_of_data_unit_as_DataFrame(
data_unit_id,
......
......@@ -315,11 +315,11 @@ class GDEProcessor:
def assign_building_classes_to_obm_buildings(
obm_buildings, data_unit_building_classes, occupancy_case
):
"""This function assigns building classes and proportions from
"""This function assigns building classes and probabilities from
'data_unit_building_classes' to each of the OBM buildings in 'obm_buildings', by calling
'GDEProcessor.assign_building_classes_to_obm_building'. The latter selects from
'data_unit_building_classes' only the building classes that are compatible with the
attributes of the building contained in 'obm_buildings'. The proportions are
attributes of the building contained in 'obm_buildings'. The probabilities are
recalculated to reflect only the building classes retained.
Args:
......@@ -364,7 +364,7 @@ class GDEProcessor:
Returns:
obm_buildings_building_classes (dict):
Dictionary containing the building classes and their proportions for each OBM
Dictionary containing the building classes and their probabilities for each OBM
building. Dictionary keys correspond to the OSM ID of the building. Each key
contains a Pandas DataFrame with the following columns:
building_class_name (str):
......@@ -375,10 +375,9 @@ class GDEProcessor:
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
proportions (float):
Proportions in which the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype') is
present in the data unit.
probabilities (float):
Probabilities of the building belonging to the building class (defined
by 'building_class_name', 'settlement_type' and 'occupancy_subtype').
"""
obm_buildings_building_classes = {}
......@@ -400,7 +399,7 @@ class GDEProcessor:
def assign_building_classes_to_obm_building(
osm_id, storeys, occupancy, occupancy_case, data_unit_building_classes
):
"""This function assigns building classes and proportions from
"""This function assigns building classes and probabilities from
'data_unit_building_classes' to the OBM building with ID 'osm_id'. When building
attributes such as 'storeys' or 'occupancy' do not allow for a narrowing down of all the
potential building classes, the output consists of all building classes in the input
......@@ -459,7 +458,7 @@ class GDEProcessor:
Returns:
obm_building_building_classes (Pandas DataFrame):
DataFrame with the building classes and proportions assigned to the input OBM
DataFrame with the building classes and probabilities assigned to the input OBM
building. It contains the following columns:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
......@@ -469,10 +468,9 @@ class GDEProcessor:
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
proportions (float):
Proportions in which the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype') is
present in the data unit.
probabilities (float):
Probabilities of the building belonging to the building class (defined
by 'building_class_name', 'settlement_type' and 'occupancy_subtype').
"""
obm_building_building_classes = deepcopy(data_unit_building_classes)
......@@ -509,6 +507,10 @@ class GDEProcessor:
obm_building_building_classes = deepcopy(data_unit_building_classes)
obm_building_building_classes = obm_building_building_classes.rename(
columns={"proportions": "probabilities"}
)
return obm_building_building_classes
@staticmethod
......
......@@ -4,6 +4,7 @@ DROP TABLE IF EXISTS obm_buildings;
DROP TABLE IF EXISTS data_units_buildings;
DROP TABLE IF EXISTS data_unit_tiles;
DROP TABLE IF EXISTS obm_built_area_assessments;
DROP TABLE IF EXISTS gde_buildings;
DROP TYPE IF EXISTS occupancycase;
DROP TYPE IF EXISTS settlement;
DROP EXTENSION IF EXISTS postgis;
......@@ -218,3 +219,31 @@ VALUES ('122010321033023130', 1, 0),
('122010321033023132', 1, 1),
('122010321033023121', 1, 0),
('122010321033023123', 1, 1);
CREATE TABLE gde_buildings
(
osm_id integer,
aggregated_source_id SMALLINT,
occupancy_case occupancycase,
data_unit_id VARCHAR,
building_class_names VARCHAR[],
settlement_types settlement[],
occupancy_subtypes VARCHAR[],
probabilities FLOAT[],
PRIMARY KEY (osm_id, aggregated_source_id)
);
INSERT INTO gde_buildings(osm_id,
aggregated_source_id,
occupancy_case,
data_unit_id,
building_class_names,
settlement_types,
occupancy_subtypes,
probabilities)
VALUES (-101010, 2, 'industrial', 'ABC_10269',
'{"CLASS/X/params/H:1", "CLASS/Y/params/H:2"}',
'{"rural", "rural"}',
'{"all", "all"}',
'{0.723, 0.277}'
);
......@@ -17,6 +17,7 @@
# along with this program. If not, see http://www.gnu.org/licenses/.
import os
import numpy
import pandas
from gdeimporter.tools.database import Database
from gdecore.configuration import Configuration
......@@ -121,3 +122,197 @@ def query_obm_and_remainder(
db_test.close_connection()
return obm_buildings, remainder_buildings
def test_store_OBM_building_classes(test_db):
# Database connection (the Configuration class will define the credentials based on whether
# the code is running in the CI or locally)
config = Configuration(
os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml")
)
obm_bdg_classes = {}
obm_bdg_classes[11223344] = pandas.DataFrame(
{
"building_class_name": ["A2/HBET:4-6", "B2/H:4"],
"settlement_type": ["urban", "rural"],
"occupancy_subtype": ["all", "all"],
"probabilities": [0.666667, 0.333333],
}
)
DatabaseStorage.store_OBM_building_classes(
"ABC_10269",
"residential",
2,
obm_bdg_classes,
config.database_gde_tiles,
"gde_buildings",
)
(
returned_occupancy_case,
returned_data_unit_id,
returned_building_class_names,
returned_settlement_types,
returned_occupancy_subtypes,
returned_probabilities,
) = query_OBM_building_classes(config.database_gde_tiles, 11223344, 2)
assert returned_occupancy_case == "residential"
assert returned_data_unit_id == "ABC_10269"
assert len(returned_building_class_names) == len(
obm_bdg_classes[11223344]["building_class_name"].to_numpy()
)
for i, bdg_class_name in enumerate(returned_building_class_names):
which = numpy.where(
obm_bdg_classes[11223344]["building_class_name"].to_numpy() == bdg_class_name
)[0][0]
assert (
returned_settlement_types[i]
== obm_bdg_classes[11223344]["settlement_type"].to_numpy()[which]
)
assert (
returned_occupancy_subtypes[i]
== obm_bdg_classes[11223344]["occupancy_subtype"].to_numpy()[which]
)
assert round(returned_probabilities[i], 4) == round(
obm_bdg_classes[11223344]["probabilities"].to_numpy()[which], 4
)
obm_bdg_classes = {}
obm_bdg_classes[-101010] = pandas.DataFrame(
{
"building_class_name": ["C6/HBET:3-5"],
"settlement_type": ["urban"],
"occupancy_subtype": ["Offices"],
"probabilities": [1.0],
}
)
obm_bdg_classes[-202020] = pandas.DataFrame(
{
"building_class_name": [
"C1/HBET:1-2",
"C2/HBET:3-",
"C3/H:1",
"C4/HBET:2-3",
"C5/HBET:1-2",
"C6/HBET:3-5",
],
"settlement_type": ["urban", "urban", "urban", "urban", "urban", "urban"],
"occupancy_subtype": ["Hotels", "Hotels", "Trade", "Trade", "Offices", "Offices"],
"probabilities": [0.10, 0.25, 0.05, 0.10, 0.20, 0.30],
}
)
DatabaseStorage.store_OBM_building_classes(
"ABC_10269",
"commercial",
2,
obm_bdg_classes,
config.database_gde_tiles,
"gde_buildings",
)
for osm_id in obm_bdg_classes.keys():
(
returned_occupancy_case,
returned_data_unit_id,
returned_building_class_names,
returned_settlement_types,
returned_occupancy_subtypes,
returned_probabilities,
) = query_OBM_building_classes(config.database_gde_tiles, osm_id, 2)
assert returned_occupancy_case == "commercial"
assert returned_data_unit_id == "ABC_10269"
assert len(returned_building_class_names) == len(
obm_bdg_classes[osm_id]["building_class_name"].to_numpy()
)
for i, bdg_class_name in enumerate(returned_building_class_names):
which = numpy.where(
obm_bdg_classes[osm_id]["building_class_name"].to_numpy() == bdg_class_name
)[0][0]
assert (
returned_settlement_types[i]
== obm_bdg_classes[osm_id]["settlement_type"].to_numpy()[which]
)
assert (
returned_occupancy_subtypes[i]
== obm_bdg_classes[osm_id]["occupancy_subtype"].to_numpy()[which]
)
assert round(returned_probabilities[i], 4) == round(
obm_bdg_classes[osm_id]["probabilities"].to_numpy()[which], 4
)
def query_OBM_building_classes(credentials, osm_id, aggregated_source_id):
"""This auxiliary function queries the 'gde_buildings' table of the test database to
retrieve the building classes (and related attributes) associated with an OBM building with
ID 'osm_id' as per an aggregated model with 'aggregated_source_id'.
Args:
credentials (dict):
Dictionary containing the credentials needed to connect to the test SQL database.
The keys of the dictionary need to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
osm_id (int):
OSM ID of the building to query.
aggregated_source_id (int):
ID of the source of the aggregated exposure model associated with the building
classes.
Returns:
occupancy_case (str):
Occupancy case associated with the OBM building.
data_unit_id (str):
ID of the data unit of the 'aggregated_source_id' and 'occupancy_case' associated
with the OBM building.
building_class_names (list of str):
Building classes as per the GEM Building Taxonomy.
settlement_types (list of str):
Type of settlements within the data unit. Possible values: "urban", "rural",
"big_city", "all".
occupancy_subtypes (list of str):
Details on the occupancy, if relevant to characterise the building classes.
probabilities (list of float):
Probabilities of the building belonging to the building classes.
"""
sql_command = (
"SELECT occupancy_case, data_unit_id, building_class_names, settlement_types, "
"occupancy_subtypes, probabilities FROM gde_buildings "
"WHERE (osm_id=%s AND aggregated_source_id=%s);" % (osm_id, aggregated_source_id)
)
db_test = Database(**credentials)
db_test.create_connection_and_cursor()
db_test.cursor.execute(sql_command)
result = db_test.cursor.fetchall()
occupancy_case = result[0][0]
data_unit_id = result[0][1]
building_class_names = result[0][2]
settlement_types = result[0][3].replace("{", "").replace("}", "").split(",")
occupancy_subtypes = result[0][4]
probabilities = result[0][5]
db_test.close_connection()
return (
occupancy_case,
data_unit_id,
building_class_names,
settlement_types,
occupancy_subtypes,
probabilities,
)
......@@ -198,16 +198,16 @@ def test_assign_building_classes_to_obm_buildings(test_db):
assert case_11223344.shape[0] == 2
expected_class_names = ["A2/HBET:4-6", "B2/H:4"]
expected_proportions = [0.667, 0.333]
expected_probabilities = [0.667, 0.333]
for i in range(len(expected_class_names)):
assert (
round(
case_11223344[case_11223344.building_class_name == expected_class_names[i]][
"proportions"
"probabilities"
].to_numpy()[0],
3,
)
== expected_proportions[i]
== expected_probabilities[i]
)
# Test Case 2: Residential building with unknown number of storeys (all classes returned)
......@@ -249,16 +249,16 @@ def test_assign_building_classes_to_obm_buildings(test_db):
assert case_55667788.shape[0] == 2
expected_class_names = ["C1/HBET:1-2", "C2/HBET:3-"]
expected_proportions = [0.286, 0.714]
expected_probabilities = [0.286, 0.714]
for i in range(len(expected_class_names)):
assert (
round(
case_55667788[case_55667788.building_class_name == expected_class_names[i]][
"proportions"
"probabilities"
].to_numpy()[0],
3,
)
== expected_proportions[i]
== expected_probabilities[i]
)
# Test Case 5: Commercial building (from relation) with 3 storeys, "COM3"
......@@ -267,7 +267,7 @@ def test_assign_building_classes_to_obm_buildings(test_db):
assert (
round(
case_101010[case_101010.building_class_name == "C6/HBET:3-5"][
"proportions"
"probabilities"
].to_numpy()[0],
3,
)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment