Commit 966c0be1 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to retrieve building classes of a data unit

parent 75584e90
Pipeline #40949 passed with stage
in 2 minutes and 22 seconds
......@@ -19,6 +19,7 @@
import logging
import numpy
import shapely
import pandas
import geopandas
from gdeimporter.tools.database import Database
......@@ -476,3 +477,157 @@ class DatabaseQueries:
obm_buildings[column_name] = new_column
return obm_buildings
@staticmethod
def get_building_classes_of_data_unit(
data_unit_id, occupancy_case, aggregated_source_id, db_gde_tiles_config, db_table
):
"""This function retrieves the building classes and proportions as per
'aggregated_source_id' associated with a data unit with 'data_unit_id' and
'occupancy_case', from 'db_table' of the database whose credentials are given in
'db_gde_tiles_config'. The building classes are defined in terms of three parameters:
the building_class_name, the settlement_type and the occupancy_subtype.
Args:
data_unit_id (str):
ID of the data unit for which the building classes and their proportions will be
retrieved.
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial")
for which the building classes and their proportions will be retrieved.
aggregated_source_id (int):
ID of the source of the aggregated exposure model for which the building classes
and their proportions will be retrieved.
db_gde_tiles_config (dict):
Dictionary containing the credentials needed to connect to the SQL database in
which information on the data units is stored. The keys of the dictionary need
to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
Name of the table of the SQL database where the data-unit buildings are stored.
It is assumed that this table contains, at least, the following fields:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (enum):
Type of settlement within the data unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
storeys_min (int):
Minimum number of storeys of the building class.
storeys_max (int):
Maximum number of storeys of the building class.
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
occupancy_case (enum):
SQL enumerated type describing the building occupancy cases.
data_unit_id (str):
ID of the data unit.
proportions (float):
Proportions in which the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype') is
present in the data unit.
Returns:
building_classes_proportions (Pandas DataFrame):
DataFrame containing the building classes and their proportions. It comprises
the following columns:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (enum):
Type of settlement within the data unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
storeys_min (int):
Minimum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
storeys_max (int):
Maximum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
proportions (float):
Proportions in which the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype') is
present in the data unit.
"""
sql_query = "SELECT building_class_name, settlement_type, occupancy_subtype, "
sql_query += "storeys_min, storeys_max, proportions FROM %s "
sql_query += "WHERE (data_unit_id='%s' AND occupancy_case='%s' AND "
sql_query += "aggregated_source_id=%s);"
db_gde_tiles = Database(**db_gde_tiles_config)
db_gde_tiles.create_connection_and_cursor()
db_gde_tiles.cursor.execute(
sql_query % (db_table, data_unit_id, occupancy_case, aggregated_source_id)
)
exec_result = db_gde_tiles.cursor.fetchall()
db_gde_tiles.close_connection()
if len(exec_result) > 1: # Entries exist --> retrieve
building_class_names = numpy.array(
[exec_result[i][0] for i in range(len(exec_result))], dtype="str"
)
settlement_types = numpy.array(
[exec_result[i][1] for i in range(len(exec_result))], dtype="str"
)
occupancy_subtypes = numpy.array(
[exec_result[i][2] for i in range(len(exec_result))], dtype="str"
)
storeys_min = numpy.array(
[exec_result[i][3] for i in range(len(exec_result))], dtype="int"
)
storeys_max = numpy.array(
[exec_result[i][4] for i in range(len(exec_result))], dtype="int"
)
proportions = numpy.array(
[exec_result[i][5] for i in range(len(exec_result))], dtype="float"
)
if abs(proportions.sum() - 1.0) > 1e-5:
warning_message = (
"DatabaseQueries.get_building_classes_of_data_unit: the sum of proportions "
"of building classes found for 'data_unit_id'=%s, 'occupancy_case'=%s and "
"'aggregated_source_id'=%s is different from 1.0; actual value is %s."
% (
data_unit_id,
occupancy_case,
aggregated_source_id,
"{:.6f}".format(proportions.sum()),
)
)
logger.warning(warning_message)
else: # No entries found
building_class_names = numpy.array([], dtype="str")
settlement_types = numpy.array([], dtype="str")
occupancy_subtypes = numpy.array([], dtype="str")
storeys_min = numpy.array([], dtype="int")
storeys_max = numpy.array([], dtype="int")
proportions = numpy.array([], dtype="float")
building_classes_proportions = pandas.DataFrame(
{
"building_class_name": building_class_names,
"settlement_type": settlement_types,
"occupancy_subtype": occupancy_subtypes,
"storeys_min": storeys_min,
"storeys_max": storeys_max,
"proportions": proportions,
}
)
return building_classes_proportions
......@@ -110,6 +110,8 @@ def main():
)
# Going by data unit so as to minimise intersection operations and because
# building classes are associated with specific data units
# Retrieve OBM buildings
obm_buildings_raw = (
DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case(
occupancy_case,
......@@ -133,6 +135,19 @@ def main():
% (aux_log_string, str(obm_buildings.shape[0]))
)
# Retrieve building classes of this data unit
data_unit_building_classes = DatabaseQueries.get_building_classes_of_data_unit(
data_unit_id,
occupancy_case,
aggregated_source_id,
config.database_gde_tiles,
"data_units_buildings",
)
logger.info(
"%s: %s building classes identified"
% (aux_log_string, str(data_unit_building_classes.shape[0]))
)
# Leave the program
logger.info("gde-core has finished")
sys.exit()
......
DROP TABLE IF EXISTS aggregated_sources;
DROP TABLE IF EXISTS data_units;
DROP TABLE IF EXISTS obm_buildings;
DROP TABLE IF EXISTS data_units_buildings;
DROP TYPE IF EXISTS occupancycase;
DROP TYPE IF EXISTS settlement;
DROP EXTENSION IF EXISTS postgis;
CREATE EXTENSION postgis;
CREATE TYPE occupancycase AS ENUM ('residential', 'commercial', 'industrial');
CREATE TYPE settlement AS ENUM ('urban', 'rural', 'big_city', 'all');
CREATE TABLE aggregated_sources
(
......@@ -109,3 +112,52 @@ INSERT INTO obm_buildings(osm_id, relation_id, occupancy, occupancy_case, quadke
VALUES (
77889900, -202020, 'COM2', 'commercial', '122010321033023130',
ST_GeomFromText('POLYGON((15.0494 37.4805,15.0496 37.4803,15.0494 37.4802,15.0492 37.4804,15.0494 37.4805))'));
CREATE TABLE data_units_buildings
(
building_class_name VARCHAR,
settlement_type settlement,
occupancy_subtype VARCHAR,
aggregated_source_id SMALLINT,
exposure_entity CHAR(3),
occupancy_case occupancycase,
data_unit_id VARCHAR,
proportions FLOAT,
census_people_per_building FLOAT,
total_cost_per_building FLOAT,
storeys_min SMALLINT,
storeys_max SMALLINT,
PRIMARY KEY (
data_unit_id,
occupancy_case,
aggregated_source_id,
building_class_name,
settlement_type,
occupancy_subtype
)
);
INSERT INTO data_units_buildings(building_class_name,
settlement_type,
occupancy_subtype,
aggregated_source_id,
exposure_entity,
occupancy_case,
data_unit_id,
proportions,
census_people_per_building,
total_cost_per_building,
storeys_min,
storeys_max)
VALUES ('A1/HBET:1-3', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.20, 0.0, 0.0, 1, 3),
('A2/HBET:4-6', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.30, 0.0, 0.0, 4, 6),
('A3/HBET:7-', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.10, 0.0, 0.0, 7, 9999),
('B1/HBET:1-3', 'rural', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.25, 0.0, 0.0, 1, 3),
('B2/H:4', 'rural', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.15, 0.0, 0.0, 4, 4),
('C1/HBET:1-2', 'urban', 'Hotels', 2, 'ABC', 'commercial', 'ABC_10269', 0.10, 0.0, 0.0, 1, 2),
('C2/HBET:3-', 'urban', 'Hotels', 2, 'ABC', 'commercial', 'ABC_10269', 0.25, 0.0, 0.0, 3, 9999),
('C3/H:1', 'urban', 'Trade', 2, 'ABC', 'commercial', 'ABC_10269', 0.05, 0.0, 0.0, 1, 1),
('C4/HBET:2-3', 'urban', 'Trade', 2, 'ABC', 'commercial', 'ABC_10269', 0.10, 0.0, 0.0, 2, 3),
('C5/HBET:1-2', 'urban', 'Offices', 2, 'ABC', 'commercial', 'ABC_10269', 0.20, 0.0, 0.0, 1, 2),
('C6/HBET:3-5', 'urban', 'Offices', 2, 'ABC', 'commercial', 'ABC_10269', 0.30, 0.0, 0.0, 3, 5);
......@@ -229,3 +229,113 @@ def test_get_OBM_buildings_in_data_unit_by_occupancy_case(test_db):
)
)
assert "TypeError" in str(excinfo.type)
def test_get_building_classes_of_data_unit(test_db):
# Database connection (the Configuration class will define the credentials based on whether
# the code is running in the CI or locally)
config = Configuration(
os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml")
)
expected_columns = [
"building_class_name",
"settlement_type",
"occupancy_subtype",
"storeys_min",
"storeys_max",
"proportions",
]
# Residential buildings
returned_building_classes = DatabaseQueries.get_building_classes_of_data_unit(
"ABC_10269", "residential", 2, config.database_gde_tiles, "data_units_buildings"
)
expected_bdg_class_names = [
"A1/HBET:1-3",
"A2/HBET:4-6",
"A3/HBET:7-",
"B1/HBET:1-3",
"B2/H:4",
]
assert returned_building_classes.shape[0] == len(expected_bdg_class_names)
for col_name in expected_columns:
assert col_name in returned_building_classes.columns
assert round(returned_building_classes["proportions"].sum(), 5) == 1.0
for name in expected_bdg_class_names:
assert name in returned_building_classes["building_class_name"].to_numpy()
assert (
returned_building_classes[
returned_building_classes.building_class_name == "A2/HBET:4-6"
]["storeys_min"].to_numpy()[0]
== 4
)
assert (
returned_building_classes[
returned_building_classes.building_class_name == "A2/HBET:4-6"
]["storeys_max"].to_numpy()[0]
== 6
)
assert (
round(
returned_building_classes[
returned_building_classes.building_class_name == "B1/HBET:1-3"
]["proportions"].to_numpy()[0],
2,
)
== 0.25
)
# Commercial buildings
returned_building_classes = DatabaseQueries.get_building_classes_of_data_unit(
"ABC_10269", "commercial", 2, config.database_gde_tiles, "data_units_buildings"
)
expected_bdg_class_names = [
"C1/HBET:1-2",
"C2/HBET:3-",
"C3/H:1",
"C4/HBET:2-3",
"C5/HBET:1-2",
"C6/HBET:3-5",
]
assert returned_building_classes.shape[0] == len(expected_bdg_class_names)
for col_name in expected_columns:
assert col_name in returned_building_classes.columns
assert round(returned_building_classes["proportions"].sum(), 5) == 1.0
for name in expected_bdg_class_names:
assert name in returned_building_classes["building_class_name"].to_numpy()
assert (
returned_building_classes[
returned_building_classes.building_class_name == "C2/HBET:3-"
]["storeys_min"].to_numpy()[0]
== 3
)
assert (
returned_building_classes[
returned_building_classes.building_class_name == "C2/HBET:3-"
]["storeys_max"].to_numpy()[0]
== 9999
)
assert (
round(
returned_building_classes[
returned_building_classes.building_class_name == "C5/HBET:1-2"
]["proportions"].to_numpy()[0],
2,
)
== 0.20
)
# Industrial buildings (no entries to be found)
returned_building_classes = DatabaseQueries.get_building_classes_of_data_unit(
"ABC_10269", "industrial", 2, config.database_gde_tiles, "data_units_buildings"
)
assert returned_building_classes.shape[0] == 0
for col_name in expected_columns:
assert col_name in returned_building_classes.columns
assert round(returned_building_classes["proportions"].sum(), 5) == 0.0
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment