Commit 6168e7a2 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to assign building classes to OBM buildings

parent 966c0be1
Pipeline #41181 passed with stage
in 2 minutes and 24 seconds
......@@ -544,7 +544,7 @@ class DatabaseQueries:
the following columns:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (enum):
settlement_type (str):
Type of settlement within the data unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
......
......@@ -148,6 +148,17 @@ def main():
% (aux_log_string, str(data_unit_building_classes.shape[0]))
)
# Assign building classes to OBM buildings
obm_buildings_building_classes = (
GDEProcessor.assign_building_classes_to_obm_buildings(
obm_buildings, data_unit_building_classes, occupancy_case
)
)
logger.info(
"%s: %s OBM buildings with assigned building classes"
% (aux_log_string, str(len(obm_buildings_building_classes.keys())))
)
# Leave the program
logger.info("gde-core has finished")
sys.exit()
......
......@@ -307,3 +307,379 @@ class GDEProcessor:
)
return unique_occupancy
@staticmethod
def assign_building_classes_to_obm_buildings(
obm_buildings, data_unit_building_classes, occupancy_case
):
"""This function assigns building classes and proportions from
'data_unit_building_classes' to each of the OBM buildings in 'obm_buildings', by calling
'GDEProcessor.assign_building_classes_to_obm_building'. The latter selects from
'data_unit_building_classes' only the building classes that are compatible with the
attributes of the building contained in 'obm_buildings'. The proportions are
recalculated to reflect only the building classes retained.
Args:
obm_buildings (Pandas DataFrame):
GeoDataFrame with data on OBM buildings. It comprises the following columns:
osm_id (int):
OpenStreetMap (OSM) ID of the building. If the building is represented
by a relation, this is the ID of the relation.
quadkey (str):
String indicating the quadkey of the tile to which the centroid of the
building belongs.
storeys (float):
Number of storeys of the building (maximum of all components if building
is an OSM relation). Treated as floats so as to be able to use numpy.nan
for missing values.
occupancy (str):
Occupancy of the building as per the GEM Building Taxonomy v3.0. Missing
values can be "nan" or "None".
data_unit_building_classes (Pandas DataFrame):
DataFrame containing the building classes and their proportions. It comprises
the following columns:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (str):
Type of settlement within the data unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
storeys_min (int):
Minimum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
storeys_max (int):
Maximum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
proportions (float):
Proportions in which the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype') is
present in the data unit.
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial").
Returns:
obm_buildings_building_classes (dict):
Dictionary containing the building classes and their proportions for each OBM
building. Dictionary keys correspond to the OSM ID of the building. Each key
contains a Pandas DataFrame with the following columns:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (str):
Type of settlement within the data unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
proportions (float):
Proportions in which the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype') is
present in the data unit.
"""
obm_buildings_building_classes = {}
for i in range(obm_buildings.shape[0]):
osm_id_i = obm_buildings["osm_id"].to_numpy()[i]
storeys_i = obm_buildings["storeys"].to_numpy()[i]
occupancy_i = obm_buildings["occupancy"].to_numpy()[i]
obm_buildings_building_classes[
osm_id_i
] = GDEProcessor.assign_building_classes_to_obm_building(
osm_id_i, storeys_i, occupancy_i, occupancy_case, data_unit_building_classes
)
return obm_buildings_building_classes
@staticmethod
def assign_building_classes_to_obm_building(
osm_id, storeys, occupancy, occupancy_case, data_unit_building_classes
):
"""This function assigns building classes and proportions from
'data_unit_building_classes' to the OBM building with ID 'osm_id'. When building
attributes such as 'storeys' or 'occupancy' do not allow for a narrowing down of all the
potential building classes, the output consists of all building classes in the input
'data_unit_building_classes'. The function applies rules to attempt to narrow down the
potential building classes. If the application of a rule leads to no building classes
from 'data_unit_building_classes' being feasible, this means that there is an
inconsistency between 'data_unit_building_classes' and the attributes of the building;
the output in this case includes all potential building classes and a warning is logged.
The rules that are currently implemented are:
narrow_down_by_storeys:
Only the potential building classes whose (ranges of) numbers of storeys are
compatible with the input 'storeys' are selected (when the number of storeys of
the OBM building is known).
narrow_down_by_commercial_occupancy_details:
ESRM20 commercial classes include details of sub-occupancy such as whether they
correspond to hotels/motels/guest lodges/etc, restaurants/bars/cafes, offices,
or retail/wholesale trade. When the input 'occupancy' corresponds to the GEM
Building Taxonomy representation of one of these cases, only compatible building
classes are selected.
Args:
osm_id (int):
OpenStreetMap (OSM) ID of the building to which building classes will be
assigned.
storeys (float):
Number of storeys of the OBM building with 'osm_id'. Treated as floats so as to
be able to use numpy.nan for missing values.
occupancy (str):
Occupancy of the OBM building with 'osm_id' as per the GEM Building Taxonomy
v3.0.
occupancy_case (str):
Name of the occupancy case (e.g. "residential", "commercial", "industrial").
data_unit_building_classes (Pandas DataFrame):
DataFrame containing the building classes and proportions corresponding to the
data unit and occupancy case to which the OBM building with 'osm_id' belongs.
It comprises the following columns:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (str):
Type of settlement within the data unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
storeys_min (int):
Minimum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
storeys_max (int):
Maximum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
proportions (float):
Proportions in which the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype') is
present in the data unit.
Returns:
obm_building_building_classes (Pandas DataFrame):
DataFrame with the building classes and proportions assigned to the input OBM
building. It contains the following columns:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (str):
Type of settlement within the data unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
proportions (float):
Proportions in which the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype') is
present in the data unit.
"""
obm_building_building_classes = deepcopy(data_unit_building_classes)
# Narrow down by number of storeys
if not numpy.isnan(storeys):
obm_building_building_classes = GDEProcessor.narrow_down_by_storeys(
storeys, obm_building_building_classes
)
# Narrow down by commercial occupancy details
if (
obm_building_building_classes.shape[0] > 0
and occupancy_case == "commercial"
and occupancy in ("RES3", "COM5", "COM3", "COM1", "COM2")
):
obm_building_building_classes = (
GDEProcessor.narrow_down_by_commercial_occupancy_details(
occupancy, obm_building_building_classes
)
)
obm_building_building_classes = obm_building_building_classes.drop(
columns=["storeys_min", "storeys_max"]
)
if obm_building_building_classes.shape[0] < 1:
warning_message = (
"Attempt of narrowing down building classes assigned to OBM building with "
"'osm_id' = %s resulted in no building classes left; all building classes were "
"assigned to 'osm_id' = %s despite this inconsistency" % (osm_id, osm_id)
)
logger.warning(warning_message)
obm_building_building_classes = deepcopy(data_unit_building_classes)
return obm_building_building_classes
@staticmethod
def narrow_down_by_storeys(storeys, building_classes):
"""This function keeps from 'building_classes' only those that are compatible with the
input number of storeys ('storeys'). The proportion of the retained building classes is
recalculated to exclude the discarded building classes.
If storeys is numpy.nan, then all 'building_classes' are returned.
If no classes from 'building_classes' are compatible with 'storeys', the output
'obm_building_classes' does not contain any rows.
Args:
storeys (float):
Number of storeys of the OBM building. Treated as floats so as to
be able to use numpy.nan for missing values.
building_classes (Pandas DataFrame):
DataFrame containing the building classes and proportions from which suitable
classes will be selected for the target OBM building. It comprises the following
columns:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (str):
Type of settlement within the data unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
storeys_min (int):
Minimum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
storeys_max (int):
Maximum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
proportions (float):
Proportions in which the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype') is
present in the data unit.
Returns:
obm_building_classes (Pandas DataFrame):
DataFrame containing the building classes and proportions selected for the
target OBM building as described above. The columns are the same as those of the
input 'building_classes'.
"""
if not numpy.isnan(storeys):
building_classes_filtered = building_classes.loc[
(building_classes["storeys_min"] <= storeys)
& (building_classes["storeys_max"] >= storeys)
]
else:
building_classes_filtered = deepcopy(building_classes)
obm_building_classes = GDEProcessor._recalculate_building_classes_proportions(
building_classes_filtered
)
return obm_building_classes
@staticmethod
def narrow_down_by_commercial_occupancy_details(occupancy, building_classes):
"""This function keeps from 'building_classes' only those that are compatible with the
input 'occupancy'. The proportion of the retained building classes is recalculated to
exclude the discarded building classes.
ESRM20 commercial classes include details of sub-occupancy such as whether they
correspond to hotels/motels/guest lodges/etc, restaurants/bars/cafes, offices or retail/
/wholesale trade. When the input 'occupancy' corresponds to the GEM Building Taxonomy
representation of one of these cases, only compatible building classes are selected.
Values of 'occupancy' that are considered by this function for filtering
'building_classes' are:
- "RES3" (hotels/motels/guest lodges/etc)
- "COM1" (retail trade)
- "COM2" (wholesale trade and storage)
- "COM3" (offices, professional/technical services)
- "COM5" (restaurants/bars/cafes)
Any other values of 'occupancy' result in 'building_classes' being returned without
filtering.
Args:
occupancy (str):
Occupancy of the building as per the GEM Building Taxonomy v3.0.
building_classes (Pandas DataFrame):
DataFrame containing the building classes and proportions from which suitable
classes will be selected for the target OBM building. It comprises the following
columns:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (str):
Type of settlement within the data unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
storeys_min (int):
Minimum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
storeys_max (int):
Maximum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
proportions (float):
Proportions in which the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype') is
present in the data unit.
Returns:
obm_building_classes (Pandas DataFrame):
DataFrame containing the building classes and proportions selected for the
target OBM building as described above. The columns are the same as those of the
input 'building_classes'.
"""
if occupancy in ("RES3", "COM5"):
# RES3 is hotels/motels/guest lodges/etc, COM5 is restaurants/bars/cafes.
# Both are under "Hotels" in ESRM20.
target_occupancy_subtype = "Hotels"
elif occupancy == "COM3":
target_occupancy_subtype = "Offices"
elif occupancy in ("COM1", "COM2"):
# COM1 is retail trade, COM2 is wholesale trade and storage (warehouse).
target_occupancy_subtype = "Trade"
else:
target_occupancy_subtype = None
if target_occupancy_subtype is not None:
building_classes_filtered = building_classes.loc[
(building_classes["occupancy_subtype"] == target_occupancy_subtype)
]
else:
building_classes_filtered = deepcopy(building_classes)
obm_building_classes = GDEProcessor._recalculate_building_classes_proportions(
building_classes_filtered
)
return obm_building_classes
@staticmethod
def _recalculate_building_classes_proportions(building_classes):
"""This function recalculates the 'proportions' column of 'building_classes', which may
or may not add up to one when passed as input to the function.
Args:
building_classes (Pandas DataFrame):
DataFrames containing building classes and proportions. They comprise the
following columns:
building_class_name (str):
Building class as per the GEM Building Taxonomy.
settlement_type (str):
Type of settlement within the data unit. Possible values: "urban",
"rural", "big_city", "all".
occupancy_subtype (str):
Details on the occupancy, if relevant to characterise the building
class.
storeys_min (int):
Minimum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
storeys_max (int):
Maximum number of storeys of the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype').
proportions (float):
Proportions in which the building class (defined by
'building_class_name', 'settlement_type' and 'occupancy_subtype') is
present in the data unit.
Returns:
'building_classes' with re-calculated proportions.
"""
if building_classes.shape[0] > 0:
old_proportions = building_classes["proportions"].to_numpy()
building_classes.loc[:, "proportions"] = old_proportions / old_proportions.sum()
return building_classes
......@@ -84,7 +84,10 @@ CREATE TABLE obm_buildings
INSERT INTO obm_buildings(osm_id, storeys, occupancy, occupancy_case, quadkey, geometry)
VALUES (
11223344, 4, 'RES2', 'residential', '122010321033023130',
ST_GeomFromText('POLYGON((15.0487 37.4812,15.0489 37.4810,15.0486 37.4808,15.0484 37.4810,15.0487 37.4812))'));
ST_GeomFromText('POLYGON((15.0487 37.4812,15.0489 37.4810,15.0486 37.4808,15.0484 37.4810,15.0487 37.4812))')),
(
99001122, 15, 'RES2', 'residential', '122010321033023130',
ST_GeomFromText('POLYGON((15.0490 37.4813,15.0490 37.4812,15.0488 37.4812,15.0488 37.4813,15.0490 37.4813))'));
-- Buildings that are not part of a relation and do not have number of storeys
INSERT INTO obm_buildings(osm_id, occupancy, occupancy_case, quadkey, geometry)
VALUES (
......@@ -152,7 +155,7 @@ INSERT INTO data_units_buildings(building_class_name,
storeys_max)
VALUES ('A1/HBET:1-3', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.20, 0.0, 0.0, 1, 3),
('A2/HBET:4-6', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.30, 0.0, 0.0, 4, 6),
('A3/HBET:7-', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.10, 0.0, 0.0, 7, 9999),
('A3/HBET:7-12', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.10, 0.0, 0.0, 7, 12),
('B1/HBET:1-3', 'rural', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.25, 0.0, 0.0, 1, 3),
('B2/H:4', 'rural', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.15, 0.0, 0.0, 4, 4),
('C1/HBET:1-2', 'urban', 'Hotels', 2, 'ABC', 'commercial', 'ABC_10269', 0.10, 0.0, 0.0, 1, 2),
......
......@@ -171,7 +171,7 @@ def test_get_OBM_buildings_in_data_unit_by_occupancy_case(test_db):
"obm_buildings",
)
assert returned_obm_buildings.shape[0] == 2
assert returned_obm_buildings.shape[0] == 3
for col_name in expected_columns:
assert col_name in returned_obm_buildings.columns
assert 11223344 in returned_obm_buildings["osm_id"].to_numpy()
......@@ -255,7 +255,7 @@ def test_get_building_classes_of_data_unit(test_db):
expected_bdg_class_names = [
"A1/HBET:1-3",
"A2/HBET:4-6",
"A3/HBET:7-",
"A3/HBET:7-12",
"B1/HBET:1-3",
"B2/H:4",
]
......
......@@ -19,6 +19,7 @@
import os
import logging
import numpy
import pandas
from gdecore.processor import GDEProcessor
from gdecore.configuration import Configuration
from gdecore.database_queries import DatabaseQueries
......@@ -56,9 +57,10 @@ def test_post_process_obm_relations(test_db):
returned_obm_buildings = GDEProcessor.post_process_obm_relations(raw_obm_buildings)
assert returned_obm_buildings.shape[0] == 2
assert returned_obm_buildings.shape[0] == 3
assert 11223344 in returned_obm_buildings["osm_id"].to_numpy()
assert 22334455 in returned_obm_buildings["osm_id"].to_numpy()
assert 99001122 in returned_obm_buildings["osm_id"].to_numpy()
# Group of commercial buildings, some in relations, some not
raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case(
......@@ -114,3 +116,295 @@ def test_select_max_of_array():
def test_ensure_unique_occupancy():
# Tested within test_post_process_obm_relations()
pass
def test_assign_building_classes_to_obm_buildings(test_db):
"""The test cases cover a range of combinations of outcomes from the two rules that are used
to narrow down the building classes that are assigned to an OBM building. The first rule
narrows down as a function of the number of storeys, the second rule narrows down as a
function of occupancy details when the occupancy case is commercial and the occupancy falls
within a set of cases.
The test cases are the following:
- Test Case 1: Residential building with 4 storeys:
- The number of storeys first narrows down the possibilities.
- The second rule does not apply because it is residential.
- Two building classes are assigned at the end.
- Test Case 2: Residential building with unknown number of storeys:
- No narrowing down with the first rule, as the number of storeys is unknown
- The second rule does not apply because it is residential.
- As no narrowing down occurs, all building classes are assigned.
- Test Case 3: Residential building with 15 storeys:
- The number of storeys results in no possible classes left (all classes are incompatible)
- The second rule does not apply because it is residential.
- As all building classes are incompatible, all building classes are assigned.
- Test Case 4: Commercial "RES3" building with unknown number of storeys:
- No narrowing down with the first rule, as the number of storeys is unknown
- The second rule leads to narrowing down.
- Two building classes are assigned at the end.
- Test Case 5: Commercial "COM3" building (from relation) with 3 storeys:
- The number of storeys first narrows down the possibilities.
- The second rule leads to further narrowing down.
- One building class is assigned at the end.
- Test Case 6: Commercial "COM2" building (from relation) with 4 storeys:
- The number of storeys first narrows down the possibilities.
- The second rule leads to further narrowing down but results in no classes left.
- As all building classes are incompatible, all building classes are assigned.
"""
# Database connection (the Configuration class will define the credentials based on whether
# the code is running in the CI or locally)
config = Configuration(
os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml")
)
(
returned_data_units_ids,
returned_data_units_geometries,
_,
) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case(
"ABC", "residential", 2, config.database_gde_tiles, "data_units"
) # auxiliary, to retrieve the geometry of the data unit
geometry = returned_data_units_geometries[
numpy.where(returned_data_units_ids == "ABC_10269")[0][0]
]
# Group of residential buildings that do not belong to relations
raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case(
"residential",
geometry,
config.database_obm_buildings,
"obm_buildings",
)
obm_buildings = GDEProcessor.post_process_obm_relations(raw_obm_buildings)
data_unit_building_classes = DatabaseQueries.get_building_classes_of_data_unit(
"ABC_10269", "residential", 2, config.database_gde_tiles, "data_units_buildings"
)
returned_obm_buildings_building_classes = (
GDEProcessor.assign_building_classes_to_obm_buildings(
obm_buildings, data_unit_building_classes, "residential"
)
)
assert len(returned_obm_buildings_building_classes.keys()) == 3
# Test Case 1: Residential building with 4 storeys
case_11223344 = returned_obm_buildings_building_classes[11223344]
assert case_11223344.shape[0] == 2
expected_class_names = ["A2/HBET:4-6", "B2/H:4"]
expected_proportions = [0.667, 0.333]
for i in range(len(expected_class_names)):
assert (
round(
case_11223344[case_11223344.building_class_name == expected_class_names[i]][
"proportions"
].to_numpy()[0],
3,
)
== expected_proportions[i]
)
# Test Case 2: Residential building with unknown number of storeys (all classes returned)
case_22334455 = returned_obm_buildings_building_classes[22334455]
assert case_22334455.shape[0] == data_unit_building_classes.shape[0]
for bdg_class_name in data_unit_building_classes["building_class_name"].to_numpy():
assert bdg_class_name in case_22334455["building_class_name"].to_numpy()
# Test Case 3: Residential building with 15 storeys (returns all classes)
case_99001122 = returned_obm_buildings_building_classes[99001122]
assert case_99001122.shape[0] == data_unit_building_classes.shape[0]
for bdg_class_name in data_unit_building_classes["building_class_name"].to_numpy():
assert bdg_class_name in case_99001122["building_class_name"].to_numpy()
# Group of commercial buildings, some in relations, some not
raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case(
"commercial",
geometry,
config.database_obm_buildings,
"obm_buildings",
)
obm_buildings = GDEProcessor.post_process_obm_relations(raw_obm_buildings)
data_unit_building_classes = DatabaseQueries.get_building_classes_of_data_unit(
"ABC_10269", "commercial", 2, config.database_gde_tiles, "data_units_buildings"
)
returned_obm_buildings_building_classes = (
GDEProcessor.assign_building_classes_to_obm_buildings(
obm_buildings, data_unit_building_classes, "commercial"
)
)
assert len(returned_obm_buildings_building_classes.keys()) == 3
# Test Case 4: Commercial building with unknown number of storeys, "RES3"
case_55667788 = returned_obm_buildings_building_classes[55667788]
assert case_55667788.shape[0] == 2
expected_class_names = ["C1/HBET:1-2", "C2/HBET:3-"]
expected_proportions = [0.286, 0.714]
for i in range(len(expected_class_names)):
assert (
round(
case_55667788[case_55667788.building_class_name == expected_class_names[i]][
"proportions"
].to_numpy()[0],
3,
)
== expected_proportions[i]
)
# Test Case 5: Commercial building (from relation) with 3 storeys, "COM3"
case_101010 = returned_obm_buildings_building_classes[-101010]