diff --git a/gdecore/database_queries.py b/gdecore/database_queries.py index bc077dad6aad542715a95f73e63a11a43279923e..874f81db70399d18b23da195fc5c7a115e8b804c 100644 --- a/gdecore/database_queries.py +++ b/gdecore/database_queries.py @@ -544,7 +544,7 @@ class DatabaseQueries: the following columns: building_class_name (str): Building class as per the GEM Building Taxonomy. - settlement_type (enum): + settlement_type (str): Type of settlement within the data unit. Possible values: "urban", "rural", "big_city", "all". occupancy_subtype (str): diff --git a/gdecore/gdecore.py b/gdecore/gdecore.py index d86ce145b1f5059c0f674b426d4b663415a5fd99..579207ffdce2204da0aeecbe61c5d714106e7442 100644 --- a/gdecore/gdecore.py +++ b/gdecore/gdecore.py @@ -148,6 +148,17 @@ def main(): % (aux_log_string, str(data_unit_building_classes.shape[0])) ) + # Assign building classes to OBM buildings + obm_buildings_building_classes = ( + GDEProcessor.assign_building_classes_to_obm_buildings( + obm_buildings, data_unit_building_classes, occupancy_case + ) + ) + logger.info( + "%s: %s OBM buildings with assigned building classes" + % (aux_log_string, str(len(obm_buildings_building_classes.keys()))) + ) + # Leave the program logger.info("gde-core has finished") sys.exit() diff --git a/gdecore/processor.py b/gdecore/processor.py index 36aef89fdd6aaff489b48cbc9716f1b718a68fdc..58e7886d85bcbb0f7f0aa272c81cffbb5c3cad64 100644 --- a/gdecore/processor.py +++ b/gdecore/processor.py @@ -307,3 +307,379 @@ class GDEProcessor: ) return unique_occupancy + + @staticmethod + def assign_building_classes_to_obm_buildings( + obm_buildings, data_unit_building_classes, occupancy_case + ): + """This function assigns building classes and proportions from + 'data_unit_building_classes' to each of the OBM buildings in 'obm_buildings', by calling + 'GDEProcessor.assign_building_classes_to_obm_building'. The latter selects from + 'data_unit_building_classes' only the building classes that are compatible with the + attributes of the building contained in 'obm_buildings'. The proportions are + recalculated to reflect only the building classes retained. + + Args: + obm_buildings (Pandas DataFrame): + GeoDataFrame with data on OBM buildings. It comprises the following columns: + osm_id (int): + OpenStreetMap (OSM) ID of the building. If the building is represented + by a relation, this is the ID of the relation. + quadkey (str): + String indicating the quadkey of the tile to which the centroid of the + building belongs. + storeys (float): + Number of storeys of the building (maximum of all components if building + is an OSM relation). Treated as floats so as to be able to use numpy.nan + for missing values. + occupancy (str): + Occupancy of the building as per the GEM Building Taxonomy v3.0. Missing + values can be "nan" or "None". + data_unit_building_classes (Pandas DataFrame): + DataFrame containing the building classes and their proportions. It comprises + the following columns: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + storeys_min (int): + Minimum number of storeys of the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype'). + storeys_max (int): + Maximum number of storeys of the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype'). + proportions (float): + Proportions in which the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype') is + present in the data unit. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial"). + + Returns: + obm_buildings_building_classes (dict): + Dictionary containing the building classes and their proportions for each OBM + building. Dictionary keys correspond to the OSM ID of the building. Each key + contains a Pandas DataFrame with the following columns: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + proportions (float): + Proportions in which the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype') is + present in the data unit. + """ + + obm_buildings_building_classes = {} + + for i in range(obm_buildings.shape[0]): + osm_id_i = obm_buildings["osm_id"].to_numpy()[i] + storeys_i = obm_buildings["storeys"].to_numpy()[i] + occupancy_i = obm_buildings["occupancy"].to_numpy()[i] + + obm_buildings_building_classes[ + osm_id_i + ] = GDEProcessor.assign_building_classes_to_obm_building( + osm_id_i, storeys_i, occupancy_i, occupancy_case, data_unit_building_classes + ) + + return obm_buildings_building_classes + + @staticmethod + def assign_building_classes_to_obm_building( + osm_id, storeys, occupancy, occupancy_case, data_unit_building_classes + ): + """This function assigns building classes and proportions from + 'data_unit_building_classes' to the OBM building with ID 'osm_id'. When building + attributes such as 'storeys' or 'occupancy' do not allow for a narrowing down of all the + potential building classes, the output consists of all building classes in the input + 'data_unit_building_classes'. The function applies rules to attempt to narrow down the + potential building classes. If the application of a rule leads to no building classes + from 'data_unit_building_classes' being feasible, this means that there is an + inconsistency between 'data_unit_building_classes' and the attributes of the building; + the output in this case includes all potential building classes and a warning is logged. + + The rules that are currently implemented are: + narrow_down_by_storeys: + Only the potential building classes whose (ranges of) numbers of storeys are + compatible with the input 'storeys' are selected (when the number of storeys of + the OBM building is known). + narrow_down_by_commercial_occupancy_details: + ESRM20 commercial classes include details of sub-occupancy such as whether they + correspond to hotels/motels/guest lodges/etc, restaurants/bars/cafes, offices, + or retail/wholesale trade. When the input 'occupancy' corresponds to the GEM + Building Taxonomy representation of one of these cases, only compatible building + classes are selected. + + Args: + osm_id (int): + OpenStreetMap (OSM) ID of the building to which building classes will be + assigned. + storeys (float): + Number of storeys of the OBM building with 'osm_id'. Treated as floats so as to + be able to use numpy.nan for missing values. + occupancy (str): + Occupancy of the OBM building with 'osm_id' as per the GEM Building Taxonomy + v3.0. + occupancy_case (str): + Name of the occupancy case (e.g. "residential", "commercial", "industrial"). + data_unit_building_classes (Pandas DataFrame): + DataFrame containing the building classes and proportions corresponding to the + data unit and occupancy case to which the OBM building with 'osm_id' belongs. + It comprises the following columns: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + storeys_min (int): + Minimum number of storeys of the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype'). + storeys_max (int): + Maximum number of storeys of the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype'). + proportions (float): + Proportions in which the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype') is + present in the data unit. + + Returns: + obm_building_building_classes (Pandas DataFrame): + DataFrame with the building classes and proportions assigned to the input OBM + building. It contains the following columns: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + proportions (float): + Proportions in which the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype') is + present in the data unit. + """ + + obm_building_building_classes = deepcopy(data_unit_building_classes) + + # Narrow down by number of storeys + if not numpy.isnan(storeys): + obm_building_building_classes = GDEProcessor.narrow_down_by_storeys( + storeys, obm_building_building_classes + ) + + # Narrow down by commercial occupancy details + if ( + obm_building_building_classes.shape[0] > 0 + and occupancy_case == "commercial" + and occupancy in ("RES3", "COM5", "COM3", "COM1", "COM2") + ): + obm_building_building_classes = ( + GDEProcessor.narrow_down_by_commercial_occupancy_details( + occupancy, obm_building_building_classes + ) + ) + + obm_building_building_classes = obm_building_building_classes.drop( + columns=["storeys_min", "storeys_max"] + ) + + if obm_building_building_classes.shape[0] < 1: + warning_message = ( + "Attempt of narrowing down building classes assigned to OBM building with " + "'osm_id' = %s resulted in no building classes left; all building classes were " + "assigned to 'osm_id' = %s despite this inconsistency" % (osm_id, osm_id) + ) + logger.warning(warning_message) + + obm_building_building_classes = deepcopy(data_unit_building_classes) + + return obm_building_building_classes + + @staticmethod + def narrow_down_by_storeys(storeys, building_classes): + """This function keeps from 'building_classes' only those that are compatible with the + input number of storeys ('storeys'). The proportion of the retained building classes is + recalculated to exclude the discarded building classes. + + If storeys is numpy.nan, then all 'building_classes' are returned. + + If no classes from 'building_classes' are compatible with 'storeys', the output + 'obm_building_classes' does not contain any rows. + + Args: + storeys (float): + Number of storeys of the OBM building. Treated as floats so as to + be able to use numpy.nan for missing values. + building_classes (Pandas DataFrame): + DataFrame containing the building classes and proportions from which suitable + classes will be selected for the target OBM building. It comprises the following + columns: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + storeys_min (int): + Minimum number of storeys of the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype'). + storeys_max (int): + Maximum number of storeys of the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype'). + proportions (float): + Proportions in which the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype') is + present in the data unit. + + Returns: + obm_building_classes (Pandas DataFrame): + DataFrame containing the building classes and proportions selected for the + target OBM building as described above. The columns are the same as those of the + input 'building_classes'. + """ + + if not numpy.isnan(storeys): + building_classes_filtered = building_classes.loc[ + (building_classes["storeys_min"] <= storeys) + & (building_classes["storeys_max"] >= storeys) + ] + else: + building_classes_filtered = deepcopy(building_classes) + + obm_building_classes = GDEProcessor._recalculate_building_classes_proportions( + building_classes_filtered + ) + + return obm_building_classes + + @staticmethod + def narrow_down_by_commercial_occupancy_details(occupancy, building_classes): + """This function keeps from 'building_classes' only those that are compatible with the + input 'occupancy'. The proportion of the retained building classes is recalculated to + exclude the discarded building classes. + + ESRM20 commercial classes include details of sub-occupancy such as whether they + correspond to hotels/motels/guest lodges/etc, restaurants/bars/cafes, offices or retail/ + /wholesale trade. When the input 'occupancy' corresponds to the GEM Building Taxonomy + representation of one of these cases, only compatible building classes are selected. + + Values of 'occupancy' that are considered by this function for filtering + 'building_classes' are: + - "RES3" (hotels/motels/guest lodges/etc) + - "COM1" (retail trade) + - "COM2" (wholesale trade and storage) + - "COM3" (offices, professional/technical services) + - "COM5" (restaurants/bars/cafes) + Any other values of 'occupancy' result in 'building_classes' being returned without + filtering. + + Args: + occupancy (str): + Occupancy of the building as per the GEM Building Taxonomy v3.0. + building_classes (Pandas DataFrame): + DataFrame containing the building classes and proportions from which suitable + classes will be selected for the target OBM building. It comprises the following + columns: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + storeys_min (int): + Minimum number of storeys of the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype'). + storeys_max (int): + Maximum number of storeys of the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype'). + proportions (float): + Proportions in which the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype') is + present in the data unit. + + Returns: + obm_building_classes (Pandas DataFrame): + DataFrame containing the building classes and proportions selected for the + target OBM building as described above. The columns are the same as those of the + input 'building_classes'. + """ + + if occupancy in ("RES3", "COM5"): + # RES3 is hotels/motels/guest lodges/etc, COM5 is restaurants/bars/cafes. + # Both are under "Hotels" in ESRM20. + target_occupancy_subtype = "Hotels" + elif occupancy == "COM3": + target_occupancy_subtype = "Offices" + elif occupancy in ("COM1", "COM2"): + # COM1 is retail trade, COM2 is wholesale trade and storage (warehouse). + target_occupancy_subtype = "Trade" + else: + target_occupancy_subtype = None + + if target_occupancy_subtype is not None: + building_classes_filtered = building_classes.loc[ + (building_classes["occupancy_subtype"] == target_occupancy_subtype) + ] + else: + building_classes_filtered = deepcopy(building_classes) + + obm_building_classes = GDEProcessor._recalculate_building_classes_proportions( + building_classes_filtered + ) + + return obm_building_classes + + @staticmethod + def _recalculate_building_classes_proportions(building_classes): + """This function recalculates the 'proportions' column of 'building_classes', which may + or may not add up to one when passed as input to the function. + + Args: + building_classes (Pandas DataFrame): + DataFrames containing building classes and proportions. They comprise the + following columns: + building_class_name (str): + Building class as per the GEM Building Taxonomy. + settlement_type (str): + Type of settlement within the data unit. Possible values: "urban", + "rural", "big_city", "all". + occupancy_subtype (str): + Details on the occupancy, if relevant to characterise the building + class. + storeys_min (int): + Minimum number of storeys of the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype'). + storeys_max (int): + Maximum number of storeys of the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype'). + proportions (float): + Proportions in which the building class (defined by + 'building_class_name', 'settlement_type' and 'occupancy_subtype') is + present in the data unit. + + Returns: + 'building_classes' with re-calculated proportions. + """ + + if building_classes.shape[0] > 0: + old_proportions = building_classes["proportions"].to_numpy() + building_classes.loc[:, "proportions"] = old_proportions / old_proportions.sum() + + return building_classes diff --git a/tests/data/test_database_set_up.sql b/tests/data/test_database_set_up.sql index 9858b8bc87994125a6aa834b1f7bf23d8f60a147..ac93a2f8c77ca70295dcc91e0d9db0a4a46ce42e 100644 --- a/tests/data/test_database_set_up.sql +++ b/tests/data/test_database_set_up.sql @@ -84,7 +84,10 @@ CREATE TABLE obm_buildings INSERT INTO obm_buildings(osm_id, storeys, occupancy, occupancy_case, quadkey, geometry) VALUES ( 11223344, 4, 'RES2', 'residential', '122010321033023130', - ST_GeomFromText('POLYGON((15.0487 37.4812,15.0489 37.4810,15.0486 37.4808,15.0484 37.4810,15.0487 37.4812))')); + ST_GeomFromText('POLYGON((15.0487 37.4812,15.0489 37.4810,15.0486 37.4808,15.0484 37.4810,15.0487 37.4812))')), +( + 99001122, 15, 'RES2', 'residential', '122010321033023130', + ST_GeomFromText('POLYGON((15.0490 37.4813,15.0490 37.4812,15.0488 37.4812,15.0488 37.4813,15.0490 37.4813))')); -- Buildings that are not part of a relation and do not have number of storeys INSERT INTO obm_buildings(osm_id, occupancy, occupancy_case, quadkey, geometry) VALUES ( @@ -152,7 +155,7 @@ INSERT INTO data_units_buildings(building_class_name, storeys_max) VALUES ('A1/HBET:1-3', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.20, 0.0, 0.0, 1, 3), ('A2/HBET:4-6', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.30, 0.0, 0.0, 4, 6), -('A3/HBET:7-', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.10, 0.0, 0.0, 7, 9999), +('A3/HBET:7-12', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.10, 0.0, 0.0, 7, 12), ('B1/HBET:1-3', 'rural', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.25, 0.0, 0.0, 1, 3), ('B2/H:4', 'rural', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.15, 0.0, 0.0, 4, 4), ('C1/HBET:1-2', 'urban', 'Hotels', 2, 'ABC', 'commercial', 'ABC_10269', 0.10, 0.0, 0.0, 1, 2), diff --git a/tests/test_database_queries.py b/tests/test_database_queries.py index e31291784c1ea8ce9ae708886f93109a32c3ddc4..4daa33a5e5a1a5913b8c22da959969e42eeae31a 100644 --- a/tests/test_database_queries.py +++ b/tests/test_database_queries.py @@ -171,7 +171,7 @@ def test_get_OBM_buildings_in_data_unit_by_occupancy_case(test_db): "obm_buildings", ) - assert returned_obm_buildings.shape[0] == 2 + assert returned_obm_buildings.shape[0] == 3 for col_name in expected_columns: assert col_name in returned_obm_buildings.columns assert 11223344 in returned_obm_buildings["osm_id"].to_numpy() @@ -255,7 +255,7 @@ def test_get_building_classes_of_data_unit(test_db): expected_bdg_class_names = [ "A1/HBET:1-3", "A2/HBET:4-6", - "A3/HBET:7-", + "A3/HBET:7-12", "B1/HBET:1-3", "B2/H:4", ] diff --git a/tests/test_processor.py b/tests/test_processor.py index 727e49e2440928332b4a334f240b1b233846fbc5..f4622f861e969119dd9c42ee01acd2d17ae3ccca 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -19,6 +19,7 @@ import os import logging import numpy +import pandas from gdecore.processor import GDEProcessor from gdecore.configuration import Configuration from gdecore.database_queries import DatabaseQueries @@ -56,9 +57,10 @@ def test_post_process_obm_relations(test_db): returned_obm_buildings = GDEProcessor.post_process_obm_relations(raw_obm_buildings) - assert returned_obm_buildings.shape[0] == 2 + assert returned_obm_buildings.shape[0] == 3 assert 11223344 in returned_obm_buildings["osm_id"].to_numpy() assert 22334455 in returned_obm_buildings["osm_id"].to_numpy() + assert 99001122 in returned_obm_buildings["osm_id"].to_numpy() # Group of commercial buildings, some in relations, some not raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( @@ -114,3 +116,295 @@ def test_select_max_of_array(): def test_ensure_unique_occupancy(): # Tested within test_post_process_obm_relations() pass + + +def test_assign_building_classes_to_obm_buildings(test_db): + """The test cases cover a range of combinations of outcomes from the two rules that are used + to narrow down the building classes that are assigned to an OBM building. The first rule + narrows down as a function of the number of storeys, the second rule narrows down as a + function of occupancy details when the occupancy case is commercial and the occupancy falls + within a set of cases. + + The test cases are the following: + + - Test Case 1: Residential building with 4 storeys: + - The number of storeys first narrows down the possibilities. + - The second rule does not apply because it is residential. + - Two building classes are assigned at the end. + - Test Case 2: Residential building with unknown number of storeys: + - No narrowing down with the first rule, as the number of storeys is unknown + - The second rule does not apply because it is residential. + - As no narrowing down occurs, all building classes are assigned. + - Test Case 3: Residential building with 15 storeys: + - The number of storeys results in no possible classes left (all classes are incompatible) + - The second rule does not apply because it is residential. + - As all building classes are incompatible, all building classes are assigned. + - Test Case 4: Commercial "RES3" building with unknown number of storeys: + - No narrowing down with the first rule, as the number of storeys is unknown + - The second rule leads to narrowing down. + - Two building classes are assigned at the end. + - Test Case 5: Commercial "COM3" building (from relation) with 3 storeys: + - The number of storeys first narrows down the possibilities. + - The second rule leads to further narrowing down. + - One building class is assigned at the end. + - Test Case 6: Commercial "COM2" building (from relation) with 4 storeys: + - The number of storeys first narrows down the possibilities. + - The second rule leads to further narrowing down but results in no classes left. + - As all building classes are incompatible, all building classes are assigned. + """ + + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + ( + returned_data_units_ids, + returned_data_units_geometries, + _, + ) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case( + "ABC", "residential", 2, config.database_gde_tiles, "data_units" + ) # auxiliary, to retrieve the geometry of the data unit + + geometry = returned_data_units_geometries[ + numpy.where(returned_data_units_ids == "ABC_10269")[0][0] + ] + + # Group of residential buildings that do not belong to relations + raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( + "residential", + geometry, + config.database_obm_buildings, + "obm_buildings", + ) + + obm_buildings = GDEProcessor.post_process_obm_relations(raw_obm_buildings) + + data_unit_building_classes = DatabaseQueries.get_building_classes_of_data_unit( + "ABC_10269", "residential", 2, config.database_gde_tiles, "data_units_buildings" + ) + + returned_obm_buildings_building_classes = ( + GDEProcessor.assign_building_classes_to_obm_buildings( + obm_buildings, data_unit_building_classes, "residential" + ) + ) + + assert len(returned_obm_buildings_building_classes.keys()) == 3 + + # Test Case 1: Residential building with 4 storeys + case_11223344 = returned_obm_buildings_building_classes[11223344] + assert case_11223344.shape[0] == 2 + + expected_class_names = ["A2/HBET:4-6", "B2/H:4"] + expected_proportions = [0.667, 0.333] + for i in range(len(expected_class_names)): + assert ( + round( + case_11223344[case_11223344.building_class_name == expected_class_names[i]][ + "proportions" + ].to_numpy()[0], + 3, + ) + == expected_proportions[i] + ) + + # Test Case 2: Residential building with unknown number of storeys (all classes returned) + case_22334455 = returned_obm_buildings_building_classes[22334455] + assert case_22334455.shape[0] == data_unit_building_classes.shape[0] + for bdg_class_name in data_unit_building_classes["building_class_name"].to_numpy(): + assert bdg_class_name in case_22334455["building_class_name"].to_numpy() + + # Test Case 3: Residential building with 15 storeys (returns all classes) + case_99001122 = returned_obm_buildings_building_classes[99001122] + assert case_99001122.shape[0] == data_unit_building_classes.shape[0] + for bdg_class_name in data_unit_building_classes["building_class_name"].to_numpy(): + assert bdg_class_name in case_99001122["building_class_name"].to_numpy() + + # Group of commercial buildings, some in relations, some not + raw_obm_buildings = DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_case( + "commercial", + geometry, + config.database_obm_buildings, + "obm_buildings", + ) + + obm_buildings = GDEProcessor.post_process_obm_relations(raw_obm_buildings) + + data_unit_building_classes = DatabaseQueries.get_building_classes_of_data_unit( + "ABC_10269", "commercial", 2, config.database_gde_tiles, "data_units_buildings" + ) + + returned_obm_buildings_building_classes = ( + GDEProcessor.assign_building_classes_to_obm_buildings( + obm_buildings, data_unit_building_classes, "commercial" + ) + ) + + assert len(returned_obm_buildings_building_classes.keys()) == 3 + + # Test Case 4: Commercial building with unknown number of storeys, "RES3" + case_55667788 = returned_obm_buildings_building_classes[55667788] + assert case_55667788.shape[0] == 2 + + expected_class_names = ["C1/HBET:1-2", "C2/HBET:3-"] + expected_proportions = [0.286, 0.714] + for i in range(len(expected_class_names)): + assert ( + round( + case_55667788[case_55667788.building_class_name == expected_class_names[i]][ + "proportions" + ].to_numpy()[0], + 3, + ) + == expected_proportions[i] + ) + + # Test Case 5: Commercial building (from relation) with 3 storeys, "COM3" + case_101010 = returned_obm_buildings_building_classes[-101010] + assert case_101010.shape[0] == 1 + assert ( + round( + case_101010[case_101010.building_class_name == "C6/HBET:3-5"][ + "proportions" + ].to_numpy()[0], + 3, + ) + == 1.0 + ) + + # Test Case 6: Commercial building (from relation) with 4 storeys, "COM2" + case_202020 = returned_obm_buildings_building_classes[-202020] + assert case_202020.shape[0] == data_unit_building_classes.shape[0] + for bdg_class_name in data_unit_building_classes["building_class_name"].to_numpy(): + assert bdg_class_name in case_202020["building_class_name"].to_numpy() + + +def test_narrow_down_by_storeys(): + + building_class_name = ["A", "B", "C", "D"] + settlement_type = ["urban", "urban", "urban", "urban"] + occupancy_subtype = ["all", "all", "all", "all"] + storeys_min = [1, 2, 5, 6] + storeys_max = [3, 4, 5, 8] + proportions = [0.5, 0.25, 0.20, 0.05] + + building_classes = pandas.DataFrame( + { + "building_class_name": building_class_name, + "settlement_type": settlement_type, + "occupancy_subtype": occupancy_subtype, + "storeys_min": storeys_min, + "storeys_max": storeys_max, + "proportions": proportions, + } + ) + + # One class possible + returned_obm_building_classes = GDEProcessor.narrow_down_by_storeys(5, building_classes) + + assert returned_obm_building_classes.shape[0] == 1 + assert ( + round( + returned_obm_building_classes[ + returned_obm_building_classes.building_class_name == "C" + ]["proportions"].to_numpy()[0], + 3, + ) + == 1.0 + ) + + # Two classes possible + returned_obm_building_classes = GDEProcessor.narrow_down_by_storeys(3, building_classes) + + assert returned_obm_building_classes.shape[0] == 2 + expected_class_names = ["A", "B"] + expected_proportions = [0.667, 0.333] + for i in range(len(expected_class_names)): + assert ( + round( + returned_obm_building_classes[ + returned_obm_building_classes.building_class_name == expected_class_names[i] + ]["proportions"].to_numpy()[0], + 3, + ) + == expected_proportions[i] + ) + + # No class possible + returned_obm_building_classes = GDEProcessor.narrow_down_by_storeys(12, building_classes) + + assert returned_obm_building_classes.shape[0] == 0 + + # Input numpy.nan (all classes are returned) + returned_obm_building_classes = GDEProcessor.narrow_down_by_storeys( + numpy.nan, building_classes + ) + + assert returned_obm_building_classes.shape[0] == building_classes.shape[0] + + +def test_narrow_down_by_commercial_occupancy_details(): + + building_class_name = ["A", "B", "C", "D"] + settlement_type = ["urban", "urban", "urban", "urban"] + occupancy_subtype = ["Offices", "Trade", "Offices", "Hotels"] + storeys_min = [1, 2, 5, 6] + storeys_max = [3, 4, 5, 9999] + proportions = [0.5, 0.25, 0.20, 0.05] + + building_classes = pandas.DataFrame( + { + "building_class_name": building_class_name, + "settlement_type": settlement_type, + "occupancy_subtype": occupancy_subtype, + "storeys_min": storeys_min, + "storeys_max": storeys_max, + "proportions": proportions, + } + ) + + # One class possible + returned_obm_building_classes = GDEProcessor.narrow_down_by_commercial_occupancy_details( + "RES3", building_classes + ) + + assert returned_obm_building_classes.shape[0] == 1 + assert ( + round( + returned_obm_building_classes[ + returned_obm_building_classes.building_class_name == "D" + ]["proportions"].to_numpy()[0], + 3, + ) + == 1.0 + ) + + # Two classes possible + returned_obm_building_classes = GDEProcessor.narrow_down_by_commercial_occupancy_details( + "COM3", building_classes + ) + + assert returned_obm_building_classes.shape[0] == 2 + expected_class_names = ["A", "C"] + expected_proportions = [0.714, 0.286] + for i in range(len(expected_class_names)): + assert ( + round( + returned_obm_building_classes[ + returned_obm_building_classes.building_class_name == expected_class_names[i] + ]["proportions"].to_numpy()[0], + 3, + ) + == expected_proportions[i] + ) + + # No filtering possible (therefore, return them all) + returned_obm_building_classes = GDEProcessor.narrow_down_by_commercial_occupancy_details( + "ALL", building_classes + ) + + assert returned_obm_building_classes.shape[0] == building_classes.shape[0] + for bdg_class_name in building_classes["building_class_name"].to_numpy(): + assert bdg_class_name in returned_obm_building_classes["building_class_name"].to_numpy()