processor.py 33.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
#!/usr/bin/env python3

# Copyright (C) 2022:
#   Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.

import logging
from copy import deepcopy
import numpy
import mercantile
import pandas
import pyproj


logger = logging.getLogger()


class GDEProcessor:
    """This class contains methods that are fundamental to the calculations of `gde-core`."""

    @staticmethod
    def post_process_obm_relations(obm_buildings):
        """This function processes the contents of 'obm_buildings' to identify entries that
        correspond to individual buildings and entries that correspond to parts of buildings
        that belong to the same OpenStreetMap (OSM) relation ID. Data on the former stays the
        same. For the latter (i.e. parts of relations), the function gathers the parts and
        transforms them into one individual entry in which:
        - the 'osm_id' becomes that of the 'relation_id',
        - the number of storeys becomes the maximum of all individual parts,
        - the quadkey of the ensemble is identified,
        - the occupancy of the ensemble is identified.

        Args:
            obm_buildings (GeoPandas GeoDataFrame):
                GeoDataFrame with data on OBM buildings. It comprises the following columns:
                    osm_id (int):
                        OpenStreetMap (OSM) ID of the building. It cannot contain missing values
                        (by definition).
                    relation_id (str):
                        OpenStreetMap (OSM) ID of the relation to which an osm_id belongs, if
                        any. Missing values can be "nan" or "None".
                    quadkey (str):
                        String indicating the quadkey of the tile to which the centroid of the
                        building belongs. Missing values can be "nan" or "None".
                    storeys (float):
                        Number of storeys of the building. Treated as floats so as to be able to
                        use numpy.nan for missing values.
                    occupancy (str):
                        Occupancy of the building as per the GEM Building Taxonomy v3.0. Missing
                        values can be "nan" or "None".
                    geometry (Shapely Polygon or MultiPolygon):
                        Geometry (footprint) of the building, defined in EPSG:4326. It cannot
                        contain missing values (by definition).

        Returns:
            obm_buildings_adjusted (Pandas DataFrame):
                Processed version of 'obm_buildings' in which individual parts of the same
                'relation_id' have been grouped together and relevant values of the other fields
                have been identified. It comprises the following columns:
                    osm_id (int):
                        OpenStreetMap (OSM) ID of the building. If the building is represented
                        by a relation, this is the ID of the relation.
                    quadkey (str):
                        String indicating the quadkey of the tile to which the centroid of the
                        building belongs.
                    storeys (float):
                        Number of storeys of the building (maximum of all components if building
                        is an OSM relation). Treated as floats so as to be able to use numpy.nan
                        for missing values.
                    occupancy (str):
                        Occupancy of the building as per the GEM Building Taxonomy v3.0. Missing
                        values can be "nan" or "None".
        """

        # Identify unique relation IDs in 'obm_buildings'
        unique_relations = numpy.unique(obm_buildings["relation_id"].to_numpy())

        # Get rid of "nan" and "None"
        unique_relations = unique_relations[
            numpy.logical_and(unique_relations != "nan", unique_relations != "None")
        ]

        # Identify rows that contain buildings that are not part of a relation
        which_no_relation = numpy.where(
            numpy.logical_or(
                obm_buildings["relation_id"].to_numpy() == "nan",
                obm_buildings["relation_id"].to_numpy() == "None",
            )
        )[0]

        if len(which_no_relation) > 0:
            # Start output DataFrame with buildings that are not part of relations
            obm_buildings_adjusted = obm_buildings.iloc[which_no_relation, :]
            obm_buildings_adjusted = obm_buildings_adjusted.drop(
                columns=["relation_id", "geometry"]
            )

        # Process buildings that are part of relations
        add_osm_ids = []
        add_quadkey = []
        add_storeys = []
        add_occupancy = []

        for relation_id in unique_relations:
            # Identify components of this relation_id
            which = numpy.where(obm_buildings["relation_id"].to_numpy() == relation_id)[0]
            obm_buildings_of_relation = deepcopy(obm_buildings.iloc[which, :])

            add_osm_ids.append(int(float(relation_id)))

            add_quadkey.append(
                GDEProcessor._identify_unique_quadkey_L18(obm_buildings_of_relation)
            )

            add_storeys.append(
                GDEProcessor._select_max_of_array(
                    obm_buildings_of_relation["storeys"].to_numpy()
                )
            )

            add_occupancy.append(
                GDEProcessor._ensure_unique_occupancy(obm_buildings_of_relation)
            )

        relation_buildings = pandas.DataFrame(
            {
                "osm_id": pandas.Series(numpy.array(add_osm_ids).astype(int), dtype="int"),
                "quadkey": pandas.Series(numpy.array(add_quadkey).astype(str), dtype="str"),
                "storeys": pandas.Series(numpy.array(add_storeys).astype(float), dtype="float"),
                "occupancy": pandas.Series(numpy.array(add_occupancy).astype(str), dtype="str"),
            }
        )

        if len(which_no_relation) > 0:
            obm_buildings_adjusted = pandas.concat([obm_buildings_adjusted, relation_buildings])
        else:
            obm_buildings_adjusted = deepcopy(relation_buildings)

        return obm_buildings_adjusted

    @staticmethod
    def _identify_unique_quadkey_L18(obm_buildings):
        """This function identifies the unique level 18 quadkey associated with the (parts of)
        buildings contained in 'obm_buildings'.

        It is asssumed that 'obm_buildings' contains different OpenStreetMap (OSM) building
        polygons that are linked by the same OSM relation ID. If all values of 'quadkey' within
        'obm_buildings' are the same, this is identified as the sought unique level 18 quadkey.
        If 'obm_buildings' contains different values of 'quadkey', then all geometries of
        'obm_buildings' are aggregated ("dissolved") into one, whose centroid is calculated and
        used to identify the corresponding level 18 quadkey.

        If 'obm_buildings' contains more than one value of 'relation_id', this function returns
        an empty string.

        Args:
            obm_buildings (GeoPandas GeoDataFrame):
                GeoDataFrame with data on OBM buildings. It comprises the following columns:
                    osm_id (int):
                        OpenStreetMap (OSM) ID of the building.
                    relation_id (str):
                        OpenStreetMap (OSM) ID of the relation to which an osm_id belongs, if
                        any.
                    quadkey (str):
                        String indicating the quadkey of the tile to which the centroid of the
                        building belongs.
                    storeys (float):
                        Number of storeys of the building.
                    occupancy (str):
                        Occupancy of the building as per the GEM Building Taxonomy v3.0.
                    geometry (Shapely Polygon or MultiPolygon):
                        Geometry (footprint) of the building, defined in EPSG:4326.

        Returns:
            unique_quadkey (str):
                String indicating the unique quadkey to which the centroid of the geometries in
                'obm_buildings' belongs.
        """

        if len(numpy.unique(obm_buildings["relation_id"].to_numpy())) != 1:
            logger.error(
                "'obm_buildings' passed on to GDEProcessor._identify_unique_quadkey_L18() "
                "contains more than one unique value of 'relation_id'. The program cannot run."
            )
            return ""

        unique_quadkeys = numpy.unique(obm_buildings["quadkey"].to_numpy())

        if len(unique_quadkeys) == 1:
            unique_quadkey = unique_quadkeys[0]
        else:
            obm_buildings_aux = deepcopy(obm_buildings)
            obm_buildings_aux = obm_buildings_aux.drop(
                columns=["osm_id", "quadkey", "storeys", "occupancy"]
            )
            obm_buildings_dissolved = obm_buildings_aux.dissolve(by="relation_id")

            # Project 'obm_buildings_dissolved' onto Albers Equal Area
            lon_w = obm_buildings_dissolved.total_bounds[0]
            lat_s = obm_buildings_dissolved.total_bounds[1]
            lon_e = obm_buildings_dissolved.total_bounds[2]
            lat_n = obm_buildings_dissolved.total_bounds[3]
            projection_string = "+proj=aea +lat_1={} +lat_2={} +lat_0={} +lon_0={}".format(
                lat_s, lat_n, (lat_s + lat_n) / 2.0, (lon_w + lon_e) / 2.0
            )
            obm_buildings_dissolved = obm_buildings_dissolved.to_crs(
                pyproj.CRS(projection_string)
            )

            centroid = obm_buildings_dissolved["geometry"].centroid.to_crs("EPSG:4326")[0]
            unique_quadkey = mercantile.quadkey(mercantile.tile(centroid.x, centroid.y, 18))

        return unique_quadkey

    @staticmethod
    def _select_max_of_array(numbers):
        """This function returns the largest value in 'numbers'. If all elements of 'numbers'
        are NaNs, it returns numpy.nan. If some elements of 'numbers' are NaNs, the NaNs are
        ignored and the maximum of the rest of the elements is returned.

        Note: The purpose of this function is to be able to handle the case in which all
        elements of 'numbers' are NaNs, as numpy.nanmax() returns a RuntimeWarning in this case.

        Args:
            numbers (array of int or float):
                Array of integers or floats. May contain NaNs.

        Returns:
            max_number (int, float or numpy.nan):
                Maximum value of 'numbers'.
        """

        if numpy.all(numpy.isnan(numbers)):
            max_number = numpy.nan
        else:
            max_number = numpy.nanmax(numbers)

        return max_number

    @staticmethod
    def _ensure_unique_occupancy(obm_buildings):
        """This function identifies the unique occupancy associated with the (parts of)
        buildings contained in 'obm_buildings'.

        It is asssumed that 'obm_buildings' contains different OpenStreetMap (OSM) building
        polygons that are linked by the same OSM relation ID. If all values of 'occupancy'
        within 'obm_buildings' are the same, this is identified as the sought unique occupancy.
        If 'obm_buildings' contains different values of 'occupancy', or if 'obm_buildings'
        contains more than one value of 'relation_id', an empty string is returned.

        NOTE: This function assumes that all parts of the same relation are assigned the same
        occupancy by `rabotnik-obm`.

        Args:
            obm_buildings (GeoPandas GeoDataFrame):
                GeoDataFrame with data on OBM buildings. It comprises the following columns:
                    osm_id (int):
                        OpenStreetMap (OSM) ID of the building.
                    relation_id (str):
                        OpenStreetMap (OSM) ID of the relation to which an osm_id belongs, if
                        any.
                    quadkey (str):
                        String indicating the quadkey of the tile to which the centroid of the
                        building belongs.
                    storeys (float):
                        Number of storeys of the building.
                    occupancy (str):
                        Occupancy of the building as per the GEM Building Taxonomy v3.0.
                    geometry (Shapely Polygon or MultiPolygon):
                        Geometry (footprint) of the building, defined in EPSG:4326.

        Returns:
            unique_occupancy (str):
                String indicating the unique occupancy of all the parts of the building
                contained in 'obm_buildings' as per the GEM Building Taxonomy v3.0.
        """

        if len(numpy.unique(obm_buildings["relation_id"].to_numpy())) != 1:
            logger.error(
                "'obm_buildings' passed on to GDEProcessor._ensure_unique_occupancy() contains "
                "more than one unique value of 'relation_id'. The program cannot run."
            )
            return ""

        unique_occupancies = numpy.unique(obm_buildings["occupancy"].to_numpy())

        if len(unique_occupancies) == 1:
            unique_occupancy = unique_occupancies[0]
        else:
            unique_occupancy = ""
            logger.warning(
                "GDEProcessor._ensure_unique_occupancy(): individual parts of 'relation_id' %s "
                "have different values of 'occupancy'. A unique occupancy value could not be "
                "determined." % (numpy.unique(obm_buildings["relation_id"].to_numpy())[0])
            )

        return unique_occupancy
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685

    @staticmethod
    def assign_building_classes_to_obm_buildings(
        obm_buildings, data_unit_building_classes, occupancy_case
    ):
        """This function assigns building classes and proportions from
        'data_unit_building_classes' to each of the OBM buildings in 'obm_buildings', by calling
        'GDEProcessor.assign_building_classes_to_obm_building'. The latter selects from
        'data_unit_building_classes' only the building classes that are compatible with the
        attributes of the building contained in 'obm_buildings'. The proportions are
        recalculated to reflect only the building classes retained.

        Args:
            obm_buildings (Pandas DataFrame):
                GeoDataFrame with data on OBM buildings. It comprises the following columns:
                    osm_id (int):
                        OpenStreetMap (OSM) ID of the building. If the building is represented
                        by a relation, this is the ID of the relation.
                    quadkey (str):
                        String indicating the quadkey of the tile to which the centroid of the
                        building belongs.
                    storeys (float):
                        Number of storeys of the building (maximum of all components if building
                        is an OSM relation). Treated as floats so as to be able to use numpy.nan
                        for missing values.
                    occupancy (str):
                        Occupancy of the building as per the GEM Building Taxonomy v3.0. Missing
                        values can be "nan" or "None".
            data_unit_building_classes (Pandas DataFrame):
                DataFrame containing the building classes and their proportions. It comprises
                the following columns:
                    building_class_name (str):
                        Building class as per the GEM Building Taxonomy.
                    settlement_type (str):
                        Type of settlement within the data unit. Possible values: "urban",
                        "rural", "big_city", "all".
                    occupancy_subtype (str):
                        Details on the occupancy, if relevant to characterise the building
                        class.
                    storeys_min (int):
                        Minimum number of storeys of the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype').
                    storeys_max (int):
                        Maximum number of storeys of the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype').
                    proportions (float):
                        Proportions in which the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype') is
                        present in the data unit.
            occupancy_case (str):
                Name of the occupancy case (e.g. "residential", "commercial", "industrial").

        Returns:
            obm_buildings_building_classes (dict):
                Dictionary containing the building classes and their proportions for each OBM
                building. Dictionary keys correspond to the OSM ID of the building. Each key
                contains a Pandas DataFrame with the following columns:
                    building_class_name (str):
                        Building class as per the GEM Building Taxonomy.
                    settlement_type (str):
                        Type of settlement within the data unit. Possible values: "urban",
                        "rural", "big_city", "all".
                    occupancy_subtype (str):
                        Details on the occupancy, if relevant to characterise the building
                        class.
                    proportions (float):
                        Proportions in which the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype') is
                        present in the data unit.
        """

        obm_buildings_building_classes = {}

        for i in range(obm_buildings.shape[0]):
            osm_id_i = obm_buildings["osm_id"].to_numpy()[i]
            storeys_i = obm_buildings["storeys"].to_numpy()[i]
            occupancy_i = obm_buildings["occupancy"].to_numpy()[i]

            obm_buildings_building_classes[
                osm_id_i
            ] = GDEProcessor.assign_building_classes_to_obm_building(
                osm_id_i, storeys_i, occupancy_i, occupancy_case, data_unit_building_classes
            )

        return obm_buildings_building_classes

    @staticmethod
    def assign_building_classes_to_obm_building(
        osm_id, storeys, occupancy, occupancy_case, data_unit_building_classes
    ):
        """This function assigns building classes and proportions from
        'data_unit_building_classes' to the OBM building with ID 'osm_id'. When building
        attributes such as 'storeys' or 'occupancy' do not allow for a narrowing down of all the
        potential building classes, the output consists of all building classes in the input
        'data_unit_building_classes'. The function applies rules to attempt to narrow down the
        potential building classes. If the application of a rule leads to no building classes
        from 'data_unit_building_classes' being feasible, this means that there is an
        inconsistency between 'data_unit_building_classes' and the attributes of the building;
        the output in this case includes all potential building classes and a warning is logged.

        The rules that are currently implemented are:
            narrow_down_by_storeys:
                Only the potential building classes whose (ranges of) numbers of storeys are
                compatible with the input 'storeys' are selected (when the number of storeys of
                the OBM building is known).
            narrow_down_by_commercial_occupancy_details:
                ESRM20 commercial classes include details of sub-occupancy such as whether they
                correspond to hotels/motels/guest lodges/etc, restaurants/bars/cafes, offices,
                or retail/wholesale trade. When the input 'occupancy' corresponds to the GEM
                Building Taxonomy representation of one of these cases, only compatible building
                classes are selected.

        Args:
            osm_id (int):
                OpenStreetMap (OSM) ID of the building to which building classes will be
                assigned.
            storeys (float):
                Number of storeys of the OBM building with 'osm_id'. Treated as floats so as to
                be able to use numpy.nan for missing values.
            occupancy (str):
                Occupancy of the OBM building with 'osm_id' as per the GEM Building Taxonomy
                v3.0.
            occupancy_case (str):
                Name of the occupancy case (e.g. "residential", "commercial", "industrial").
            data_unit_building_classes (Pandas DataFrame):
                DataFrame containing the building classes and proportions corresponding to the
                data unit and occupancy case to which the OBM building with 'osm_id' belongs.
                It comprises the following columns:
                    building_class_name (str):
                        Building class as per the GEM Building Taxonomy.
                    settlement_type (str):
                        Type of settlement within the data unit. Possible values: "urban",
                        "rural", "big_city", "all".
                    occupancy_subtype (str):
                        Details on the occupancy, if relevant to characterise the building
                        class.
                    storeys_min (int):
                        Minimum number of storeys of the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype').
                    storeys_max (int):
                        Maximum number of storeys of the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype').
                    proportions (float):
                        Proportions in which the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype') is
                        present in the data unit.

        Returns:
            obm_building_building_classes (Pandas DataFrame):
                DataFrame with the building classes and proportions assigned to the input OBM
                building. It contains the following columns:
                    building_class_name (str):
                        Building class as per the GEM Building Taxonomy.
                    settlement_type (str):
                        Type of settlement within the data unit. Possible values: "urban",
                        "rural", "big_city", "all".
                    occupancy_subtype (str):
                        Details on the occupancy, if relevant to characterise the building
                        class.
                    proportions (float):
                        Proportions in which the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype') is
                        present in the data unit.
        """

        obm_building_building_classes = deepcopy(data_unit_building_classes)

        # Narrow down by number of storeys
        if not numpy.isnan(storeys):
            obm_building_building_classes = GDEProcessor.narrow_down_by_storeys(
                storeys, obm_building_building_classes
            )

        # Narrow down by commercial occupancy details
        if (
            obm_building_building_classes.shape[0] > 0
            and occupancy_case == "commercial"
            and occupancy in ("RES3", "COM5", "COM3", "COM1", "COM2")
        ):
            obm_building_building_classes = (
                GDEProcessor.narrow_down_by_commercial_occupancy_details(
                    occupancy, obm_building_building_classes
                )
            )

        obm_building_building_classes = obm_building_building_classes.drop(
            columns=["storeys_min", "storeys_max"]
        )

        if obm_building_building_classes.shape[0] < 1:
            warning_message = (
                "Attempt of narrowing down building classes assigned to OBM building with "
                "'osm_id' = %s resulted in no building classes left; all building classes were "
                "assigned to 'osm_id' = %s despite this inconsistency" % (osm_id, osm_id)
            )
            logger.warning(warning_message)

            obm_building_building_classes = deepcopy(data_unit_building_classes)

        return obm_building_building_classes

    @staticmethod
    def narrow_down_by_storeys(storeys, building_classes):
        """This function keeps from 'building_classes' only those that are compatible with the
        input number of storeys ('storeys'). The proportion of the retained building classes is
        recalculated to exclude the discarded building classes.

        If storeys is numpy.nan, then all 'building_classes' are returned.

        If no classes from 'building_classes' are compatible with 'storeys', the output
        'obm_building_classes' does not contain any rows.

        Args:
            storeys (float):
                Number of storeys of the OBM building. Treated as floats so as to
                be able to use numpy.nan for missing values.
            building_classes (Pandas DataFrame):
                DataFrame containing the building classes and proportions from which suitable
                classes will be selected for the target OBM building. It comprises the following
                columns:
                    building_class_name (str):
                        Building class as per the GEM Building Taxonomy.
                    settlement_type (str):
                        Type of settlement within the data unit. Possible values: "urban",
                        "rural", "big_city", "all".
                    occupancy_subtype (str):
                        Details on the occupancy, if relevant to characterise the building
                        class.
                    storeys_min (int):
                        Minimum number of storeys of the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype').
                    storeys_max (int):
                        Maximum number of storeys of the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype').
                    proportions (float):
                        Proportions in which the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype') is
                        present in the data unit.

        Returns:
            obm_building_classes (Pandas DataFrame):
                DataFrame containing the building classes and proportions selected for the
                target OBM building as described above. The columns are the same as those of the
                input 'building_classes'.
        """

        if not numpy.isnan(storeys):
            building_classes_filtered = building_classes.loc[
                (building_classes["storeys_min"] <= storeys)
                & (building_classes["storeys_max"] >= storeys)
            ]
        else:
            building_classes_filtered = deepcopy(building_classes)

        obm_building_classes = GDEProcessor._recalculate_building_classes_proportions(
            building_classes_filtered
        )

        return obm_building_classes

    @staticmethod
    def narrow_down_by_commercial_occupancy_details(occupancy, building_classes):
        """This function keeps from 'building_classes' only those that are compatible with the
        input 'occupancy'. The proportion of the retained building classes is recalculated to
        exclude the discarded building classes.

        ESRM20 commercial classes include details of sub-occupancy such as whether they
        correspond to hotels/motels/guest lodges/etc, restaurants/bars/cafes, offices or retail/
        /wholesale trade. When the input 'occupancy' corresponds to the GEM Building Taxonomy
        representation of one of these cases, only compatible building classes are selected.

        Values of 'occupancy' that are considered by this function for filtering
        'building_classes' are:
        - "RES3" (hotels/motels/guest lodges/etc)
        - "COM1" (retail trade)
        - "COM2" (wholesale trade and storage)
        - "COM3" (offices, professional/technical services)
        - "COM5" (restaurants/bars/cafes)
        Any other values of 'occupancy' result in 'building_classes' being returned without
        filtering.

        Args:
            occupancy (str):
                Occupancy of the building as per the GEM Building Taxonomy v3.0.
            building_classes (Pandas DataFrame):
                DataFrame containing the building classes and proportions from which suitable
                classes will be selected for the target OBM building. It comprises the following
                columns:
                    building_class_name (str):
                        Building class as per the GEM Building Taxonomy.
                    settlement_type (str):
                        Type of settlement within the data unit. Possible values: "urban",
                        "rural", "big_city", "all".
                    occupancy_subtype (str):
                        Details on the occupancy, if relevant to characterise the building
                        class.
                    storeys_min (int):
                        Minimum number of storeys of the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype').
                    storeys_max (int):
                        Maximum number of storeys of the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype').
                    proportions (float):
                        Proportions in which the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype') is
                        present in the data unit.

        Returns:
            obm_building_classes (Pandas DataFrame):
                DataFrame containing the building classes and proportions selected for the
                target OBM building as described above. The columns are the same as those of the
                input 'building_classes'.
        """

        if occupancy in ("RES3", "COM5"):
            # RES3 is hotels/motels/guest lodges/etc, COM5 is restaurants/bars/cafes.
            # Both are under "Hotels" in ESRM20.
            target_occupancy_subtype = "Hotels"
        elif occupancy == "COM3":
            target_occupancy_subtype = "Offices"
        elif occupancy in ("COM1", "COM2"):
            # COM1 is retail trade, COM2 is wholesale trade and storage (warehouse).
            target_occupancy_subtype = "Trade"
        else:
            target_occupancy_subtype = None

        if target_occupancy_subtype is not None:
            building_classes_filtered = building_classes.loc[
                (building_classes["occupancy_subtype"] == target_occupancy_subtype)
            ]
        else:
            building_classes_filtered = deepcopy(building_classes)

        obm_building_classes = GDEProcessor._recalculate_building_classes_proportions(
            building_classes_filtered
        )

        return obm_building_classes

    @staticmethod
    def _recalculate_building_classes_proportions(building_classes):
        """This function recalculates the 'proportions' column of 'building_classes', which may
        or may not add up to one when passed as input to the function.

        Args:
            building_classes (Pandas DataFrame):
                DataFrames containing building classes and proportions. They comprise the
                following columns:
                    building_class_name (str):
                        Building class as per the GEM Building Taxonomy.
                    settlement_type (str):
                        Type of settlement within the data unit. Possible values: "urban",
                        "rural", "big_city", "all".
                    occupancy_subtype (str):
                        Details on the occupancy, if relevant to characterise the building
                        class.
                    storeys_min (int):
                        Minimum number of storeys of the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype').
                    storeys_max (int):
                        Maximum number of storeys of the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype').
                    proportions (float):
                        Proportions in which the building class (defined by
                        'building_class_name', 'settlement_type' and 'occupancy_subtype') is
                        present in the data unit.

        Returns:
            'building_classes' with re-calculated proportions.
        """

        if building_classes.shape[0] > 0:
            old_proportions = building_classes["proportions"].to_numpy()
            building_classes.loc[:, "proportions"] = old_proportions / old_proportions.sum()

        return building_classes