gdecore.py 9.06 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/usr/bin/env python3

# Copyright (C) 2022:
#   Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.

import logging
import sys
21
from copy import deepcopy
22
23
from gdecore.configuration import Configuration
from gdecore.database_queries import DatabaseQueries
24
from gdecore.database_storage import DatabaseStorage
25
from gdecore.processor import GDEProcessor
26
from gdecore.occupancy_cases import OccupancyCasesESRM20
27
28
29
30
31
32

# Add a logger printing error, warning, info and debug messages to the screen
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(sys.stdout))

33
34
AGGREGATED_MODELS_OCCUPANCY = {"esrm20": OccupancyCasesESRM20}

35
36

def main():
37
    """Run the gde-core."""
38
39
40
41

    # Log the start of the run
    logger.info("gde-core has started")

42
43
44
    # Read configuration parameters
    config = Configuration("config.yml")

45
46
47
48
    (
        aggregated_source_id,
        aggregated_source_format,
    ) = DatabaseQueries.retrieve_aggregated_source_id_and_format(
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
        config.model_name,
        config.database_gde_tiles,
        "aggregated_sources",
    )

    if aggregated_source_id < 0:
        error_message = (
            "Error while attempting to retrieve the ID of aggregated exposure model with name "
            "'%s': more than one or no entries were found." % (config.model_name)
        )
        raise OSError(error_message)
    logger.info(
        "aggregated_source_id of aggregated exposure model with name '%s' retrieved: %s"
        % (config.model_name, aggregated_source_id)
    )

65
66
67
    # Initialise occupancy cases class
    occupancy_cases = AGGREGATED_MODELS_OCCUPANCY[aggregated_source_format]()

68
69
70
71
72
73
74
75
76
77
78
79
80
81
    # Interpret and update config.exposure_entities_to_run
    config.interpret_exposure_entities_to_run(aggregated_source_id)

    if len(config.exposure_entities_to_run) < 1:
        error_message = "Attribute 'exposure_entities_to_run' of configuration is an empty list"
        raise OSError(error_message)
    logger.info(
        "%s exposure entity(ies) will be run: %s"
        % (
            str(len(config.exposure_entities_to_run)),
            ", ".join(config.exposure_entities_to_run),
        )
    )

82
    # Run by exposure entity and occupancy case
83
    for exposure_entity_code in config.exposure_entities_to_run:
84
        for occupancy_case in config.occupancies_to_run:
85
86
87
88
89
90
91
92
93
94
95
96
            # Retrieve data unit IDs and geometries
            (
                data_units_ids,
                data_units_geometries,
                data_units_ids_no_geometry,
            ) = DatabaseQueries.get_data_unit_ids_geometries_of_entity_and_occupancy_case(
                exposure_entity_code,
                occupancy_case,
                aggregated_source_id,
                config.database_gde_tiles,
                "data_units",
            )
97
            logger.info(
98
99
                "Running exposure entity '%s', occupancy case '%s': %s data units retrieved."
                % (exposure_entity_code, occupancy_case, str(len(data_units_ids)))
100
            )
101
102
103
104
105
106
107
108
109
110
111
112
113
            if len(data_units_ids_no_geometry) > 0:
                concatenated_ids = ", ".join(list(data_units_ids_no_geometry))
                logger.warning(
                    "%s data units associated with exposure entity '%s' and occupancy case '%s'"
                    " found for which the 'data_units' table contained no geometry. These data"
                    " units will not be run: %s"
                    % (
                        str(len(data_units_ids_no_geometry)),
                        exposure_entity_code,
                        occupancy_case,
                        concatenated_ids,
                    )
                )
114

115
            for i, data_unit_id in enumerate(data_units_ids):
116
117
118
                # Going by data unit so as to minimise intersection operations, need to hold
                # excessively large data in RAM and because building classes are associated with
                # specific data units
119
120
121
122
                aux_log_string = (
                    "Data unit '%s' (of exposure entity '%s' and occupancy case '%s')"
                    % (data_unit_id, exposure_entity_code, occupancy_case)
                )
123

124
                # Retrieve OBM buildings and assign building classes and probabilities to them
125
                # Retrieve OBM buildings
126
                obm_buildings_raw = (
127
128
                    DatabaseQueries.get_OBM_buildings_in_data_unit_by_occupancy_types(
                        occupancy_cases.mapping[occupancy_case],
129
130
131
132
133
134
                        data_units_geometries[i],
                        config.database_obm_buildings,
                        "obm_buildings",
                    )
                )
                logger.info(
135
136
137
138
139
140
141
142
143
144
145
146
                    "%s: %s OBM building parts retrieved"
                    % (aux_log_string, str(obm_buildings_raw.shape[0]))
                )

                if obm_buildings_raw.shape[0] > 0:
                    # Group parts of the same relations existing in 'obm_buildings_raw'
                    obm_buildings = GDEProcessor.post_process_obm_relations(obm_buildings_raw)
                else:
                    obm_buildings = deepcopy(obm_buildings_raw)
                logger.info(
                    "%s: %s OBM buildings identified"
                    % (aux_log_string, str(obm_buildings.shape[0]))
147
148
                )

149
150
151
152
153
154
155
                del obm_buildings_raw

                # Calculate number of OBM buildings per quadkey
                obm_buildings_per_quadkey = GDEProcessor.calculate_buildings_per_quadkey(
                    obm_buildings["quadkey"].to_numpy()
                )

156
157
158
159
160
161
162
163
164
165
166
167
168
                # Retrieve building classes of this data unit
                data_unit_building_classes = DatabaseQueries.get_building_classes_of_data_unit(
                    data_unit_id,
                    occupancy_case,
                    aggregated_source_id,
                    config.database_gde_tiles,
                    "data_units_buildings",
                )
                logger.info(
                    "%s: %s building classes identified"
                    % (aux_log_string, str(data_unit_building_classes.shape[0]))
                )

169
170
171
172
173
174
175
176
177
178
179
                # Assign building classes to OBM buildings
                obm_buildings_building_classes = (
                    GDEProcessor.assign_building_classes_to_obm_buildings(
                        obm_buildings, data_unit_building_classes, occupancy_case
                    )
                )
                logger.info(
                    "%s: %s OBM buildings with assigned building classes"
                    % (aux_log_string, str(len(obm_buildings_building_classes.keys())))
                )

180
181
182
183
184
185
186
187
188
189
                # Store building classes of OBM buildings
                DatabaseStorage.store_OBM_building_classes(
                    data_unit_id,
                    occupancy_case,
                    aggregated_source_id,
                    obm_buildings_building_classes,
                    config.database_gde_tiles,
                    "gde_buildings",
                )

190
191
192
193
194
195
196
197
198
199
200
201
202
                # Retrieve data-unit tiles (quadkey, aggregated_buildings) as a Pandas DataFrame
                data_unit_tiles = DatabaseQueries.get_data_unit_tiles_of_data_unit_as_DataFrame(
                    data_unit_id,
                    occupancy_case,
                    aggregated_source_id,
                    config.database_gde_tiles,
                    "data_unit_tiles",
                )
                logger.info(
                    "%s: %s data-unit tiles retrieved"
                    % (aux_log_string, str(data_unit_tiles.shape[0]))
                )

203
204
205
206
207
208
209
210
211
                # Calculate remainder buildings in data-unit tiles
                data_unit_tiles = GDEProcessor.process_group_data_unit_tiles(
                    data_unit_tiles,
                    obm_buildings_per_quadkey,
                    config.database_completeness,
                    "obm_built_area_assessments",
                    config.number_cores,
                )

212
213
214
215
216
217
218
219
220
221
                # Store number of OBM and remainder buildings of the data-unit tiles
                DatabaseStorage.store_number_OBM_and_remainder_buildings(
                    data_unit_id,
                    occupancy_case,
                    aggregated_source_id,
                    data_unit_tiles,
                    config.database_gde_tiles,
                    "data_unit_tiles",
                )

222
223
224
225
226
227
228
    # Leave the program
    logger.info("gde-core has finished")
    sys.exit()


if __name__ == "__main__":
    main()