to_openquake.py 13.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python3

# Copyright (C) 2022:
#   Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.

import logging
import os
import mercantile
import pandas
import geopandas
import pyproj
from shapely.geometry import Polygon
from shapely.wkt import loads
from copy import deepcopy
from gdeexporter.tools import write_CSV_file, read_last_written_values_csv


logger = logging.getLogger()


def export_to_OpenQuake_CSV(
    quadtile,
    buildings_to_export,
    cost_cases,
    people_cases,
    output_path,
    quadkeys_group,
    occupancy_case,
42
    export_OBM_footprints=True,
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
):
    """This method exports the contents of a TileExposure object into the OpenQuake Engine CSV
    format and additional GeoPackage (.gpkg) files that contain the geometry of quadtiles and
    OBM buildings (the latter only if "OBM" is one of the type of 'buildings_to_export' and
    'export_OBM_footprints' is True).

    Details on the OpenQuake Engine: https://github.com/gem/oq-engine.

    Note: In an OpenQuake exposure CSV file, the word "asset" is used to describe an individual
    row that corresponds to a number of buildings (any float >0) of a certain building class
    that is considered to exist at a specific location, defined by a longitude-latitude pair.

    Args:
        quadtile (TileExposure object):
            Instance of gdeexporter.tileexposure.TileExposure.
        buildings_to_export (list of str):
            List of types of buildings to export. Currently supported values: OBM, remainder,
            aggregated.
        cost_cases (dict):
            Dictionary whose keys indicate the columns of the buildings attributes of 'quadtile'
            (e.g. quadtile.obm_buildings, quadtile.remainder_buildings, etc) that are associated
            with the replacement costs of the building.
        people_cases (dict):
            Dictionary whose keys indicate the columns of the buildings attributes of 'quadtile'
            (e.g. quadtile.obm_buildings, quadtile.remainder_buildings, etc) that are associated
            with the number of people in the building.
        output_path (str):
            Path to which the output files will be saved.
        quadkeys_group (str):
            Name of the quadkey group that the 'quadtile' is part of. It is used for file naming
            and assigning incremental IDs to the rows of the OpenQuake CSV files.
        occupancy_case (str):
            Occupancy case to which the buildings of 'quadtile' belong. It is used for file
            naming and assigning incremental IDs to the rows of the OpenQuake CSV files.
77
78
79
80
81
82
83
        export_OBM_footprints (bool):
            If True (and if "OBM" is one of the type of 'buildings_to_export'), the geometries
            of OpenBuildingMap buildings will be exported and the OpenQuake CSV files of the OBM
            buildings will indicate their centroids and IDs. If False, geometries will not be
            exported, the coordinates in the OpenQuake CSV files will correspond to the centroid
            of the quadtile, and the "osm_id" column will contain ficticious IDs (generated to
            allow for aggregation of OpenQuake results). Default = True.
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296

    Returns:
        This method writes three kinds of files:
        - OpenQuake CSV file:
            With name pattern [quadkeys_group]_[occupancy_case]_[building_type].csv, where
            'quadkeys_group' and 'occupancy_case' are as defined in the arguments, and
            'building_type' is each of the elements of 'buildings_to_export' (i.e. one such file
            is created per element of 'buildings_to_export'). It contains the following columns:
                - id (str):
                    Unique ID of the asset (required by OpenQuake to be different for all rows
                    of all exposure CSV files that may be run simultaneously).
                - lon (float):
                    Longitude of the asset.
                - lat (float):
                    Latitude of the asset.
                - taxonomy (str):
                    Building class of the asset.
                - number (float):
                    Number of buildings in the asset (any float >0).
                - Columns associated with building replacement costs (float):
                    Names and contents are user-defined ('cost_cases').
                - Columns associated with the number of people in the building at different
                times of the day (float):
                    Names and contents are user-defined ('people_cases').
                - occupancy (str):
                    Occupancy case of the asset (e.g. "residential", "commercial", etc).
                - data_unit_id (str):
                    ID of the data unit that the asset belongs to (useful for post-processing
                    purposes).
                - quadkey (str):
                    Quadkey of the zoom-level 18 tile that the asset belongs to (useful for
                    post-processing purposes).
                - osm_id (str; only in the case of OBM buildings):
                    If 'export_OBM_footprints' is True, it is the OpenStreetMap ID of the
                    building that the asset corresponds to. If 'export_OBM_footprints' is False,
                    it is a ficticious ID. (Useful for post-processing purposes in both cases).
        - GeoPackage (.gpkg) with geometry of the quadtile:
            - quadkey (str): Quadkey of the tile.
            - geometry (geometry): Geometry of the tile.
        - GeoPackage (.gpkg) with geometry of the OBM buildings (created only if "OBM" is in
        'buildings_to_export' and 'export_OBM_footprints' is True):
            - osm_id (str): OpenStreetMap (OSM) ID of the building.
            - geometry (geometry): Geometry of the building.
    """

    # Retrieve quadtile's geometry and centroid
    tile = mercantile.quadkey_to_tile(quadtile.quadkey)
    tile_bounds = mercantile.bounds(tile)
    tile_geometry = Polygon(
        [
            (tile_bounds.west, tile_bounds.south),
            (tile_bounds.east, tile_bounds.south),
            (tile_bounds.east, tile_bounds.north),
            (tile_bounds.west, tile_bounds.north),
        ]
    )
    tile_centroid_lon = (tile_bounds.west + tile_bounds.east) / 2.0
    tile_centroid_lat = (tile_bounds.south + tile_bounds.north) / 2.0

    # File with geometries of quadtiles
    filename_quadtiles = "%s_%s_geometries_quadtiles.gpkg" % (quadkeys_group, occupancy_case)
    quadtile_geom = geopandas.GeoDataFrame(
        {
            "quadkey": pandas.Series([quadtile.quadkey], dtype=str),
            "geometry": [tile_geometry],
        },
        geometry=[tile_geometry],
    )
    quadtile_geom.crs = pyproj.CRS("epsg:4326")
    if os.path.exists(os.path.join(output_path, filename_quadtiles)):  # append
        quadtile_geom.to_file(
            os.path.join(output_path, filename_quadtiles), driver="GPKG", mode="a"
        )
    else:  # create
        quadtile_geom.to_file(os.path.join(output_path, filename_quadtiles), driver="GPKG")

    for building_type in buildings_to_export:
        # Prefix of unique ID
        prefix_id = "%s_%s_%s" % (quadkeys_group, occupancy_case, building_type)

        # File name of the output CSV file
        filename_csv = "%s.csv" % (prefix_id)

        # Retrieve last value of "id" and "osm_id" in 'filename_csv'
        last_values = read_last_written_values_csv(
            os.path.join(output_path, filename_csv), ["id", "osm_id"], sep=","
        )
        last_id_str = last_values[0]
        last_osm_id_str = last_values[1]  # to be used further down

        if last_id_str == "UNKNOWN":
            last_id = 0
        else:
            last_id = int(last_id_str.split("%s_" % (prefix_id))[-1])

        # Identify the attribute of 'quadtile' that 'building_type' corresponds to
        attribute_name = "%s_buildings" % (building_type.lower())
        if hasattr(quadtile, attribute_name):  # check if attribute exists
            data = deepcopy(getattr(quadtile, attribute_name))
        else:
            continue

        # There may be no buildings of this type (e.g. zero remainder buildings)
        if data.shape[0] == 0:
            continue

        # Create additional output columns
        data["id"] = [
            "%s_%s" % (prefix_id, i) for i in range(last_id + 1, last_id + 1 + data.shape[0])
        ]
        data["occupancy"] = [occupancy_case for i in range(data.shape[0])]
        data["quadkey"] = pandas.Series(
            [quadtile.quadkey for i in range(data.shape[0])], dtype=str
        )

        # Longitude and latitude of the points that represent the building/s
        if building_type in ["remainder", "aggregated", "total"]:
            data["lon"] = [tile_centroid_lon for i in range(data.shape[0])]
            data["lat"] = [tile_centroid_lat for i in range(data.shape[0])]

        elif building_type in ["OBM"]:
            if export_OBM_footprints:
                # Use centroids from the building footprints
                data["lon"] = [
                    quadtile.obm_buildings_geometries[osm_id]["centroid_lon"]
                    for osm_id in data["osm_id"].to_numpy()
                ]
                data["lat"] = [
                    quadtile.obm_buildings_geometries[osm_id]["centroid_lat"]
                    for osm_id in data["osm_id"].to_numpy()
                ]

                # File with geometries of footprints
                filename_footprints = "%s_geometries_footprints.gpkg" % (prefix_id)
                obm_geometries = [
                    loads(quadtile.obm_buildings_geometries[osm_id]["footprint"])
                    for osm_id in quadtile.obm_buildings_geometries
                ]
                obm_footprints = geopandas.GeoDataFrame(
                    {
                        "osm_id": pandas.Series(
                            [osm_id for osm_id in quadtile.obm_buildings_geometries], dtype=str
                        ),
                        "geometry": obm_geometries,
                    },
                    geometry=obm_geometries,
                )
                obm_footprints.crs = pyproj.CRS("epsg:4326")
                if os.path.exists(os.path.join(output_path, filename_footprints)):  # append
                    obm_footprints.to_file(
                        os.path.join(output_path, filename_footprints), driver="GPKG", mode="a"
                    )
                else:  # create
                    obm_footprints.to_file(
                        os.path.join(output_path, filename_footprints), driver="GPKG"
                    )

            else:  # export_OBM_footprints is False --> need to anonymise the OBM buildings
                # Use centroids of the tile
                data["lon"] = [tile_centroid_lon for i in range(data.shape[0])]
                data["lat"] = [tile_centroid_lat for i in range(data.shape[0])]

                # Generate OSM IDs that are not the real ones
                # (but allow to gather the rows associated with one building)
                if last_osm_id_str == "UNKNOWN":
                    last_osm_id = 0
                else:
                    last_osm_id = int(last_osm_id_str.split("osm_fake_")[-1])
                # Identify unique OSM IDs
                osm_ids_real = data["osm_id"].unique()
                # Create one ficticious OSM ID per unique real OSM ID
                osm_ids_ficticious = [
                    "osm_fake_%s" % (i)
                    for i in range(last_osm_id + 1, last_osm_id + 1 + len(osm_ids_real))
                ]
                aux_df = pandas.DataFrame(
                    {
                        "osm_id": osm_ids_real,
                        "osm_id_new": osm_ids_ficticious,
                    }
                )

                # Replace the OSM ID in 'data'
                data = (
                    data.set_index("osm_id")
                    .join(aux_df.set_index("osm_id"))
                    .set_index(pandas.Index(range(data.shape[0])))
                    .rename(columns={"osm_id_new": "osm_id"})
                )
                data = data.sort_values(by=["id"])
                del aux_df, osm_ids_real, osm_ids_ficticious

        else:
            data["lon"] = [-999.9 for i in range(data.shape[0])]
            data["lat"] = [-999.9 for i in range(data.shape[0])]

        data = data.rename(columns={"building_class_name": "taxonomy"})

        # Order in which the columns of 'data' will appear in the CSV file
        columns_order = ["id", "lon", "lat", "taxonomy", "number"]
        for col_name in cost_cases:
            columns_order.append(col_name)
        for col_name in people_cases:
            columns_order.append(col_name)
        columns_order.extend(["occupancy", "data_unit_id", "quadkey"])
        if building_type in ["OBM"]:
            columns_order.append("osm_id")

        write_CSV_file(
            data, columns_order, os.path.join(output_path, filename_csv), writemode="a", sep=","
        )

    return