Commit 7d509439 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to export to OpenQuake format

parent f1fe00cb
Pipeline #46925 passed with stage
in 2 minutes and 20 seconds
......@@ -8,6 +8,13 @@ Exports the Global Dynamic Exposure (GDE) model onto desired output formats.
[gde-core](https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/gde-core) and
exports it onto desired output formats.
Broadly speaking, the GDE model defines exposure in terms of zoom-level 18 quadtiles and
individual building footprints retrieved from OpenStreetMap (OSM) and processed under
OpenBuildingMap (OBM). The latter are usually referred to as "OBM buildings". Quadtiles are
assigned numbers of buildings from aggregated exposure models (referred to as "aggregated
buildings") and a number of "remainder buildings" is calculated when combining the former with
the OBM buildings. OBM and remainder buildings form the GDE model.
## Installing gde-exporter
### Software dependencies
......@@ -29,6 +36,10 @@ Copy the file `config_example.yml` to your working directory as `config.yml` and
necessary parameters. Required parameters are:
- `model_name`: Name of the input aggregated exposure model to be processed.
- `output_path`: Path where export files will be stored (this path needs to exist for the
software to run). The software creates a sub-directory under this path with name
`exporter_YYYY_MM_DD_HH_MM_SS`, so as to avoid involuntary overwriting of previous output
results.
- `occupancies_to_run`: List of occupancies for which the code will be run, separated by ", "
(comma and space). They need to exist for the `exposure format` of `model_name`. Currently
supported values: residential, commercial, industrial.
......@@ -64,9 +75,24 @@ will be produced:
containing quadkeys (either one per row, comma-separared, or a mix).
- `bounding_box`: Required if `selection_mode = bounding_box`. Coordinates `lon_w`, `lon_e`,
`lat_s` and `lat_n` of the bounding box.
- `output_format`: Format/s to which the GDE model will be exported. More than one format can be
listed separating them with ", " (comma and space). Currently supported values:
- `OpenQuake_CSV`: Exposure CSV files compatible with the
[OpenQuake Engine](https://github.com/gem/oq-engine), with complementary GeoPackage (.gpkg)
files that contain the geometry of quadtiles and OBM buildings.
- `buildings_to_export`: Types of buildings to export, separated by ", " (comma and space).
Currently supported values:
- `OBM`: OBM buildings.
- `remainder`: GDE remainder buildings.
- `aggregated`: Buildings as in the input aggregated exposure model.
- `export_OBM_footprints`: True/False. If True, geometries of OBM buildings will be exported and
parameters associated with these buildings will be attached to their original OSM ID. If False,
the geometries will not be exported and anonymous IDs will be assigned to the parameters of the
OBM buildings.
- `database_gde_tiles`: Credentials for the
[GDE Tiles](https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/database-gdetiles)
database where information on the GDE tiles is stored.
- `number_cores`: Number of cores used for parallelisation.
## Running gde-exporter
......
model_name: esrm20 # Needs to exist in 'aggregated_sources' database table
output_path: path # Path where export files will be stored (needs to exist)
occupancies_to_run: residential, commercial, industrial # Need to exist for the indicated `model_name`
exposure_entities_to_run: all # Either "all", a comma-space-separated list of entity names, or a name of a .txt or .csv file
exposure_entities_code: ISO3 # Either "ISO3" in this or a nested structure with exposure entities names and 3-character codes
......@@ -14,6 +15,8 @@ geographic_selection: # Selection of the geographic area for which GDE will be
lon_e: 23.713597
lat_s: 37.965450
lat_n: 37.972561
output_format: OpenQuake_CSV # Currently supported values: OpenQuake_CSV
buildings_to_export: OBM, remainder # Currently supported values: OBM, remainder, aggregated
export_OBM_footprints: True # If True, geometries of OBM buildings will be exported
database_gde_tiles: # Database where info on the GDE tiles is stored
host: localhost
......
......@@ -26,6 +26,12 @@ from gdeexporter.database_queries import DatabaseQueries
logger = logging.getLogger()
# Currently supported output formats
OUTPUT_FORMATS = ["OpenQuake_CSV"]
# Currently supported types of buildings to export
SUPPORTED_BUILDING_TYPES = ["OBM", "remainder", "aggregated"]
class Configuration:
"""This class handles the configuration parameters of the gde-exporter.
......@@ -33,6 +39,8 @@ class Configuration:
Attributes:
self.model_name (str):
Name of the input aggregated model.
self.output_path (str):
Path where the export files will be stored.
self.occupancies_to_run (list of str):
List of occupancy cases of the input aggregated exposure model for which the code
will be run.
......@@ -100,6 +108,12 @@ class Configuration:
available times of the day are: day, night, transit and census. The keys are the
names as they will appear in the output, the values refer to the intrinsic naming in
the model (i.e. the way values are stored in the database).
self.output_format (list of str):
Format to which the GDE model will be exported. Currently supported options:
OpenQuake_CSV.
self.buildings_to_export (list of str):
List of types of buildings to export. Currently supported values: OBM, remainder,
aggregated.
self.export_OBM_footprints (bool):
If True, the geometries of OpenBuildingMap buildings will be retrieved and exported,
if False, they will not.
......@@ -132,12 +146,15 @@ class Configuration:
REQUIRES = [
"model_name",
"output_path",
"occupancies_to_run",
"exposure_entities_to_run",
"exposure_entities_code",
"geographic_selection",
"cost_cases",
"people_cases",
"output_format",
"buildings_to_export",
"export_OBM_footprints",
"database_gde_tiles",
"number_cores",
......@@ -160,6 +177,8 @@ class Configuration:
self.model_name = ConfigurationMethods.assign_parameter(config, "model_name")
self.output_path = ConfigurationMethods.assign_parameter(config, "output_path")
self.occupancies_to_run = ConfigurationMethods.assign_listed_parameters(
config, "occupancies_to_run"
)
......@@ -202,6 +221,30 @@ class Configuration:
)
self.validate_people_cases()
self.output_format = ConfigurationMethods.assign_listed_parameters(
config, "output_format"
)
for output_format in self.output_format:
if output_format not in OUTPUT_FORMATS:
error_message = (
"Error: output format specified in configuration file is not recognised. "
"Currently supported values: %s." % (", ".join(OUTPUT_FORMATS))
)
logger.critical(error_message)
raise OSError(error_message)
self.buildings_to_export = ConfigurationMethods.assign_listed_parameters(
config, "buildings_to_export"
)
for building_type in self.buildings_to_export:
if building_type not in SUPPORTED_BUILDING_TYPES:
error_message = (
"Error: building tpye specified in configuration file is not recognised. "
"Currently supported values: %s." % (", ".join(SUPPORTED_BUILDING_TYPES))
)
logger.critical(error_message)
raise OSError(error_message)
self.export_OBM_footprints = ConfigurationMethods.assign_boolean_parameter(
config, "export_OBM_footprints"
)
......
......@@ -968,17 +968,19 @@ class DatabaseQueries:
obm_geometries (dict):
Dictionary in which each key is a unique 'osm_id' from 'obm_buildings', with the
following subkeys (only if 'get_footprints' is set to True):
centroid (str):
Centroid of the OBM building in Well-Known Text format.
centroid_lon (float):
Longitude of the centroid of the OBM building.
centroid_lat (float):
Latitude of the centroid of the OBM building.
footprint (str):
Footprint of the OBM building in Well-Known Text format.
If 'get_footprints' is False, 'obm_geometries' is an empty dictionary.
"""
sql_query = "SELECT osm_id, building_class_names, settlement_types, occupancy_subtypes,"
sql_query += " probabilities, ST_AsText(ST_Centroid(geometry)), ST_AsText(geometry)"
sql_query += " FROM %s WHERE(quadkey='%s' AND data_unit_id='%s' AND occupancy_case='%s'"
sql_query += " AND aggregated_source_id=%s);"
sql_query += " probabilities, ST_X(ST_Centroid(geometry)), ST_Y(ST_Centroid(geometry)),"
sql_query += " ST_AsText(geometry) FROM %s WHERE(quadkey='%s' AND data_unit_id='%s' AND"
sql_query += " occupancy_case='%s' AND aggregated_source_id=%s);"
db_gde_tiles = Database(**db_gde_tiles_config)
db_gde_tiles.create_connection_and_cursor()
......@@ -1011,14 +1013,22 @@ class DatabaseQueries:
[exec_result[i][4] for i in range(len(exec_result))], dtype="object"
)
if get_footprints:
raw_centroids = numpy.array(
[exec_result[i][5] for i in range(len(exec_result))], dtype="str"
raw_centroids_x = numpy.array(
[exec_result[i][5] for i in range(len(exec_result))], dtype="float"
)
raw_centroids_y = numpy.array(
[exec_result[i][6] for i in range(len(exec_result))], dtype="float"
)
raw_footprints = numpy.array(
[exec_result[i][6] for i in range(len(exec_result))], dtype="str"
[exec_result[i][7] for i in range(len(exec_result))], dtype="str"
)
else:
raw_centroids = numpy.array(["" for i in range(len(exec_result))], dtype="str")
raw_centroids_x = numpy.array(
[0.0 for i in range(len(exec_result))], dtype="float"
)
raw_centroids_y = numpy.array(
[0.0 for i in range(len(exec_result))], dtype="float"
)
raw_footprints = numpy.array(["" for i in range(len(exec_result))], dtype="str")
else: # No entries found
......@@ -1027,7 +1037,8 @@ class DatabaseQueries:
raw_settlement_types = numpy.array([], dtype="object")
raw_occupancy_subtypes = numpy.array([], dtype="object")
raw_probabilities = numpy.array([], dtype="object")
raw_centroids = numpy.array([], dtype="str")
raw_centroids_x = numpy.array([], dtype="float")
raw_centroids_y = numpy.array([], dtype="float")
raw_footprints = numpy.array([], dtype="str")
obm_geometries = {}
......@@ -1040,7 +1051,8 @@ class DatabaseQueries:
for i, osm_id in enumerate(raw_osm_ids):
if get_footprints:
obm_geometries[osm_id] = {}
obm_geometries[osm_id]["centroid"] = raw_centroids[i]
obm_geometries[osm_id]["centroid_lon"] = raw_centroids_x[i]
obm_geometries[osm_id]["centroid_lat"] = raw_centroids_y[i]
obm_geometries[osm_id]["footprint"] = raw_footprints[i]
number_building_classes = len(raw_building_class_names[i])
osm_ids = numpy.hstack(
......
......@@ -18,6 +18,8 @@
import logging
import sys
import os
import datetime
from multiprocessing import Pool
from functools import partial
from gdeexporter.configuration import Configuration
......@@ -39,6 +41,31 @@ def main():
# Read configuration parameters
config = Configuration("config.yml")
# Verify that user-defined output path exists
if not os.path.isdir(config.output_path):
error_message = (
"The output path specified in the configuration file does not exist: %s"
% (config.output_path)
)
raise OSError(error_message)
# Create a sub-directory (to avoid undesired overwriting of previous exports)
now = datetime.datetime.now()
directory_name = "exporter_%s_%s_%s_%s_%s_%s" % (
str(now.year),
str(now.month).zfill(2),
str(now.day).zfill(2),
str(now.hour).zfill(2),
str(now.minute).zfill(2),
str(now.second).zfill(2),
)
output_path_full = os.path.join(config.output_path, directory_name)
os.mkdir(output_path_full)
# Update config.output_path to include the new sub-directory
config.output_path = output_path_full
logger.info("Results will be exported to: %s" % (config.output_path))
# Retrieve ID and format of aggregated source
(
aggregated_source_id,
aggregated_source_format,
......
......@@ -19,10 +19,14 @@
import logging
from gdeexporter.tileexposure import TileExposure
from gdeexporter.database_queries import DatabaseQueries
from gdeexporter.to_openquake import export_to_OpenQuake_CSV
logger = logging.getLogger()
# Exporters
EXPORTERS = {"OpenQuake_CSV": export_to_OpenQuake_CSV}
class ExportHandler:
"""This class handles the main processing activities of the gde-exporter."""
......@@ -194,6 +198,23 @@ class ExportHandler:
# Append obm_geometries to quadtile.obm_buildings_geometries (dictionary)
quadtile.obm_buildings_geometries.update(obm_geometries)
# Export
for output_format in config.output_format:
EXPORTERS[output_format](
quadtile,
config.buildings_to_export,
config.export_OBM_footprints,
config.cost_cases,
config.people_cases,
config.output_path,
quadkeys_group,
occupancy_case,
)
logger.info(
"Quadtile with quadkey '%s' has been exported to format '%s'"
% (quadtile.quadkey, output_format)
)
# Add to summary values
summary_values["aggregated_buildings"] += (
quadtile.aggregated_buildings["number"].to_numpy().sum()
......
......@@ -62,8 +62,10 @@ class TileExposure:
self.obm_buildings_geometries (dict):
Dictionary in which each key is a unique 'osm_id' from self.obm_buildings, with the
following subkeys:
centroid (str):
Centroid of the OBM building in Well-Known Text format.
centroid_lon (float):
Longitude of the centroid of the OBM building.
centroid_lat (float):
Latitude of the centroid of the OBM building.
footprint (str) (only if instructed to retrieve footprints by the user):
Footprint of the OBM building in Well-Known Text format.
self.remainder_buildings (Pandas DataFrame):
......
#!/usr/bin/env python3
# Copyright (C) 2022:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
import os
import mercantile
import pandas
import geopandas
import pyproj
from shapely.geometry import Polygon
from shapely.wkt import loads
from copy import deepcopy
from gdeexporter.tools import write_CSV_file, read_last_written_values_csv
logger = logging.getLogger()
def export_to_OpenQuake_CSV(
quadtile,
buildings_to_export,
export_OBM_footprints,
cost_cases,
people_cases,
output_path,
quadkeys_group,
occupancy_case,
):
"""This method exports the contents of a TileExposure object into the OpenQuake Engine CSV
format and additional GeoPackage (.gpkg) files that contain the geometry of quadtiles and
OBM buildings (the latter only if "OBM" is one of the type of 'buildings_to_export' and
'export_OBM_footprints' is True).
Details on the OpenQuake Engine: https://github.com/gem/oq-engine.
Note: In an OpenQuake exposure CSV file, the word "asset" is used to describe an individual
row that corresponds to a number of buildings (any float >0) of a certain building class
that is considered to exist at a specific location, defined by a longitude-latitude pair.
Args:
quadtile (TileExposure object):
Instance of gdeexporter.tileexposure.TileExposure.
buildings_to_export (list of str):
List of types of buildings to export. Currently supported values: OBM, remainder,
aggregated.
export_OBM_footprints (bool):
If True (and if "OBM" is one of the type of 'buildings_to_export'), the geometries
of OpenBuildingMap buildings will be exported and the OpenQuake CSV files of the OBM
buildings will indicate their centroids and IDs. If False, geometries will not be
exported, the coordinates in the OpenQuake CSV files will correspond to the centroid
of the quadtile, and the "osm_id" column will contain ficticious IDs (generated to
allow for aggregation of OpenQuake results).
cost_cases (dict):
Dictionary whose keys indicate the columns of the buildings attributes of 'quadtile'
(e.g. quadtile.obm_buildings, quadtile.remainder_buildings, etc) that are associated
with the replacement costs of the building.
people_cases (dict):
Dictionary whose keys indicate the columns of the buildings attributes of 'quadtile'
(e.g. quadtile.obm_buildings, quadtile.remainder_buildings, etc) that are associated
with the number of people in the building.
output_path (str):
Path to which the output files will be saved.
quadkeys_group (str):
Name of the quadkey group that the 'quadtile' is part of. It is used for file naming
and assigning incremental IDs to the rows of the OpenQuake CSV files.
occupancy_case (str):
Occupancy case to which the buildings of 'quadtile' belong. It is used for file
naming and assigning incremental IDs to the rows of the OpenQuake CSV files.
Returns:
This method writes three kinds of files:
- OpenQuake CSV file:
With name pattern [quadkeys_group]_[occupancy_case]_[building_type].csv, where
'quadkeys_group' and 'occupancy_case' are as defined in the arguments, and
'building_type' is each of the elements of 'buildings_to_export' (i.e. one such file
is created per element of 'buildings_to_export'). It contains the following columns:
- id (str):
Unique ID of the asset (required by OpenQuake to be different for all rows
of all exposure CSV files that may be run simultaneously).
- lon (float):
Longitude of the asset.
- lat (float):
Latitude of the asset.
- taxonomy (str):
Building class of the asset.
- number (float):
Number of buildings in the asset (any float >0).
- Columns associated with building replacement costs (float):
Names and contents are user-defined ('cost_cases').
- Columns associated with the number of people in the building at different
times of the day (float):
Names and contents are user-defined ('people_cases').
- occupancy (str):
Occupancy case of the asset (e.g. "residential", "commercial", etc).
- data_unit_id (str):
ID of the data unit that the asset belongs to (useful for post-processing
purposes).
- quadkey (str):
Quadkey of the zoom-level 18 tile that the asset belongs to (useful for
post-processing purposes).
- osm_id (str; only in the case of OBM buildings):
If 'export_OBM_footprints' is True, it is the OpenStreetMap ID of the
building that the asset corresponds to. If 'export_OBM_footprints' is False,
it is a ficticious ID. (Useful for post-processing purposes in both cases).
- GeoPackage (.gpkg) with geometry of the quadtile:
- quadkey (str): Quadkey of the tile.
- geometry (geometry): Geometry of the tile.
- GeoPackage (.gpkg) with geometry of the OBM buildings (created only if "OBM" is in
'buildings_to_export' and 'export_OBM_footprints' is True):
- osm_id (str): OpenStreetMap (OSM) ID of the building.
- geometry (geometry): Geometry of the building.
"""
# Retrieve quadtile's geometry and centroid
tile = mercantile.quadkey_to_tile(quadtile.quadkey)
tile_bounds = mercantile.bounds(tile)
tile_geometry = Polygon(
[
(tile_bounds.west, tile_bounds.south),
(tile_bounds.east, tile_bounds.south),
(tile_bounds.east, tile_bounds.north),
(tile_bounds.west, tile_bounds.north),
]
)
tile_centroid_lon = (tile_bounds.west + tile_bounds.east) / 2.0
tile_centroid_lat = (tile_bounds.south + tile_bounds.north) / 2.0
# File with geometries of quadtiles
filename_quadtiles = "%s_%s_geometries_quadtiles.gpkg" % (quadkeys_group, occupancy_case)
quadtile_geom = geopandas.GeoDataFrame(
{
"quadkey": pandas.Series([quadtile.quadkey], dtype=str),
"geometry": [tile_geometry],
},
geometry=[tile_geometry],
)
quadtile_geom.crs = pyproj.CRS("epsg:4326")
if os.path.exists(os.path.join(output_path, filename_quadtiles)): # append
quadtile_geom.to_file(
os.path.join(output_path, filename_quadtiles), driver="GPKG", mode="a"
)
else: # create
quadtile_geom.to_file(os.path.join(output_path, filename_quadtiles), driver="GPKG")
for building_type in buildings_to_export:
# Prefix of unique ID
prefix_id = "%s_%s_%s" % (quadkeys_group, occupancy_case, building_type)
# File name of the output CSV file
filename_csv = "%s.csv" % (prefix_id)
# Retrieve last value of "id" and "osm_id" in 'filename_csv'
last_values = read_last_written_values_csv(
os.path.join(output_path, filename_csv), ["id", "osm_id"], sep=","
)
last_id_str = last_values[0]
last_osm_id_str = last_values[1] # to be used further down
if last_id_str == "UNKNOWN":
last_id = 0
else:
last_id = int(last_id_str.split("%s_" % (prefix_id))[-1])
# Identify the attribute of 'quadtile' that 'building_type' corresponds to
attribute_name = "%s_buildings" % (building_type.lower())
if hasattr(quadtile, attribute_name): # check if attribute exists
data = deepcopy(getattr(quadtile, attribute_name))
else:
continue
# There may be no buildings of this type (e.g. zero remainder buildings)
if data.shape[0] == 0:
continue
# Create additional output columns
data["id"] = [
"%s_%s" % (prefix_id, i) for i in range(last_id + 1, last_id + 1 + data.shape[0])
]
data["occupancy"] = [occupancy_case for i in range(data.shape[0])]
data["quadkey"] = pandas.Series(
[quadtile.quadkey for i in range(data.shape[0])], dtype=str
)
# Longitude and latitude of the points that represent the building/s
if building_type in ["remainder", "aggregated", "total"]:
data["lon"] = [tile_centroid_lon for i in range(data.shape[0])]
data["lat"] = [tile_centroid_lat for i in range(data.shape[0])]
elif building_type in ["OBM"]:
if export_OBM_footprints:
# Use centroids from the building footprints
data["lon"] = [
quadtile.obm_buildings_geometries[osm_id]["centroid_lon"]
for osm_id in data["osm_id"].to_numpy()
]
data["lat"] = [
quadtile.obm_buildings_geometries[osm_id]["centroid_lat"]
for osm_id in data["osm_id"].to_numpy()
]
# File with geometries of footprints
filename_footprints = "%s_geometries_footprints.gpkg" % (prefix_id)
obm_geometries = [
loads(quadtile.obm_buildings_geometries[osm_id]["footprint"])
for osm_id in quadtile.obm_buildings_geometries
]
obm_footprints = geopandas.GeoDataFrame(
{
"osm_id": pandas.Series(
[osm_id for osm_id in quadtile.obm_buildings_geometries], dtype=str
),
"geometry": obm_geometries,
},
geometry=obm_geometries,
)
obm_footprints.crs = pyproj.CRS("epsg:4326")
if os.path.exists(os.path.join(output_path, filename_footprints)): # append
obm_footprints.to_file(
os.path.join(output_path, filename_footprints), driver="GPKG", mode="a"
)
else: # create
obm_footprints.to_file(
os.path.join(output_path, filename_footprints), driver="GPKG"
)
else: # export_OBM_footprints is False --> need to anonymise the OBM buildings
# Use centroids of the tile
data["lon"] = [tile_centroid_lon for i in range(data.shape[0])]
data["lat"] = [tile_centroid_lat for i in range(data.shape[0])]
# Generate OSM IDs that are not the real ones
# (but allow to gather the rows associated with one building)
if last_osm_id_str == "UNKNOWN":
last_osm_id = 0
else:
last_osm_id = int(last_osm_id_str.split("osm_fake_")[-1])
# Identify unique OSM IDs
osm_ids_real = data["osm_id"].unique()
# Create one ficticious OSM ID per unique real OSM ID
osm_ids_ficticious = [
"osm_fake_%s" % (i)
for i in range(last_osm_id + 1, last_osm_id + 1 + len(osm_ids_real))
]
aux_df = pandas.DataFrame(
{
"osm_id": osm_ids_real,
"osm_id_new": osm_ids_ficticious,
}
)
# Replace the OSM ID in 'data'
data = (
data.set_index("osm_id")
.join(aux_df.set_index("osm_id"))
.set_index(pandas.Index(range(data.shape[0])))
.rename(columns={"osm_id_new": "osm_id"})
)
data = data.sort_values(by=["id"])
del aux_df, osm_ids_real, osm_ids_ficticious
else:
data["lon"] = [-999.9 for i in range(data.shape[0])]
data["lat"] = [-999.9 for i in range(data.shape[0])]
data = data.rename(columns={"building_class_name": "taxonomy"})
# Order in which the columns of 'data' will appear in the CSV file
columns_order = ["id", "lon", "lat", "taxonomy", "number"]
for col_name in cost_cases:
columns_order.append(col_name)
for col_name in people_cases:
columns_order.append(col_name)
columns_order.extend(["occupancy", "data_unit_id", "quadkey"])
if building_type in ["OBM"]:
columns_order.append("osm_id")