diff --git a/README.md b/README.md index 9f3549739322fd6f1f2d601a4cc08e0482b8da1d..996de8e562a4c7845f6a78d5e4a75d276a96e90b 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,7 @@ listed separating them with ", " (comma and space). Currently supported values: - `OpenQuake_CSV`: Exposure CSV files compatible with the [OpenQuake Engine](https://github.com/gem/oq-engine), with complementary GeoPackage (.gpkg) files that contain the geometry of quadtiles and OBM buildings. + - `GeoSummary_Tiles`: GeoPackage (GPKG) files showing summary values per tile. - `buildings_to_export`: Types of buildings to export, separated by ", " (comma and space). Currently supported values: - `OBM`: OBM buildings. diff --git a/config_example.yml b/config_example.yml index 7a7fd75ba861e405824e6b6b250a30f0b9f3d344..330058188701ff559d86d8d7116773e61efde61a 100644 --- a/config_example.yml +++ b/config_example.yml @@ -15,7 +15,7 @@ geographic_selection: # Selection of the geographic area for which GDE will be lon_e: 23.713597 lat_s: 37.965450 lat_n: 37.972561 -output_format: OpenQuake_CSV # Currently supported values: OpenQuake_CSV +output_format: OpenQuake_CSV, GeoSummary_Tiles # One or more (separated by ", "). Currently supported values: OpenQuake_CSV, GeoSummary_Tiles buildings_to_export: OBM, remainder # Currently supported values: OBM, remainder, aggregated export_OBM_footprints: True # If True, geometries of OBM buildings will be exported database_gde_tiles: # Database where info on the GDE tiles is stored diff --git a/gdeexporter/configuration.py b/gdeexporter/configuration.py index 8cbe60db935494a46aee153361a80cd4485dd83c..0c17d32f3f2ec622a713744c0bb39ea0194765a4 100644 --- a/gdeexporter/configuration.py +++ b/gdeexporter/configuration.py @@ -27,7 +27,7 @@ from gdeexporter.database_queries import DatabaseQueries logger = logging.getLogger() # Currently supported output formats -OUTPUT_FORMATS = ["OpenQuake_CSV"] +OUTPUT_FORMATS = ["OpenQuake_CSV", "GeoSummary_Tiles"] # Currently supported types of buildings to export SUPPORTED_BUILDING_TYPES = ["OBM", "remainder", "aggregated"] @@ -110,7 +110,7 @@ class Configuration: the model (i.e. the way values are stored in the database). self.output_format (list of str): Format to which the GDE model will be exported. Currently supported options: - OpenQuake_CSV. + OpenQuake_CSV, GeoSummary_Tiles. self.buildings_to_export (list of str): List of types of buildings to export. Currently supported values: OBM, remainder, aggregated. diff --git a/gdeexporter/handler.py b/gdeexporter/handler.py index ecf34fc08359a77f4680147e523bfe674dbeca5f..8a5db6551f5c0257e3fc43c4db389dd9adde80cb 100644 --- a/gdeexporter/handler.py +++ b/gdeexporter/handler.py @@ -20,12 +20,16 @@ import logging from gdeexporter.tileexposure import TileExposure from gdeexporter.database_queries import DatabaseQueries from gdeexporter.to_openquake import export_to_OpenQuake_CSV +from gdeexporter.to_geosummary import export_to_GeoSummary logger = logging.getLogger() # Exporters -EXPORTERS = {"OpenQuake_CSV": export_to_OpenQuake_CSV} +EXPORTERS = { + "OpenQuake_CSV": export_to_OpenQuake_CSV, + "GeoSummary_Tiles": export_to_GeoSummary, +} class ExportHandler: @@ -203,12 +207,12 @@ class ExportHandler: EXPORTERS[output_format]( quadtile, config.buildings_to_export, - config.export_OBM_footprints, config.cost_cases, config.people_cases, config.output_path, quadkeys_group, occupancy_case, + config.export_OBM_footprints, ) logger.info( "Quadtile with quadkey '%s' has been exported to format '%s'" diff --git a/gdeexporter/to_geosummary.py b/gdeexporter/to_geosummary.py new file mode 100644 index 0000000000000000000000000000000000000000..3cc61ee81ee3fca0139999d21218ea8cd08a6149 --- /dev/null +++ b/gdeexporter/to_geosummary.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import logging +import os +import mercantile +import numpy +import pandas +import geopandas +import pyproj +from shapely.geometry import Polygon +from copy import deepcopy + + +logger = logging.getLogger() + + +def export_to_GeoSummary( + quadtile, + buildings_to_export, + cost_cases, + people_cases, + output_path, + quadkeys_group, + occupancy_case, + export_OBM_footprints=False, +): + """This method exports the contents of a TileExposure object into a GeoPackage (GPKG) file + that summarises relevant values of the tile. If the file exists, it appends to it the data + associated with this 'quadtile'; otherwise, it creates it. + + Args: + quadtile (TileExposure object): + Instance of gdeexporter.tileexposure.TileExposure. + buildings_to_export (list of str): + List of types of buildings to export. Currently supported values: OBM, remainder, + aggregated. + cost_cases (dict): + Dictionary whose keys indicate the columns of the buildings attributes of 'quadtile' + (e.g. quadtile.obm_buildings, quadtile.remainder_buildings, etc) that are associated + with the replacement costs of the building. + people_cases (dict): + Dictionary whose keys indicate the columns of the buildings attributes of 'quadtile' + (e.g. quadtile.obm_buildings, quadtile.remainder_buildings, etc) that are associated + with the number of people in the building. + output_path (str): + Path to which the output files will be saved. + quadkeys_group (str): + Name of the quadkey group that the 'quadtile' is part of. It is used for file naming + and assigning incremental IDs to the rows of the OpenQuake CSV files. + occupancy_case (str): + Occupancy case to which the buildings of 'quadtile' belong. It is used for file + naming and assigning incremental IDs to the rows of the OpenQuake CSV files. + export_OBM_footprints (bool): + Unused. Default: False. + + Returns: + This method writes one GeoPackage (GPKG) file with name pattern + [quadkeys_group]_[occupancy_case]_geosummary_tiles.gpkg, where 'quadkeys_group' and + 'occupancy_case' are as defined in the arguments. It contains the following fields: + - quadkey (str): Quadkey of the tile. + - geometry (geometry): Geometry of the tile. + - [occupancy_case]_number_data_units: Number of data units associated with this + quadtile and occupancy case. + - [occupancy_case]_[building_type]_buildings: Number of buildings in the tile of the + type 'building_type' (each of the elements of 'buildings_to_export') and of this + 'occupancy_case'. + - [occupancy_case]_[building_type]_XXX: Fields in which 'occupancy_case' and + 'building_type' have the same meaning as above, and XXX refers to: + - Columns associated with building replacement costs, whose name and contents + are user-defined ('cost_cases'). + - Columns associated with the number of people in the building at different + times of the day, whose names and contents are user-defined ('people_cases'). + In all of the cost- and people-cases columns, the values stored correspond to the + sum of all buildings in the tile of the corresponding 'occupancy_case' and + 'building_type' ('[occupancy_case]_[building_type]_buildings' field). + """ + + # Retrieve quadtile's geometry and centroid + tile = mercantile.quadkey_to_tile(quadtile.quadkey) + tile_bounds = mercantile.bounds(tile) + tile_geometry = Polygon( + [ + (tile_bounds.west, tile_bounds.south), + (tile_bounds.east, tile_bounds.south), + (tile_bounds.east, tile_bounds.north), + (tile_bounds.west, tile_bounds.north), + ] + ) + + # Start GeoPandas DataFrame with summary of values for the tile + geosummary = geopandas.GeoDataFrame( + { + "quadkey": pandas.Series([quadtile.quadkey], dtype=str), + "geometry": [tile_geometry], + }, + geometry=[tile_geometry], + ) + geosummary.crs = pyproj.CRS("epsg:4326") + + data_unit_ids = [] + + for building_type in buildings_to_export: + field_id_prefix = "%s_%s" % (occupancy_case, building_type) + + # Identify the attribute of 'quadtile' that 'building_type' corresponds to + attribute_name = "%s_buildings" % (building_type.lower()) + if hasattr(quadtile, attribute_name): # check if attribute exists + data = deepcopy(getattr(quadtile, attribute_name)) + + # Create additional output columns + geosummary["%s_buildings" % (field_id_prefix)] = data["number"].to_numpy().sum() + + for col_name in cost_cases: + geosummary["%s_%s" % (field_id_prefix, col_name)] = ( + data[col_name].to_numpy().sum() + ) + + for col_name in people_cases: + geosummary["%s_%s" % (field_id_prefix, col_name)] = ( + data[col_name].to_numpy().sum() + ) + + data_unit_ids.extend(data["data_unit_id"].unique()) + + else: # because all columns need to be created (so as to be able to append to the GPKG) + + # Create additional output columns + geosummary["%s_buildings" % (field_id_prefix)] = 0.0 + + for col_name in cost_cases: + geosummary["%s_%s" % (field_id_prefix, col_name)] = 0.0 + + for col_name in people_cases: + geosummary["%s_%s" % (field_id_prefix, col_name)] = 0.0 + + unique_data_unit_ids = numpy.unique(numpy.array(data_unit_ids)) + geosummary["%s_number_data_units" % (occupancy_case)] = len(unique_data_unit_ids) + + filename_geosummary = "%s_%s_geosummary_tiles.gpkg" % (quadkeys_group, occupancy_case) + if os.path.exists(os.path.join(output_path, filename_geosummary)): # append + geosummary.to_file( + os.path.join(output_path, filename_geosummary), index=False, driver="GPKG", mode="a" + ) + else: # create + geosummary.to_file( + os.path.join(output_path, filename_geosummary), index=False, driver="GPKG" + ) + + if export_OBM_footprints: + pass # Nothing to be done, input argument kept for compatibility with other formats + + return diff --git a/gdeexporter/to_openquake.py b/gdeexporter/to_openquake.py index 121b92b069fd469104ebaca818397a85fa51c14a..5dc842194b7d236a509843772f136a2f8ecfc09b 100644 --- a/gdeexporter/to_openquake.py +++ b/gdeexporter/to_openquake.py @@ -34,12 +34,12 @@ logger = logging.getLogger() def export_to_OpenQuake_CSV( quadtile, buildings_to_export, - export_OBM_footprints, cost_cases, people_cases, output_path, quadkeys_group, occupancy_case, + export_OBM_footprints=True, ): """This method exports the contents of a TileExposure object into the OpenQuake Engine CSV format and additional GeoPackage (.gpkg) files that contain the geometry of quadtiles and @@ -58,13 +58,6 @@ def export_to_OpenQuake_CSV( buildings_to_export (list of str): List of types of buildings to export. Currently supported values: OBM, remainder, aggregated. - export_OBM_footprints (bool): - If True (and if "OBM" is one of the type of 'buildings_to_export'), the geometries - of OpenBuildingMap buildings will be exported and the OpenQuake CSV files of the OBM - buildings will indicate their centroids and IDs. If False, geometries will not be - exported, the coordinates in the OpenQuake CSV files will correspond to the centroid - of the quadtile, and the "osm_id" column will contain ficticious IDs (generated to - allow for aggregation of OpenQuake results). cost_cases (dict): Dictionary whose keys indicate the columns of the buildings attributes of 'quadtile' (e.g. quadtile.obm_buildings, quadtile.remainder_buildings, etc) that are associated @@ -81,6 +74,13 @@ def export_to_OpenQuake_CSV( occupancy_case (str): Occupancy case to which the buildings of 'quadtile' belong. It is used for file naming and assigning incremental IDs to the rows of the OpenQuake CSV files. + export_OBM_footprints (bool): + If True (and if "OBM" is one of the type of 'buildings_to_export'), the geometries + of OpenBuildingMap buildings will be exported and the OpenQuake CSV files of the OBM + buildings will indicate their centroids and IDs. If False, geometries will not be + exported, the coordinates in the OpenQuake CSV files will correspond to the centroid + of the quadtile, and the "osm_id" column will contain ficticious IDs (generated to + allow for aggregation of OpenQuake results). Default = True. Returns: This method writes three kinds of files: diff --git a/tests/data/test_geosummary_tiles_expected_output.csv b/tests/data/test_geosummary_tiles_expected_output.csv new file mode 100644 index 0000000000000000000000000000000000000000..eda2eedc5a46473bf96d7e1a898a6b88918419a1 --- /dev/null +++ b/tests/data/test_geosummary_tiles_expected_output.csv @@ -0,0 +1,4 @@ +quadkey;geometry;residential_OBM_buildings;residential_OBM_structural;residential_OBM_day;residential_OBM_night;residential_OBM_transit;residential_remainder_buildings;residential_remainder_structural;residential_remainder_day;residential_remainder_night;residential_remainder_transit;residential_number_data_units +122010321033023130;POLYGON ((15.04852294921875 37.480307245086024, 15.049896240234375 37.480307245086024, 15.049896240234375 37.48139702942733, 15.04852294921875 37.48139702942733, 15.04852294921875 37.480307245086024));3;5929444.4448;6.160245;24.121985;15.11353556;12.7;27749500;30.7358415;120.3538995;75.407266;1 +122010321033023121;POLYGON ((15.047149658203125 37.480307245086024, 15.04852294921875 37.480307245086024, 15.04852294921875 37.48139702942733, 15.047149658203125 37.48139702942733, 15.047149658203125 37.480307245086024));0;0;0;0;0;26.2;57247000;63.407799;248.289147;155.564596;1 +122010321033023132;POLYGON ((15.04852294921875 37.47921744485059, 15.049896240234375 37.47921744485059, 15.049896240234375 37.480307245086024, 15.04852294921875 37.480307245086024, 15.04852294921875 37.47921744485059));12;50929444.4448;72.499245;283.888985;177.8695356;0;0;0;0;0;1 diff --git a/tests/test_to_geosummary.py b/tests/test_to_geosummary.py new file mode 100644 index 0000000000000000000000000000000000000000..25c27e1b671275ffeae0f3009dd69a2b5c43066d --- /dev/null +++ b/tests/test_to_geosummary.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import os +import pandas +import geopandas +from shapely.wkt import loads +from gdeexporter.to_geosummary import export_to_GeoSummary +from gdeexporter.tileexposure import TileExposure + + +def test_export_to_GeoSummary(): + # User-defined costs and people columns + cost_cases = {"structural": "total"} + people_cases = {"day": "day", "night": "night", "transit": "transit"} + + # User-defined input parameters for export_to_GeoSummary + buildings_to_export = ["OBM", "remainder"] + output_path = os.path.join( + os.path.dirname(__file__), "data", "temp_test_export_to_GeoSummary" + ) + quadkeys_group = "quadkeys_list" + occupancy_case = "residential" + + # Create temporary directory + os.mkdir(output_path) + + # Three quadtiles will be created and used to write output file + # First quadtile has both remainder and OBM buildings, second quadtile has remainder + # buildings only, third quadtile has OBM buildings only + quadkeys = ["122010321033023130", "122010321033023121", "122010321033023132"] + obm_buildings_input = [ + "test_oq_input_OBM_buildings_122010321033023130.csv", + "", + "test_oq_input_OBM_buildings_122010321033023132.csv", + ] + remainder_buildings_input = [ + "test_oq_input_remainder_buildings_122010321033023130.csv", + "test_oq_input_remainder_buildings_122010321033023121.csv", + "", + ] + + # Expected name of output file + expected_name_output = "%s_%s_geosummary_tiles.gpkg" % (quadkeys_group, occupancy_case) + + # Expected contents of output file + expected_output = "test_geosummary_tiles_expected_output.csv" + + # Path to expected results + expected_results_path = os.path.join(os.path.dirname(__file__), "data") + + # Create TileExposure objects and call export_to_GeoSummary each time + for i, quadkey in enumerate(quadkeys): + quadtile = TileExposure(quadkey, cost_cases, people_cases) + if obm_buildings_input[i] != "": + quadtile.obm_buildings = pandas.read_csv( + os.path.join(os.path.dirname(__file__), "data", obm_buildings_input[i]) + ) + + if remainder_buildings_input[i] != "": + quadtile.remainder_buildings = pandas.read_csv( + os.path.join(os.path.dirname(__file__), "data", remainder_buildings_input[i]) + ) + + export_to_GeoSummary( + quadtile, + buildings_to_export, + cost_cases, + people_cases, + output_path, + quadkeys_group, + occupancy_case, + ) + + # Check that the output file has been created + assert os.path.exists(os.path.join(output_path, expected_name_output)) + + # Check contents of file with quadtile geometries + returned_output = geopandas.read_file(os.path.join(output_path, expected_name_output)) + expected_output = pandas.read_csv( + os.path.join(expected_results_path, expected_output), + sep=";", + dtype={"quadkey": str}, + ) + + assert returned_output.shape[0] == expected_output.shape[0] + assert returned_output.shape[1] == expected_output.shape[1] + + for j, quadkey in enumerate(expected_output["quadkey"].values): + assert quadkey in returned_output["quadkey"].values + filter = returned_output["quadkey"] == quadkey + for col_name in expected_output.columns: + if col_name == "geometry": + returned_bounds = returned_output[filter]["geometry"].values[0].bounds + expected_bounds = loads(expected_output["geometry"].values[j]).bounds + for bound in range(4): + assert round(returned_bounds[bound], 5) == round(expected_bounds[bound], 5) + elif col_name == "quadkey": + continue + else: + assert round(returned_output[filter][col_name].values[0], 5) == round( + expected_output[col_name].values[j], 5 + ) + + # Delete created output files + os.remove(os.path.join(output_path, expected_name_output)) + + # Delete temporary directory + os.rmdir(output_path) diff --git a/tests/test_to_openquake.py b/tests/test_to_openquake.py index cb0eedb8b780a7d42dcc57e3e7a8bcaf664523ca..24e354e8f8199ddf8263a2596a265a88bbb98b10 100644 --- a/tests/test_to_openquake.py +++ b/tests/test_to_openquake.py @@ -111,12 +111,12 @@ def test_export_to_OpenQuake_CSV(): export_to_OpenQuake_CSV( quadtile, buildings_to_export, - export_OBM_footprints_vals[k], cost_cases, people_cases, output_path, quadkeys_group, occupancy_case, + export_OBM_footprints_vals[k], ) # Check that output files that need to be created have been created