aggregatedexposuremodel.py 11.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#!/usr/bin/env python3

# Copyright (C) 2021:
#   Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.

import abc
import logging
import numpy
import pandas
from gdeimporter.exposureentity import ExposureEntity

logger = logging.getLogger()


class AggregatedExposureModel(abc.ABC):
    """This class represents an input aggregated exposure model.

    Attributes:
        self.model_name (str):
            Name of the input aggregated model.
        self.exposure_format (str):
            Format of the input aggregated model. Currently supported values: "esrm20".
        self.occupancy_cases (list of str):
            List of names of the occupancy cases for which the input aggregated exposure model
            is defined.
        self.exposure_entities (dictionary of ExposureEntity):
            Dictionary of instances of ExposureEntity objects, each of which represent an
            exposure entity where the input aggregated exposure model is defined. See
            attributes in description of ExposureEntity. The keys of the dictionary are the
            names of the corresponding exposure entities.
    """

    def __init__(self, configuration):
        self.model_name = configuration.model_name
        self.exposure_format = configuration.exposure_format
        self.occupancy_cases = list(configuration.occupancy_cases.keys())
        self.exposure_entities = self.retrieve_exposure_entities(configuration)

    def retrieve_exposure_entities(self, configuration):
        """This function retrieves the exposure entities for which an input aggregated exposure
        model is defined, together with the definition of their types of data units for each
        occupancy case, by reading and processing the relevant data (as specified in the
        respective subclasses).

        Exposure entities can be, for example, countries or any other spatial/administrative
        unit for which an aggregated exposure model is defined. Arbitrary polygons can be
        administrative units, Voronoi cells, etc.

        Args:
            configuration (Configuration object):
                Instance of the Configuration class.

        Returns:
            exposure_entities (dictionary of ExposureEntity):
                Dictionary of instances of ExposureEntity objects, each of which represent an
                exposure entity where the input aggregated exposure model is defined. See
                attributes in description of ExposureEntity. The keys of the dictionary are the
                names of the corresponding exposure entities.
        """

        raise NotImplementedError


class ExposureModelESRM20(AggregatedExposureModel):
    """This class represents the European Seismic Risk Model 2020 (ESRM20) aggregated exposure
    model.

    See details in https://git.gfz-potsdam.de/dynamicexposure/datasources/-/tree/master/esrm20.
    """

    def retrieve_exposure_entities(self, configuration):
        """This function retrieves the exposure entities of the ESRM20 model from the
        corresponding .xlsx metadata file.

        Args:
            configuration (Configuration object):
                Instance of the Configuration class, with at least the following attributes:
                    exposure_format (string):
                        Format of the input aggregated model. Currently supported values:
                        "esrm20" (any other input will return an empty dictionary).
                    metadata_filepath (string):
                        Full file path to the .xlsx file that contains metadata on the input
                        aggregated exposure model.
                    occupancy_cases (dictionary):
                        Dictionary in which each first level key corresponds to an occupancy
                        case (e.g. "residential", "commercial", "industrial"). Each first level
                        key contains two sub-keys: "sheet_name" and "data_units_types_field":
                            sheet_name (str):
                                Name of the sheet in the meatadata file of the input aggregated
                                exposure model from which info on this occupancy case can be
                                retrieved. E.g.: "RES", "COM", "IND".
                            data_units_types_field (str):
                                Name of the field in sheet_name from which to retrieve
                                information on the types of data_units. E.g.: "Resolution".

        Returns:
            exposure_entities (dictionary of ExposureEntity):
                Dictionary of instances of ExposureEntity objects, each of which represent an
                exposure entity where the input aggregated exposure model is defined. See
                attributes in description of ExposureEntity. The keys of the dictionary are the
                names of the corresponding exposure entities.
        """

        exposure_entities = {}

        logger.info(
            "Retrieving exposure_entities from exposure with format %s "
            "with retrieve_exposure_entities" % configuration.exposure_format
        )

        # Needs to go by occupancy case because the names and properties of the exposure
        # entities can only be read from the metadata file for a sheet that is associated
        # with a particular occupancy case:
        for case in configuration.occupancy_cases.keys():

            # Read the file (errors will be handled by pandas)
            metadata = pandas.read_excel(
                configuration.metadata_filepath,
                sheet_name=configuration.occupancy_cases[case]["sheet_name"],
                header=None,  # Otherwise we cannot handle repeated column names properly
                index_col=0,  # Use first column as index
            )

            # Retrieve names of exposure entities
            read_names = numpy.array(metadata.loc["Variables", :])
            # Fix the possibility that there might be other rows named "Variables" other
            # than the first one
            if len(read_names.shape) > 1:
                read_names = read_names[0, :]

            # Check if there are repeated names of exposure entities (terminate if True)
            if len(read_names) != len(numpy.unique(read_names)):
                logger.critical(
                    "ERROR: REPEATED NAMES OF EXPOSURE ENTITIES FOUND "
                    "IN OCCUPANCY CASE %s. "
                    "retrieve_exposure_entities COULD NOT RUN." % (case)
                )
                break

            # Use first row as header, once it has been confirmed that there are no repeated
            # names of exposure entities
            new_header = metadata.loc["Variables", :]
            if len(new_header.shape) > 1:  # When there are more than one "Variables" row
                new_header = new_header.iloc[0, :]
            metadata = metadata[1:]  # Keep the data below the header row
            metadata.columns = new_header  # Set the header row as the new column names

            # Retrieve the row from which the types of data units can be interpreted
            data_units_types_row = metadata.loc[
                configuration.occupancy_cases[case]["data_units_types_field"], :
            ]
            if len(data_units_types_row.shape) > 1:  # This should not occur
                logger.critical(
                    "ERROR READING %s: ROW NOT FOUND." % (configuration.metadata_filepath)
                )
                data_units_types_row = data_units_types_row.iloc[0, :]
                data_units_types_row.iloc[:] = "unknown"

            for exposure_entity in read_names:
                if exposure_entity not in exposure_entities.keys():
                    exposure_entities[exposure_entity] = ExposureEntity(exposure_entity)

                output = self._map_data_units_types(data_units_types_row.loc[exposure_entity])
                (data_units_type, data_units_level, data_units_definition) = output
                output = {
                    "data_units_type": data_units_type,
                    "data_units_level": data_units_level,
                    "data_units_definition": data_units_definition,
                }

                # Write the contents occupancy_cases to the ExposureEntity object
                exposure_entities[exposure_entity].occupancy_cases[case] = output

        return exposure_entities

    def _map_data_units_types(self, original_description):
        """This function maps original descriptions of resolution/definition of an input
        aggregated exposure model to attributes of the data units that an ExposureEntity
        comprises.

        Args:
            original_description (str): String coming from an input aggregated exposure model.

        Returns:
            data_units_type (str):
                Type of data unit used by the ExposureEntity. Currently supported types:
                    "polygon":
                        Polygon of arbitrary shape (e.g. boundaries of an administrative unit,
                        Voronoi cell).
                    "cell":
                        Regular quadrilateral in a specified projection that can be sufficiently
                        and unequivocally defined by knowing (1) the coordinates of a vertex or
                        the centroid, (2) the width, (3) the height, and (4) the projection
                        system. E.g. a 30 arcsec (width) by 30 arcsec cell (height) in the World
                        Geodetic System 1984 (WGS84).
            data_units_level (str):
                Level/resolution of the data units used by the ExposureEntity. Currently
                supported types:
                    "30arcsec30arcsec":
                        Applies to the "cell" type and defines a 30 arcsec (width) by 30 arcsec
                        (height) cell in the World Geodetic System 1984 (WGS84).
                    Any integer >= 0:
                        Applies to the "polygon" type and refers to an administrative unit level
                        as per a classification system.
            data_units_definition (str):
                Name of the projection (if "cell" type) or classification system (if "polygon"
                type) in which the data units used by the ExposureEntity are defined. E.g.
                "WGS84" for cells, "NUTS" for polygons that represent administrative units.
        """

        if "admin level" in original_description:
            data_units_type = "polygon"
            data_units_level = str(original_description.split(" ")[-1])
            data_units_definition = "NUTS"
        elif "30 arc seconds" in original_description:
            data_units_type = "cell"
            data_units_level = "30arcsec_30arcsec"
            data_units_definition = "WGS84"
        else:
            data_units_type = "unknown"
            data_units_level = "unknown"
            data_units_definition = "unknown"

        return data_units_type, data_units_level, data_units_definition