Commit 8ab32c13 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Moved model-specific parameters from config to class

parent ab258de7
Pipeline #25344 passed with stage
in 1 minute and 9 seconds
model_name: esrm20
exposure_format: esrm20 # Only supported value for now
metadata_filepath: full_path_to_xlsx_file_with_model_metadata
occupancy_cases: # Occupancy cases to be processed
residential:
sheet_name: RES # Sheet name in metadata_filepath
data_units_types_field: fieldnameRes # Field name in sheet_name
commercial:
sheet_name: COM # Sheet name in metadata_filepath
data_units_types_field: fieldnameCom # Field name in sheet_name
industrial:
sheet_name: IND # Sheet name in metadata_filepath
data_units_types_field: fieldnameInd # Field name in sheet_name
......@@ -33,21 +33,25 @@ class AggregatedExposureModel(abc.ABC):
Name of the input aggregated model.
self.exposure_format (str):
Format of the input aggregated model. Currently supported values: "esrm20".
self.occupancy_cases (list of str):
List of names of the occupancy cases for which the input aggregated exposure model
is defined.
self.occupancy_cases (dict):
Dictionary in which each first level key corresponds to an occupancy case (e.g.
"residential", "commercial", "industrial") for which the input aggregated exposure
model is defined. The subkeys will point at specific mappings for each sub-class.
self.exposure_entities (dictionary of ExposureEntity):
Dictionary of instances of ExposureEntity objects, each of which represent an
exposure entity where the input aggregated exposure model is defined. See
attributes in description of ExposureEntity. The keys of the dictionary are the
names of the corresponding exposure entities.
self.filename_pattern (str):
Pattern of the names of the files that define the input aggregated exposure model.
"""
def __init__(self, configuration):
self.model_name = configuration.model_name
self.exposure_format = configuration.exposure_format
self.occupancy_cases = list(configuration.occupancy_cases.keys())
self.exposure_entities = self.retrieve_exposure_entities(configuration)
self.occupancy_cases = None
self.exposure_entities = None
self.filename_pattern = None
def retrieve_exposure_entities(self, configuration):
"""This function retrieves the exposure entities for which an input aggregated exposure
......@@ -79,8 +83,56 @@ class ExposureModelESRM20(AggregatedExposureModel):
model.
See details in https://git.gfz-potsdam.de/dynamicexposure/datasources/-/tree/master/esrm20.
Attributes:
self.occupancy_cases (dict):
Dictionary in which each first level key corresponds to an occupancy case (e.g.
"residential", "commercial", "industrial") for which ESRM20 is defined. Each first
level key contains three sub-keys: "short", "sheet_name" and
"data_units_types_field":
short (str):
Name of the occupancy case as found in the ESRM20 model files.
sheet_name (str):
Name of the sheet in the meatadata file of the input aggregated exposure
model from which info on this occupancy case can be retrieved.
data_units_types_field (str):
Name of the field in sheet_name from which to retrieve information on the
types of data_units.
self.filename_pattern (str):
Pattern of the names of the ESRM20 CSV files.
self.exposure_entities (dictionary of ExposureEntity):
Dictionary of instances of ExposureEntity objects, each of which represent an
exposure entity where the input aggregated exposure model is defined. See
attributes in description of ExposureEntity. The keys of the dictionary are the
names of the corresponding exposure entities.
"""
def __init__(self, configuration):
super().__init__(configuration)
self.occupancy_cases = {
"residential": {
"short": "Res",
"sheet_name": "RES",
"data_units_types_field": "Admin level resolution/aggregation",
},
"commercial": {
"short": "Com",
"sheet_name": "COM",
"data_units_types_field": "Admin level resolution/aggregation",
},
"industrial": {
"short": "Ind",
"sheet_name": "IND",
"data_units_types_field": "Resolution",
},
}
self.filename_pattern = {
"filename": "Exposure_Model_%s_%s.csv",
"first": "entity_name",
"second": "occupancy_short",
}
self.exposure_entities = self.retrieve_exposure_entities(configuration)
def retrieve_exposure_entities(self, configuration):
"""This function retrieves the exposure entities of the ESRM20 model from the
corresponding .xlsx metadata file.
......@@ -94,17 +146,6 @@ class ExposureModelESRM20(AggregatedExposureModel):
metadata_filepath (string):
Full file path to the .xlsx file that contains metadata on the input
aggregated exposure model.
occupancy_cases (dictionary):
Dictionary in which each first level key corresponds to an occupancy
case (e.g. "residential", "commercial", "industrial"). Each first level
key contains two sub-keys: "sheet_name" and "data_units_types_field":
sheet_name (str):
Name of the sheet in the meatadata file of the input aggregated
exposure model from which info on this occupancy case can be
retrieved. E.g.: "RES", "COM", "IND".
data_units_types_field (str):
Name of the field in sheet_name from which to retrieve
information on the types of data_units. E.g.: "Resolution".
Returns:
exposure_entities (dictionary of ExposureEntity):
......@@ -124,12 +165,12 @@ class ExposureModelESRM20(AggregatedExposureModel):
# Needs to go by occupancy case because the names and properties of the exposure
# entities can only be read from the metadata file for a sheet that is associated
# with a particular occupancy case:
for case in configuration.occupancy_cases.keys():
for case in self.occupancy_cases.keys():
# Read the file (errors will be handled by pandas)
metadata = pandas.read_excel(
configuration.metadata_filepath,
sheet_name=configuration.occupancy_cases[case]["sheet_name"],
sheet_name=self.occupancy_cases[case]["sheet_name"],
header=None, # Otherwise we cannot handle repeated column names properly
index_col=0, # Use first column as index
)
......@@ -160,7 +201,7 @@ class ExposureModelESRM20(AggregatedExposureModel):
# Retrieve the row from which the types of data units can be interpreted
data_units_types_row = metadata.loc[
configuration.occupancy_cases[case]["data_units_types_field"], :
self.occupancy_cases[case]["data_units_types_field"], :
]
if len(data_units_types_row.shape) > 1: # This should not occur
logger.critical(
......
......@@ -33,20 +33,9 @@ class Configuration:
self.metadata_filepath (str):
Full file path to the .xlsx file that contains metadata on the input aggregated
exposure model.
self.occupancy_cases (dictionary):
Dictionary in which each first level key corresponds to an occupancy case (e.g.
"residential", "commercial", "industrial"). Each first level key contains two
sub-keys: "sheet_name" and "data_units_types_field":
sheet_name (str):
Name of the sheet in the meatadata file of the input aggregated exposure
model from which info on this occupancy case can be retrieved. E.g.: "RES",
"COM", "IND".
data_units_types_field (str):
Name of the field in sheet_name from which to retrieve information on the
types of data_units. E.g.: "Resolution".
"""
REQUIRES = ["metadata_filepath", "occupancy_cases"]
REQUIRES = ["metadata_filepath"]
def __init__(self, filepath):
"""
......@@ -61,9 +50,6 @@ class Configuration:
self.exposure_format = self._assign_parameter(config, "exposure_format")
self.metadata_filepath = self._assign_parameter(config, "metadata_filepath")
self.occupancy_cases = self._assign_hierarchical_parameters(
config, "occupancy_cases", ["sheet_name", "data_units_types_field"]
)
# Terminate if critical parameters are missing (not all parameters are critical)
for key_parameter in self.REQUIRES:
......@@ -122,51 +108,3 @@ class Configuration:
assigned_parameter = None
return assigned_parameter
def _assign_hierarchical_parameters(self, config, input_parameter, requested_nested):
"""This function searches for the key input_parameter in the dictionary config, and for
each of the elements of requested_nested as keys of config[input_parameter].
If input_parameter is not a key of config, the output is None.
If input_parameter is a key of config, but one of the elements of requested_nested is
not a key of config[input_parameter]
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
requested_nested (list of str):
List of the names of the desired nested parameters, to be searched for as keys
of config[input_parameter].
Returns:
assigned_parameter (dictionary or None):
The content of config[input_parameter], if input_parameter is a key of config
and all elements of requested_nested are keys of config[input_parameter], or
None otherwise.
"""
assigned_parameter = self._assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
if not isinstance(assigned_parameter, dict):
return None
sub_parameters_missing = False
for case in assigned_parameter.keys():
for requested_parameter in requested_nested:
if requested_parameter not in assigned_parameter[case].keys():
logger.critical(
"ERROR instantiating Configuration: occupancy case '%s' does not "
"contain a '%s' parameter" % (case, requested_parameter)
)
sub_parameters_missing = True
if sub_parameters_missing is True:
assigned_parameter = None
return assigned_parameter
model_name: esrm20
exposure_format: esrm20
metadata_filepath: /some/path/metadata.xlsx
occupancy_cases:
residential:
sheet_names: RES
data_units_types_field: Admin level resolution/aggregation
commercial:
sheet_name: COM
data_units_types_field: Admin level resolution/aggregation
industrial:
sheet_name: IND
data_units_types_field: Resolution
model_name: esrm20
exposure_format: esrm20
metadata_filepath: /some/path/metadata.xlsx
occupancy_cases:
residential:
sheet_name: RES
data_units_types_field: Admin level resolution/aggregation
commercial:
sheet_name: COM
another_name_that_is_wrong: Admin level resolution/aggregation
industrial:
sheet_name: IND
data_units_types_field: Resolution
model_name: esrm20
exposure_format: esrm20
metadata_filepath: /home/cnievas/Documents/GFZ_local/SERA/From_EFEHR_GitLab/TESTING/esrm20_exposure/sources/European_Exposure_Model_Data_Inputs_Sources.xlsx
occupancy_cases: residential
......@@ -29,21 +29,6 @@ def test_Configuration():
assert returned_config.model_name == "esrm20"
assert returned_config.exposure_format == "esrm20"
assert returned_config.metadata_filepath == "/some/path/metadata.xlsx"
assert len(returned_config.occupancy_cases.keys()) == 3
assert returned_config.occupancy_cases["residential"]["sheet_name"] == "RES"
assert (
returned_config.occupancy_cases["residential"]["data_units_types_field"]
== "Admin level resolution/aggregation"
)
assert returned_config.occupancy_cases["commercial"]["sheet_name"] == "COM"
assert (
returned_config.occupancy_cases["commercial"]["data_units_types_field"]
== "Admin level resolution/aggregation"
)
assert returned_config.occupancy_cases["industrial"]["sheet_name"] == "IND"
assert (
returned_config.occupancy_cases["industrial"]["data_units_types_field"] == "Resolution"
)
# Test case in which the file is not found
with pytest.raises(OSError) as excinfo:
......@@ -51,24 +36,3 @@ def test_Configuration():
os.path.join(os.path.dirname(__file__), "data", "doesnotexist.yml")
)
assert "OSError" in str(excinfo.type)
# Test case in which the parameter "sheet_name" is missing
with pytest.raises(OSError) as excinfo:
returned_config = Configuration(
os.path.join(os.path.dirname(__file__), "data", "config_for_testing_missing_01.yml")
)
assert "OSError" in str(excinfo.type)
# Test case in which the parameter "data_units_types_field" is missing
with pytest.raises(OSError) as excinfo:
returned_config = Configuration(
os.path.join(os.path.dirname(__file__), "data", "config_for_testing_missing_02.yml")
)
assert "OSError" in str(excinfo.type)
# Test case in which the parameter "occupancy_cases" is not hierarchical
with pytest.raises(OSError) as excinfo:
returned_config = Configuration(
os.path.join(os.path.dirname(__file__), "data", "config_for_testing_missing_03.yml")
)
assert "OSError" in str(excinfo.type)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment