Commit eda5cc60 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Refactored and split Configuration class

parent c3bd441a
Pipeline #40445 failed with stage
in 1 minute and 51 seconds
......@@ -16,12 +16,10 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import os
import sys
import logging
import numpy
import yaml
from dotenv import load_dotenv
from gdeimporter.tools.configuration_methods import ConfigurationMethods
logger = logging.getLogger()
......@@ -137,23 +135,29 @@ class Configuration:
class under certain circumstances. Default: False.
"""
config = self.read_config_file(filepath)
config = ConfigurationMethods.read_config_file(filepath)
self.model_name = self._assign_parameter(config, "model_name")
self.model_name = ConfigurationMethods.assign_parameter(config, "model_name")
self.exposure_format = self._assign_parameter(config, "exposure_format")
self.data_pathname = self._assign_parameter(config, "data_pathname")
self.boundaries_pathname = self._assign_parameter(config, "boundaries_pathname")
self.domain_boundary_filepath = self._assign_parameter(
self.exposure_format = ConfigurationMethods.assign_parameter(config, "exposure_format")
self.data_pathname = ConfigurationMethods.assign_parameter(config, "data_pathname")
self.boundaries_pathname = ConfigurationMethods.assign_parameter(
config, "boundaries_pathname"
)
self.domain_boundary_filepath = ConfigurationMethods.assign_parameter(
config, "domain_boundary_filepath"
)
self.occupancies_to_run = self._assign_listed_parameters(config, "occupancies_to_run")
self.exposure_entities_to_run = self._assign_listed_parameters(
self.occupancies_to_run = ConfigurationMethods.assign_listed_parameters(
config, "occupancies_to_run"
)
self.exposure_entities_to_run = ConfigurationMethods.assign_listed_parameters(
config, "exposure_entities_to_run"
)
try:
self.exposure_entities_code = self._validate_exposure_entities_code(config)
self.exposure_entities_code = ConfigurationMethods.validate_exposure_entities_code(
config
)
except ValueError as e:
error_message = (
"Error: the configuration file assigns unsupported values "
......@@ -169,20 +173,22 @@ class Configuration:
logger.critical(error_message)
sys.exit(1)
self.number_cores = self._assign_integer_parameter(config, "number_cores")
self.database_built_up = self._retrieve_database_credentials(
self.number_cores = ConfigurationMethods.assign_integer_parameter(
config, "number_cores"
)
self.database_built_up = ConfigurationMethods.retrieve_database_credentials(
config, "database_built_up", "test_db_built_up.env", force_config_over_hierarchies
)
self.database_gde_tiles = self._retrieve_database_credentials(
self.database_gde_tiles = ConfigurationMethods.retrieve_database_credentials(
config, "database_gde_tiles", "test_db_gde_tiles.env", force_config_over_hierarchies
)
self.data_units_surface_threshold = self._assign_float_parameter(
self.data_units_surface_threshold = ConfigurationMethods.assign_float_parameter(
config, "data_units_surface_threshold", True, -1e-15, 100.0
)
if self.data_units_surface_threshold is not None:
if numpy.sign(self.data_units_surface_threshold) < -0.5:
self.data_units_surface_threshold = abs(self.data_units_surface_threshold)
self.force_creation_data_units = self._assign_boolean_parameter(
self.force_creation_data_units = ConfigurationMethods.assign_boolean_parameter(
config, "force_creation_data_units"
)
if self.force_creation_data_units is None:
......@@ -191,10 +197,10 @@ class Configuration:
"configuration file and was automatically set to 'False'."
)
self.force_creation_data_units = False
self.data_units_min_admisible_area = self._assign_float_parameter(
self.data_units_min_admisible_area = ConfigurationMethods.assign_float_parameter(
config, "data_units_min_admisible_area", True, 1e-15, 30e12
)
self.data_units_max_admisible_area = self._assign_float_parameter(
self.data_units_max_admisible_area = ConfigurationMethods.assign_float_parameter(
config,
"data_units_max_admisible_area",
True,
......@@ -212,31 +218,6 @@ class Configuration:
logger.critical(error_message)
raise OSError(error_message)
def read_config_file(self, filepath):
"""This function attempts to open the configuration file. If not found, it logs a
critical error and raises an OSError.
Args:
filepath (str):
Full file path to the .yml configuration file.
Returns:
config (dictionary):
The configuration file read as a dictionary, or an empty dictionary if the
configuration file was not found.
"""
try:
with open(filepath, "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.FullLoader)
except FileNotFoundError:
config = {}
error_message = "Error instantiating Configuration: configuration file not found"
logger.critical(error_message)
raise OSError(error_message)
return config
def interpret_exposure_entities_to_run(self, aggregated_exposure_model):
"""This function interprets the value assigned to self.exposure_entities_to_run from the
configuration file and updates self.exposure_entities_to_run accordingly.
......@@ -287,358 +268,3 @@ class Configuration:
pass
return
def _assign_parameter(self, config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config. If
found, it returns its value (a string or a dictionary). If not found, it returns None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (str, dictionary or None):
The content of config[input_parameter], which can be a string or a dictionary.
It is None if input_parameter is not a key of config.
"""
try:
assigned_parameter = config[input_parameter]
except KeyError:
logger.warning(
"Warning: parameter '%s' is missing from configuration file" % (input_parameter)
)
assigned_parameter = None
return assigned_parameter
def _assign_listed_parameters(self, config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config, and
splits its assigned value as per ", ", i.e. a comma plus space separation.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (list of str):
Each element of the list is an element of config[input_parameter], separated as
per a comma followed by a space (", "). E.g. if 'config[input_parameter]' is
"Name_A, Name_B", 'assigned_parameter' is ["Name_A", "Name_B"].
"""
assigned_parameter = self._assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
assigned_parameter = assigned_parameter.split(", ")
return assigned_parameter
def _assign_integer_parameter(self, config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config, and
converts it into an integer.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (int):
The content of config[input_parameter] converted into an integer.
"""
assigned_parameter = self._assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
try:
assigned_parameter = int(assigned_parameter)
except ValueError:
logger.critical(
"Error reading %s from configuration file: not an integer" % (input_parameter)
)
assigned_parameter = None
return assigned_parameter
def _assign_float_parameter(
self, config, input_parameter, check_range, lower_bound, upper_bound
):
"""This function searches for the key input_parameter in the dictionary config, and
converts it into a float.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
check_range (bool):
If True, it will be verified that the desired float parameter belongs to the
interval 'bounding_range'.
lower_bound (float):
Lower possible value of the desired float parameter, inclusive. Only verified
if 'check_range' is True.
upper_bound (float):
Upper possible value of the desired float parameter, inclusive. Only verified
if 'check_range' is True.
Returns:
assigned_parameter (float):
The content of config[input_parameter] converted into a float.
"""
assigned_parameter = self._assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
try:
assigned_parameter = float(assigned_parameter)
if check_range:
if assigned_parameter < lower_bound or assigned_parameter > upper_bound:
error_message = (
"Error reading %s from configuration file: float out of range. "
"Valid range: [%s, %s]"
% (
input_parameter,
"{:.2f}".format(lower_bound),
"{:.2f}".format(upper_bound),
)
)
logger.critical(error_message)
raise ValueError(error_message)
except ValueError:
logger.critical(
"Error reading %s from configuration file: not a float" % (input_parameter)
)
assigned_parameter = None
return assigned_parameter
def _assign_boolean_parameter(self, config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config, and
converts it into a boolean.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (bool):
The content of config[input_parameter] converted into a boolean.
"""
assigned_parameter = self._assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
if not isinstance(assigned_parameter, bool): # yaml tries to interpret data types
if assigned_parameter.lower() in ["true", "yes"]:
assigned_parameter = True
elif assigned_parameter.lower() in ["false", "no"]:
assigned_parameter = False
else:
logger.critical(
"Error reading %s from configuration file: not a boolean"
% (input_parameter)
)
assigned_parameter = None
return assigned_parameter
def _assign_hierarchical_parameters(self, config, input_parameter, requested_nested=[]):
"""This function searches for the key input_parameter in the dictionary config, and for
each of the elements of requested_nested as keys of config[input_parameter].
If input_parameter is not a key of config, the output is None.
If input_parameter is a key of config, but one of the elements of requested_nested is
not a key of config[input_parameter], the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
requested_nested (list of str):
List of the names of the desired nested parameters, to be searched for as keys
of config[input_parameter]. If empty, the function will retrieve all nested
parameters available in 'config'.
Returns:
assigned_parameter (dictionary or None):
The content of config[input_parameter], if input_parameter is a key of config
and all elements of requested_nested are keys of config[input_parameter], or
None otherwise.
"""
assigned_parameter = self._assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
if not isinstance(assigned_parameter, dict):
return None
if len(requested_nested) == 0:
requested_nested = list(assigned_parameter.keys())
sub_parameters_missing = False
for requested_parameter in requested_nested:
if requested_parameter not in assigned_parameter.keys():
logger.critical(
"ERROR instantiating Configuration: parameter '%s' does not "
"exist in %s" % (requested_parameter, input_parameter)
)
sub_parameters_missing = True
if sub_parameters_missing is True:
return None
return assigned_parameter
def _retrieve_database_credentials(
self, config, input_parameter, env_filename, force_config_over_hierarchies
):
"""This function retrieves the credentials needed to (later) connect to a specific SQL
database. If force_config_over_hierarchies is False, it does so hieararchically, by
giving top priority to environment variables that are created when running the CI
Pipeline, second priority to environment variables that are created locally if a .env
file with name env_filename is provided and, finally, by looking at what has been
indicated in the input configuration file (read as config).
When force_config_over_hierarchies is True, it does not matter where the code is
running, it will always retrieve the credentials from the configuration file.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
env_filename (str):
Name of a local .env file that will be run to load environment variables.
force_config_over_hierarchies (bool):
If True, the contents of the .yml configuration file specified in filepath will
take precedence over any other hierarchy (e.g. preference of environment
variables if they exist). If False, hierarchies of preference established in
this class are applied. This parameter is used for forcing the testing of this
class under certain circumstances. Default: False.
Returns:
db_config (dict):
Dictionary containing the credentials needed to connect to the desired SQL
database. These are:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with username.
"""
db_config = self._assign_hierarchical_parameters(config, input_parameter)
if "port" in config:
db_config["port"] = int(db_config["port"])
if "GDEIMPORTER_DB_HOST" in os.environ and not (force_config_over_hierarchies):
# Running the CI pipeline
db_config = {
"host": os.environ.get("GDEIMPORTER_DB_HOST"),
"dbname": os.environ.get("GDEIMPORTER_DB"),
"username": os.environ.get("GDEIMPORTER_USER"),
"password": os.environ.get("GDEIMPORTER_PASSWORD"),
"sourceid": os.environ.get("GDEIMPORTER_SOURCEID"),
}
elif os.path.isfile(env_filename) and not (force_config_over_hierarchies):
# Testing locally with a test database
load_dotenv(env_filename)
db_config = {
"host": os.environ.get("GDEIMPORTER_LOCAL_DB_HOST"),
"dbname": os.environ.get("GDEIMPORTER_LOCAL_DB"),
"username": os.environ.get("GDEIMPORTER_LOCAL_USER"),
"password": os.environ.get("GDEIMPORTER_LOCAL_PASSWORD"),
"sourceid": os.environ.get("GDEIMPORTER_LOCAL_SOURCEID"),
}
return db_config
def _validate_exposure_entities_code(self, config):
"""This function retrieves the content of config["exposure_entities_code"], and checks
whether it complies with the following conditions:
- It must be either a string or a dictionary.
- If it is one string, it should be equal to "ISO3".
- If it is a dictionary, the elements within each key should be 3-character strings.
An error is raised if these conditions are not met.
Examples of valid values of config["exposure_entities_code"]:
1) "ISO3"
2) {"Exposure Entity 1": "EE1",
"Exposure Entity 2": "XXX"
}
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
Returns:
assigned_parameter (str, dictionary or None):
The content of config["exposure_entities_code"], which can be a string or a
dictionary.
"""
assigned_parameter = self._assign_parameter(config, "exposure_entities_code")
if assigned_parameter is None:
return None
if isinstance(assigned_parameter, str):
if assigned_parameter.upper() != "ISO3":
raise ValueError("String must be 'ISO3'.")
elif isinstance(assigned_parameter, dict):
for key in assigned_parameter.keys():
if isinstance(assigned_parameter[key], str):
if len(assigned_parameter[key]) != 3:
raise ValueError(
"The content of each dictionary key must be a 3-character string."
)
else:
raise TypeError("The dictionary must contain only one level of keys.")
else:
raise TypeError("The value must be a string or a dictionary.")
return assigned_parameter
#!/usr/bin/env python3
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import os
import logging
import yaml
from dotenv import load_dotenv
logger = logging.getLogger()
class ConfigurationMethods:
"""This class contains methods to handle and interpret configuration parameters."""
@staticmethod
def read_config_file(filepath):
"""This function attempts to open the configuration file. If not found, it logs a
critical error and raises an OSError.
Args:
filepath (str):
Full file path to the .yml configuration file.
Returns:
config (dictionary):
The configuration file read as a dictionary, or an empty dictionary if the
configuration file was not found.
"""
try:
with open(filepath, "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.FullLoader)
except FileNotFoundError:
config = {}
error_message = "Error instantiating Configuration: configuration file not found"
logger.critical(error_message)
raise OSError(error_message)
return config
@staticmethod
def assign_parameter(config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config. If
found, it returns its value (a string or a dictionary). If not found, it returns None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (str, dictionary or None):
The content of config[input_parameter], which can be a string or a dictionary.
It is None if input_parameter is not a key of config.
"""
try:
assigned_parameter = config[input_parameter]
except KeyError:
logger.warning(
"Warning: parameter '%s' is missing from configuration file" % (input_parameter)
)
assigned_parameter = None
return assigned_parameter
@staticmethod
def assign_listed_parameters(config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config, and
splits its assigned value as per ", ", i.e. a comma plus space separation.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (list of str):
Each element of the list is an element of config[input_parameter], separated as
per a comma followed by a space (", "). E.g. if 'config[input_parameter]' is
"Name_A, Name_B", 'assigned_parameter' is ["Name_A", "Name_B"].
"""
assigned_parameter = ConfigurationMethods.assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
assigned_parameter = assigned_parameter.split(", ")
return assigned_parameter
@staticmethod
def assign_integer_parameter(config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config, and
converts it into an integer.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (int):
The content of config[input_parameter] converted into an integer.
"""
assigned_parameter = ConfigurationMethods.assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None