Commit eda5cc60 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Refactored and split Configuration class

parent c3bd441a
Pipeline #40445 failed with stage
in 1 minute and 51 seconds
This diff is collapsed.
#!/usr/bin/env python3
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import os
import logging
import yaml
from dotenv import load_dotenv
logger = logging.getLogger()
class ConfigurationMethods:
"""This class contains methods to handle and interpret configuration parameters."""
@staticmethod
def read_config_file(filepath):
"""This function attempts to open the configuration file. If not found, it logs a
critical error and raises an OSError.
Args:
filepath (str):
Full file path to the .yml configuration file.
Returns:
config (dictionary):
The configuration file read as a dictionary, or an empty dictionary if the
configuration file was not found.
"""
try:
with open(filepath, "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.FullLoader)
except FileNotFoundError:
config = {}
error_message = "Error instantiating Configuration: configuration file not found"
logger.critical(error_message)
raise OSError(error_message)
return config
@staticmethod
def assign_parameter(config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config. If
found, it returns its value (a string or a dictionary). If not found, it returns None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (str, dictionary or None):
The content of config[input_parameter], which can be a string or a dictionary.
It is None if input_parameter is not a key of config.
"""
try:
assigned_parameter = config[input_parameter]
except KeyError:
logger.warning(
"Warning: parameter '%s' is missing from configuration file" % (input_parameter)
)
assigned_parameter = None
return assigned_parameter
@staticmethod
def assign_listed_parameters(config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config, and
splits its assigned value as per ", ", i.e. a comma plus space separation.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (list of str):
Each element of the list is an element of config[input_parameter], separated as
per a comma followed by a space (", "). E.g. if 'config[input_parameter]' is
"Name_A, Name_B", 'assigned_parameter' is ["Name_A", "Name_B"].
"""
assigned_parameter = ConfigurationMethods.assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
assigned_parameter = assigned_parameter.split(", ")
return assigned_parameter
@staticmethod
def assign_integer_parameter(config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config, and
converts it into an integer.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (int):
The content of config[input_parameter] converted into an integer.
"""
assigned_parameter = ConfigurationMethods.assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
if isinstance(assigned_parameter, int):
return assigned_parameter
if isinstance(assigned_parameter, float):
if assigned_parameter.is_integer():
return int(assigned_parameter)
else:
logger.critical(
"Error reading %s from configuration file: not an integer"
% (input_parameter)
)
return None
try:
assigned_parameter = int(assigned_parameter)
except ValueError:
logger.critical(
"Error reading %s from configuration file: not an integer" % (input_parameter)
)
assigned_parameter = None
return assigned_parameter
@staticmethod
def assign_float_parameter(config, input_parameter, check_range, lower_bound, upper_bound):
"""This function searches for the key input_parameter in the dictionary config, and
converts it into a float.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
check_range (bool):
If True, it will be verified that the desired float parameter belongs to the
interval 'bounding_range'.
lower_bound (float):
Lower possible value of the desired float parameter, inclusive. Only verified
if 'check_range' is True.
upper_bound (float):
Upper possible value of the desired float parameter, inclusive. Only verified
if 'check_range' is True.
Returns:
assigned_parameter (float):
The content of config[input_parameter] converted into a float.
"""
assigned_parameter = ConfigurationMethods.assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
if isinstance(assigned_parameter, int):
assigned_parameter = float(assigned_parameter)
# yaml interprets scientific notation as integers
if isinstance(assigned_parameter, str) and (
"e" in assigned_parameter or "E" in assigned_parameter
):
assigned_parameter = float(assigned_parameter)
if isinstance(assigned_parameter, float):
if check_range:
if assigned_parameter < lower_bound or assigned_parameter > upper_bound:
error_message = (
"Error reading %s from configuration file: float out of range. "
"Valid range: [%s, %s]"
% (
input_parameter,
"{:.2f}".format(lower_bound),
"{:.2f}".format(upper_bound),
)
)
logger.critical(error_message)
raise ValueError(error_message)
else:
error_message = "Error reading %s from configuration file: not a float" % (
input_parameter
)
logger.critical(error_message)
raise ValueError(error_message)
return assigned_parameter
@staticmethod
def assign_boolean_parameter(config, input_parameter):
"""This function searches for the key input_parameter in the dictionary config, and
converts it into a boolean.
If input_parameter is not a key of config, the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
Returns:
assigned_parameter (bool):
The content of config[input_parameter] converted into a boolean.
"""
assigned_parameter = ConfigurationMethods.assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
if not isinstance(assigned_parameter, bool): # yaml tries to interpret data types
if isinstance(assigned_parameter, str):
if assigned_parameter.lower() in ["true", "yes"]:
assigned_parameter = True
elif assigned_parameter.lower() in ["false", "no"]:
assigned_parameter = False
else:
logger.critical(
"Error reading %s from configuration file: "
"string '%s' cannot be interpreted as boolean"
% (input_parameter, assigned_parameter)
)
assigned_parameter = None
else:
logger.critical(
"Error reading %s from configuration file: not a boolean"
% (input_parameter)
)
assigned_parameter = None
return assigned_parameter
@staticmethod
def assign_hierarchical_parameters(config, input_parameter, requested_nested=[]):
"""This function searches for the key input_parameter in the dictionary config, and for
each of the elements of requested_nested as keys of config[input_parameter].
If input_parameter is not a key of config, the output is None.
If input_parameter is a key of config, but one of the elements of requested_nested is
not a key of config[input_parameter], the output is None.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
requested_nested (list of str):
List of the names of the desired nested parameters, to be searched for as keys
of config[input_parameter]. If empty, the function will retrieve all nested
parameters available in 'config'.
Returns:
assigned_parameter (dictionary or None):
The content of config[input_parameter], if input_parameter is a key of config
and all elements of requested_nested are keys of config[input_parameter], or
None otherwise.
"""
assigned_parameter = ConfigurationMethods.assign_parameter(config, input_parameter)
if assigned_parameter is None:
return None
if not isinstance(assigned_parameter, dict):
return None
if len(requested_nested) == 0:
requested_nested = list(assigned_parameter.keys())
sub_parameters_missing = False
for requested_parameter in requested_nested:
if requested_parameter not in assigned_parameter.keys():
logger.critical(
"ERROR instantiating Configuration: parameter '%s' does not "
"exist in %s" % (requested_parameter, input_parameter)
)
sub_parameters_missing = True
if sub_parameters_missing is True:
return None
return assigned_parameter
@staticmethod
def retrieve_database_credentials(
config, input_parameter, env_filename, force_config_over_hierarchies
):
"""This function retrieves the credentials needed to (later) connect to a specific SQL
database. If force_config_over_hierarchies is False, it does so hieararchically, by
giving top priority to environment variables that are created when running the CI
Pipeline, second priority to environment variables that are created locally if a .env
file with name env_filename is provided and, finally, by looking at what has been
indicated in the input configuration file (read as config).
When force_config_over_hierarchies is True, it does not matter where the code is
running, it will always retrieve the credentials from the configuration file.
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
input_parameter (str):
Name of the desired parameter, to be searched for as a primary key of config.
env_filename (str):
Name of a local .env file that will be run to load environment variables.
force_config_over_hierarchies (bool):
If True, the contents of the .yml configuration file specified in filepath will
take precedence over any other hierarchy (e.g. preference of environment
variables if they exist). If False, hierarchies of preference established in
this class are applied. This parameter is used for forcing the testing of this
class under certain circumstances. Default: False.
Returns:
db_config (dict):
Dictionary containing the credentials needed to connect to the desired SQL
database. These are:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with username.
"""
db_config = ConfigurationMethods.assign_hierarchical_parameters(config, input_parameter)
if "port" in config:
db_config["port"] = int(db_config["port"])
if "GDEIMPORTER_DB_HOST" in os.environ and not (force_config_over_hierarchies):
# Running the CI pipeline
db_config = {
"host": os.environ.get("GDEIMPORTER_DB_HOST"),
"dbname": os.environ.get("GDEIMPORTER_DB"),
"username": os.environ.get("GDEIMPORTER_USER"),
"password": os.environ.get("GDEIMPORTER_PASSWORD"),
"sourceid": os.environ.get("GDEIMPORTER_SOURCEID"),
}
elif os.path.isfile(env_filename) and not (force_config_over_hierarchies):
# Testing locally with a test database
load_dotenv(env_filename)
db_config = {
"host": os.environ.get("GDEIMPORTER_LOCAL_DB_HOST"),
"dbname": os.environ.get("GDEIMPORTER_LOCAL_DB"),
"username": os.environ.get("GDEIMPORTER_LOCAL_USER"),
"password": os.environ.get("GDEIMPORTER_LOCAL_PASSWORD"),
"sourceid": os.environ.get("GDEIMPORTER_LOCAL_SOURCEID"),
}
return db_config
@staticmethod
def validate_exposure_entities_code(config):
"""This function retrieves the content of config["exposure_entities_code"], and checks
whether it complies with the following conditions:
- It must be either a string or a dictionary.
- If it is one string, it should be equal to "ISO3".
- If it is a dictionary, the elements within each key should be 3-character strings.
An error is raised if these conditions are not met.
Examples of valid values of config["exposure_entities_code"]:
1) "ISO3"
2) {"Exposure Entity 1": "EE1",
"Exposure Entity 2": "XXX"
}
Args:
config (dictionary):
The configuration file read as a dictionary. It may be an empty dictionary.
Returns:
assigned_parameter (str, dictionary or None):
The content of config["exposure_entities_code"], which can be a string or a
dictionary.
"""
assigned_parameter = ConfigurationMethods.assign_parameter(
config, "exposure_entities_code"
)
if assigned_parameter is None:
return None
if isinstance(assigned_parameter, str):
if assigned_parameter.upper() != "ISO3":
raise ValueError("String must be 'ISO3'.")
elif isinstance(assigned_parameter, dict):
for key in assigned_parameter.keys():
if isinstance(assigned_parameter[key], str):
if len(assigned_parameter[key]) != 3:
raise ValueError(
"The content of each dictionary key must be a 3-character string."
)
else:
raise TypeError("The dictionary must contain only one level of keys.")
else:
raise TypeError("The value must be a string or a dictionary.")
return assigned_parameter
...@@ -196,16 +196,6 @@ def test_Configuration(): ...@@ -196,16 +196,6 @@ def test_Configuration():
) )
assert "OSError" in str(excinfo.type) assert "OSError" in str(excinfo.type)
# Test case in which an expected hierarchical parameter is just a string
filepath = os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml")
returned_config = Configuration(filepath, force_config_over_hierarchies=True)
config = returned_config.read_config_file(filepath)
returned_database_built_up = returned_config._assign_hierarchical_parameters(
config, "database_built_up", ["missing_param"]
)
assert returned_database_built_up is None
# Test case in which exposure_entities_code is ISO3 # Test case in which exposure_entities_code is ISO3
returned_config = Configuration( returned_config = Configuration(
os.path.join( os.path.join(
...@@ -277,7 +267,7 @@ def test_Configuration(): ...@@ -277,7 +267,7 @@ def test_Configuration():
assert round(returned_config.data_units_surface_threshold, 16) == round(1e-16, 16) assert round(returned_config.data_units_surface_threshold, 16) == round(1e-16, 16)
# Test the case in which 'data_units_surface_threshold' is positive but out of range # Test the case in which 'data_units_surface_threshold' is positive but out of range
with pytest.raises(OSError) as excinfo: with pytest.raises(ValueError) as excinfo:
returned_config = Configuration( returned_config = Configuration(
os.path.join( os.path.join(
os.path.dirname(__file__), os.path.dirname(__file__),
...@@ -286,11 +276,11 @@ def test_Configuration(): ...@@ -286,11 +276,11 @@ def test_Configuration():
), ),
force_config_over_hierarchies=True, force_config_over_hierarchies=True,
) )
assert "OSError" in str(excinfo.type) assert "ValueError" in str(excinfo.type)
# Test the case in which 'data_units_min_admisible_area' is larger than # Test the case in which 'data_units_min_admisible_area' is larger than
# 'data_units_max_admisible_area' # 'data_units_max_admisible_area'
with pytest.raises(OSError) as excinfo: with pytest.raises(ValueError) as excinfo:
returned_config = Configuration( returned_config = Configuration(
os.path.join( os.path.join(
os.path.dirname(__file__), os.path.dirname(__file__),
...@@ -299,40 +289,4 @@ def test_Configuration(): ...@@ -299,40 +289,4 @@ def test_Configuration():
), ),
force_config_over_hierarchies=True, force_config_over_hierarchies=True,
) )
assert "OSError" in str(excinfo.type)
def test_Configuration_validate_exposure_entities_code():
# Instantiate Configuration
returned_config = Configuration(
os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml"),
force_config_over_hierarchies=True,
)
config = {}
# Test case in which exposure_entities_code is a string different from ISO3
config["exposure_entities_code"] = "WRONG"
with pytest.raises(ValueError) as excinfo:
returned_config._validate_exposure_entities_code(config)
assert "ValueError" in str(excinfo.type) assert "ValueError" in str(excinfo.type)
# Test case in which exposure_entities_code is a dictionary with wrong length strings
config["exposure_entities_code"] = {}
config["exposure_entities_code"]["Name 1"] = "LONGERSTRING"
config["exposure_entities_code"]["Name 2"] = "XXX"
with pytest.raises(ValueError) as excinfo:
returned_config._validate_exposure_entities_code(config)
assert "ValueError" in str(excinfo.type)
# Test case in which exposure_entities_code is a dictionary with dictionaries
config["exposure_entities_code"] = {}
config["exposure_entities_code"]["Name 1"] = {}
config["exposure_entities_code"]["Name 1"]["Something 1"] = "XXX"
config["exposure_entities_code"]["Name 1"]["Something 2"] = "XXX"
config["exposure_entities_code"]["Name 2"] = "XXX"
with pytest.raises(TypeError) as excinfo:
returned_config._validate_exposure_entities_code(config)
assert "TypeError" in str(excinfo.type)
#!/usr/bin/env python3
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import pytest
from gdeimporter.tools.configuration_methods import ConfigurationMethods
def test_ConfigurationMethods_assign_parameter():
config = {}
config["param_A"] = "something"
assert ConfigurationMethods.assign_parameter(config, "param_A") == "something"
assert ConfigurationMethods.assign_parameter(config, "param_B") is None
def test_ConfigurationMethods_assign_listed_parameters():
config = {}
config["param_A"] = "A1, A2, a3, b4"
expected_results = ["A1", "A2", "a3", "b4"]
returned_parameter = ConfigurationMethods.assign_listed_parameters(config, "param_A")
assert isinstance(returned_parameter, list)
for i in range(len(returned_parameter)):
assert returned_parameter[i] == expected_results[i]
assert ConfigurationMethods.assign_listed_parameters(config, "param_B") is None
def test_ConfigurationMethods_assign_integer_parameter():
config = {}
config["param_A"] = 7
config["param_B"] = 2.3
config["param_C"] = "something"
expected_results = [7, None, None]
for i in range(len(expected_results)):
returned_parameter = ConfigurationMethods.assign_integer_parameter(
config, list(config.keys())[i]
)
assert returned_parameter is expected_results[i]
def test_ConfigurationMethods_assign_float_parameter():
config = {}
config["param_A"] = 7
config["param_B"] = 2.3
config["param_C"] = 1.5e-2
expected_results = [7.0, 2.3, 0.015]
for i in range(len(expected_results)):
returned_parameter = ConfigurationMethods.assign_float_parameter(
config, list(config.keys())[i], True, 0.0, 10.0
)
if isinstance(expected_results[i], float):
assert round(returned_parame