diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 75b7fee42a8b76b147e3c49fcf89a47cd22997e3..4841f0566308d760546760c9d6b56e4d502380bc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,8 +1,19 @@ image: python:3.9-bullseye +services: + - name: $CI_REGISTRY/dynamicexposure/server-components/containers/docker-obm-database:master + alias: postgres + # Make pip cache the installed dependencies variables: PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" + POSTGRES_DB: testdatabase + POSTGRES_USER: tester + POSTGRES_PASSWORD: somepass + GDEEXPORTER_DB_HOST: postgres + GDEEXPORTER_DB: ${POSTGRES_DB} + GDEEXPORTER_USER: ${POSTGRES_USER} + GDEEXPORTER_PASSWORD: ${POSTGRES_PASSWORD} cache: paths: diff --git a/README.md b/README.md index 71311a7e6b04bd64bce684c17f17bdc0468d6b94..4d82660f742f528f3a2ac07a60e32dd60d392020 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,52 @@ git clone https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/gde-e cd gde-exporter pip3 install -e . ``` +## Preparation + +### Configuration + +Copy the file `config_example.yml` to your working directory as `config.yml` and provide the +necessary parameters. Required parameters are: + +- `model_name`: Name of the input aggregated exposure model to be processed. +- `occupancies_to_run`: List of occupancies for which the code will be run, separated by ", " +(comma and space). They need to exist for the `exposure format` of `model_name`. Currently +supported values: residential, commercial, industrial. +- `exposure_entities_to_run`: List of names of exposure entities for which the code will be run. +It is used even if `selection_mode` (see below) is not `exposure_entity`. +Currently supported options: + - "all": The list of names associated with `model_name` will be retrieved from the + [GDE Tiles](https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/database-gdetiles) +database. + - A comma-space-separated list of entity names: This list of names will be used. + - A full path to a .txt or .csv file: The list of names will be retrieved from the indicated + .txt/.csv file. +- `exposure_entities_code`: Either "ISO3" or a nested structure with exposure entities names +and 3-character codes. When running `model_name=esrm20`, "ISO3" is the preferred option. +- `geographic_selection`: Set of parameters that define the geographic area for which the output +will be produced: + - `selection_mode`: `exposure_entity`, `data_unit_id`, `quadkeys`, or `bounding_box`. In all + cases only data from the exposure entities specified in `exposure_entities_to_run` will be + considered, even if the geographic area includes other exposure entities (they will be + ignored). The meaning of the `selection_mode` options is as follows: + - `exposure_entity`: The quadkeys associated with the exposure entities specified in + `exposure_entities_to_run` will be retrieved from `database_gde_tiles` (see below) and output. + - `data_unit_id`: The quadkeys associated with the data units specified in `data_unit_ids` + (see below) will be retrieved from `database_gde_tiles` and output. + - `quadkeys`: The quadkeys contained in a TXT file whose file path is specified in + `quadkeys_file` (see below) will be retrieved and output. + - `bounding_box`: The quadkeys that contain the bounding box defined by the coordinates + `lon_w`, `lon_e`, `lat_s` and `lat_n`, under `bounding_box`(see below) will be retrieved and + output. + - `data_unit_ids`: Required if `selection_mode = data_unit_id`. List of IDs of data units, + separated by a comma and a space. + - `quadkeys_file`: Required if `selection_mode = quadkeys`. Full path to a TXT file + containing quadkeys (either one per row, comma-separared, or a mix). + - `bounding_box`: Required if `selection_mode = bounding_box`. Coordinates `lon_w`, `lon_e`, + `lat_s` and `lat_n` of the bounding box. +- `database_gde_tiles`: Credentials for the +[GDE Tiles](https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/database-gdetiles) +database where information on the GDE tiles is stored. ## Running gde-exporter diff --git a/config_example.yml b/config_example.yml new file mode 100644 index 0000000000000000000000000000000000000000..1a3d1c11c4898a4730dcf4693ada17addb93db5a --- /dev/null +++ b/config_example.yml @@ -0,0 +1,22 @@ +model_name: esrm20 # Needs to exist in 'aggregated_sources' database table +occupancies_to_run: residential, commercial, industrial # Need to exist for the indicated `model_name` +exposure_entities_to_run: all # Either "all", a comma-space-separated list of entity names, or a name of a .txt or .csv file +exposure_entities_code: ISO3 # Either "ISO3" in this or a nested structure with exposure entities names and 3-character codes +geographic_selection: # Selection of the geographic area for which GDE will be output + selection_mode: exposure_entity # exposure_entity, data_unit_id, quadkeys, bounding_box + # If selection_mode = quadkeys + quadkeys_file: /path/to/quadkeys.txt + # If selection_mode = data_unit_id + data_unit_ids: data_unit_id_1, data_unit_id_2 # One or several, separatred by comma and space + # If selection_mode = bounding_box + bounding_box: # Coordinates that define the bounding box, in degrees + lon_w: 23.703371 + lon_e: 23.713597 + lat_s: 37.965450 + lat_n: 37.972561 +database_gde_tiles: # Database where info on the GDE tiles is stored + host: localhost + dbname: gde_tiles_attica_2022_04_12_0900 + port: 5432 + username: tester + password: somepass diff --git a/gdeexporter/configuration.py b/gdeexporter/configuration.py new file mode 100644 index 0000000000000000000000000000000000000000..6602b41f3f1f401e068102c5678ebff88f5d56f3 --- /dev/null +++ b/gdeexporter/configuration.py @@ -0,0 +1,455 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2021: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import sys +import logging +import mercantile +from copy import deepcopy +from gdeimporter.tools.configuration_methods import ConfigurationMethods +from gdeimporter.exposureentity import ExposureEntity +from gdeexporter.database_queries import DatabaseQueries + +logger = logging.getLogger() + + +class Configuration: + """This class handles the configuration parameters of the gde-exporter. + + Attributes: + self.model_name (str): + Name of the input aggregated model. + self.occupancies_to_run (list of str): + List of occupancy cases of the input aggregated exposure model for which the code + will be run. + self.exposure_entities_to_run (list of str): + List of names of the exposure entities for which the code will be run. + self.exposure_entities_code (str or dict): + If "ISO3" (str), the country ISO3 codes associated with the names of the exposure + entities will be automatically retrieved and used as their codes. Otherwise it needs + to be a dicionary whose keys are the names of the exposure entities. The content + within each key is a 3-character string to be used as the code for the corresponding + exposure entity. E.g.: + self.exposure_entities_code = { + "Exposure Entity 1": "EE1", + "Exposure Entity 2": "XXX" + } + self.geographic_selection (dict): + Dictionary of parameters that define the selection of quadkeys for which the code + will be run. It contains (some of) the following keys: + selection_mode (str): + Way in which the quadkeys will be selected. Supported values are: + "exposure_entity": + All quadkeys that exist in the 'data_unit_tiles' table of the GDE + Tiles database and that are associated with the exposure entities + indicated in self.exposure_entities_to_run and the aggregated + exposure model indicated in self.model_name. + "data_unit_id": + All quadkeys that exist in the 'data_unit_tiles' table of the GDE + Tiles database and that are associated with the data unit IDs + indicated in self.geographic_selection["data_unit_ids"] and the + aggregated exposure model indicated in self.model_name. + "quadkeys": + All quadkeys retrieved from a TXT file whose path is indicated in + self.geographic_selection["quadkeys_file"]. Only data associated + with the exposure entities indicated in + self.exposure_entities_to_run and the aggregated exposure model + indicated in self.model_name will be processed. + "bounding_box": + All quadkeys of the tiles that contain the bounding box with limits + indicated in self.geographic_selection["bounding_box"]. Only data + associated with the exposure entities indicated in + self.exposure_entities_to_run and the aggregated exposure model + indicated in self.model_name will be processed. + quadkeys_file (str): + The path to a TXT file that contains a list of quadkeys to be processed. + Only needed if selection_mode = "quadkeys". + data_unit_ids (list of str): + List of data unit IDS to be processed. Only needed if selection_mode = + "data_unit_id". + bounding_box (dict): + Dictionary with the coordinates of the bounding box to be processed. Only + needed if selection_mode = "bounding_box". The keys are: + "lon_w" (float): West-most longitude. + "lon_e" (float): East-most longitude. + "lat_s" (float): South-most latitude. + "lat_n" (float): North-most latitude. + self.database_gde_tiles (dict): + Dictionary containing the credentials needed to connect to the SQL database in which + information on the GDE tiles is stored. The exact parameters needed depend on the + database. They can be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + self.quadkeys_to_process (dict): + Dictionary whose keys contain lists of quadkeys to be processed. The keys depend on + self.geographic_selection["selection_mode"] and can be: + - exposure entity codes + - data unit IDs + - "quadkeys_list" + - "bounding_box" + self.number_quadkeys_to_process (int): + Total number of quadkeys to process (from all keys of self.quadkeys_to_process). + """ + + REQUIRES = [ + "model_name", + "occupancies_to_run", + "exposure_entities_to_run", + "exposure_entities_code", + "geographic_selection", + "database_gde_tiles", + ] + + def __init__(self, filepath, force_config_over_hierarchies=False): + """ + Args: + filepath (str): + Full file path to the .yml configuration file. + force_config_over_hierarchies (bool): + If True, the contents of the .yml configuration file specified in filepath will + take precedence over any other hierarchy (e.g. preference of environment + variables if they exist). If False, hierarchies of preference established in + this class are applied. This parameter is used for forcing the testing of this + class under certain circumstances. Default: False. + """ + + config = ConfigurationMethods.read_config_file(filepath) + + self.model_name = ConfigurationMethods.assign_parameter(config, "model_name") + + self.occupancies_to_run = ConfigurationMethods.assign_listed_parameters( + config, "occupancies_to_run" + ) + + self.exposure_entities_to_run = ConfigurationMethods.assign_listed_parameters( + config, "exposure_entities_to_run" + ) + + try: + self.exposure_entities_code = ConfigurationMethods.validate_exposure_entities_code( + config + ) + except ValueError as e: + error_message = ( + "Error: the configuration file assigns unsupported values " + "to exposure_entities_code. The program cannot run. %s" % (e) + ) + logger.critical(error_message) + sys.exit(1) + except TypeError as e: + error_message = ( + "Error: the configuration file assigns an unsupported data type " + "to exposure_entities_code. The program cannot run. %s" % (e) + ) + logger.critical(error_message) + sys.exit(1) + + self.geographic_selection = ConfigurationMethods.assign_hierarchical_parameters( + config, "geographic_selection", requested_nested=["selection_mode"] + ) + self.interpret_geographic_selection() + + self.database_gde_tiles = ConfigurationMethods.retrieve_database_credentials( + config, + "database_gde_tiles", + "test_db_gde_tiles.env", + "GDEEXPORTER", + force_config_over_hierarchies, + ) + + self.quadkeys_to_process = None + self.number_quadkeys_to_process = None + + # Terminate if critical parameters are missing (not all parameters are critical) + for key_parameter in self.REQUIRES: + if getattr(self, key_parameter) is None: + error_message = ( + "Error: parameter '%s' could not be retrieved from " + "configuration file. The program cannot run." % (key_parameter) + ) + logger.critical(error_message) + raise OSError(error_message) + + def interpret_exposure_entities_to_run(self, aggregated_source_id=0): + """This function interprets the value assigned to self.exposure_entities_to_run from the + configuration file and updates self.exposure_entities_to_run accordingly. + + Args: + aggregated_source_id (int): + ID of the source of the aggregated exposure model to be run. Only needed if + 'exposure_entities_to_run' is "all" in the configuration file. + + Returns: + The method updates self.exposure_entities_to_run as a function of its content. + Possibilities: + self.exposure_entities_to_run == ["all"]: + self.exposure_entities_to_run is updated to contain the list of 3-character + codes of all exposure entities associated with 'aggregated_source_id' in the + database self.database_gde_tiles. + self.exposure_entities_to_run contains a list with a path to a .txt or .csv + file: + self.exposure_entities_to_run is updated to contain the list of 3-character + codes of the exposure entities listed in the indicated .txt/.csv file. + self.exposure_entities_to_run contains a list with one or more names of exposure + entities: + self.exposure_entities_to_run is updated to contain the list of 3-character + codes of these names. + Any other case: + self.exposure_entities_to_run becomes an empty list. + """ + + if self.exposure_entities_to_run[0].lower() == "all": + # Retrieve 3-char codes of all exposure entities associated with + # aggregated_source_id in the 'data_units' table of self.database_gde_tiles + self.exposure_entities_to_run = ( + DatabaseQueries.retrieve_all_exposure_entities_of_aggregated_source_id( + aggregated_source_id, self.database_gde_tiles, "data_units" + ) + ) + + return + + if len(self.exposure_entities_to_run) > 0: + # Keep the original content (several names are listed) + exposure_entities_full_names = deepcopy(self.exposure_entities_to_run) + + if ( + self.exposure_entities_to_run[0].split(".")[-1] == "txt" + or self.exposure_entities_to_run[0].split(".")[-1] == "csv" + ): + # Retrieve names of exposure entities from the indicated file + with open(self.exposure_entities_to_run[0], "r") as f: + exposure_entities_full_names = [] + for row in f.readlines(): + raw_row = row.rstrip("\n") + raw_row = raw_row.split(",") + for element in raw_row: + exposure_entities_full_names.append(element) + f.close() + + exposure_entities_to_run = [] + + if isinstance(self.exposure_entities_code, str): + # If so, it's already been validated that it's "ISO3" + for full_name in exposure_entities_full_names: + iso3_code = ExposureEntity.retrieve_country_ISO3(full_name) + if iso3_code is not None: + exposure_entities_to_run.append(iso3_code) + else: + logger.warning( + "ExposureEntity.retrieve_country_ISO3 has returned 'None' for exposure " + "entity %s. %s will not be run." % (full_name, full_name) + ) + + if isinstance(self.exposure_entities_code, dict): + for full_name in exposure_entities_full_names: + if full_name in self.exposure_entities_code.keys(): + exposure_entities_to_run.append(self.exposure_entities_code[full_name]) + else: + logger.warning( + "'exposure_entities_code' in the configuration file does not contain a " + "code for exposure entity %s. %s will not be run." + % (full_name, full_name) + ) + + self.exposure_entities_to_run = exposure_entities_to_run + + return + + def interpret_geographic_selection(self): + """ + This function interprets the contents of self.geographic_selection. Different attributes + are needed depending on the selection mode specified by the user: + selection_mode = "exposure_entity" requires no specific attributes + selection_mode = "data_unit_id" requires self.geographic_selection["data_unit_ids"] + selection_mode = "quadkeys" requires self.geographic_selection["quadkeys_file"] + selection_mode = "bounding_box" requires self.geographic_selection["bounding_box"] + + An error is raised when these conditions are not satisfied. + + Unnecessary attributes are set to None in self.geographic_selection. + """ + + missing_attr = None + + if self.geographic_selection["selection_mode"].lower() == "exposure_entity": + self.geographic_selection["data_unit_ids"] = None + self.geographic_selection["quadkeys_file"] = None + self.geographic_selection["bounding_box"] = None + + if self.geographic_selection["selection_mode"].lower() == "data_unit_id": + if "data_unit_ids" not in self.geographic_selection: + missing_attr = ("data_unit_id", "data_unit_ids") + else: + self.geographic_selection[ + "data_unit_ids" + ] = ConfigurationMethods.assign_listed_parameters( + self.geographic_selection, "data_unit_ids" + ) + self.geographic_selection["quadkeys_file"] = None + self.geographic_selection["bounding_box"] = None + + if self.geographic_selection["selection_mode"].lower() == "quadkeys": + if "quadkeys_file" not in self.geographic_selection: + missing_attr = ("quadkeys", "quadkeys_file") + self.geographic_selection["data_unit_ids"] = None + self.geographic_selection["bounding_box"] = None + + if self.geographic_selection["selection_mode"].lower() == "bounding_box": + if "bounding_box" not in self.geographic_selection: + missing_attr = ("bounding_box", "bounding_box") + self.geographic_selection["quadkeys_file"] = None + self.geographic_selection["data_unit_ids"] = None + + if missing_attr is not None: + error_message = ( + "Error: selection mode is '%s' but parameter '%s' is missing from " + "configuration file. The program cannot run." + % (missing_attr[0], missing_attr[1]) + ) + logger.critical(error_message) + raise OSError(error_message) + + return + + def determine_quadkeys_to_process( + self, aggregated_source_id, db_gde_tiles_config, db_table + ): + """ + This function identifies the quadkeys to be processed based on the contents of + self.geographic_selection: + selection_mode = "exposure_entity": + This function will retrieve the quadkeys associated with each of the exposure + entities especified in self.exposure_entities_to_run and 'aggregated_source_id' + from the 'db_table' of the database whose credentials are given in + 'db_gde_tiles_config'. + selection_mode = "data_unit_id": + This function will retrieve the quadkeys associated with each of the data units + especified in self.geographic_selection["data_unit_ids"] and + 'aggregated_source_id' from the 'db_table' of the database whose credentials are + given in 'db_gde_tiles_config'. + selection_mode = "quadkeys": + This function will retrieve the quadkeys specified in the TXT file whose file + path is indicated in self.geographic_selection["quadkeys_file"]. + selection_mode = "bounding_box": + This function will identify the quadkeys that contain the bounding box defined + by the coordinates specified in self.geographic_selection["bounding_box"]. + + Args: + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the data unit IDs + and geometries will be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the data units is stored. The keys of the dictionary need + to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the data units are stored. It is + assumed that this table contains, at least, the following fields: + quadkey (str): + String indicating the quadkey of a tile. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + exposure_entity (str): + 3-character code of the exposure entity. + data_unit_id (str): + ID of the data unit. + + Returns: + This function updates self.quadkeys_to_process and self.number_quadkeys_to_process. + """ + + if self.geographic_selection["selection_mode"].lower() == "exposure_entity": + quadkeys_to_process = {} + number_quadkeys = 0 + for exposure_entity_code in self.exposure_entities_to_run: + quadkeys_list = ( + DatabaseQueries.retrieve_quadkeys_by_exposure_entity_aggregated_source_id( + exposure_entity_code, + aggregated_source_id, + self.database_gde_tiles, + "data_unit_tiles", + ) + ) + quadkeys_to_process[exposure_entity_code] = quadkeys_list + number_quadkeys += len(quadkeys_list) + + if self.geographic_selection["selection_mode"].lower() == "data_unit_id": + quadkeys_to_process = {} + number_quadkeys = 0 + for data_unit_id in self.geographic_selection["data_unit_ids"]: + quadkeys_list = ( + DatabaseQueries.retrieve_quadkeys_by_data_unit_id_aggregated_source_id( + data_unit_id, + aggregated_source_id, + self.database_gde_tiles, + "data_unit_tiles", + ) + ) + quadkeys_to_process[data_unit_id] = quadkeys_list + number_quadkeys += len(quadkeys_list) + + if self.geographic_selection["selection_mode"].lower() == "quadkeys": + # Retrieve quadkeys from the indicated file + quadkeys_list = [] + with open(self.geographic_selection["quadkeys_file"], "r") as f: + for row in f.readlines(): + raw_row = row.rstrip("\n") + raw_row = raw_row.split(",") + for element in raw_row: + quadkeys_list.append(element) + f.close() + quadkeys_list = list(dict.fromkeys(quadkeys_list)) + quadkeys_to_process = {"quadkeys_list": quadkeys_list} + number_quadkeys = len(quadkeys_list) + + if self.geographic_selection["selection_mode"].lower() == "bounding_box": + tiles = list( + mercantile.tiles( + self.geographic_selection["bounding_box"]["lon_w"], + self.geographic_selection["bounding_box"]["lat_s"], + self.geographic_selection["bounding_box"]["lon_e"], + self.geographic_selection["bounding_box"]["lat_n"], + 18, + ) + ) + quadkeys_list = list([mercantile.quadkey(tile) for tile in tiles]) + quadkeys_to_process = {"bounding_box": quadkeys_list} + number_quadkeys = len(quadkeys_list) + + self.quadkeys_to_process = quadkeys_to_process + self.number_quadkeys_to_process = number_quadkeys + + return diff --git a/gdeexporter/database_queries.py b/gdeexporter/database_queries.py new file mode 100644 index 0000000000000000000000000000000000000000..b324131f7b48adb5aa4da4c32d4481baca6e1ee4 --- /dev/null +++ b/gdeexporter/database_queries.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import logging +from gdeimporter.tools.database import Database + + +logger = logging.getLogger() + + +class DatabaseQueries: + """This class contains methods used to query the OpenBuildingMap (OBM) and Global Dynamic + Exposure (GDE) databases. + """ + + @staticmethod + def retrieve_aggregated_source_id_and_format(model_name, db_gde_tiles_config, db_table): + """This function retrieves the ID of the aggregated exposure model source whose name is + 'model_name'. + + Args: + model_name (str): + Name of the source whose ID is to be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the aggregated_sources is stored. The keys of the + dictionary need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the aggregated_sources are stored. + It is assumed that this table contains, at least, the following fields: + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + name (str): + Name of the source of the aggregated exposure model. + + Returns: + aggregated_source_id (int): + ID of the source of the aggregated exposure model with name 'model_name'. If + 'model_name' is not found, 'aggregated_source_id' is -999. + aggregated_source_format (str): + Format of the aggregated exposure model with name 'model_name'. If 'model_name' + is not found, 'aggregated_source_format' is "UNKNOWN". + """ + + sql_query = "SELECT aggregated_source_id, format FROM %s WHERE name='%s';" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute(sql_query % (db_table, model_name)) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + if len(exec_result) == 1: # Entry exists --> retrieve + aggregated_source_id = exec_result[0][0] + aggregated_source_format = exec_result[0][1] + else: # More than one entries found, this is an error + logger.error( + "Error in retrieve_aggregated_source_id_and_format: " + "more than one or no entry found for name = %s" % (model_name) + ) + aggregated_source_id = -999 + aggregated_source_format = "UNKNOWN" + + return aggregated_source_id, aggregated_source_format + + @staticmethod + def retrieve_all_exposure_entities_of_aggregated_source_id( + aggregated_source_id, db_gde_tiles_config, db_table + ): + """This function retrieves the 3-character codes of all exposure entities associated + with 'aggregated_source_id' in 'db_table' of the database whose credentials are given by + 'db_gde_tiles_config'. + + Args: + aggregated_source_id (int): + ID of the source of the aggregated exposure model to be run. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on exposure entities is stored. The keys of the dictionary + need to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database from which the exposure entities can be + retrieved. It is assumed that this table contains, at least, the following + fields: + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + exposure_entity (str): + 3-character code of the exposure entity. + + Returns: + exposure_entities (list of str): + List of 3-character codes of the exposure entities associated with + 'aggregated_source_id'. + """ + + sql_query = "SELECT DISTINCT(exposure_entity) FROM %s WHERE aggregated_source_id=%s;" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute(sql_query % (db_table, aggregated_source_id)) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + if len(exec_result) > 0: + exposure_entities = [exec_result[i][0] for i in range(len(exec_result))] + else: + exposure_entities = [] + + return exposure_entities + + @staticmethod + def retrieve_quadkeys_by_exposure_entity_aggregated_source_id( + exposure_entity, aggregated_source_id, db_gde_tiles_config, db_table + ): + """ + This function retrives all quadkeys associated with 'exposure_entity' and + 'aggregated_source_id' in 'db_table' of the database whose credentials are given in + 'db_gde_tiles_config'. + + Args: + exposure_entity (str): + 3-character code of the exposure entity for which the data unit IDs and + geometries will be retrieved. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the data unit IDs + and geometries will be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the data units is stored. The keys of the dictionary need + to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the data units are stored. It is + assumed that this table contains, at least, the following fields: + quadkey (str): + String indicating the quadkey of a tile. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + exposure_entity (str): + 3-character code of the exposure entity. + + Returns: + quadkeys (list of str): + List of all quadkeys associated with 'exposure_entity' and + 'aggregated_source_id'. + """ + + sql_query = "SELECT DISTINCT(quadkey) FROM %s " + sql_query += "WHERE exposure_entity='%s' AND aggregated_source_id=%s;" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute( + sql_query % (db_table, exposure_entity, aggregated_source_id) + ) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + if len(exec_result) > 0: + quadkeys = [exec_result[i][0] for i in range(len(exec_result))] + else: + quadkeys = [] + + return quadkeys + + @staticmethod + def retrieve_quadkeys_by_data_unit_id_aggregated_source_id( + data_unit_id, aggregated_source_id, db_gde_tiles_config, db_table + ): + """ + This function retrives all quadkeys associated with 'data_unit_id' and + 'aggregated_source_id' in 'db_table' of the database whose credentials are given in + 'db_gde_tiles_config'. + + Args: + data_unit_id (str): + ID of the data unit for which the quadkeys will be retrieved. + aggregated_source_id (int): + ID of the source of the aggregated exposure model for which the data unit IDs + and geometries will be retrieved. + db_gde_tiles_config (dict): + Dictionary containing the credentials needed to connect to the SQL database in + which information on the data units is stored. The keys of the dictionary need + to be: + host (str): + SQL database host address. + dbname (str): + Name of the SQL database. + port (int): + Port where the SQL database can be found. + username (str): + User name to connect to the SQL database. + password (str): + Password associated with self.username. + db_table (str): + Name of the table of the SQL database where the data units are stored. It is + assumed that this table contains, at least, the following fields: + quadkey (str): + String indicating the quadkey of a tile. + aggregated_source_id (int): + ID of the source of the aggregated exposure model. + data_unit_id (str): + ID of the data unit. + + Returns: + quadkeys (list of str): + List of all quadkeys associated with 'data_unit_id' and 'aggregated_source_id'. + """ + + sql_query = "SELECT DISTINCT(quadkey) FROM %s " + sql_query += "WHERE data_unit_id='%s' AND aggregated_source_id=%s;" + + db_gde_tiles = Database(**db_gde_tiles_config) + db_gde_tiles.create_connection_and_cursor() + + db_gde_tiles.cursor.execute(sql_query % (db_table, data_unit_id, aggregated_source_id)) + exec_result = db_gde_tiles.cursor.fetchall() + + db_gde_tiles.close_connection() + + if len(exec_result) > 0: + quadkeys = [exec_result[i][0] for i in range(len(exec_result))] + else: + quadkeys = [] + + return quadkeys diff --git a/gdeexporter/gdeexporter.py b/gdeexporter/gdeexporter.py index 222e43ac8f117de56d0ccb1c14cb5bab9c7f70c9..11c32b1846c0f27fa108fac4c5cdc0e85e768802 100644 --- a/gdeexporter/gdeexporter.py +++ b/gdeexporter/gdeexporter.py @@ -18,6 +18,8 @@ import logging import sys +from gdeexporter.configuration import Configuration +from gdeexporter.database_queries import DatabaseQueries # Add a logger printing error, warning, info and debug messages to the screen logger = logging.getLogger() @@ -31,6 +33,57 @@ def main(): # Log the start of the run logger.info("gde-exporter has started") + # Read configuration parameters + config = Configuration("config.yml") + + ( + aggregated_source_id, + aggregated_source_format, + ) = DatabaseQueries.retrieve_aggregated_source_id_and_format( + config.model_name, + config.database_gde_tiles, + "aggregated_sources", + ) + + if aggregated_source_id < 0: + error_message = ( + "Error while attempting to retrieve the ID of aggregated exposure model with name " + "'%s': more than one or no entries were found." % (config.model_name) + ) + raise OSError(error_message) + logger.info( + "aggregated_source_id of aggregated exposure model with name '%s' " + "and format '%s' retrieved: %s" + % (config.model_name, aggregated_source_format, aggregated_source_id) + ) + + # Interpret and update config.exposure_entities_to_run + config.interpret_exposure_entities_to_run(aggregated_source_id) + + if len(config.exposure_entities_to_run) < 1: + error_message = "Attribute 'exposure_entities_to_run' of configuration is an empty list" + raise OSError(error_message) + logger.info( + "%s exposure entity(ies) will be run: %s" + % ( + str(len(config.exposure_entities_to_run)), + ", ".join(config.exposure_entities_to_run), + ) + ) + + logger.info("Retrieving list of quadkeys to process") + config.determine_quadkeys_to_process( + aggregated_source_id, config.database_gde_tiles, "data_unit_tiles" + ) + + logger.info("%s quadkeys will be processed" % (config.number_quadkeys_to_process)) + + for quadkeys_group in config.quadkeys_to_process.keys(): + logger.info( + "Processing of %s quadkeys from quadkey group '%s' has started" + % (len(config.quadkeys_to_process[quadkeys_group]), quadkeys_group) + ) + # Leave the program logger.info("gde-exporter has finished") sys.exit() diff --git a/setup.py b/setup.py index e8046db658d05a855bf2532c87896285b30a62d9..ccd016553294ef48c2731636e4edb340777a68bf 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,11 @@ setup( keywords="Global Dynamic Exposure, GDE, buildings, exposure model", author="Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ", license="AGPLv3+", - install_requires=["numpy"], + install_requires=[ + "numpy", + # pylint: disable=line-too-long + "gdeimporter@git+https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/gde-importer.git", # noqa: E501 + ], extras_require={ "tests": tests_require, "linters": linters_require, diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..bd94e9d901bd2e76005a65c746d736c952788981 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import os +from pathlib import Path +import pytest +from dotenv import load_dotenv +from gdeimporter.tools.database import Database + +load_dotenv(Path(".env").resolve()) + + +@pytest.fixture +def test_db(): + """A test database simulating to contain the tables needed to run the gde-exporter.""" + + init_test_db() + return + + +def init_test_db(): + """Populates the test database.""" + + if "GDEEXPORTER_DB_HOST" in os.environ: # When running the CI pipeline + db_built_up_config = { + "host": os.environ.get("GDEEXPORTER_DB_HOST"), + "dbname": os.environ.get("GDEEXPORTER_DB"), + "port": "", + "username": os.environ.get("GDEEXPORTER_USER"), + "password": os.environ.get("GDEEXPORTER_PASSWORD"), + } + + # Create Database instance and establish the connection and cursor + db = Database(**db_built_up_config) + db.create_connection_and_cursor() + + # Create columns and populate the tables + with open("tests/data/test_database_set_up.sql", "r") as file: + for command in file.read().split(";"): + if command != "\n": + db.cursor.execute(command) + + db.close_connection() diff --git a/tests/data/config_for_testing_geographic_bbox.yml b/tests/data/config_for_testing_geographic_bbox.yml new file mode 100644 index 0000000000000000000000000000000000000000..1dae02efeb1742ae18e804ce0a2b2b8a6a304cb7 --- /dev/null +++ b/tests/data/config_for_testing_geographic_bbox.yml @@ -0,0 +1,17 @@ +model_name: esrm20 +occupancies_to_run: residential, commercial +exposure_entities_to_run: Greece +exposure_entities_code: ISO3 +geographic_selection: + selection_mode: bounding_box + bounding_box: + lon_w: 23.703371 + lon_e: 23.703597 + lat_s: 37.965450 + lat_n: 37.972561 + quadkeys_file: /path/to/quadkeys.txt +database_gde_tiles: + host: host.somewhere.xx + dbname: some_database_name + username: some_username + password: some_password diff --git a/tests/data/config_for_testing_geographic_bbox_missing.yml b/tests/data/config_for_testing_geographic_bbox_missing.yml new file mode 100644 index 0000000000000000000000000000000000000000..a76462bcc8e28d79570485c5fa518ce845dbbff9 --- /dev/null +++ b/tests/data/config_for_testing_geographic_bbox_missing.yml @@ -0,0 +1,12 @@ +model_name: esrm20 +occupancies_to_run: residential, commercial +exposure_entities_to_run: Greece +exposure_entities_code: ISO3 +geographic_selection: + selection_mode: bounding_box + quadkeys_file: /path/to/quadkeys.txt +database_gde_tiles: + host: host.somewhere.xx + dbname: some_database_name + username: some_username + password: some_password diff --git a/tests/data/config_for_testing_geographic_data_units.yml b/tests/data/config_for_testing_geographic_data_units.yml new file mode 100644 index 0000000000000000000000000000000000000000..56f20910ea21e37c36cc0ea9db2bcce3e76db22c --- /dev/null +++ b/tests/data/config_for_testing_geographic_data_units.yml @@ -0,0 +1,13 @@ +model_name: esrm20 +occupancies_to_run: residential, commercial +exposure_entities_to_run: Greece +exposure_entities_code: ISO3 +geographic_selection: + selection_mode: data_unit_id + data_unit_ids: ABC_10278, DEF_00000 + quadkeys_file: /path/to/quadkeys.txt +database_gde_tiles: + host: host.somewhere.xx + dbname: some_database_name + username: some_username + password: some_password diff --git a/tests/data/config_for_testing_geographic_data_units_missing.yml b/tests/data/config_for_testing_geographic_data_units_missing.yml new file mode 100644 index 0000000000000000000000000000000000000000..fc0efbecb30bb00929e9d16a2458278ea4e554aa --- /dev/null +++ b/tests/data/config_for_testing_geographic_data_units_missing.yml @@ -0,0 +1,12 @@ +model_name: esrm20 +occupancies_to_run: residential, commercial +exposure_entities_to_run: Greece +exposure_entities_code: ISO3 +geographic_selection: + selection_mode: data_unit_id + quadkeys_file: /path/to/quadkeys.txt +database_gde_tiles: + host: host.somewhere.xx + dbname: some_database_name + username: some_username + password: some_password diff --git a/tests/data/config_for_testing_geographic_quadkeys.yml b/tests/data/config_for_testing_geographic_quadkeys.yml new file mode 100644 index 0000000000000000000000000000000000000000..3d66fdac321404d6b99af76312136f24702f4205 --- /dev/null +++ b/tests/data/config_for_testing_geographic_quadkeys.yml @@ -0,0 +1,12 @@ +model_name: esrm20 +occupancies_to_run: residential, commercial +exposure_entities_to_run: Greece +exposure_entities_code: ISO3 +geographic_selection: + selection_mode: quadkeys + quadkeys_file: /path/to/quadkeys.txt +database_gde_tiles: + host: host.somewhere.xx + dbname: some_database_name + username: some_username + password: some_password diff --git a/tests/data/config_for_testing_geographic_quadkeys_missing.yml b/tests/data/config_for_testing_geographic_quadkeys_missing.yml new file mode 100644 index 0000000000000000000000000000000000000000..3fb4e3aad08bd594c66d1cbd8a964ae5edca3613 --- /dev/null +++ b/tests/data/config_for_testing_geographic_quadkeys_missing.yml @@ -0,0 +1,11 @@ +model_name: esrm20 +occupancies_to_run: residential, commercial +exposure_entities_to_run: Greece +exposure_entities_code: ISO3 +geographic_selection: + selection_mode: quadkeys +database_gde_tiles: + host: host.somewhere.xx + dbname: some_database_name + username: some_username + password: some_password diff --git a/tests/data/config_for_testing_good.yml b/tests/data/config_for_testing_good.yml new file mode 100644 index 0000000000000000000000000000000000000000..caa6f8bf217e46b4d2f2db152ae2d39d12ec8544 --- /dev/null +++ b/tests/data/config_for_testing_good.yml @@ -0,0 +1,11 @@ +model_name: esrm20 +occupancies_to_run: residential, commercial +exposure_entities_to_run: Italy +exposure_entities_code: ISO3 +geographic_selection: + selection_mode: exposure_entity +database_gde_tiles: + host: host.somewhere.xx + dbname: some_database_name + username: some_username + password: some_password diff --git a/tests/data/config_for_testing_missing.yml b/tests/data/config_for_testing_missing.yml new file mode 100644 index 0000000000000000000000000000000000000000..190e2032a0a4d28bd6c1ade3e7abd902aa3bff96 --- /dev/null +++ b/tests/data/config_for_testing_missing.yml @@ -0,0 +1,10 @@ +model_name: esrm20 +occupancies_to_run: residential, commercial +exposure_entities_code: ISO3 +geographic_selection: + selection_mode: exposure_entity +database_gde_tiles: + host: host.somewhere.xx + dbname: some_database_name + username: some_username + password: some_password diff --git a/tests/data/exposure_entities.csv b/tests/data/exposure_entities.csv new file mode 100644 index 0000000000000000000000000000000000000000..61567423b516df20f3d8f88e773e63bc57497cf0 --- /dev/null +++ b/tests/data/exposure_entities.csv @@ -0,0 +1 @@ +United_Kingdom,Lithuania,Greece \ No newline at end of file diff --git a/tests/data/exposure_entities.txt b/tests/data/exposure_entities.txt new file mode 100644 index 0000000000000000000000000000000000000000..959d0f006ad3a44f8d574223fb18f9b29ac989d2 --- /dev/null +++ b/tests/data/exposure_entities.txt @@ -0,0 +1,3 @@ +Spain +Turkey +Germany \ No newline at end of file diff --git a/tests/data/exposure_entities_non_countries.txt b/tests/data/exposure_entities_non_countries.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e7b173ea2a6a868f72b75da9b2f0fac1571da6a --- /dev/null +++ b/tests/data/exposure_entities_non_countries.txt @@ -0,0 +1,2 @@ +Entity1 +Entity2 \ No newline at end of file diff --git a/tests/data/test_database_set_up.sql b/tests/data/test_database_set_up.sql new file mode 100644 index 0000000000000000000000000000000000000000..d35b6ca91ce3a5dab496211c90b4adee5e0ffed4 --- /dev/null +++ b/tests/data/test_database_set_up.sql @@ -0,0 +1,108 @@ +DROP TABLE IF EXISTS aggregated_sources; +DROP TABLE IF EXISTS data_units; +DROP TABLE IF EXISTS data_unit_tiles; +DROP TYPE IF EXISTS occupancycase; +DROP EXTENSION IF EXISTS postgis; + +CREATE EXTENSION postgis; + +CREATE TYPE occupancycase AS ENUM ('residential', 'commercial', 'industrial'); + +CREATE TABLE aggregated_sources +( + aggregated_source_id SERIAL PRIMARY KEY, + name varchar, + format varchar +); +INSERT INTO aggregated_sources(name, format) +VALUES ('esrm20', 'esrm20'), +('second_source', 'bbb'), +('third_source', 'ccc'), +('first_source', 'ddd'); + +CREATE TABLE data_units +( + data_unit_id VARCHAR, + occupancy_case occupancycase, + aggregated_source_id SMALLINT, + exposure_entity CHAR(3), + buildings_total FLOAT, + dwellings_total FLOAT, + people_census FLOAT, + cost_total FLOAT, + geometry GEOMETRY, + + PRIMARY KEY (data_unit_id, occupancy_case, aggregated_source_id) +); +INSERT INTO data_units(data_unit_id, + occupancy_case, + aggregated_source_id, + exposure_entity, + buildings_total, + dwellings_total, + people_census, + cost_total, + geometry) +VALUES ( + 'ABC_10269', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0, + ST_GeomFromText( + 'POLYGON((15.04625 37.48424,15.05455 37.48424,15.05455 37.475893,15.04625 37.475893,15.04625 37.48424))')), +( + 'ABC_10278', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0, + ST_GeomFromText('POLYGON((15.05455 37.48424,15.0629 37.48424,15.0629 37.475893,15.05455 37.475893,15.05455 37.48424))')), +( + 'ABC_10277', 'residential', 2, 'ABC', 0.0, 0.0, 0.0, 0.0, + ST_GeomFromText('POLYGON((15.05455 37.475893,15.0629 37.475893,15.0629 37.4675485,15.05455 37.4675485,15.05455 37.475893))')); + +INSERT INTO data_units(data_unit_id, + occupancy_case, + aggregated_source_id, + exposure_entity, + buildings_total, + dwellings_total, + people_census, + cost_total) +VALUES ('DEF_00000', 'residential', 2, 'DEF', 0.0, 0.0, 0.0, 0.0); + +CREATE TABLE data_unit_tiles +( + quadkey char(18), + aggregated_source_id SMALLINT, + occupancy_case occupancycase, + exposure_entity char(3), + data_unit_id varchar, + size_data_unit_tile_area FLOAT, + size_data_unit_tile_built_up_area FLOAT, + fraction_data_unit_area FLOAT, + fraction_data_unit_built_up_area FLOAT, + aggregated_buildings FLOAT, + obm_buildings SMALLINT, + remainder_buildings FLOAT, + + PRIMARY KEY (quadkey, aggregated_source_id, occupancy_case, data_unit_id) +); +INSERT INTO data_unit_tiles(quadkey, + aggregated_source_id, + occupancy_case, + exposure_entity, + data_unit_id, + size_data_unit_tile_area, + size_data_unit_tile_built_up_area, + fraction_data_unit_area, + fraction_data_unit_built_up_area, + aggregated_buildings) +VALUES ('122010321033023130', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 15.7), +('122010321033023130', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 23.4), +('122010321033023120', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 39.1), +('122010321033023120', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 17.6), +('122010321033023132', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 34.4), +('122010321033023132', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 11.5), +('122010321033023121', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 26.2), +('122010321033023121', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033023123', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 16.5), +('122010321033023123', 2, 'commercial', 'ABC', 'ABC_10269', 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033032123', 2, 'residential', 'ABC', 'ABC_10278', 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033032123', 2, 'commercial', 'ABC', 'ABC_10278', 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033032301', 2, 'commercial', 'ABC', 'ABC_10278', 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033211220', 2, 'residential', 'DEF', 'DEF_00000', 0.0, 0.0, 0.0, 0.0, 0.0), +('122010321033211220', 2, 'commercial', 'DEF', 'DEF_00000', 0.0, 0.0, 0.0, 0.0, 0.0); diff --git a/tests/data/test_quadkeys.txt b/tests/data/test_quadkeys.txt new file mode 100644 index 0000000000000000000000000000000000000000..91d98d2dfc9f17a8857b0f7aa5d2780feae97c4b --- /dev/null +++ b/tests/data/test_quadkeys.txt @@ -0,0 +1,5 @@ +120203220301101323,120220011012110003 +120232221130001023 +120210233222032112 +122100203301311323 +120210233222032112 diff --git a/tests/test_configuration.py b/tests/test_configuration.py new file mode 100644 index 0000000000000000000000000000000000000000..69e9ab278069289fd3f90d2a6b672b50938514e0 --- /dev/null +++ b/tests/test_configuration.py @@ -0,0 +1,352 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import os +import pytest +from gdeexporter.configuration import Configuration + + +def test_Configuration(): + # Test the non-trivial case using a test data file + returned_config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml"), + force_config_over_hierarchies=True, + ) + assert returned_config.model_name == "esrm20" + assert len(returned_config.database_gde_tiles.keys()) == 4 + assert returned_config.database_gde_tiles["host"] == "host.somewhere.xx" + assert returned_config.database_gde_tiles["dbname"] == "some_database_name" + assert returned_config.database_gde_tiles["username"] == "some_username" + assert returned_config.database_gde_tiles["password"] == "some_password" + assert len(returned_config.exposure_entities_to_run) == 1 + assert returned_config.exposure_entities_to_run[0] == "Italy" + assert returned_config.exposure_entities_code == "ISO3" + assert len(returned_config.occupancies_to_run) == 2 + assert returned_config.occupancies_to_run[0] == "residential" + assert returned_config.occupancies_to_run[1] == "commercial" + assert returned_config.geographic_selection["selection_mode"] == "exposure_entity" + assert returned_config.geographic_selection["quadkeys_file"] is None + assert returned_config.geographic_selection["data_unit_ids"] is None + assert returned_config.geographic_selection["bounding_box"] is None + + # Test case in which the file is not found + with pytest.raises(OSError) as excinfo: + returned_config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "doesnotexist.yml"), + force_config_over_hierarchies=True, + ) + assert "OSError" in str(excinfo.type) + + # Test case in which a critical parameter is missing + with pytest.raises(OSError) as excinfo: + returned_config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_missing.yml"), + force_config_over_hierarchies=True, + ) + assert "OSError" in str(excinfo.type) + + # Test case: geographic selection defined by data unit IDs + returned_config = Configuration( + os.path.join( + os.path.dirname(__file__), "data", "config_for_testing_geographic_data_units.yml" + ), + ) + + expected_data_unit_ids = ["ABC_10278", "DEF_00000"] + + assert returned_config.geographic_selection["selection_mode"] == "data_unit_id" + assert len(returned_config.geographic_selection["data_unit_ids"]) == len( + expected_data_unit_ids + ) + for data_unit_id in expected_data_unit_ids: + assert data_unit_id in returned_config.geographic_selection["data_unit_ids"] + assert returned_config.geographic_selection["quadkeys_file"] is None + assert returned_config.geographic_selection["bounding_box"] is None + + # Test case: geographic selection defined by list of quadkeys from file + returned_config = Configuration( + os.path.join( + os.path.dirname(__file__), "data", "config_for_testing_geographic_quadkeys.yml" + ), + ) + + assert returned_config.geographic_selection["selection_mode"] == "quadkeys" + assert returned_config.geographic_selection["bounding_box"] is None + assert returned_config.geographic_selection["data_unit_ids"] is None + + # Test case: geographic selection defined by bounding box + returned_config = Configuration( + os.path.join( + os.path.dirname(__file__), "data", "config_for_testing_geographic_bbox.yml" + ), + ) + + assert returned_config.geographic_selection["selection_mode"] == "bounding_box" + assert round(returned_config.geographic_selection["bounding_box"]["lon_w"], 6) == round( + 23.703371, 6 + ) + assert round(returned_config.geographic_selection["bounding_box"]["lon_e"], 6) == round( + 23.703597, 6 + ) + assert round(returned_config.geographic_selection["bounding_box"]["lat_s"], 6) == round( + 37.965450, 6 + ) + assert round(returned_config.geographic_selection["bounding_box"]["lat_n"], 6) == round( + 37.972561, 6 + ) + assert returned_config.geographic_selection["quadkeys_file"] is None + assert returned_config.geographic_selection["data_unit_ids"] is None + + # Test case: geographic selection defined by data unit IDs but sub-parameter missing + with pytest.raises(OSError) as excinfo: + returned_config = Configuration( + os.path.join( + os.path.dirname(__file__), + "data", + "config_for_testing_geographic_data_units_missing.yml", + ), + ) + assert "OSError" in str(excinfo.type) + + # Test case: geographic selection defined by bounding box but sub-parameter missing + with pytest.raises(OSError) as excinfo: + returned_config = Configuration( + os.path.join( + os.path.dirname(__file__), + "data", + "config_for_testing_geographic_bbox_missing.yml", + ), + ) + assert "OSError" in str(excinfo.type) + + # Test case: geographic selection defined by quadkeys from file but sub-parameter missing + with pytest.raises(OSError) as excinfo: + returned_config = Configuration( + os.path.join( + os.path.dirname(__file__), + "data", + "config_for_testing_geographic_quadkeys_missing.yml", + ), + ) + assert "OSError" in str(excinfo.type) + + +def test_Configuration_interpret_exposure_entities_to_run(test_db): + returned_config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml"), + ) + + # One country name provided, 'exposure_entities_code' is ISO3 + returned_config.interpret_exposure_entities_to_run() + assert len(returned_config.exposure_entities_to_run) == 1 + assert returned_config.exposure_entities_to_run[0] == "ITA" + + # Several country names provided, 'exposure_entities_code' is ISO3 + returned_config.exposure_entities_to_run = ["Italy", "France", "Portugal"] + returned_config.interpret_exposure_entities_to_run() + assert len(returned_config.exposure_entities_to_run) == 3 + assert returned_config.exposure_entities_to_run[0] == "ITA" + assert returned_config.exposure_entities_to_run[1] == "FRA" + assert returned_config.exposure_entities_to_run[2] == "PRT" + + # Several country names provided, one does not exist, 'exposure_entities_code' is ISO3 + returned_config.exposure_entities_to_run = ["Italy", "France", "England"] + returned_config.interpret_exposure_entities_to_run() + assert len(returned_config.exposure_entities_to_run) == 2 + assert returned_config.exposure_entities_to_run[0] == "ITA" + assert returned_config.exposure_entities_to_run[1] == "FRA" + + # A name of a TXT file is provided, 'exposure_entities_code' is ISO3 + returned_config.exposure_entities_to_run = [ + os.path.join(os.path.dirname(__file__), "data", "exposure_entities.txt") + ] + returned_config.interpret_exposure_entities_to_run() + assert len(returned_config.exposure_entities_to_run) == 3 + assert returned_config.exposure_entities_to_run[0] == "ESP" + assert returned_config.exposure_entities_to_run[1] == "TUR" + assert returned_config.exposure_entities_to_run[2] == "DEU" + + # A name of a CSV file is provided, 'exposure_entities_code' is ISO3 + returned_config.exposure_entities_to_run = [ + os.path.join(os.path.dirname(__file__), "data", "exposure_entities.csv") + ] + returned_config.interpret_exposure_entities_to_run() + assert len(returned_config.exposure_entities_to_run) == 3 + assert returned_config.exposure_entities_to_run[0] == "GBR" + assert returned_config.exposure_entities_to_run[1] == "LTU" + assert returned_config.exposure_entities_to_run[2] == "GRC" + + # "all" is provided, 'exposure_entities_code' is ISO3 + returned_config.exposure_entities_to_run = ["all"] + returned_config.interpret_exposure_entities_to_run(2) + assert len(returned_config.exposure_entities_to_run) == 2 + assert returned_config.exposure_entities_to_run[0] == "ABC" + assert returned_config.exposure_entities_to_run[1] == "DEF" + + # A name of a TXT file is provided, 'exposure_entities_code' is a dictionary + returned_config.exposure_entities_to_run = [ + os.path.join(os.path.dirname(__file__), "data", "exposure_entities_non_countries.txt") + ] + returned_config.exposure_entities_code = { + "Entity1": "EN1", + "Entity2": "ET2", + } + returned_config.interpret_exposure_entities_to_run() + assert len(returned_config.exposure_entities_to_run) == 2 + assert returned_config.exposure_entities_to_run[0] == "EN1" + assert returned_config.exposure_entities_to_run[1] == "ET2" + + # A name of a TXT file is provided, 'exposure_entities_code' is a dictionary, one missing + returned_config.exposure_entities_to_run = [ + os.path.join(os.path.dirname(__file__), "data", "exposure_entities_non_countries.txt") + ] + returned_config.exposure_entities_code = { + "Entity1": "EN1", + "Entity3": "ET3", + } + returned_config.interpret_exposure_entities_to_run() + assert len(returned_config.exposure_entities_to_run) == 1 + assert returned_config.exposure_entities_to_run[0] == "EN1" + + +def test_Configuration_determine_quadkeys_to_process(test_db): + # Read a base config file + returned_config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + # Test case: geographic selection defined by exposure entities + # Manually modify the exposure entities to run + returned_config.exposure_entities_to_run = ["ABC", "DEF"] + + returned_config.determine_quadkeys_to_process( + 2, returned_config.database_gde_tiles, "data_unit_tiles" + ) + + expected_quadkeys_to_process = { + "ABC": [ + "122010321033023130", + "122010321033023120", + "122010321033023132", + "122010321033023121", + "122010321033023123", + "122010321033032123", + "122010321033032301", + ], + "DEF": ["122010321033211220"], + } + + assert len(returned_config.quadkeys_to_process.keys()) == len( + expected_quadkeys_to_process.keys() + ) + for exposure_entity_code in expected_quadkeys_to_process.keys(): + assert exposure_entity_code in returned_config.quadkeys_to_process.keys() + for quadkey in expected_quadkeys_to_process[exposure_entity_code]: + assert quadkey in returned_config.quadkeys_to_process[exposure_entity_code] + assert returned_config.number_quadkeys_to_process == 8 + + # Test case: geographic selection defined by list of quadkeys from file + returned_config = Configuration( + os.path.join( + os.path.dirname(__file__), "data", "config_for_testing_geographic_quadkeys.yml" + ), + ) + returned_config.geographic_selection["quadkeys_file"] = os.path.join( + os.path.dirname(__file__), "data", "test_quadkeys.txt" + ) + + returned_config.determine_quadkeys_to_process( + 2, returned_config.database_gde_tiles, "data_unit_tiles" + ) + + expected_quadkeys_to_process = { + "quadkeys_list": [ + "120203220301101323", + "120220011012110003", + "120232221130001023", + "120210233222032112", + "122100203301311323", + ], + } + + assert len(returned_config.quadkeys_to_process.keys()) == len( + expected_quadkeys_to_process.keys() + ) + for quadkey in expected_quadkeys_to_process["quadkeys_list"]: + assert quadkey in returned_config.quadkeys_to_process["quadkeys_list"] + assert returned_config.number_quadkeys_to_process == len( + expected_quadkeys_to_process["quadkeys_list"] + ) + + # Test case: geographic selection defined by data unit IDs + returned_config = Configuration( + os.path.join( + os.path.dirname(__file__), "data", "config_for_testing_geographic_data_units.yml" + ), + ) + + returned_config.determine_quadkeys_to_process( + 2, returned_config.database_gde_tiles, "data_unit_tiles" + ) + + expected_quadkeys_to_process = { + "ABC_10278": ["122010321033032123", "122010321033032301"], + "DEF_00000": ["122010321033211220"], + } + + assert len(returned_config.quadkeys_to_process.keys()) == len( + expected_quadkeys_to_process.keys() + ) + for data_unit_id in expected_quadkeys_to_process.keys(): + assert data_unit_id in returned_config.quadkeys_to_process.keys() + for quadkey in expected_quadkeys_to_process[data_unit_id]: + assert quadkey in returned_config.quadkeys_to_process[data_unit_id] + assert returned_config.number_quadkeys_to_process == 3 + + # Test case: geographic selection defined by bounding box + returned_config = Configuration( + os.path.join( + os.path.dirname(__file__), "data", "config_for_testing_geographic_bbox.yml" + ), + ) + + returned_config.determine_quadkeys_to_process( + 2, returned_config.database_gde_tiles, "data_unit_tiles" + ) + + expected_quadkeys_to_process = { + "bounding_box": [ + "122100203301121302", + "122100203301121320", + "122100203301121322", + "122100203301123100", + "122100203301123102", + "122100203301123120", + "122100203301123122", + "122100203301123300", + ], + } + + assert len(returned_config.quadkeys_to_process.keys()) == len( + expected_quadkeys_to_process.keys() + ) + for quadkey in expected_quadkeys_to_process["bounding_box"]: + assert quadkey in returned_config.quadkeys_to_process["bounding_box"] + assert returned_config.number_quadkeys_to_process == len( + expected_quadkeys_to_process["bounding_box"] + ) diff --git a/tests/test_database_queries.py b/tests/test_database_queries.py new file mode 100644 index 0000000000000000000000000000000000000000..423cde31ecb3bd34c6e377483c86c738d85edf56 --- /dev/null +++ b/tests/test_database_queries.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import os +from gdeexporter.configuration import Configuration +from gdeexporter.database_queries import DatabaseQueries + + +def test_retrieve_aggregated_source_id_and_format(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + ( + returned_aggregated_source_id, + returned_aggregated_format, + ) = DatabaseQueries.retrieve_aggregated_source_id_and_format( + "second_source", config.database_gde_tiles, "aggregated_sources" + ) + + assert returned_aggregated_source_id == 2 + assert returned_aggregated_format == "bbb" + + +def test_retrieve_all_exposure_entities_of_aggregated_source_id(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + # aggregated_source_id exists, several exposure entities associated with it + returned_exposure_entities = ( + DatabaseQueries.retrieve_all_exposure_entities_of_aggregated_source_id( + 2, config.database_gde_tiles, "data_units" + ) + ) + + assert len(returned_exposure_entities) == 2 + assert "ABC" in returned_exposure_entities + assert "DEF" in returned_exposure_entities + + # aggregated_source_id exists, no exposure entities associated with it + returned_exposure_entities = ( + DatabaseQueries.retrieve_all_exposure_entities_of_aggregated_source_id( + 3, config.database_gde_tiles, "data_units" + ) + ) + + assert len(returned_exposure_entities) == 0 + + # aggregated_source_id does not exist + returned_exposure_entities = ( + DatabaseQueries.retrieve_all_exposure_entities_of_aggregated_source_id( + 9999, config.database_gde_tiles, "data_units" + ) + ) + + assert len(returned_exposure_entities) == 0 + + +def test_retrieve_quadkeys_by_exposure_entity_aggregated_source_id(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + returned_quadkeys = ( + DatabaseQueries.retrieve_quadkeys_by_exposure_entity_aggregated_source_id( + "ABC", 2, config.database_gde_tiles, "data_unit_tiles" + ) + ) + + expected_quadkeys = [ + "122010321033023130", + "122010321033023120", + "122010321033023132", + "122010321033023121", + "122010321033023123", + "122010321033032123", + "122010321033032301", + ] + + assert len(returned_quadkeys) == len(expected_quadkeys) + + returned_quadkeys = ( + DatabaseQueries.retrieve_quadkeys_by_exposure_entity_aggregated_source_id( + "XYZ", 2, config.database_gde_tiles, "data_unit_tiles" + ) + ) + + assert len(returned_quadkeys) == 0 + + +def test_retrieve_quadkeys_by_data_unit_id_aggregated_source_id(test_db): + # Database connection (the Configuration class will define the credentials based on whether + # the code is running in the CI or locally) + config = Configuration( + os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml") + ) + + returned_quadkeys = DatabaseQueries.retrieve_quadkeys_by_data_unit_id_aggregated_source_id( + "ABC_10269", 2, config.database_gde_tiles, "data_unit_tiles" + ) + + expected_quadkeys = [ + "122010321033023130", + "122010321033023120", + "122010321033023132", + "122010321033023121", + "122010321033023123", + ] + + assert len(returned_quadkeys) == len(expected_quadkeys) + + returned_quadkeys = DatabaseQueries.retrieve_quadkeys_by_data_unit_id_aggregated_source_id( + "ABC_10269", 77, config.database_gde_tiles, "data_unit_tiles" + ) + + assert len(returned_quadkeys) == 0