Commit f61f6d2b authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added feature to define geographic area to process

parent 08fc1d27
Pipeline #42396 passed with stage
in 1 minute and 59 seconds
image: python:3.9-bullseye
services:
- name: $CI_REGISTRY/dynamicexposure/server-components/containers/docker-obm-database:master
alias: postgres
# Make pip cache the installed dependencies
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
POSTGRES_DB: testdatabase
POSTGRES_USER: tester
POSTGRES_PASSWORD: somepass
GDEEXPORTER_DB_HOST: postgres
GDEEXPORTER_DB: ${POSTGRES_DB}
GDEEXPORTER_USER: ${POSTGRES_USER}
GDEEXPORTER_PASSWORD: ${POSTGRES_PASSWORD}
cache:
paths:
......
......@@ -21,6 +21,52 @@ git clone https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/gde-e
cd gde-exporter
pip3 install -e .
```
## Preparation
### Configuration
Copy the file `config_example.yml` to your working directory as `config.yml` and provide the
necessary parameters. Required parameters are:
- `model_name`: Name of the input aggregated exposure model to be processed.
- `occupancies_to_run`: List of occupancies for which the code will be run, separated by ", "
(comma and space). They need to exist for the `exposure format` of `model_name`. Currently
supported values: residential, commercial, industrial.
- `exposure_entities_to_run`: List of names of exposure entities for which the code will be run.
It is used even if `selection_mode` (see below) is not `exposure_entity`.
Currently supported options:
- "all": The list of names associated with `model_name` will be retrieved from the
[GDE Tiles](https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/database-gdetiles)
database.
- A comma-space-separated list of entity names: This list of names will be used.
- A full path to a .txt or .csv file: The list of names will be retrieved from the indicated
.txt/.csv file.
- `exposure_entities_code`: Either "ISO3" or a nested structure with exposure entities names
and 3-character codes. When running `model_name=esrm20`, "ISO3" is the preferred option.
- `geographic_selection`: Set of parameters that define the geographic area for which the output
will be produced:
- `selection_mode`: `exposure_entity`, `data_unit_id`, `quadkeys`, or `bounding_box`. In all
cases only data from the exposure entities specified in `exposure_entities_to_run` will be
considered, even if the geographic area includes other exposure entities (they will be
ignored). The meaning of the `selection_mode` options is as follows:
- `exposure_entity`: The quadkeys associated with the exposure entities specified in
`exposure_entities_to_run` will be retrieved from `database_gde_tiles` (see below) and output.
- `data_unit_id`: The quadkeys associated with the data units specified in `data_unit_ids`
(see below) will be retrieved from `database_gde_tiles` and output.
- `quadkeys`: The quadkeys contained in a TXT file whose file path is specified in
`quadkeys_file` (see below) will be retrieved and output.
- `bounding_box`: The quadkeys that contain the bounding box defined by the coordinates
`lon_w`, `lon_e`, `lat_s` and `lat_n`, under `bounding_box`(see below) will be retrieved and
output.
- `data_unit_ids`: Required if `selection_mode = data_unit_id`. List of IDs of data units,
separated by a comma and a space.
- `quadkeys_file`: Required if `selection_mode = quadkeys`. Full path to a TXT file
containing quadkeys (either one per row, comma-separared, or a mix).
- `bounding_box`: Required if `selection_mode = bounding_box`. Coordinates `lon_w`, `lon_e`,
`lat_s` and `lat_n` of the bounding box.
- `database_gde_tiles`: Credentials for the
[GDE Tiles](https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/database-gdetiles)
database where information on the GDE tiles is stored.
## Running gde-exporter
......
model_name: esrm20 # Needs to exist in 'aggregated_sources' database table
occupancies_to_run: residential, commercial, industrial # Need to exist for the indicated `model_name`
exposure_entities_to_run: all # Either "all", a comma-space-separated list of entity names, or a name of a .txt or .csv file
exposure_entities_code: ISO3 # Either "ISO3" in this or a nested structure with exposure entities names and 3-character codes
geographic_selection: # Selection of the geographic area for which GDE will be output
selection_mode: exposure_entity # exposure_entity, data_unit_id, quadkeys, bounding_box
# If selection_mode = quadkeys
quadkeys_file: /path/to/quadkeys.txt
# If selection_mode = data_unit_id
data_unit_ids: data_unit_id_1, data_unit_id_2 # One or several, separatred by comma and space
# If selection_mode = bounding_box
bounding_box: # Coordinates that define the bounding box, in degrees
lon_w: 23.703371
lon_e: 23.713597
lat_s: 37.965450
lat_n: 37.972561
database_gde_tiles: # Database where info on the GDE tiles is stored
host: localhost
dbname: gde_tiles_attica_2022_04_12_0900
port: 5432
username: tester
password: somepass
This diff is collapsed.
#!/usr/bin/env python3
# Copyright (C) 2022:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
from gdeimporter.tools.database import Database
logger = logging.getLogger()
class DatabaseQueries:
"""This class contains methods used to query the OpenBuildingMap (OBM) and Global Dynamic
Exposure (GDE) databases.
"""
@staticmethod
def retrieve_aggregated_source_id_and_format(model_name, db_gde_tiles_config, db_table):
"""This function retrieves the ID of the aggregated exposure model source whose name is
'model_name'.
Args:
model_name (str):
Name of the source whose ID is to be retrieved.
db_gde_tiles_config (dict):
Dictionary containing the credentials needed to connect to the SQL database in
which information on the aggregated_sources is stored. The keys of the
dictionary need to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
Name of the table of the SQL database where the aggregated_sources are stored.
It is assumed that this table contains, at least, the following fields:
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
name (str):
Name of the source of the aggregated exposure model.
Returns:
aggregated_source_id (int):
ID of the source of the aggregated exposure model with name 'model_name'. If
'model_name' is not found, 'aggregated_source_id' is -999.
aggregated_source_format (str):
Format of the aggregated exposure model with name 'model_name'. If 'model_name'
is not found, 'aggregated_source_format' is "UNKNOWN".
"""
sql_query = "SELECT aggregated_source_id, format FROM %s WHERE name='%s';"
db_gde_tiles = Database(**db_gde_tiles_config)
db_gde_tiles.create_connection_and_cursor()
db_gde_tiles.cursor.execute(sql_query % (db_table, model_name))
exec_result = db_gde_tiles.cursor.fetchall()
db_gde_tiles.close_connection()
if len(exec_result) == 1: # Entry exists --> retrieve
aggregated_source_id = exec_result[0][0]
aggregated_source_format = exec_result[0][1]
else: # More than one entries found, this is an error
logger.error(
"Error in retrieve_aggregated_source_id_and_format: "
"more than one or no entry found for name = %s" % (model_name)
)
aggregated_source_id = -999
aggregated_source_format = "UNKNOWN"
return aggregated_source_id, aggregated_source_format
@staticmethod
def retrieve_all_exposure_entities_of_aggregated_source_id(
aggregated_source_id, db_gde_tiles_config, db_table
):
"""This function retrieves the 3-character codes of all exposure entities associated
with 'aggregated_source_id' in 'db_table' of the database whose credentials are given by
'db_gde_tiles_config'.
Args:
aggregated_source_id (int):
ID of the source of the aggregated exposure model to be run.
db_gde_tiles_config (dict):
Dictionary containing the credentials needed to connect to the SQL database in
which information on exposure entities is stored. The keys of the dictionary
need to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
Name of the table of the SQL database from which the exposure entities can be
retrieved. It is assumed that this table contains, at least, the following
fields:
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
exposure_entity (str):
3-character code of the exposure entity.
Returns:
exposure_entities (list of str):
List of 3-character codes of the exposure entities associated with
'aggregated_source_id'.
"""
sql_query = "SELECT DISTINCT(exposure_entity) FROM %s WHERE aggregated_source_id=%s;"
db_gde_tiles = Database(**db_gde_tiles_config)
db_gde_tiles.create_connection_and_cursor()
db_gde_tiles.cursor.execute(sql_query % (db_table, aggregated_source_id))
exec_result = db_gde_tiles.cursor.fetchall()
db_gde_tiles.close_connection()
if len(exec_result) > 0:
exposure_entities = [exec_result[i][0] for i in range(len(exec_result))]
else:
exposure_entities = []
return exposure_entities
@staticmethod
def retrieve_quadkeys_by_exposure_entity_aggregated_source_id(
exposure_entity, aggregated_source_id, db_gde_tiles_config, db_table
):
"""
This function retrives all quadkeys associated with 'exposure_entity' and
'aggregated_source_id' in 'db_table' of the database whose credentials are given in
'db_gde_tiles_config'.
Args:
exposure_entity (str):
3-character code of the exposure entity for which the data unit IDs and
geometries will be retrieved.
aggregated_source_id (int):
ID of the source of the aggregated exposure model for which the data unit IDs
and geometries will be retrieved.
db_gde_tiles_config (dict):
Dictionary containing the credentials needed to connect to the SQL database in
which information on the data units is stored. The keys of the dictionary need
to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
Name of the table of the SQL database where the data units are stored. It is
assumed that this table contains, at least, the following fields:
quadkey (str):
String indicating the quadkey of a tile.
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
exposure_entity (str):
3-character code of the exposure entity.
Returns:
quadkeys (list of str):
List of all quadkeys associated with 'exposure_entity' and
'aggregated_source_id'.
"""
sql_query = "SELECT DISTINCT(quadkey) FROM %s "
sql_query += "WHERE exposure_entity='%s' AND aggregated_source_id=%s;"
db_gde_tiles = Database(**db_gde_tiles_config)
db_gde_tiles.create_connection_and_cursor()
db_gde_tiles.cursor.execute(
sql_query % (db_table, exposure_entity, aggregated_source_id)
)
exec_result = db_gde_tiles.cursor.fetchall()
db_gde_tiles.close_connection()
if len(exec_result) > 0:
quadkeys = [exec_result[i][0] for i in range(len(exec_result))]
else:
quadkeys = []
return quadkeys
@staticmethod
def retrieve_quadkeys_by_data_unit_id_aggregated_source_id(
data_unit_id, aggregated_source_id, db_gde_tiles_config, db_table
):
"""
This function retrives all quadkeys associated with 'data_unit_id' and
'aggregated_source_id' in 'db_table' of the database whose credentials are given in
'db_gde_tiles_config'.
Args:
data_unit_id (str):
ID of the data unit for which the quadkeys will be retrieved.
aggregated_source_id (int):
ID of the source of the aggregated exposure model for which the data unit IDs
and geometries will be retrieved.
db_gde_tiles_config (dict):
Dictionary containing the credentials needed to connect to the SQL database in
which information on the data units is stored. The keys of the dictionary need
to be:
host (str):
SQL database host address.
dbname (str):
Name of the SQL database.
port (int):
Port where the SQL database can be found.
username (str):
User name to connect to the SQL database.
password (str):
Password associated with self.username.
db_table (str):
Name of the table of the SQL database where the data units are stored. It is
assumed that this table contains, at least, the following fields:
quadkey (str):
String indicating the quadkey of a tile.
aggregated_source_id (int):
ID of the source of the aggregated exposure model.
data_unit_id (str):
ID of the data unit.
Returns:
quadkeys (list of str):
List of all quadkeys associated with 'data_unit_id' and 'aggregated_source_id'.
"""
sql_query = "SELECT DISTINCT(quadkey) FROM %s "
sql_query += "WHERE data_unit_id='%s' AND aggregated_source_id=%s;"
db_gde_tiles = Database(**db_gde_tiles_config)
db_gde_tiles.create_connection_and_cursor()
db_gde_tiles.cursor.execute(sql_query % (db_table, data_unit_id, aggregated_source_id))
exec_result = db_gde_tiles.cursor.fetchall()
db_gde_tiles.close_connection()
if len(exec_result) > 0:
quadkeys = [exec_result[i][0] for i in range(len(exec_result))]
else:
quadkeys = []
return quadkeys
......@@ -18,6 +18,8 @@
import logging
import sys
from gdeexporter.configuration import Configuration
from gdeexporter.database_queries import DatabaseQueries
# Add a logger printing error, warning, info and debug messages to the screen
logger = logging.getLogger()
......@@ -31,6 +33,57 @@ def main():
# Log the start of the run
logger.info("gde-exporter has started")
# Read configuration parameters
config = Configuration("config.yml")
(
aggregated_source_id,
aggregated_source_format,
) = DatabaseQueries.retrieve_aggregated_source_id_and_format(
config.model_name,
config.database_gde_tiles,
"aggregated_sources",
)
if aggregated_source_id < 0:
error_message = (
"Error while attempting to retrieve the ID of aggregated exposure model with name "
"'%s': more than one or no entries were found." % (config.model_name)
)
raise OSError(error_message)
logger.info(
"aggregated_source_id of aggregated exposure model with name '%s' "
"and format '%s' retrieved: %s"
% (config.model_name, aggregated_source_format, aggregated_source_id)
)
# Interpret and update config.exposure_entities_to_run
config.interpret_exposure_entities_to_run(aggregated_source_id)
if len(config.exposure_entities_to_run) < 1:
error_message = "Attribute 'exposure_entities_to_run' of configuration is an empty list"
raise OSError(error_message)
logger.info(
"%s exposure entity(ies) will be run: %s"
% (
str(len(config.exposure_entities_to_run)),
", ".join(config.exposure_entities_to_run),
)
)
logger.info("Retrieving list of quadkeys to process")
config.determine_quadkeys_to_process(
aggregated_source_id, config.database_gde_tiles, "data_unit_tiles"
)
logger.info("%s quadkeys will be processed" % (config.number_quadkeys_to_process))
for quadkeys_group in config.quadkeys_to_process.keys():
logger.info(
"Processing of %s quadkeys from quadkey group '%s' has started"
% (len(config.quadkeys_to_process[quadkeys_group]), quadkeys_group)
)
# Leave the program
logger.info("gde-exporter has finished")
sys.exit()
......
......@@ -28,7 +28,11 @@ setup(
keywords="Global Dynamic Exposure, GDE, buildings, exposure model",
author="Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ",
license="AGPLv3+",
install_requires=["numpy"],
install_requires=[
"numpy",
# pylint: disable=line-too-long
"gdeimporter@git+https://git.gfz-potsdam.de/dynamicexposure/globaldynamicexposure/gde-importer.git", # noqa: E501
],
extras_require={
"tests": tests_require,
"linters": linters_require,
......
#!/usr/bin/env python3
# Copyright (C) 2022:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import os
from pathlib import Path
import pytest
from dotenv import load_dotenv
from gdeimporter.tools.database import Database
load_dotenv(Path(".env").resolve())
@pytest.fixture
def test_db():
"""A test database simulating to contain the tables needed to run the gde-exporter."""
init_test_db()
return
def init_test_db():
"""Populates the test database."""
if "GDEEXPORTER_DB_HOST" in os.environ: # When running the CI pipeline
db_built_up_config = {
"host": os.environ.get("GDEEXPORTER_DB_HOST"),
"dbname": os.environ.get("GDEEXPORTER_DB"),
"port": "",
"username": os.environ.get("GDEEXPORTER_USER"),
"password": os.environ.get("GDEEXPORTER_PASSWORD"),
}
# Create Database instance and establish the connection and cursor
db = Database(**db_built_up_config)
db.create_connection_and_cursor()
# Create columns and populate the tables
with open("tests/data/test_database_set_up.sql", "r") as file:
for command in file.read().split(";"):
if command != "\n":
db.cursor.execute(command)
db.close_connection()
model_name: esrm20
occupancies_to_run: residential, commercial
exposure_entities_to_run: Greece
exposure_entities_code: ISO3
geographic_selection:
selection_mode: bounding_box
bounding_box:
lon_w: 23.703371
lon_e: 23.703597
lat_s: 37.965450
lat_n: 37.972561
quadkeys_file: /path/to/quadkeys.txt
database_gde_tiles:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
model_name: esrm20
occupancies_to_run: residential, commercial
exposure_entities_to_run: Greece
exposure_entities_code: ISO3
geographic_selection:
selection_mode: bounding_box
quadkeys_file: /path/to/quadkeys.txt
database_gde_tiles:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
model_name: esrm20
occupancies_to_run: residential, commercial
exposure_entities_to_run: Greece
exposure_entities_code: ISO3
geographic_selection:
selection_mode: data_unit_id
data_unit_ids: ABC_10278, DEF_00000
quadkeys_file: /path/to/quadkeys.txt
database_gde_tiles:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
model_name: esrm20
occupancies_to_run: residential, commercial
exposure_entities_to_run: Greece
exposure_entities_code: ISO3
geographic_selection:
selection_mode: data_unit_id
quadkeys_file: /path/to/quadkeys.txt
database_gde_tiles:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
model_name: esrm20
occupancies_to_run: residential, commercial
exposure_entities_to_run: Greece
exposure_entities_code: ISO3
geographic_selection:
selection_mode: quadkeys
quadkeys_file: /path/to/quadkeys.txt
database_gde_tiles:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
model_name: esrm20
occupancies_to_run: residential, commercial
exposure_entities_to_run: Greece
exposure_entities_code: ISO3
geographic_selection:
selection_mode: quadkeys
database_gde_tiles:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password
model_name: esrm20
occupancies_to_run: residential, commercial
exposure_entities_to_run: Italy
exposure_entities_code: ISO3
geographic_selection:
selection_mode: exposure_entity
database_gde_tiles:
host: host.somewhere.xx
dbname: some_database_name
username: some_username
password: some_password