Skip to content
Snippets Groups Projects
Commit f90efb43 authored by Danijel Schorlemmer's avatar Danijel Schorlemmer
Browse files

Add program to upload population values into the `Metadata` table

parent cd85cb1c
No related branches found
No related tags found
1 merge request!138Resolve "Develop a Python program to upload population values per country to the database"
Pipeline #74268 passed
#!/usr/bin/env python3
# Copyright (C) 2024:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
# Add a logger printing error, warning, info and debug messages to the screen
logger = logging.getLogger()
# fmt: off
USED_ISO_CODES = [
# Europe
'ALB', 'AND', 'AUT', 'BLR', 'BEL', 'BIH', 'BSA', 'BGR', 'HRV', 'CYP', 'TCY', 'CZE', 'DNK',
'EST', 'FRO', 'FIN', 'FRA', 'DEU', 'GGY', 'GIB', 'GRC', 'HUN', 'ISL', 'IRL', 'IMN', 'ITA',
'JEY', 'XKO', 'LVA', 'LIE', 'LTU', 'LUX', 'MLT', 'MDA', 'MCO', 'MNE', 'NLD', 'MKD', 'NOR',
'POL', 'PRT', 'ROU', 'SMR', 'SRB', 'SVK', 'SVN', 'ESP', 'SJM', 'SWE', 'CHE', 'TUR', 'UKR',
'GBR', 'VAT',
# Asia
'AFG', 'ARM', 'AZE', 'BHR', 'BGD', 'BTN', 'IOT', 'BRN', 'KHM', 'CHN', 'GEO', 'HKG', 'IND',
'IDN', 'IRN', 'IRQ', 'ISR', 'JPN', 'JOR', 'KAZ', 'KWT', 'KGZ', 'LAO', 'LBN', 'MAC', 'MYS',
'MDV', 'MNG', 'MMR', 'NPL', 'PRK', 'OMN', 'PAK', 'PSE', 'PHL', 'QAT', 'RUS', 'SAU', 'SGP',
'KOR', 'LKA', 'SYR', 'TWN', 'TJK', 'THA', 'TLS', 'TKM', 'ARE', 'UZB', 'VNM', 'YEM',
# North America
'BMU', 'CAN', 'GRL', 'MEX', 'SPM', 'USA',
# Central America
'AIA', 'ATG', 'ABW', 'BHS', 'BRB', 'BLZ', 'BES', 'VGB', 'CYM', 'CUB', 'CUW', 'DMA', 'DOM',
'SLV', 'GRD', 'GLP', 'GTM', 'HTI', 'HND', 'JAM', 'MTQ', 'MSR', 'NIC', 'PAN', 'PRI', 'BLM',
'KNA', 'LCA', 'MAF', 'SXM', 'VCT', 'TTO', 'TCA', 'VIR',
# South America
'ARG', 'BOL', 'BRA', 'CHL', 'COL', 'ECU', 'FLK', 'GUF', 'GUY', 'PRY', 'PER', 'SGS', 'SUR',
'URY', 'VEN',
# Africa
'DZA', 'AGO', 'BEN', 'BWA', 'BFA', 'BDI', 'CMR', 'CPV', 'CAF', 'TCD', 'COM', 'COG', 'COD',
'CIV', 'DJI', 'EGY', 'GNQ', 'ERI', 'SWZ', 'ETH', 'ATF', 'GAB', 'GMB', 'GHA', 'GIN', 'GNB',
'KEN', 'LSO', 'LBR', 'LBY', 'MDG', 'MWI', 'MLI', 'MRT', 'MUS', 'MYT', 'MAR', 'MOZ', 'NAM',
'NER', 'NGA', 'REU', 'RWA', 'SHN', 'STP', 'SEN', 'SYC', 'SLE', 'SOM', 'ZAF', 'SSD', 'SDN',
'TZA', 'TGO', 'TUN', 'UGA', 'ZMB', 'ZWE',
# Oceania and Antarctica
'ASM', 'AUS', 'CCK', 'COK', 'CXR', 'PYF', 'FJI', 'GUM', 'KIR', 'MHL', 'FSM', 'NRU', 'NCL',
'NZL', 'NIU', 'NFK', 'MNP', 'PLW', 'PNG', 'PCN', 'WSM', 'SLB', 'TKL', 'TON', 'TUV', 'UMI',
'VUT', 'WLF', 'ATA', 'BVT', 'HMD',
]
# fmt: on
def check_iso_code(iso_code):
"""
Checks if the provided ISO 3166-1 alpha-3 code of a country/territory to be extracted is
correct (length must be three characters and the code must match with the set of used ISO
codes in `USED_ISO_CODES`, and returns the ISO code. The function raises a `SystemExit` in
case of an error.
Args:
iso_code (str):
ISO 3166-1 alpha-3 code of a country/territory.
Returns:
ISO 3166-1 alpha-3 code of a country/territory.
"""
if len(iso_code) != 3:
raise SystemExit(
f"ISO code is {len(iso_code)} characters long but should only be three characters "
"long."
)
if iso_code not in USED_ISO_CODES:
raise SystemExit(f"ISO code {iso_code} is unknown.")
return iso_code
def check_is_number_between(
number: float, min_value: float, max_value: float, inclusive="both"
) -> bool:
"""
Checks whether the given number `number` is within the given interval specified by a minimum
and a maximum value. The parameter `inclusive` defines which of the interval limits are
inclusive (possible values are: `both`, `neither`, `left`, right`). In case of any error,
the function raises an exception. If all checks are successful, the function returns `True`.
Args:
number (float or int):
Number to be checked if it is a floating point or integer value and within the
given interval.
min_value (float or int):
Minimum value of the interval.
max_value (float or int):
Maximum value of the interval.
inclusive (`both`, `neither`, `left`, `right`):
Defines which interval limit is considered inclusive.
Returns:
True.
"""
# Check if the `inclusive` parameter is correct.
if inclusive not in ["both", "neither", "left", "right"]:
raise ValueError(
"Parameter `inclusive` can be of `both`, `neither`, `left`, `right`"
f" but `{inclusive}` was given."
)
# Check for lower bound of interval.
if number < min_value:
raise ValueError(f"Number `{number}` is smaller than minimum value `{min_value}`.")
if inclusive in ["neither", "right"] and number == min_value:
raise ValueError(f"Number `{number}` is equal to minimum value `{min_value}`.")
# Check for upper bound of interval.
if number > max_value:
raise ValueError(f"Number `{number}` is larger than maximum value `{max_value}`.")
if inclusive in ["neither", "left"] and number == max_value:
raise ValueError(f"Number `{number}` is equal to maximum value `{max_value}`.")
return True
#!/usr/bin/env python3
# Copyright (C) 2024:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
import csv
import sys
import argparse
import configparser
import datetime
import os
from exposurelib import PostGISExposure
import exposurelib.checks as checks
# Add a logger printing error, warning, info and debug messages to the screen.
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(sys.stdout))
class UploadPopulation:
"""
This class uploads population values for each country (identified with its 3-letter
ISO 3166-1 alpha-3 code) stored in a CSV file into the `Metadata` table as key-value pairs.
The key is set as `<ISO code>.TotalPopulationReference` and the value is the population
count. The overwrite options lets the user decided whether values of existing keys should be
overwritten or not.
Args:
population_csv_filepath (str):
Filepath of the CSV file containing the population values. The CSV file needs to
contain at least these two columns:
`country_iso_code`: ISO 3166-1 alpha-3 code of the respective country.
`population`: Population count of the respective country.
postgis_config (ConfigParser):
Configuration read from an INI-file describing the access to the exposure database.
This configuration needs one section called `Exposure` for the specification of the
database access. This section has the following keys:
`host` : Host name of the server.
`dbname` : Name of the database.
`port` : Port to access the server.
`username`: Name of the database user.
`password`: Password of the database user.
`timeout` : Database timeout (set to 0 for no timeout).
`itersize`: Number of rows fetched from the server at one call.
overwrite (bool):
If set, overwrite existing values in the `Metadata` table.
Attributes:
population_csv_filepath (str):
See above.
exposure_db (PostGISExposure):
Exposure database object, initialized using the configuration in `postgis_config`.
overwrite (bool):
See above.
"""
def __init__(
self,
population_csv_filepath,
postgis_config,
overwrite=False,
):
self.population_csv_filepath = population_csv_filepath
self.exposure_db = PostGISExposure(**dict(postgis_config["Exposure"]))
self.exposure_db.connect()
self.overwrite = overwrite
def run(self):
"""
Runs the upload of population values for each country/territory (identified with its
3-letter ISO 3166-1 alpha-3 code) into the `Metadata` table of the exposure database.
Each population value is stored under the key-value pair
`<country_iso_code>.TotalPopulationReference` = <population value>.
All values are read from a CSV file. Its filepath is stored in
`self.population_csv_filepath`. If `self.overwrite` is set to True, values of existing
keys will be overwritten, otherwise skipped.
"""
csv_reader = csv.DictReader(open(self.population_csv_filepath), delimiter=",")
# Check if all necessary columns exist in the CSV file.
if "country_iso_code" not in csv_reader.fieldnames:
raise KeyError(
f"Column `country_iso_code` does not exist in `{self.population_csv_filepath}`."
)
if "population" not in csv_reader.fieldnames:
raise KeyError(
f"Column `population` does not exist in `{self.population_csv_filepath}`."
)
# Perform the upload.
for row in csv_reader:
# Add type hint for `row` to avoid PyCharm indicating a type conflict.
row: dict
# Check if the asset dict of the boundary already exists.
country_iso_code = row["country_iso_code"]
checks.check_iso_code(country_iso_code)
population = row["population"]
checks.check_is_number_between(float(population), 0, 1500000000)
self.exposure_db.upsert_key_value_pair(
f"{country_iso_code}.TotalPopulationReference",
str(population),
overwrite=self.overwrite,
)
self.exposure_db.commit_and_close()
def command_line_interface():
"""
Command-line interface of the global-export program. See explanations in the argument
parser.
"""
# Create the argument parser
parser = argparse.ArgumentParser(
description="""
Upload-Population
This program uploads population values for each country (identified with its 3-letter
ISO 3166-1 alpha-3 code) stored in a CSV file into the `Metadata` table as key-value
pairs. The key is set as `<ISO code>.TotalPopulationReference` and the value is the
population count.
"""
)
parser.add_argument(
"-c",
"--config-file",
required=True,
type=str,
help="""
Config file (INI-type) describing the access to the exposure database. This file needs
one section called `Exposure` for the specification of the access to the exposure
database. This section has the following keys:
`host` : Host name of the server.
`dbname` : Name of the database.
`port` : Port to access the server.
`username`: Name of the database user.
`password`: Password of the database user.
`timeout` : Database timeout (set to 0 for no timeout).
`itersize`: Number of rows fetched from the server at one call.
""",
)
parser.add_argument(
"-p",
"--population-filepath",
type=str,
help="""
Filepath of the CSV file containing the population values. The CSV file needs to contain
at least these two columns:
`country_iso_code`: ISO 3166-1 alpha-3 code of the respective country.
`population` : Population count of the respective country.
""",
)
parser.add_argument(
"-o",
"--overwrite",
required=False,
action="store_true",
help="Overwrite population values in `Metadata` table for already existing keys.",
)
# Read arguments from command line.
args = parser.parse_args()
population_filepath = args.population_filepath
postgis_config = configparser.ConfigParser()
postgis_config.read(args.config_file)
overwrite = args.overwrite
# Check if population file can be read.
if not os.access(population_filepath, os.R_OK):
raise IOError("Population file cannot be read.")
start_time = datetime.datetime.now()
logger.info(f"Start time: {start_time}.")
upload_population = UploadPopulation(population_filepath, postgis_config, overwrite)
upload_population.run()
logger.info(f"Execution time: {datetime.datetime.now() - start_time}.")
if __name__ == "__main__":
command_line_interface()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment