diff --git a/.env b/.env new file mode 100644 index 0000000000000000000000000000000000000000..db1281f949517b5de4967b46427c063f2ee72bc9 --- /dev/null +++ b/.env @@ -0,0 +1,6 @@ +POSTGRES_DB=discretizer_database +POSTGRES_USER=postgres +POSTGRES_PASSWORD=docker +POSTGRES_HOST=discretizer-database +TILES_TABLE=tiles +WATERBODIES_DB=discretizer_database diff --git a/database/Dockerfile b/database/Dockerfile index fc5c0e7ec9f1bcadb5cc24a1f8095eca07f9406e..0f796ba5345e42a75601966a421354d829d24c90 100644 --- a/database/Dockerfile +++ b/database/Dockerfile @@ -24,6 +24,13 @@ RUN apt-get update --yes --fix-missing && apt-get upgrade --yes && \ ARG CACHEBUST=1 +# FIXME Kind of hacky, but this block seems to prevent the database container from possibly +# not starting due to missing locale entries +ENV LANGUAGE=en_US.UTF-8 +ENV LANG=en_US.UTF-8 +ENV LC_ALL=en_US.UTF-8 +RUN localedef -i en_US -f UTF-8 en_US.UTF-8 + # Clone into the tiles database migration scripts RUN cd /srv && git clone https://git.gfz-potsdam.de/dynamicexposure/completeness/tiles-database.git diff --git a/discretizer/Dockerfile b/discretizer/Dockerfile index 7c73793d79f608779da6a8df40e82f9a3266f6f8..63cf1a472adab4bf3eb1a831c3d8d77899bcec3b 100644 --- a/discretizer/Dockerfile +++ b/discretizer/Dockerfile @@ -30,6 +30,9 @@ RUN apt-get update --yes --fix-missing && apt-get upgrade --yes && \ ARG CACHEBUST=1 +# Disable `assert` statements +ENV PYTHONOPTIMIZE=1 + # Clone into the Discretizer RUN cd /srv && git clone https://git.gfz-potsdam.de/dynamicexposure/completeness/land-water-discretizer.git land-water-discretizer WORKDIR /srv/land-water-discretizer @@ -37,12 +40,15 @@ WORKDIR /srv/land-water-discretizer # Install imposm3 into `/opt` RUN cd /tmp && wget -q https://github.com/omniscale/imposm3/releases/download/v0.11.1/imposm-0.11.1-linux-x86-64.tar.gz && tar -xvf imposm-0.11.1-linux-x86-64.tar.gz --directory /opt -# Copy `config.ini` to the Discretizer +# Copy `config.ini`, and imposm3's `mapping.yml`, `config.json` to the Discretizer COPY ./files/config.ini /srv/land-water-discretizer/discretizer +COPY ./files/mapping.yml /srv/land-water-discretizer/import +COPY ./files/config.json /srv/land-water-discretizer/import # Install the Discretizer package RUN python -m pip install --upgrade pip -RUN pip3 install -e . +RUN pip3 --default-timeout=120 install -e . +# See that the Discretizer is working RUN discretize --help # Create a data directory @@ -50,18 +56,25 @@ RUN mkdir data # Download land polygons into `data` directory RUN wget -q --directory-prefix=data -N https://osmdata.openstreetmap.de/download/land-polygons-split-4326.zip -RUN cd data && unzip land-polygons-split-4326.zip +RUN cd data && unzip -o land-polygons-split-4326.zip # Download water (ocean) polygons into `data` directory RUN wget -q --directory-prefix=data -N https://osmdata.openstreetmap.de/download/water-polygons-split-4326.zip -RUN cd data && unzip water-polygons-split-4326.zip +RUN cd data && unzip -o water-polygons-split-4326.zip # Download coastline linestrings into `data` directory RUN wget -q --directory-prefix=data -N https://osmdata.openstreetmap.de/download/coastlines-split-4326.zip -RUN cd data && unzip coastlines-split-4326.zip +RUN cd data && unzip -o coastlines-split-4326.zip -# Copy tiles database creation script to container +# Copy tiles and waterbodies database creation scripts to container COPY ./files/init-tiles-database.sh /usr/local/bin RUN chmod +x /usr/local/bin/init-tiles-database.sh -CMD ["/usr/local/bin/init-tiles-database.sh", "data/quadkeys.out"] +COPY ./files/init-waterbodies-database.sh /usr/local/bin +RUN chmod +x /usr/local/bin/init-waterbodies-database.sh + +# Copy Discretizer job script to container +COPY ./files/run.sh /usr/local/bin +RUN chmod +x /usr/local/bin/run.sh + +CMD ["/usr/local/bin/run.sh"] # Set the workdir as persistent folder VOLUME ["/srv/land-water-discretizer"] diff --git a/discretizer/files/config.ini b/discretizer/files/config.ini index 9638df3a46b88cd54171fec1802aca6099e52517..07218439b1416e62f15c2ef8457904c9b552807f 100644 --- a/discretizer/files/config.ini +++ b/discretizer/files/config.ini @@ -1,7 +1,7 @@ [DEFAULT] # Number of threads to spawn -num_threads = 8 +#num_threads = 16 # Whether to also use inland-water bodies with_inland_water = False # Whether to calculate off a db extract file or from the database diff --git a/discretizer/files/config.json b/discretizer/files/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a3ffc55c3e425ed90bf71633c7c762cf1d9558f0 --- /dev/null +++ b/discretizer/files/config.json @@ -0,0 +1,6 @@ +{ + "cachedir": "cache", + "diffdir": "diff", + "mapping": "import/mapping.yml", + "srid": 4326 +} diff --git a/discretizer/files/init-tiles-database.sh b/discretizer/files/init-tiles-database.sh index 082b8da81ca27aa066e819e2803e4ed5647e37cc..60c8bc64dc05b2dd41e24862255214bbdb84ad12 100644 --- a/discretizer/files/init-tiles-database.sh +++ b/discretizer/files/init-tiles-database.sh @@ -20,12 +20,20 @@ set -e out="quadkeys.out" +quadkeys="0 1 2 3" if [[ -z ${1+x} ]]; then # First parameter is the output directory printf 'Using default argument values.\n' else out=${1} + # Condition is true if there is at least a second command line paramter + if [[ "$2" != "" ]]; then + # Get the command line arguments on a variable + args=("$@") + # Second parameter onward should be parent quadkeys for which to create a tilegrid + quadkeys=("${args[@]:1}") + fi fi printf 'Writing quadkeys for calculation to %s.\n' "${out}" @@ -35,9 +43,11 @@ if [ "$(PGPASSWORD="${POSTGRES_PASSWORD}" psql -h "${POSTGRES_HOST}" -U "${POSTG then # Skip creation of tile grid, as there is already one present printf 'A tile grid in database %s exists. Skipping initialization.\n' "${POSTGRES_DB}" - init_tilegrid --quadkeys 1 --list-tile-level 9 --init-tile-level 1 --out "${out}" else # Create a new basic tile grid printf 'Initializing basic tile grid in database %s.\n' "${POSTGRES_DB}" - exec init_tilegrid --list-tile-level 9 --init-tile-level 1 --yes --new-tiles-database --out "${out}" + init_tilegrid --list-tile-level 9 --init-tile-level 1 --yes --new-tiles-database --out "${out}" fi + +# Expand `quadkeys` array into a string +init_tilegrid --quadkeys ${quadkeys[*]} --list-tile-level 9 --init-tile-level 1 --out "${out}" diff --git a/discretizer/files/init-waterbodies-database.sh b/discretizer/files/init-waterbodies-database.sh new file mode 100644 index 0000000000000000000000000000000000000000..c0d5f2e8fbef39cb028b7d80ca69f87ac70cb139 --- /dev/null +++ b/discretizer/files/init-waterbodies-database.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Uses OpenStreetMap data to set up a world-wide data set of inland waterbodies +# to be used by the `Discretizer`. Set-up will take a while to execute. + +if [ "$(PGPASSWORD="${POSTGRES_PASSWORD}" psql -h "${POSTGRES_HOST}" -U "${POSTGRES_USER}" -d "${WATERBODIES_DB}" -XtAc "SELECT exists(SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = 'waterbody_polygons')")" = "t" ] +then + # Skip creation of inland waterbody tables when they are already present + printf 'Inland waterbody tables in database %s exist. Skipping initialization.\n' "${WATERBODIES_DB}" +else + # Create inland waterbody tables + printf 'Initializing world-wide inland waterbodies in database %s.\n' "${WATERBODIES_DB}" + baseurl="https://download.geofabrik.de" + # OSM region data to import from + regions=( "africa" "antarctica" "asia" "australia-oceania" "europe" "north-america" "central-america" "south-america") + # First download and cache all data + for region in "${regions[@]}" + do + printf 'Downloading region %s.\n' "${region}" + wget -q --directory-prefix=data -N "${baseurl}/${region}-latest.osm.pbf" + printf 'Caching %s.\n' "${region}" + /opt/imposm-0.11.1-linux-x86-64/imposm import -config import/config.json -read "data/${region}-latest.osm.pbf" -diff -connection postgis://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}/${WATERBODIES_DB}?prefix=NONE -appendcache + done + # Import cached data + printf 'Importing inland waterbodies to %s.\n' "${WATERBODIES_DB}" + /opt/imposm-0.11.1-linux-x86-64/imposm import -config import/config.json -write -diff -connection postgis://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}/${WATERBODIES_DB}?prefix=NONE -appendcache -optimize -deployproduction + # Delete downloaded OSM files + for region in "${regions[@]}" + do + printf 'Deleting %s.\n' "${region}" + rm "data/${region}-latest.osm.pbf" + done + printf 'Done. Inland waterbody tables in %s are ready.\n' "${WATERBODIES_DB}" +fi diff --git a/discretizer/files/mapping.yml b/discretizer/files/mapping.yml new file mode 100644 index 0000000000000000000000000000000000000000..46835ab520a854dd90e1c6430bc4d3b0d97ca1cf --- /dev/null +++ b/discretizer/files/mapping.yml @@ -0,0 +1,44 @@ +areas: + area_tags: [landuse, natural] +tags: + load_all: true + exclude: [created_by, source, "tiger:*"] +tables: + waterbody_polygons: + type: polygon + columns: + - name: osm_id + type: id + - name: geometry + type: validated_geometry + - name: name + key: name + type: string + - name: tags + type: hstore_tags + mapping: + natural: [water] + water: [__any__] + landuse: [reservoir] + filters: + reject: + water: ['gulf'] + waterbody_relations: + type: relation_member + columns: + - name: osm_id + type: id + - name: geometry + type: validated_geometry + - name: name + key: name + type: string + - name: tags + type: hstore_tags + mapping: + natural: [water] + water: [__any__] + landuse: [reservoir] + filters: + reject: + water: ['gulf'] diff --git a/discretizer/files/run.sh b/discretizer/files/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..91f54a4487c41d43f634d2176a27d25dfee67135 --- /dev/null +++ b/discretizer/files/run.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Sets up inland waterbody data and runs the Land-Water Discretizer + +# Determine available cores or overwrite in `discretizer/files/config.ini`. +# Leave `thresshold` amount of cores free +thresshold=2 +procs=$(nproc) +if [[ "${procs}" -gt "${thresshold}" ]]; then + export NUM_THREADS=$(( $(nproc) - "${thresshold}" )) +else + export NUM_THREADS=1 +fi + +# Automatically set up inland waterbody data world-wide +# +# The `init-waterbodies-database` script sets up a world-wide inland waterbody data set, ready +# to use by the Land-Water Discretizer. To manually set up a region comment out this line and +# follow the instructions in the next step. +/usr/local/bin/init-waterbodies-database.sh + +# Manually set up inland water data set +# +# To calculate a select isolated or smaller area, OSM region data has to be downloaded first. +# E.g. to extract inland waterbodies from OSM data for the British Isles, download regions and +# sub regions for `Britain and Ireland` +# +#wget -q --directory-prefix=data -N https://download.geofabrik.de/europe/britain-and-ireland-latest.osm.pbf +# +# Import the inland waterbodies from the downloaded data to the Discretizer database +# and cache downloaded data on disk. +# Use flags `-overwritechache` to overwrite or `-appendcache` to append to existing waterbodies, +# respectively. See: https://imposm.org/docs/imposm3/latest/tutorial.html +# E.g. read the downloaded OSM data for the British Isles and write the inland waterbodies to +# the waterbody database using the provided `config.json` configuration file. +# +#/opt/imposm-0.11.1-linux-x86-64/imposm import -config import/config.json -read data/britain-and-ireland-latest.osm.pbf -write -diff -connection postgis://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}/${WATERBODIES_DB}?prefix=NONE -appendcache -optimize -deployproduction + +# Set up tile grid table +# +# If the tiles database is empty initialize a basic tile grid in the `tiles` table, +# and create a text file with level-9 calculation quadkeys, else only create the file +# and omit the table initialization. +# The calculation text file's path is provided as the first parameter to the script. +# Any additional parameter is a parent tile's quadkey from which to add level-9 +# quadkeys to the text file for calculation by the `Discretizer`. +# E.g. here the parent tiles' quadkeys cover the British Isles for which inland waterbodies +# have been imported to the database in the previous step. +# +#/usr/local/bin/init-tiles-database.sh data/quadkeys.out 031132 031133 031310 031311 + +# Determine level-9 quadkeys for Europe providing parent quadkeys that roughly cover +# Western Iceland (03101), Iceland and Scotland (0311), Ireland, England and Bay of Biscay +# (0313), Southern Scandinavia and Central and Eastern Europe (120), Northern Scandinavia (102), +# Spain (03311), Sardinia (12200), Sicily (12201), Greece (12210), Turkey (12211) +/usr/local/bin/init-tiles-database.sh data/quadkeys_europe.out 03101 0311 0313 120 102 03311 12200 12201 12210 12211 +# Alternatively create level-9 quadkeys covering the whole world, though many of them would +# be mostly water tiles without coastline. The Discretizer can handle them without any problems, +# but they still are jobs that use some CPU cycles. +#/usr/local/bin/init-tiles-database.sh data/quadkeys.out 0 1 2 3 + +# Run the Discretizer, telling it to calculate all before-hand determined level-9 quadkeys +discretize -vvv --file data/quadkeys_europe.out diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..a6429b3570e16fdce9af6eaedb41a9739b826e62 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,48 @@ +version: '2.1' +services: + database: + container_name: discretizer-database + build: + context: database + dockerfile: Dockerfile + restart: always + env_file: + - .env + ports: + - "8000:5432" + expose: + - "5432" + volumes: + # Keep tiles data + - pg_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -d $${POSTGRES_DB} -U $${POSTGRES_USER}"] + interval: 10s + timeout: 5s + retries: 5 + + discretizer: + container_name: discretizer-app + shm_size: 10gb + build: + context: discretizer + dockerfile: Dockerfile + restart: "no" + env_file: + - .env + depends_on: + database: + condition: service_healthy + # Prepare and run the Discretizer here + command: + - /bin/sh + - -ecx + - | + run.sh + volumes: + # Keep config files, log files, etc in the discretizer + - discretizer:/srv/land-water-discretizer + +volumes: + pg_data: + discretizer: