Commit a4eaff3d authored by Felix Delattre's avatar Felix Delattre
Browse files

Switched to Python and augmented diffs

parent 0e5cbb44
Pipeline #28054 passed with stage
in 1 minute and 26 seconds
*.pyc
*.log
*.pkl
*.egg-info
Pipfile
Pipfile.lock
.idea
__pycache__
.cache
build
dist
env
.vscode
image: debian:bullseye-slim
image: python:3-bullseye
# Make pip cache the installed dependencies
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
cache:
paths:
- .cache/pip
- venv/
before_script:
- apt update -y
- apt install -y git make shellcheck golang-go
- export GOPATH="$HOME/go"
- export PATH="$PATH:$GOPATH/bin"
- go get -u github.com/mvdan/sh/cmd/shfmt
- python3 -V
- pip3 install virtualenv
- virtualenv venv
- source venv/bin/activate
- pip3 install .
- pip3 install .[tests]
linters:
script:
- pip3 install .[linters] --quiet
- make check
tests:
interruptible: true
script:
- pytest tests
fail_fast: false
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: check-builtin-literals
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 20.8b1
hooks:
- id: black
args: [ --line-length=96 ]
- repo: https://github.com/pycqa/flake8
rev: 3.7.9
hooks:
- id: flake8
args: [ --max-line-length=96 ]
- repo: local
hooks:
- id: pylint
name: pylint
entry: pylint
language: system
args: [ --disable=E0611, -E, -j4 ]
types: [ python ]
......@@ -616,4 +616,4 @@ an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
\ No newline at end of file
END OF TERMS AND CONDITIONS
SOURCES=./spearhead includes/*.sh
LENGTH=96
check:
pre-commit run --all-files
check: $(SOURCES)
shellcheck -x $^
shfmt -i 2 -d $^
install:
pip install .[tests]
pip install .[linters]
pip install -e .
pre-commit install
format: $(SOURCES)
shfmt -i 2 -w $^
format:
pre-commit run --all-files black
=========
spearhead
Spearhead
=========
Download continously data from `OpenStreetMap <https://openstreetmap.org/>`__
and populate a `PostgreSQL <https://www.postgresql.org/>`__ database using
`osm2pgsql <https://osm2pgsql.org/>`__.
Read augmented diffs from OpenStreetMap's Overpass API and trigger
Rabotnik based on changes to buildings.
Requirements
------------
* python >= 3.6
* osmdiff >= 0.1.9
Installation
------------
pip3 install .
Contributing
------------
Development and linting dependencies can be installed with:
pip3 install .[tests]
pip3 install .[linters]
We use `black` and `flake8` for consistent code style and `pylint` for
general linting. Project linting comes pre-defined in `.pre-commit` to
be used in `https://pre-commit.com/`_.
pre-commit install
Copyright and copyleft
----------------------
......
#!/bin/bash
#
# Helper functions around OpenStreetMap database import
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
#######################################
# Wait until database comes up and is available
# Globals:
# SPEARHEAD_DATABASE_HOST
# SPEARHEAD_DATABASE_USER
# Arguments:
# Query command
#######################################
function database::wait_for_connection() {
until database::execute_command "SELECT datname FROM pg_database;"; do
printf "Database is unavailable - trying again in 10s\n"
sleep 10
done
}
#######################################
# Execute a PostgreSQL command not addressing a particular database.
# Globals:
# SPEARHEAD_DATABASE_HOST
# SPEARHEAD_DATABASE_USER
# Arguments:
# Query command
#######################################
function database::execute_command() {
psql --host="${SPEARHEAD_DATABASE_HOST}" --username="${SPEARHEAD_DATABASE_USER}" \
--command="${1}"
}
#######################################
# Execute PostgreSQL query on a certain database.
# Globals:
# SPEARHEAD_DATABASE_HOST
# SPEARHEAD_DATABASE_NAME
# SPEARHEAD_DATABASE_USER
# Arguments:
# Query command
#######################################
function database::execute_query() {
psql --host="${SPEARHEAD_DATABASE_HOST}" --username="${SPEARHEAD_DATABASE_USER}" \
"${SPEARHEAD_DATABASE_NAME}" --command="${1}"
}
#######################################
# Add geospatial and advanced index extensions to a PostgreSQL database.
# Globals:
# SPEARHEAD_DATABASE_HOST
# SPEARHEAD_DATABASE_NAME
# SPEARHEAD_DATABASE_USER
# Arguments:
# None
#######################################
function database::add_extensions() {
if psql --host"=${SPEARHEAD_DATABASE_HOST}" --username="${SPEARHEAD_DATABASE_USER}" \
--command="\connect ${SPEARHEAD_DATABASE_NAME}"; then
if database::execute_query "CREATE EXTENSION postgis;"; then
printf " PostGIS extension added to %s database.\n" "${SPEARHEAD_DATABASE_NAME}"
else
printf " Could not add PostGIS extension added to %s database.\nAborting.\n" \
"${SPEARHEAD_DATABASE_NAME}"
exit 1
fi
if database::execute_query "CREATE EXTENSION hstore;"; then
printf " hstore extension added to %s database.\n" "${SPEARHEAD_DATABASE_NAME}"
else
printf " Could not add hstore extension added to %s database.\nAborting.\n" \
"${SPEARHEAD_DATABASE_NAME}"
exit 1
fi
if database::execute_query "CREATE EXTENSION btree_gin;"; then
printf " btree_gin extension added to %s database.\n" \
"${SPEARHEAD_DATABASE_NAME}"
else
printf " Could not add btree_gin extension added to %s database.\nAborting.\n" \
"${SPEARHEAD_DATABASE_NAME}"
exit 1
fi
if database::execute_query "CREATE EXTENSION btree_gist;"; then
printf " btree_gist extension added to %s database.\n" "${SPEARHEAD_DATABASE_NAME}"
else
printf " Could not add btree_gist extension added to %s database.\nAborting.\n" \
"${SPEARHEAD_DATABASE_NAME}"
exit 1
fi
else
printf "Error: Unable to connect to database: %s\n" "${SPEARHEAD_DATABASE_NAME}"
exit 1
fi
}
#!/bin/bash
#
# Helper functions around OpenStreetMap database import
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
#######################################
# # Import OpenStreetMap Data file into database with osm2pgsql.
# Globals:
# SPEARHEAD_DATABASE_HOST
# SPEARHEAD_DATABASE_USER
# SPEARHEAD_DATABASE_NAME
# SPEARHEAD_IMPORT_STYLE
# Arguments:
# importfile
#######################################
function import::openstreetmap_data() {
if [[ -f "${1}" ]]; then
printf "\nImporting OpenStreetMap data with "
osm2pgsql --create --verbose \
--host "${SPEARHEAD_DATABASE_HOST}" \
--username "${SPEARHEAD_DATABASE_USER}" \
--database "${SPEARHEAD_DATABASE_NAME}" \
--style "${SPEARHEAD_IMPORT_STYLE}" \
--slim \
--multi-geometry \
--hstore \
--latlong \
--extra-attributes \
--hstore-add-index \
"${1}"
else
printf "Error: Import file '%s' not found.\nAborting.\n" "${1}"
exit 1
fi
}
#######################################
# # Import OpenStreetMap changeset file into database with osm2pgsql.
# Globals:
# SPEARHEAD_DATABASE_HOST
# SPEARHEAD_DATABASE_USER
# SPEARHEAD_DATABASE_NAME
# SPEARHEAD_IMPORT_STYLE
# Arguments:
# importfile
#######################################
function import::openstreetmap_changeset() {
if [[ -f "${1}" ]]; then
printf "\nUpdating OpenStreetMap data with "
osm2pgsql --append --verbose \
--host "${SPEARHEAD_DATABASE_HOST}" \
--username "${SPEARHEAD_DATABASE_USER}" \
--database "${SPEARHEAD_DATABASE_NAME}" \
--style "${SPEARHEAD_IMPORT_STYLE}" \
--slim \
--multi-geometry \
--hstore \
--latlong \
--extra-attributes \
"${1}"
else
printf "Error: Changeset file '%s' not found.\nAborting.\n" "${1}"
exit 1
fi
}
#!/usr/bin/env python3
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
from setuptools import setup, find_packages
tests_require = ["pytest"]
linters_require = ["pylint", "pre-commit"]
setup(
name="spearhead",
version="0.1",
description="Trigger calculations on the Rabotnik Message Bus based on \
OpenStreetMap's augmented diffs",
license="AGPLv3+",
install_requires=["osmdiff>=0.1.9"],
extras_require={
"tests": tests_require,
"linters": linters_require,
},
packages=find_packages(),
entry_points={"console_scripts": ["spearhead = spearhead.spearhead:main"]},
python_requires=">=3.6",
)
#!/bin/bash
#
# spearhead: Import OpenStreetMap data into OpenBuildingMap and update continously.
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
#######################################
# Import other shell script(s)
#######################################
# shellcheck source=includes/database.sh
source /usr/local/lib/spearhead/includes/database.sh --source-only
# shellcheck source=includes/import.sh
source /usr/local/lib/spearhead/includes/import.sh --source-only
#######################################
# Initialize and populate the database.
# Globals:
# SPEARHEAD_DATABASE_HOST
# SPEARHEAD_DATABASE_NAME
# Arguments:
# None
#######################################
function initialize() {
printf "Connecting to PostgreSQL on '%s':\n" "${SPEARHEAD_DATABASE_HOST}"
database::wait_for_connection
# Initiate new or reuse existing database
if database::execute_command "\connect ${SPEARHEAD_DATABASE_NAME}"; then
printf "Reuse existing database: %s\n" "${SPEARHEAD_DATABASE_NAME}"
else
database::execute_command "CREATE DATABASE ${SPEARHEAD_DATABASE_NAME};"
database::add_extensions
printf "Initiated new database: %s\n" "${SPEARHEAD_DATABASE_NAME}"
fi
# Import OpenStreetMap data if database is empty
database_exists=$(psql --host="${SPEARHEAD_DATABASE_HOST}" \
--username="${SPEARHEAD_DATABASE_USER}" "${SPEARHEAD_DATABASE_NAME}" --tuples-only \
--command="SELECT CASE WHEN EXISTS \
(SELECT FROM pg_tables WHERE schemaname = 'public' \
AND tablename = 'planet_osm_line' LIMIT 1) THEN 'true' ELSE 'false' end;")
if ${database_exists}; then
printf "Assuming already imported database: %s\n" "${SPEARHEAD_DATABASE_NAME}"
else
import::openstreetmap_data "${SPEARHEAD_IMPORT_FILE}"
# Write last sequence number from import file to sequence file
pyosmium-get-changes -v \
--start-osm-data "${SPEARHEAD_IMPORT_FILE}" \
--sequence-file "${SPEARHEAD_REPLICATION_DIRECTORY}/${SPEARHEAD_SEQUENCE_FILE}" \
--server "${SPEARHEAD_REPLICATION_SERVER}" \
--ignore-osmosis-headers
printf "OpenStreetMap data has been imported.\n"
fi
}
#######################################
# Define exit handler
# Globals:
# None
# Arguments:
# None
#######################################
function onexit() {
[ -f "previous-${SPEARHEAD_SEQUENCE_FILE}" ] &&
mv "previous-${SPEARHEAD_SEQUENCE_FILE}" "${SPEARHEAD_SEQUENCE_FILE}"
}
#######################################
# Constantly download OpenStreetMap data and update the database.
# Globals:
# SPEARHEAD_DATABASE_HOST
# SPEARHEAD_DATABASE_NAME
# Arguments:
# None
#######################################
function update() {
# Change to the spearhead replication data directory
mkdir -p "${SPEARHEAD_REPLICATION_DIRECTORY}"
cd "${SPEARHEAD_REPLICATION_DIRECTORY}"
# Disable JIT and parallel workers in PostgreSQL
export PGOPTIONS="-c jit=off -c max_parallel_workers_per_gather=0"
# Include an exit handler
trap onexit EXIT
printf "\nStart data-update daemon.\n\n"
# This runs as daemon and loops indefinitely
while true; do
update_file="changes-$(cat "${SPEARHEAD_SEQUENCE_FILE}").osc.gz"
# Save previous sequence file for rollback if needed
cp "${SPEARHEAD_SEQUENCE_FILE}" "previous-${SPEARHEAD_SEQUENCE_FILE}"
# Fetch set of OSM changesets
pyosmium-get-changes -v \
--server="${SPEARHEAD_REPLICATION_SERVER}" \
--sequence-file="${SPEARHEAD_SEQUENCE_FILE}" \
--outfile="${update_file}"
# Check for exit sttaus
status=$?
if [ $status -eq 0 ]; then
# Exit immediately on error
set -e
printf "Fetched new data from %s to %s into %s" \
"$(cat previous-"${SPEARHEAD_SEQUENCE_FILE}")" \
"$(cat "${SPEARHEAD_SEQUENCE_FILE}")" \
"${update_file}"
import::openstreetmap_changeset "${update_file}"
# After successfull import, the previous sequence file can be removed
rm "previous-${SPEARHEAD_SEQUENCE_FILE}"
# Delete old downloads
find . -name 'changes-*.gz' -mmin +300 -exec rm -f {} \;
# Disable exit immediately on error
set +e
elif [ $status -eq 3 ]; then
printf "No new data available.\n"
# Remove file, it is just emty
rm "${update_file}"
sleep 30
else
printf "Failed to fetch changeset file.\n"
# Remove file, it is just emty
rm -f "${update_file}"
sleep 300
fi
done
}
# Start the script
printf "\nWelcome to the OpenBuildingMap importer and updater\n\n"
# Call main functions of the script
initialize
update
#!/usr/bin/env python3
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
#!/usr/bin/env python3
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
import sys
import osmdiff
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler(sys.stdout))
def main():
logger.info("spearhead started")
a = osmdiff.AugmentedDiff()
a.base_url = "https://overpass.openbuildingmap.org/api"
a.get_state()
logger.info(a.sequence_number)
a.retrieve()
building_count = 0
# Check on newly created objects
for n in a.create:
if n.tags.get("building"):
if _is_way_or_relation(n):
building_count += 1
# Check on objects that have been modified
for n in a.modify:
if n["new"].tags.get("building"):
if _is_way_or_relation(n["new"]):
building_count += 1
elif n["old"].tags.get("building"):
if _is_way_or_relation(n["old"]):
building_count += 1
# Check on deleted objects