diff --git a/spearhead/__init__.py b/spearhead/__init__.py index a1a76c7833a94d791d31df13b5ef5bf5e622a8d5..ed8a5fed366b3dc00e0cf033a65f89440f1c976e 100644 --- a/spearhead/__init__.py +++ b/spearhead/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright (C) 2021: +# Copyright (C) 2021-2022: # Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ # # This program is free software: you can redistribute it and/or modify it diff --git a/spearhead/configure.py b/spearhead/configure.py index bee975f98b23495ac971f0006e3ebf602e9d90d2..c8f1d62056f291d2bdde596ac353e24baad55ae3 100644 --- a/spearhead/configure.py +++ b/spearhead/configure.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright (c) 2020-2021: +# Copyright (c) 2020-2022: # Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ # # This program is free software: you can redistribute it and/or modify it @@ -25,9 +25,20 @@ logger.addHandler(logging.StreamHandler(sys.stdout)) class Configuration: - """Configuration. - - Holds configuration data. + """ + The `Configuration` class holds the configuration data. + + Args: + config_file (str): + Filepath to the configuration file that defines the following items: + overpass_base_url (str): + Overpass URL that is used to query the `AugmentedDiffs`. + state_pathname (str): + state_pathname of the file that defines the current `AugmentedDiff` state. + state_filename (str): + Filename of the file that defines the current `AugmentedDiff` state. + rabotnik (dict): + Credentials and host used to connect to the `Rabotnik` instance. """ def __init__(self, config_file: str = "config.yml") -> None: @@ -38,7 +49,7 @@ class Configuration: try: self.overpass_base_url = config["overpass_base_url"] - self.filepath = config["filepath"] + self.state_pathname = config["state_pathname"] self.state_filename = config["state_filename"] self.rabotnik = { "hostname": config["rabotnik"]["hostname"], diff --git a/spearhead/diff_analyzer.py b/spearhead/diff_analyzer.py index b4cd7a9c35e47c79cc28501719a79d034a03f97e..e756859c6ebc528a85f0599bc9d3c1d0d19eb8e1 100644 --- a/spearhead/diff_analyzer.py +++ b/spearhead/diff_analyzer.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright (C) 2021: +# Copyright (C) 2021-2022: # Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ # # This program is free software: you can redistribute it and/or modify it @@ -19,7 +19,8 @@ import logging import sys -import osmdiff +from typing import List +from osmdiff import AugmentedDiff, osm from rabotnik import MessageBus from spearhead.configure import Configuration @@ -30,15 +31,23 @@ logger.addHandler(logging.StreamHandler(sys.stdout)) class DiffAnalyzer: - """DiffAnalyzer. - + """ This class contains the logic for obtaining and interpreting the OpenStreetMap's - "augmented diffs". + `AugmentedDiffs`. + + Args: + diff_provider (AugmentedDiff): + An object containing all information of an augmented diff. + config (Configuration): + The configuration that includes the state_pathname and filename of the + `AugmentedDiff` state file. + message_bus (MessageBus): + Connection instance to the `Rabotnik` MessageBus. """ def __init__( self, - diff_provider: osmdiff.AugmentedDiff, + diff_provider: AugmentedDiff, config: Configuration, message_bus: MessageBus, ): @@ -46,15 +55,23 @@ class DiffAnalyzer: self.file_handler = FileHandler(config) self.message_bus = message_bus - async def get_list_of_new_diffs(self): + async def get_list_of_new_diffs(self) -> List[int]: + """ + Get all `AugmentedDiffs` between the current state of the `Overpass` instance and + the previous state that is logged in the `Configuration`. + + Returns: + A list of states that should be processed. + """ + self.diff_provider.get_state() new_state = self.diff_provider.sequence_number old_state = await self.file_handler.read_state_file() return list(range(old_state, new_state)) - async def get_augmented_diff(self) -> osmdiff.AugmentedDiff: + async def get_augmented_diff(self) -> AugmentedDiff: """ - Obtain an augmented diff from Overpass API + Obtain an augmented diff from Overpass API. Returns: diff_provider (AugmentedDiff): @@ -66,47 +83,69 @@ class DiffAnalyzer: return self.diff_provider async def update_state(self, state: int) -> None: + """ + Update the state that is logged in the `Configuration` to the current state. + + Args: + state (int): + Current `AugmentedDiff` state. + """ - await self.file_handler.update_state_file(str(state)) + await self.file_handler.update_state_file(state) - async def get_buildings(self, augmented_diff) -> None: - """Collect the building IDs of building objects that have changed in a given - augmented diff. + async def get_buildings(self, augmented_diff: AugmentedDiff) -> List[int]: """ + Collect the OSM IDs of buildings objects that have changed in a given augmented diff. + + Args: + augmented_diff (AugmentedDiff): + An object containing all information of an augmented diff. + + Returns: + List of OSM IDs of buildings that are updated in the `AugmentedDiff`. + """ + logger.info(" " + str(augmented_diff)) - numbers = [] + osm_ids = [] # Check on newly created objects for n in augmented_diff.create: if n.tags.get("building"): - if isinstance(n, osmdiff.osm.osm.Way): - numbers.append(int(n.attribs.get("id"))) - elif isinstance(n, osmdiff.osm.osm.Relation): - numbers.append(-int(n.attribs.get("id"))) + if isinstance(n, osm.osm.Way): + osm_ids.append(int(n.attribs.get("id"))) + elif isinstance(n, osm.osm.Relation): + osm_ids.append(-int(n.attribs.get("id"))) # Check on objects that have been modified for n in augmented_diff.modify: if n["new"].tags.get("building"): - if isinstance(n["new"], osmdiff.osm.osm.Way): - numbers.append(int(n["new"].attribs.get("id"))) - elif isinstance(n["new"], osmdiff.osm.osm.Relation): - numbers.append(-int(n["new"].attribs.get("id"))) + if isinstance(n["new"], osm.osm.Way): + osm_ids.append(int(n["new"].attribs.get("id"))) + elif isinstance(n["new"], osm.osm.Relation): + osm_ids.append(-int(n["new"].attribs.get("id"))) elif n["old"].tags.get("building"): - if isinstance(n["old"], osmdiff.osm.osm.Way): - numbers.append(int(n["old"].attribs.get("id"))) - elif isinstance(n["old"], osmdiff.osm.osm.Relation): - numbers.append(-int(n["old"].attribs.get("id"))) + if isinstance(n["old"], osm.osm.Way): + osm_ids.append(int(n["old"].attribs.get("id"))) + elif isinstance(n["old"], osm.osm.Relation): + osm_ids.append(-int(n["old"].attribs.get("id"))) # Check on deleted objects for n in augmented_diff.delete: - if isinstance(n, osmdiff.osm.osm.Way): - numbers.append(int(n.attribs.get("id"))) - elif isinstance(n, osmdiff.osm.osm.Relation): - numbers.append(-int(n.attribs.get("id"))) + if isinstance(n, osm.osm.Way): + osm_ids.append(int(n.attribs.get("id"))) + elif isinstance(n, osm.osm.Relation): + osm_ids.append(-int(n.attribs.get("id"))) + + return osm_ids - return numbers + async def send_buildings_to_message_bus(self, osm_ids: list) -> None: + """ + Send a list of building IDs to the message bus. + + Args: + osm_ids (list): + List of OSM IDs of buildings that are sent to the `MessageBus`. + """ - async def _set_buildings_on_message_bus(self, numbers: list) -> None: - """Set a list of building ids on the message bus""" - for number in numbers: + for number in osm_ids: await self.message_bus.send(message="building", payload={"building_id": number}) diff --git a/spearhead/file_handler.py b/spearhead/file_handler.py index ab0e900580333b946e8055b7fb23b73d505b8616..faecff0fd6164b3fcc7d3617fd9b2fd00e231e2a 100644 --- a/spearhead/file_handler.py +++ b/spearhead/file_handler.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright (C) 2021: +# Copyright (C) 2021-2022: # Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ # # This program is free software: you can redistribute it and/or modify it @@ -18,38 +18,69 @@ import logging import sys - import aiofiles +from spearhead.configure import Configuration + logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler(sys.stdout)) class FileHandler: - """FileHandler.""" + """ + The file handler reads and updates `AugmentedDiff` state files. It can also write the + `AugmentedDiff` in a report. + + Args: + config (Configuration): + The configuration that includes the state_pathname and filename of the + `AugmentedDiff` state file. + """ - def __init__(self, config): + def __init__(self, config: Configuration): self.config = config async def read_state_file(self) -> int: - """Reads state recorded in a text file""" + """ + Reads the `AgumentedDiff` state that is recorded in a text file. + + Returns: + The current `AugmentedDiff` state. + """ + async with aiofiles.open( - self.config.filepath + self.config.state_filename, mode="r" + self.config.state_pathname + self.config.state_filename, mode="r" ) as f: state = await f.readline() return int(state) async def update_state_file(self, state: int) -> None: - """Replaces state sequence_number in state text file""" + """ + Replaces the `AugmentedDiff` state sequence number in state text file. + + Args: + state (int): + The new `AugmentedDiff` state. + """ + async with aiofiles.open( - self.config.filepath + self.config.state_filename, mode="w" + self.config.state_pathname + self.config.state_filename, mode="w" ) as f: - await f.write(state) + await f.write(str(state)) + + async def write_diff_report(self, state: int, osm_ids: list) -> None: + """ + Writes a report of the `DiffAnalyzer` results per augmented diff into a text file. + + Args: + state (int): + Current `AugmentedDiff` state. + osm_ids (list): + List of OSM IDs that are updated in the current `AugmentedDiff` state. + """ - async def write_diff_report(self, identifier: int, numbers: list) -> None: - """Writes a report of analyzer results per augmented diff into a text file""" async with aiofiles.open( - self.config.filepath + "augmented-diff-" + str(identifier) + ".txt", mode="w" + self.config.state_pathname + "augmented-diff-" + str(state) + ".txt", mode="w" ) as f: - for number in numbers: - await f.write(str(number) + "\n") + for osm_id in osm_ids: + await f.write(str(osm_id) + "\n") diff --git a/spearhead/spearhead.py b/spearhead/spearhead.py index 6d3ec2e3d2c66daa46068bf49e0367a65eecd83f..173c6ebf167686e1a017a0353307f898aae8e4ab 100644 --- a/spearhead/spearhead.py +++ b/spearhead/spearhead.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright (C) 2021: +# Copyright (C) 2021-2022: # Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ # # This program is free software: you can redistribute it and/or modify it @@ -32,19 +32,40 @@ logger.addHandler(logging.StreamHandler(sys.stdout)) def main(): - """Wrapper around the main asynchronous program.""" + """ + Wrapper around the main asynchronous program. + """ + asyncio.run(async_main()) async def async_main(): - """Main entrypoint of the program.""" + """ + Main entrypoint of the program. + """ + logger.info("spearhead started - it can be stopped with CTRL+c") config = Configuration("config.yml") - await get_changed_buildings(config) + await spearhead(config) + + +async def spearhead(config: Configuration): + """ + Obtain a list of buildings that have changed since last check. + + Args: + config_file (str): + A configuration file that defines the following items: + overpass_base_url (str): + Overpass URL that is used to query the `AugmentedDiffs`. + state_pathname (str): + state_pathname of the file that defines the current `AugmentedDiff` state. + state_filename (str): + Filename of the file that defines the current `AugmentedDiff` state. + rabotnik (dict): + Credentials and host used to connect to the `Rabotnik` instance. + """ - -async def get_changed_buildings(config: Configuration): - """Obtain a list of buiildings that have changed since last check.""" diff_provider = osmdiff.AugmentedDiff() diff_provider.base_url = config.overpass_base_url logger.info("Using Overpass API: " + diff_provider.base_url) @@ -56,7 +77,6 @@ async def get_changed_buildings(config: Configuration): while True: try: - # Obtain available list of diffs diffs = await diff_analyzer.get_list_of_new_diffs() @@ -67,12 +87,12 @@ async def get_changed_buildings(config: Configuration): diff = await diff_analyzer.get_augmented_diff() # Get identifiers of all buildings that have changed - numbers = await diff_analyzer.get_buildings(diff) + osm_ids = await diff_analyzer.get_buildings(diff) await diff_analyzer.file_handler.write_diff_report( - diff.sequence_number, numbers + diff.sequence_number, osm_ids ) - await diff_analyzer._set_buildings_on_message_bus(numbers) + await diff_analyzer.send_buildings_to_message_bus(osm_ids) await diff_analyzer.update_state(diff_number + 1) # Wait until requesting more data diff --git a/tests/config.yml b/tests/config.yml index fcc161166802e4a95645c7d66454eb19fdf761a8..1287dcd05f11f13de4e6c6cc8be38a140058371d 100644 --- a/tests/config.yml +++ b/tests/config.yml @@ -1,5 +1,5 @@ overpass_base_url: https://overpass.openbuildingmap.org/api -filepath: ./tests/data/ +state_pathname: ./tests/data/ state_filename: state.txt rabotnik: hostname: localhost diff --git a/tests/test_file_handler.py b/tests/test_file_handler.py index 11401c15a4565785177e21801fa1a56a931229dc..84adb601740b17c07f3fa2e57abb8f1880503c5c 100644 --- a/tests/test_file_handler.py +++ b/tests/test_file_handler.py @@ -55,4 +55,4 @@ async def test_update_state_file(file_handler): @pytest.mark.asyncio async def test_write_diff_report(file_handler): await file_handler.write_diff_report("4749545", ["337202175", "17837838", "-387388722"]) - os.remove(file_handler.config.filepath + "augmented-diff-4749545.txt") + os.remove(file_handler.config.state_pathname + "augmented-diff-4749545.txt")