From 29ccaee1ab27d6bb0054c8f2a76a3cd5becb4a89 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Mon, 9 Aug 2021 16:45:16 +0200 Subject: [PATCH 01/20] Moved docs -> data --- {docs => data}/occupancy.md | 0 {docs => data}/rules/occupancy/GEM_taxonomy_strings.csv | 0 {docs => data}/rules/occupancy/README.md | 0 {docs => data}/rules/occupancy/building_and_PoIs_tags.csv | 0 {docs => data}/rules/occupancy/landuse_tags.csv | 0 {docs => data}/rules/occupancy/overriding_occupancies.csv | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename {docs => data}/occupancy.md (100%) rename {docs => data}/rules/occupancy/GEM_taxonomy_strings.csv (100%) rename {docs => data}/rules/occupancy/README.md (100%) rename {docs => data}/rules/occupancy/building_and_PoIs_tags.csv (100%) rename {docs => data}/rules/occupancy/landuse_tags.csv (100%) rename {docs => data}/rules/occupancy/overriding_occupancies.csv (100%) diff --git a/docs/occupancy.md b/data/occupancy.md similarity index 100% rename from docs/occupancy.md rename to data/occupancy.md diff --git a/docs/rules/occupancy/GEM_taxonomy_strings.csv b/data/rules/occupancy/GEM_taxonomy_strings.csv similarity index 100% rename from docs/rules/occupancy/GEM_taxonomy_strings.csv rename to data/rules/occupancy/GEM_taxonomy_strings.csv diff --git a/docs/rules/occupancy/README.md b/data/rules/occupancy/README.md similarity index 100% rename from docs/rules/occupancy/README.md rename to data/rules/occupancy/README.md diff --git a/docs/rules/occupancy/building_and_PoIs_tags.csv b/data/rules/occupancy/building_and_PoIs_tags.csv similarity index 100% rename from docs/rules/occupancy/building_and_PoIs_tags.csv rename to data/rules/occupancy/building_and_PoIs_tags.csv diff --git a/docs/rules/occupancy/landuse_tags.csv b/data/rules/occupancy/landuse_tags.csv similarity index 100% rename from docs/rules/occupancy/landuse_tags.csv rename to data/rules/occupancy/landuse_tags.csv diff --git a/docs/rules/occupancy/overriding_occupancies.csv b/data/rules/occupancy/overriding_occupancies.csv similarity index 100% rename from docs/rules/occupancy/overriding_occupancies.csv rename to data/rules/occupancy/overriding_occupancies.csv -- GitLab From 296a66d390708024a6671395668e2dc29b6f4630 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Mon, 9 Aug 2021 16:46:02 +0200 Subject: [PATCH 02/20] Added gem taxonomy evaluation and tests --- rabotnikobm/taxonomy/gem_taxonomy.py | 75 ++++++++++++++++++++++++++++ tests/test_gem_taxonomy.py | 54 ++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 rabotnikobm/taxonomy/gem_taxonomy.py create mode 100644 tests/test_gem_taxonomy.py diff --git a/rabotnikobm/taxonomy/gem_taxonomy.py b/rabotnikobm/taxonomy/gem_taxonomy.py new file mode 100644 index 0000000..64ddf05 --- /dev/null +++ b/rabotnikobm/taxonomy/gem_taxonomy.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2021: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import csv +from collections import defaultdict + + +def group_taxonomy(taxonomy: list[dict[str, str]]) -> dict[str, dict[str, str]]: + """Extract groups of identical keys and group them in nested dicts.""" + grouped_taxonomy = defaultdict(dict) + + for row in taxonomy: + key = row.pop("key") + value = row.pop("value") + grouped_taxonomy[key].update({value: row}) + + return grouped_taxonomy + + +def read_csv(fn: str) -> list[dict[str, str]]: + """Read content from csv into list of dicts.""" + with open(fn) as csvfile: + taxonomy_reader = list(csv.DictReader(csvfile)) + + return taxonomy_reader + + +class TaxonomyMapper: + + """Map building tags to `GEM_taxonomy_occupancy`""" + + def __init__(self, mapping): + self.mapping = mapping + + def tags_to_taxonomy(self, building_tags: dict[dict[str, str]]) -> list[str]: + """Map `building_tags` to lists of GEM_taxonomy_occupancy as defined in + building_and_POIs_tags.csv and landuse_tags.csv. Tags may be duplicated. + + Args: + building_tags: list of building tag strings + Returns: + list of `GEM_taxonomy_occupancy`s + """ + + occupancies = [] + for building_tag in building_tags: + for key, value in building_tag.items(): + occupancy = self.mapping.get(key, {}).get(value, None) + if occupancy is not None: + occupancies.append(occupancy["GEM_taxonomy_occupancy"]) + + return occupancies + + @classmethod + def from_csv(cls, fn: str): + """Read a csv and initialize a `TaxonomyMapper`.""" + taxonomy = read_csv(fn) + grouped_taxonomy = group_taxonomy(taxonomy) + + return cls(mapping=grouped_taxonomy) diff --git a/tests/test_gem_taxonomy.py b/tests/test_gem_taxonomy.py new file mode 100644 index 0000000..06bebc6 --- /dev/null +++ b/tests/test_gem_taxonomy.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2021: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +from rabotnikobm.taxonomy.gem_taxonomy import group_taxonomy, TaxonomyMapper + + +def test_group_taxonomy(): + rows = [ + {"key": "a", "value": 0}, + {"key": "a", "value": 1}, + {"key": "b", "value": 2}, + ] + + grouped = group_taxonomy(rows) + assert grouped == {"a": {0: {}, 1: {}}, "b": {2: {}}} + + +def test_mapper_from_csv(pytestconfig): + demo_file = pytestconfig.rootpath / "data/rules/occupancy/building_and_PoIs_tags.csv" + mapper = TaxonomyMapper.from_csv(demo_file) + assert mapper + + +def test_taxonomy_mapper_building_pois(pytestconfig): + demo_file = pytestconfig.rootpath / "data/rules/occupancy/building_and_PoIs_tags.csv" + mapper = TaxonomyMapper.from_csv(demo_file) + + sample_tags = [{"amenity": "community_centre"}, {"amenity": "cafe"}, {"x": "y"}] + + assert mapper.tags_to_taxonomy(sample_tags) == ["ASS4", "COM5"] + + +def test_taxonomy_mapper_landuse(pytestconfig): + demo_file = pytestconfig.rootpath / "data/rules/occupancy/landuse_tags.csv" + mapper = TaxonomyMapper.from_csv(demo_file) + + sample_tags = [{"amenity": "university", "a": "b"}, {"landuse": "brownfield"}, {"x": "y"}] + + assert mapper.tags_to_taxonomy(sample_tags) == ["EDU3", "UNDECIDABLE"] -- GitLab From d9443876b187781ea5752758243a461fd4a348ba Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Mon, 9 Aug 2021 17:43:24 +0200 Subject: [PATCH 03/20] Renamed TaxonomyMapper --- rabotnikobm/taxonomy/gem_taxonomy.py | 4 ++-- tests/test_gem_taxonomy.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rabotnikobm/taxonomy/gem_taxonomy.py b/rabotnikobm/taxonomy/gem_taxonomy.py index 64ddf05..4a35ba4 100644 --- a/rabotnikobm/taxonomy/gem_taxonomy.py +++ b/rabotnikobm/taxonomy/gem_taxonomy.py @@ -40,7 +40,7 @@ def read_csv(fn: str) -> list[dict[str, str]]: return taxonomy_reader -class TaxonomyMapper: +class TaxonomyOccupancyMapper: """Map building tags to `GEM_taxonomy_occupancy`""" @@ -68,7 +68,7 @@ class TaxonomyMapper: @classmethod def from_csv(cls, fn: str): - """Read a csv and initialize a `TaxonomyMapper`.""" + """Read a csv and initialize a `TaxonomyOccupancyMapper`.""" taxonomy = read_csv(fn) grouped_taxonomy = group_taxonomy(taxonomy) diff --git a/tests/test_gem_taxonomy.py b/tests/test_gem_taxonomy.py index 06bebc6..bb6040a 100644 --- a/tests/test_gem_taxonomy.py +++ b/tests/test_gem_taxonomy.py @@ -16,7 +16,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see http://www.gnu.org/licenses/. -from rabotnikobm.taxonomy.gem_taxonomy import group_taxonomy, TaxonomyMapper +from rabotnikobm.taxonomy.gem_taxonomy import group_taxonomy, TaxonomyOccupancyMapper def test_group_taxonomy(): @@ -32,13 +32,13 @@ def test_group_taxonomy(): def test_mapper_from_csv(pytestconfig): demo_file = pytestconfig.rootpath / "data/rules/occupancy/building_and_PoIs_tags.csv" - mapper = TaxonomyMapper.from_csv(demo_file) + mapper = TaxonomyOccupancyMapper.from_csv(demo_file) assert mapper def test_taxonomy_mapper_building_pois(pytestconfig): demo_file = pytestconfig.rootpath / "data/rules/occupancy/building_and_PoIs_tags.csv" - mapper = TaxonomyMapper.from_csv(demo_file) + mapper = TaxonomyOccupancyMapper.from_csv(demo_file) sample_tags = [{"amenity": "community_centre"}, {"amenity": "cafe"}, {"x": "y"}] @@ -47,7 +47,7 @@ def test_taxonomy_mapper_building_pois(pytestconfig): def test_taxonomy_mapper_landuse(pytestconfig): demo_file = pytestconfig.rootpath / "data/rules/occupancy/landuse_tags.csv" - mapper = TaxonomyMapper.from_csv(demo_file) + mapper = TaxonomyOccupancyMapper.from_csv(demo_file) sample_tags = [{"amenity": "university", "a": "b"}, {"landuse": "brownfield"}, {"x": "y"}] -- GitLab From af39dccf6466c048a5856b6c0ae5ea3ec8383a80 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Tue, 10 Aug 2021 18:03:19 +0200 Subject: [PATCH 04/20] Refactored taxonomies --- .../gem_taxonomy.py => occupancy/mapping.py} | 36 +++++++++---------- setup.py | 2 +- ..._taxonomy.py => test_occupancy_mapping.py} | 20 +++++------ 3 files changed, 29 insertions(+), 29 deletions(-) rename rabotnikobm/{taxonomy/gem_taxonomy.py => occupancy/mapping.py} (62%) rename tests/{test_gem_taxonomy.py => test_occupancy_mapping.py} (72%) diff --git a/rabotnikobm/taxonomy/gem_taxonomy.py b/rabotnikobm/occupancy/mapping.py similarity index 62% rename from rabotnikobm/taxonomy/gem_taxonomy.py rename to rabotnikobm/occupancy/mapping.py index 4a35ba4..4ac50da 100644 --- a/rabotnikobm/taxonomy/gem_taxonomy.py +++ b/rabotnikobm/occupancy/mapping.py @@ -20,46 +20,46 @@ import csv from collections import defaultdict -def group_taxonomy(taxonomy: list[dict[str, str]]) -> dict[str, dict[str, str]]: - """Extract groups of identical keys and group them in nested dicts.""" - grouped_taxonomy = defaultdict(dict) +def group_tags(occupancy_mapping: list[dict[str, str]]) -> dict[str, dict[str, str]]: + """Extract groups of identical tags and group them in nested dicts.""" + grouped_tag_mapping = defaultdict(dict) - for row in taxonomy: + for row in occupancy_mapping: key = row.pop("key") value = row.pop("value") - grouped_taxonomy[key].update({value: row}) + grouped_tag_mapping[key].update({value: row}) - return grouped_taxonomy + return grouped_tag_mapping def read_csv(fn: str) -> list[dict[str, str]]: """Read content from csv into list of dicts.""" with open(fn) as csvfile: - taxonomy_reader = list(csv.DictReader(csvfile)) + mapping = list(csv.DictReader(csvfile)) - return taxonomy_reader + return mapping -class TaxonomyOccupancyMapper: +class OccupancyMapper: """Map building tags to `GEM_taxonomy_occupancy`""" def __init__(self, mapping): self.mapping = mapping - def tags_to_taxonomy(self, building_tags: dict[dict[str, str]]) -> list[str]: - """Map `building_tags` to lists of GEM_taxonomy_occupancy as defined in + def tags_to_occupancy(self, osm_tags: dict[dict[str, str]]) -> list[str]: + """Map `osm_tags` to lists of GEM_taxonomy_occupancy as defined in building_and_POIs_tags.csv and landuse_tags.csv. Tags may be duplicated. Args: - building_tags: list of building tag strings + osm_tags: list of OSM building tag strings associated with a specific building Returns: list of `GEM_taxonomy_occupancy`s """ occupancies = [] - for building_tag in building_tags: - for key, value in building_tag.items(): + for osm_tag in osm_tags: + for key, value in osm_tag.items(): occupancy = self.mapping.get(key, {}).get(value, None) if occupancy is not None: occupancies.append(occupancy["GEM_taxonomy_occupancy"]) @@ -68,8 +68,8 @@ class TaxonomyOccupancyMapper: @classmethod def from_csv(cls, fn: str): - """Read a csv and initialize a `TaxonomyOccupancyMapper`.""" - taxonomy = read_csv(fn) - grouped_taxonomy = group_taxonomy(taxonomy) + """Read a csv and initialize a `OccupancyMapper`.""" + occupancy_mapping = read_csv(fn) + occupancy_mapping_grouped = group_tags(occupancy_mapping) - return cls(mapping=grouped_taxonomy) + return cls(mapping=occupancy_mapping_grouped) diff --git a/setup.py b/setup.py index 9fa49bb..690894f 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ from setuptools import setup, find_packages -tests_require = ["pytest"] +tests_require = ["pytest", "pytest-asyncio"] linters_require = ["pylint", "pre-commit"] setup( diff --git a/tests/test_gem_taxonomy.py b/tests/test_occupancy_mapping.py similarity index 72% rename from tests/test_gem_taxonomy.py rename to tests/test_occupancy_mapping.py index bb6040a..f2fe9f7 100644 --- a/tests/test_gem_taxonomy.py +++ b/tests/test_occupancy_mapping.py @@ -16,39 +16,39 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see http://www.gnu.org/licenses/. -from rabotnikobm.taxonomy.gem_taxonomy import group_taxonomy, TaxonomyOccupancyMapper +from rabotnikobm.occupancy.mapping import group_tags, OccupancyMapper -def test_group_taxonomy(): +def test_group_tags(): rows = [ {"key": "a", "value": 0}, {"key": "a", "value": 1}, {"key": "b", "value": 2}, ] - grouped = group_taxonomy(rows) + grouped = group_tags(rows) assert grouped == {"a": {0: {}, 1: {}}, "b": {2: {}}} def test_mapper_from_csv(pytestconfig): demo_file = pytestconfig.rootpath / "data/rules/occupancy/building_and_PoIs_tags.csv" - mapper = TaxonomyOccupancyMapper.from_csv(demo_file) + mapper = OccupancyMapper.from_csv(demo_file) assert mapper -def test_taxonomy_mapper_building_pois(pytestconfig): +def test_occupancy_mapper_building_pois(pytestconfig): demo_file = pytestconfig.rootpath / "data/rules/occupancy/building_and_PoIs_tags.csv" - mapper = TaxonomyOccupancyMapper.from_csv(demo_file) + mapper = OccupancyMapper.from_csv(demo_file) sample_tags = [{"amenity": "community_centre"}, {"amenity": "cafe"}, {"x": "y"}] - assert mapper.tags_to_taxonomy(sample_tags) == ["ASS4", "COM5"] + assert mapper.tags_to_occupancy(sample_tags) == ["ASS4", "COM5"] -def test_taxonomy_mapper_landuse(pytestconfig): +def test_occupancy_mapper_landuse(pytestconfig): demo_file = pytestconfig.rootpath / "data/rules/occupancy/landuse_tags.csv" - mapper = TaxonomyOccupancyMapper.from_csv(demo_file) + mapper = OccupancyMapper.from_csv(demo_file) sample_tags = [{"amenity": "university", "a": "b"}, {"landuse": "brownfield"}, {"x": "y"}] - assert mapper.tags_to_taxonomy(sample_tags) == ["EDU3", "UNDECIDABLE"] + assert mapper.tags_to_occupancy(sample_tags) == ["EDU3", "UNDECIDABLE"] -- GitLab From d2db330174bf2c710391c1c1b906967d7b2372c5 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Wed, 11 Aug 2021 18:15:41 +0200 Subject: [PATCH 05/20] Refactored csv and adopted changed key --- data/rules/occupancy/README.md | 2 +- data/rules/occupancy/building_and_PoIs_tags.csv | 2 +- data/rules/occupancy/landuse_tags.csv | 2 +- rabotnikobm/occupancy/mapping.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data/rules/occupancy/README.md b/data/rules/occupancy/README.md index 47c384d..bb79ea1 100644 --- a/data/rules/occupancy/README.md +++ b/data/rules/occupancy/README.md @@ -14,5 +14,5 @@ In the first two CSV files, the columns are the following: - `key`: OpenStreetMap key. - `value`: value of the key (together, `key=value` are a `tag`). - `taginfo_description`: description of the tag according to [taginfo](https://taginfo.openstreetmap.org/). -- `GEM_taxonomy_occupancy`: the occupancy string according to the GEM Taxonomy. If more than one value is possible, they are separated by "|". +- `GEM_occupancy`: the occupancy string according to the GEM Taxonomy. If more than one value is possible, they are separated by "|". - `comments`: comments associated with the established mapping. diff --git a/data/rules/occupancy/building_and_PoIs_tags.csv b/data/rules/occupancy/building_and_PoIs_tags.csv index 82e6327..3cf9471 100644 --- a/data/rules/occupancy/building_and_PoIs_tags.csv +++ b/data/rules/occupancy/building_and_PoIs_tags.csv @@ -1,4 +1,4 @@ -key,value,taginfo_description,GEM_taxonomy_occupancy,comments +key,value,taginfo_description,GEM_occupancy,comments aerialway,station,"A station where passengers and/or goods can enter and/or leave the aerialway (forms of transport that use wires, including cable-cars, chair-lifts and drag-lifts)",COM,COM contains other kinds of transport stations but not one specific for aerialways aeroway,hangar,A large airport building with extensive floor areas for housing aircraft or spacecraft,COM10,Unclear if hangar is treated as standard part of an airport or not aeroway,terminal,An airport passenger building,COM10, diff --git a/data/rules/occupancy/landuse_tags.csv b/data/rules/occupancy/landuse_tags.csv index 6c450ff..6806b73 100644 --- a/data/rules/occupancy/landuse_tags.csv +++ b/data/rules/occupancy/landuse_tags.csv @@ -1,4 +1,4 @@ -key,value,taginfo_description,GEM_taxonomy_occupancy,comments +key,value,taginfo_description,GEM_occupancy,comments amenity,university,"An educational institution designed for instruction, examination, or both, of students in many branches of advanced learning.",EDU3, amenity,school,A primary or secondary school (pupils typically aged 6 to 18).,EDU2, amenity,college,"A place for further education, a post-secondary education institution which is not a University",EDU3, diff --git a/rabotnikobm/occupancy/mapping.py b/rabotnikobm/occupancy/mapping.py index 4ac50da..4244a1b 100644 --- a/rabotnikobm/occupancy/mapping.py +++ b/rabotnikobm/occupancy/mapping.py @@ -62,7 +62,7 @@ class OccupancyMapper: for key, value in osm_tag.items(): occupancy = self.mapping.get(key, {}).get(value, None) if occupancy is not None: - occupancies.append(occupancy["GEM_taxonomy_occupancy"]) + occupancies.append(occupancy["GEM_occupancy"]) return occupancies -- GitLab From 866e09cce85a9855b588729dbd449a1701006681 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Wed, 11 Aug 2021 18:28:48 +0200 Subject: [PATCH 06/20] Added building occupancy rule --- rabotnikobm/occupancy/mapping.py | 57 ++++++++++++++++----- rabotnikobm/rules/get_building_occupancy.py | 44 ++++++++++++++++ tests/conftest.py | 16 ++++++ tests/test_get_building_occupancy.py | 31 +++++++++++ tests/test_occupancy_mapping.py | 29 +++++------ 5 files changed, 146 insertions(+), 31 deletions(-) create mode 100644 rabotnikobm/rules/get_building_occupancy.py create mode 100644 tests/test_get_building_occupancy.py diff --git a/rabotnikobm/occupancy/mapping.py b/rabotnikobm/occupancy/mapping.py index 4244a1b..56c9a2d 100644 --- a/rabotnikobm/occupancy/mapping.py +++ b/rabotnikobm/occupancy/mapping.py @@ -15,6 +15,7 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see http://www.gnu.org/licenses/. +from __future__ import annotations import csv from collections import defaultdict @@ -32,29 +33,21 @@ def group_tags(occupancy_mapping: list[dict[str, str]]) -> dict[str, dict[str, s return grouped_tag_mapping -def read_csv(fn: str) -> list[dict[str, str]]: - """Read content from csv into list of dicts.""" - with open(fn) as csvfile: - mapping = list(csv.DictReader(csvfile)) - - return mapping - - class OccupancyMapper: - """Map building tags to `GEM_taxonomy_occupancy`""" + """Map building tags to `GEM_occupancy`""" def __init__(self, mapping): self.mapping = mapping - def tags_to_occupancy(self, osm_tags: dict[dict[str, str]]) -> list[str]: - """Map `osm_tags` to lists of GEM_taxonomy_occupancy as defined in + def apply(self, osm_tags: dict[dict[str, str]]) -> list[str]: + """Map `osm_tags` to lists of GEM_occupancy as defined in building_and_POIs_tags.csv and landuse_tags.csv. Tags may be duplicated. Args: osm_tags: list of OSM building tag strings associated with a specific building Returns: - list of `GEM_taxonomy_occupancy`s + list of `GEM_occupancy`s """ occupancies = [] @@ -67,9 +60,45 @@ class OccupancyMapper: return occupancies @classmethod - def from_csv(cls, fn: str): + def read_csv(cls, fn: str) -> list[dict[str, str]]: + """Read content from csv into list of dicts.""" + with open(fn) as csvfile: + mapping = list(csv.DictReader(csvfile)) + + return mapping + + @classmethod + def from_csv(cls, fn: str) -> OccupancyMapper: """Read a csv and initialize a `OccupancyMapper`.""" - occupancy_mapping = read_csv(fn) + occupancy_mapping = cls.read_csv(fn) occupancy_mapping_grouped = group_tags(occupancy_mapping) return cls(mapping=occupancy_mapping_grouped) + + +class OverridingOccupancy: + def __init__(self, mapping): + self.mapping = mapping + + def apply(self, occupancies: list[str]): + for candidate, occupancy in self.mapping.items(): + if candidate in occupancies: + return occupancy + + else: + raise KeyError("No overriding occupancy matched") + + @classmethod + def read_csv(cls, fn: str) -> dict[str, str]: + """Read content from csv into list of dicts.""" + with open(fn) as csvfile: + mapping = {k.strip(): v.strip() for (k, v) in csv.reader(csvfile)} + + return mapping + + @classmethod + def from_csv(cls, fn: str) -> OverridingOccupancy: + """Read a csv and initialize a `OverridingOccupancy` instance.""" + occupancy_mapping = cls.read_csv(fn) + + return cls(mapping=occupancy_mapping) diff --git a/rabotnikobm/rules/get_building_occupancy.py b/rabotnikobm/rules/get_building_occupancy.py new file mode 100644 index 0000000..363dc18 --- /dev/null +++ b/rabotnikobm/rules/get_building_occupancy.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2021: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import logging + +from rabotnik import Rule +from rabotnik.storages.base import StorageBase +from rabotnikobm.occupancy.mapping import OccupancyMapper + +logger = logging.getLogger() + + +class GetBuildingTaxonomy(Rule): + """A rule to map OSM tags to building occupancies.""" + + def __init__(self, storage: StorageBase, occupancy_mapper: OccupancyMapper): + self.storage = storage + self.occupancy_mapper = occupancy_mapper + + async def evaluate(self, payload: dict): + building_id = payload["building_id"] + logger.debug("Processing building: %s", building_id) + tags = await self.storage.expect_one( + f"SELECT tags FROM osm_building_relations WHERE osm_id={building_id} AND index=0" + ) + + occupancies = self.occupancy_mapper.apply(tags) + logger.debug("occupancies %s: %s", building_id, occupancies) + return occupancies diff --git a/tests/conftest.py b/tests/conftest.py index d4ec532..b99a082 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,8 @@ import logging import pytest import rabotnik +from rabotnikobm.occupancy.mapping import OccupancyMapper + logger = logging.getLogger(__name__) @@ -48,6 +50,20 @@ def storage_consumer(pytestconfig): storage.disconnect() +@pytest.fixture +def building_poi_mapper(pytestconfig): + fn_mapping = pytestconfig.rootpath / "data/rules/occupancy/building_and_PoIs_tags.csv" + mapper = OccupancyMapper.from_csv(fn_mapping) + yield mapper + + +@pytest.fixture +def landuse_mapper(pytestconfig): + fn_mapping = pytestconfig.rootpath / "data/rules/occupancy/landuse_tags.csv" + mapper = OccupancyMapper.from_csv(fn_mapping) + yield mapper + + def pytest_collection_modifyitems(config, items): storage_configuration = config.getoption("storage_contributor") if storage_configuration: diff --git a/tests/test_get_building_occupancy.py b/tests/test_get_building_occupancy.py new file mode 100644 index 0000000..d05222b --- /dev/null +++ b/tests/test_get_building_occupancy.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2021: +# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero +# General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import pytest + +from rabotnikobm.rules.get_building_occupancy import GetBuildingTaxonomy + + +@pytest.mark.requires_storage +@pytest.mark.asyncio +async def test_get_building_taxonomy(connected_storage, building_poi_mapper): + rule = GetBuildingTaxonomy(storage=connected_storage, occupancy_mapper=building_poi_mapper) + + payload = {"building_id": -6744517} + result = await rule.evaluate(payload=payload) + assert result == ["ASS4", "UNDECIDABLE"] diff --git a/tests/test_occupancy_mapping.py b/tests/test_occupancy_mapping.py index f2fe9f7..4259d88 100644 --- a/tests/test_occupancy_mapping.py +++ b/tests/test_occupancy_mapping.py @@ -16,7 +16,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see http://www.gnu.org/licenses/. -from rabotnikobm.occupancy.mapping import group_tags, OccupancyMapper +from rabotnikobm.occupancy.mapping import group_tags, OverridingOccupancy def test_group_tags(): @@ -30,25 +30,20 @@ def test_group_tags(): assert grouped == {"a": {0: {}, 1: {}}, "b": {2: {}}} -def test_mapper_from_csv(pytestconfig): - demo_file = pytestconfig.rootpath / "data/rules/occupancy/building_and_PoIs_tags.csv" - mapper = OccupancyMapper.from_csv(demo_file) - assert mapper - - -def test_occupancy_mapper_building_pois(pytestconfig): - demo_file = pytestconfig.rootpath / "data/rules/occupancy/building_and_PoIs_tags.csv" - mapper = OccupancyMapper.from_csv(demo_file) - +def test_mapper_building_pois(building_poi_mapper): sample_tags = [{"amenity": "community_centre"}, {"amenity": "cafe"}, {"x": "y"}] + assert building_poi_mapper.apply(sample_tags) == ["ASS4", "COM5"] - assert mapper.tags_to_occupancy(sample_tags) == ["ASS4", "COM5"] +def test_mapper_landuse(landuse_mapper): + sample_tags = [{"amenity": "university", "a": "b"}, {"landuse": "brownfield"}, {"x": "y"}] + assert landuse_mapper.apply(sample_tags) == ["EDU3", "UNDECIDABLE"] -def test_occupancy_mapper_landuse(pytestconfig): - demo_file = pytestconfig.rootpath / "data/rules/occupancy/landuse_tags.csv" - mapper = OccupancyMapper.from_csv(demo_file) - sample_tags = [{"amenity": "university", "a": "b"}, {"landuse": "brownfield"}, {"x": "y"}] +def test_occupancy_mapper(pytestconfig): + fn_mapping = pytestconfig.rootpath / "data/rules/occupancy/overriding_occupancies.csv" + overriding_mapping = OverridingOccupancy.from_csv(fn_mapping) - assert mapper.tags_to_occupancy(sample_tags) == ["EDU3", "UNDECIDABLE"] + demo_tags = ["ASS1", "COM10"] + occupancy = overriding_mapping.apply(demo_tags) + assert occupancy == "airport" -- GitLab From 7ba3a17ccee90c1d99455fe7bad0fa7bd610805c Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Thu, 12 Aug 2021 10:31:56 +0200 Subject: [PATCH 07/20] Changed ordering of overriding occupancies --- data/rules/occupancy/overriding_occupancies.csv | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/data/rules/occupancy/overriding_occupancies.csv b/data/rules/occupancy/overriding_occupancies.csv index df17dc8..7f21514 100644 --- a/data/rules/occupancy/overriding_occupancies.csv +++ b/data/rules/occupancy/overriding_occupancies.csv @@ -1,13 +1,13 @@ -ASS1, religious gathering -ASS2, arena +COM10, airport +COM9, railway station +COM8, bus station COM4, hospital/medical clinic +GOV2, government emergency response +GOV1, government general services COM6, public building (gallery;museum;monument building;library) -COM8, bus station -COM9, railway station -COM10, airport -RES3, temporary lodging (hotels;motels;guest lodges;cabins) +ASS2, arena EDU2, school EDU3, offices and/or classrooms of college/university EDU4, research facilities and/or labs of college/university -GOV1, government general services -GOV2, government emergency response +RES3, temporary lodging (hotels;motels;guest lodges;cabins) +ASS1, religious gathering -- GitLab From ff3abd157da69f095e0ccd34cbffdf3d878c4116 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Thu, 12 Aug 2021 11:33:07 +0200 Subject: [PATCH 08/20] Added documentation and cleanup --- rabotnikobm/occupancy/mapping.py | 11 +++++++++-- rabotnikobm/rules/get_building_occupancy.py | 2 +- tests/test_get_building_occupancy.py | 6 +++--- tests/test_occupancy_mapping.py | 22 ++++++++++++++++----- 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/rabotnikobm/occupancy/mapping.py b/rabotnikobm/occupancy/mapping.py index 56c9a2d..fe834e4 100644 --- a/rabotnikobm/occupancy/mapping.py +++ b/rabotnikobm/occupancy/mapping.py @@ -35,7 +35,7 @@ def group_tags(occupancy_mapping: list[dict[str, str]]) -> dict[str, dict[str, s class OccupancyMapper: - """Map building tags to `GEM_occupancy`""" + """Map osm tags to `GEM_occupancy` categories.""" def __init__(self, mapping): self.mapping = mapping @@ -77,6 +77,13 @@ class OccupancyMapper: class OverridingOccupancy: + + """Takes precedence over other mappings. + + If `OverridingOccupancy.apply` returns a result this will be the designated occupancy. + Otherwise will return None. + """ + def __init__(self, mapping): self.mapping = mapping @@ -86,7 +93,7 @@ class OverridingOccupancy: return occupancy else: - raise KeyError("No overriding occupancy matched") + return None @classmethod def read_csv(cls, fn: str) -> dict[str, str]: diff --git a/rabotnikobm/rules/get_building_occupancy.py b/rabotnikobm/rules/get_building_occupancy.py index 363dc18..ae79583 100644 --- a/rabotnikobm/rules/get_building_occupancy.py +++ b/rabotnikobm/rules/get_building_occupancy.py @@ -25,7 +25,7 @@ from rabotnikobm.occupancy.mapping import OccupancyMapper logger = logging.getLogger() -class GetBuildingTaxonomy(Rule): +class GetBuildingOccupancy(Rule): """A rule to map OSM tags to building occupancies.""" def __init__(self, storage: StorageBase, occupancy_mapper: OccupancyMapper): diff --git a/tests/test_get_building_occupancy.py b/tests/test_get_building_occupancy.py index d05222b..eca172b 100644 --- a/tests/test_get_building_occupancy.py +++ b/tests/test_get_building_occupancy.py @@ -18,13 +18,13 @@ import pytest -from rabotnikobm.rules.get_building_occupancy import GetBuildingTaxonomy +from rabotnikobm.rules.get_building_occupancy import GetBuildingOccupancy @pytest.mark.requires_storage @pytest.mark.asyncio -async def test_get_building_taxonomy(connected_storage, building_poi_mapper): - rule = GetBuildingTaxonomy(storage=connected_storage, occupancy_mapper=building_poi_mapper) +async def test_rule_get_building_occupancy(connected_storage, building_poi_mapper): + rule = GetBuildingOccupancy(storage=connected_storage, occupancy_mapper=building_poi_mapper) payload = {"building_id": -6744517} result = await rule.evaluate(payload=payload) diff --git a/tests/test_occupancy_mapping.py b/tests/test_occupancy_mapping.py index 4259d88..f010396 100644 --- a/tests/test_occupancy_mapping.py +++ b/tests/test_occupancy_mapping.py @@ -16,9 +16,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see http://www.gnu.org/licenses/. +import pytest from rabotnikobm.occupancy.mapping import group_tags, OverridingOccupancy +@pytest.fixture() +def overriding_occupancies(pytestconfig): + fn_mapping = pytestconfig.rootpath / "data/rules/occupancy/overriding_occupancies.csv" + overriding_occupancies = OverridingOccupancy.from_csv(fn_mapping) + + yield overriding_occupancies + + def test_group_tags(): rows = [ {"key": "a", "value": 0}, @@ -40,10 +49,13 @@ def test_mapper_landuse(landuse_mapper): assert landuse_mapper.apply(sample_tags) == ["EDU3", "UNDECIDABLE"] -def test_occupancy_mapper(pytestconfig): - fn_mapping = pytestconfig.rootpath / "data/rules/occupancy/overriding_occupancies.csv" - overriding_mapping = OverridingOccupancy.from_csv(fn_mapping) - +def test_overriding_occupancy(overriding_occupancies): demo_tags = ["ASS1", "COM10"] - occupancy = overriding_mapping.apply(demo_tags) + occupancy = overriding_occupancies.apply(demo_tags) assert occupancy == "airport" + + +def test_overriding_occupancy_unknown(overriding_occupancies): + demo_tags = ["unknown tag"] + occupancy = overriding_occupancies.apply(demo_tags) + assert occupancy is None -- GitLab From c13648b1426c3de11da5252bb91444956d457633 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Fri, 13 Aug 2021 09:15:43 +0200 Subject: [PATCH 09/20] Add blank lines after doc-strings --- rabotnikobm/occupancy/mapping.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/rabotnikobm/occupancy/mapping.py b/rabotnikobm/occupancy/mapping.py index fe834e4..f4d9cd9 100644 --- a/rabotnikobm/occupancy/mapping.py +++ b/rabotnikobm/occupancy/mapping.py @@ -23,6 +23,7 @@ from collections import defaultdict def group_tags(occupancy_mapping: list[dict[str, str]]) -> dict[str, dict[str, str]]: """Extract groups of identical tags and group them in nested dicts.""" + grouped_tag_mapping = defaultdict(dict) for row in occupancy_mapping: @@ -62,6 +63,7 @@ class OccupancyMapper: @classmethod def read_csv(cls, fn: str) -> list[dict[str, str]]: """Read content from csv into list of dicts.""" + with open(fn) as csvfile: mapping = list(csv.DictReader(csvfile)) @@ -70,6 +72,7 @@ class OccupancyMapper: @classmethod def from_csv(cls, fn: str) -> OccupancyMapper: """Read a csv and initialize a `OccupancyMapper`.""" + occupancy_mapping = cls.read_csv(fn) occupancy_mapping_grouped = group_tags(occupancy_mapping) @@ -88,6 +91,8 @@ class OverridingOccupancy: self.mapping = mapping def apply(self, occupancies: list[str]): + """Apply the loaded mapping to a list of `occupancies`.""" + for candidate, occupancy in self.mapping.items(): if candidate in occupancies: return occupancy @@ -98,6 +103,7 @@ class OverridingOccupancy: @classmethod def read_csv(cls, fn: str) -> dict[str, str]: """Read content from csv into list of dicts.""" + with open(fn) as csvfile: mapping = {k.strip(): v.strip() for (k, v) in csv.reader(csvfile)} @@ -106,6 +112,6 @@ class OverridingOccupancy: @classmethod def from_csv(cls, fn: str) -> OverridingOccupancy: """Read a csv and initialize a `OverridingOccupancy` instance.""" - occupancy_mapping = cls.read_csv(fn) + occupancy_mapping = cls.read_csv(fn) return cls(mapping=occupancy_mapping) -- GitLab From a716baf8d25c0a17ff20c2ba8f846a7e89247ea8 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Mon, 16 Aug 2021 19:45:41 +0200 Subject: [PATCH 10/20] refactor --- rabotnikobm/rules/gem_occupancy/__init__.py | 0 .../data}/GEM_taxonomy_strings.csv | 0 .../rules/gem_occupancy/data}/README.md | 0 .../data}/building_and_PoIs_tags.csv | 2 +- .../gem_occupancy/data}/landuse_tags.csv | 2 +- .../data}/overriding_occupancies.csv | 0 .../get_building_occupancy.py | 37 ++++++++++++++- .../gem_occupancy}/mapping.py | 46 ++----------------- tests/conftest.py | 15 ++++-- ...occupancy.py => test_get_gem_occupancy.py} | 28 +++++++++-- tests/test_occupancy_mapping.py | 23 +--------- 11 files changed, 77 insertions(+), 76 deletions(-) create mode 100644 rabotnikobm/rules/gem_occupancy/__init__.py rename {data/rules/occupancy => rabotnikobm/rules/gem_occupancy/data}/GEM_taxonomy_strings.csv (100%) rename {data/rules/occupancy => rabotnikobm/rules/gem_occupancy/data}/README.md (100%) rename {data/rules/occupancy => rabotnikobm/rules/gem_occupancy/data}/building_and_PoIs_tags.csv (99%) rename {data/rules/occupancy => rabotnikobm/rules/gem_occupancy/data}/landuse_tags.csv (99%) rename {data/rules/occupancy => rabotnikobm/rules/gem_occupancy/data}/overriding_occupancies.csv (100%) rename rabotnikobm/rules/{ => gem_occupancy}/get_building_occupancy.py (58%) rename rabotnikobm/{occupancy => rules/gem_occupancy}/mapping.py (65%) rename tests/{test_get_building_occupancy.py => test_get_gem_occupancy.py} (52%) diff --git a/rabotnikobm/rules/gem_occupancy/__init__.py b/rabotnikobm/rules/gem_occupancy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/data/rules/occupancy/GEM_taxonomy_strings.csv b/rabotnikobm/rules/gem_occupancy/data/GEM_taxonomy_strings.csv similarity index 100% rename from data/rules/occupancy/GEM_taxonomy_strings.csv rename to rabotnikobm/rules/gem_occupancy/data/GEM_taxonomy_strings.csv diff --git a/data/rules/occupancy/README.md b/rabotnikobm/rules/gem_occupancy/data/README.md similarity index 100% rename from data/rules/occupancy/README.md rename to rabotnikobm/rules/gem_occupancy/data/README.md diff --git a/data/rules/occupancy/building_and_PoIs_tags.csv b/rabotnikobm/rules/gem_occupancy/data/building_and_PoIs_tags.csv similarity index 99% rename from data/rules/occupancy/building_and_PoIs_tags.csv rename to rabotnikobm/rules/gem_occupancy/data/building_and_PoIs_tags.csv index 3cf9471..656f6f7 100644 --- a/data/rules/occupancy/building_and_PoIs_tags.csv +++ b/rabotnikobm/rules/gem_occupancy/data/building_and_PoIs_tags.csv @@ -1,4 +1,4 @@ -key,value,taginfo_description,GEM_occupancy,comments +key,value,taginfo_description,occupancy,comments aerialway,station,"A station where passengers and/or goods can enter and/or leave the aerialway (forms of transport that use wires, including cable-cars, chair-lifts and drag-lifts)",COM,COM contains other kinds of transport stations but not one specific for aerialways aeroway,hangar,A large airport building with extensive floor areas for housing aircraft or spacecraft,COM10,Unclear if hangar is treated as standard part of an airport or not aeroway,terminal,An airport passenger building,COM10, diff --git a/data/rules/occupancy/landuse_tags.csv b/rabotnikobm/rules/gem_occupancy/data/landuse_tags.csv similarity index 99% rename from data/rules/occupancy/landuse_tags.csv rename to rabotnikobm/rules/gem_occupancy/data/landuse_tags.csv index 6806b73..f259d8d 100644 --- a/data/rules/occupancy/landuse_tags.csv +++ b/rabotnikobm/rules/gem_occupancy/data/landuse_tags.csv @@ -1,4 +1,4 @@ -key,value,taginfo_description,GEM_occupancy,comments +key,value,taginfo_description,occupancy,comments amenity,university,"An educational institution designed for instruction, examination, or both, of students in many branches of advanced learning.",EDU3, amenity,school,A primary or secondary school (pupils typically aged 6 to 18).,EDU2, amenity,college,"A place for further education, a post-secondary education institution which is not a University",EDU3, diff --git a/data/rules/occupancy/overriding_occupancies.csv b/rabotnikobm/rules/gem_occupancy/data/overriding_occupancies.csv similarity index 100% rename from data/rules/occupancy/overriding_occupancies.csv rename to rabotnikobm/rules/gem_occupancy/data/overriding_occupancies.csv diff --git a/rabotnikobm/rules/get_building_occupancy.py b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py similarity index 58% rename from rabotnikobm/rules/get_building_occupancy.py rename to rabotnikobm/rules/gem_occupancy/get_building_occupancy.py index ae79583..a454c2b 100644 --- a/rabotnikobm/rules/get_building_occupancy.py +++ b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py @@ -16,15 +16,48 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see http://www.gnu.org/licenses/. +from __future__ import annotations + import logging +import csv +from typing import Optional from rabotnik import Rule from rabotnik.storages.base import StorageBase -from rabotnikobm.occupancy.mapping import OccupancyMapper +from rabotnikobm.rules.gem_occupancy.mapping import OccupancyMapper logger = logging.getLogger() +class OverridingOccupancy: + + """Takes precedence over other mappings. + + If `OverridingOccupancy.apply` returns a result this will be the designated occupancy. + Otherwise will return None. + """ + + def __init__(self, mapping): + self.mapping = mapping + + def apply(self, occupancies: list[str]) -> Optional[str]: + """Apply the loaded mapping to a list of `occupancies`.""" + + occupancies = set(occupancies) + for candidate in self.mapping.keys(): + if candidate in occupancies: + return candidate + + @classmethod + def from_csv(cls, fn: str) -> OverridingOccupancy: + """Read a csv and initialize a `OverridingOccupancy` instance.""" + + with open(fn) as csvfile: + occupancy_mapping = {k.strip(): v.strip() for (k, v) in csv.reader(csvfile)} + + return cls(mapping=occupancy_mapping) + + class GetBuildingOccupancy(Rule): """A rule to map OSM tags to building occupancies.""" @@ -32,7 +65,7 @@ class GetBuildingOccupancy(Rule): self.storage = storage self.occupancy_mapper = occupancy_mapper - async def evaluate(self, payload: dict): + async def evaluate(self, payload: dict) -> list[str]: building_id = payload["building_id"] logger.debug("Processing building: %s", building_id) tags = await self.storage.expect_one( diff --git a/rabotnikobm/occupancy/mapping.py b/rabotnikobm/rules/gem_occupancy/mapping.py similarity index 65% rename from rabotnikobm/occupancy/mapping.py rename to rabotnikobm/rules/gem_occupancy/mapping.py index f4d9cd9..29b12a7 100644 --- a/rabotnikobm/occupancy/mapping.py +++ b/rabotnikobm/rules/gem_occupancy/mapping.py @@ -36,19 +36,19 @@ def group_tags(occupancy_mapping: list[dict[str, str]]) -> dict[str, dict[str, s class OccupancyMapper: - """Map osm tags to `GEM_occupancy` categories.""" + """Map osm tags to `occupancy` categories.""" def __init__(self, mapping): self.mapping = mapping def apply(self, osm_tags: dict[dict[str, str]]) -> list[str]: - """Map `osm_tags` to lists of GEM_occupancy as defined in + """Map `osm_tags` to lists of occupancies as defined in building_and_POIs_tags.csv and landuse_tags.csv. Tags may be duplicated. Args: osm_tags: list of OSM building tag strings associated with a specific building Returns: - list of `GEM_occupancy`s + list of `occupancy`s """ occupancies = [] @@ -56,7 +56,7 @@ class OccupancyMapper: for key, value in osm_tag.items(): occupancy = self.mapping.get(key, {}).get(value, None) if occupancy is not None: - occupancies.append(occupancy["GEM_occupancy"]) + occupancies.append(occupancy["occupancy"]) return occupancies @@ -77,41 +77,3 @@ class OccupancyMapper: occupancy_mapping_grouped = group_tags(occupancy_mapping) return cls(mapping=occupancy_mapping_grouped) - - -class OverridingOccupancy: - - """Takes precedence over other mappings. - - If `OverridingOccupancy.apply` returns a result this will be the designated occupancy. - Otherwise will return None. - """ - - def __init__(self, mapping): - self.mapping = mapping - - def apply(self, occupancies: list[str]): - """Apply the loaded mapping to a list of `occupancies`.""" - - for candidate, occupancy in self.mapping.items(): - if candidate in occupancies: - return occupancy - - else: - return None - - @classmethod - def read_csv(cls, fn: str) -> dict[str, str]: - """Read content from csv into list of dicts.""" - - with open(fn) as csvfile: - mapping = {k.strip(): v.strip() for (k, v) in csv.reader(csvfile)} - - return mapping - - @classmethod - def from_csv(cls, fn: str) -> OverridingOccupancy: - """Read a csv and initialize a `OverridingOccupancy` instance.""" - - occupancy_mapping = cls.read_csv(fn) - return cls(mapping=occupancy_mapping) diff --git a/tests/conftest.py b/tests/conftest.py index b99a082..c45e765 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,7 @@ import logging import pytest import rabotnik -from rabotnikobm.occupancy.mapping import OccupancyMapper +from rabotnikobm.rules.gem_occupancy.mapping import OccupancyMapper logger = logging.getLogger(__name__) @@ -51,15 +51,20 @@ def storage_consumer(pytestconfig): @pytest.fixture -def building_poi_mapper(pytestconfig): - fn_mapping = pytestconfig.rootpath / "data/rules/occupancy/building_and_PoIs_tags.csv" +def gem_data_path(pytestconfig): + yield pytestconfig.rootpath / "rabotnikobm/rules/gem_occupancy/data" + + +@pytest.fixture +def building_poi_mapper(gem_data_path): + fn_mapping = gem_data_path / "building_and_PoIs_tags.csv" mapper = OccupancyMapper.from_csv(fn_mapping) yield mapper @pytest.fixture -def landuse_mapper(pytestconfig): - fn_mapping = pytestconfig.rootpath / "data/rules/occupancy/landuse_tags.csv" +def landuse_mapper(gem_data_path): + fn_mapping = gem_data_path / "landuse_tags.csv" mapper = OccupancyMapper.from_csv(fn_mapping) yield mapper diff --git a/tests/test_get_building_occupancy.py b/tests/test_get_gem_occupancy.py similarity index 52% rename from tests/test_get_building_occupancy.py rename to tests/test_get_gem_occupancy.py index eca172b..fa977d8 100644 --- a/tests/test_get_building_occupancy.py +++ b/tests/test_get_gem_occupancy.py @@ -17,15 +17,37 @@ # along with this program. If not, see http://www.gnu.org/licenses/. import pytest +from rabotnikobm.rules.gem_occupancy.get_building_occupancy import ( + OverridingOccupancy, + GetBuildingOccupancy, +) -from rabotnikobm.rules.get_building_occupancy import GetBuildingOccupancy + +@pytest.fixture() +def overriding_occupancies(gem_data_path): + fn_mapping = gem_data_path / "overriding_occupancies.csv" + overriding_occupancies = OverridingOccupancy.from_csv(fn_mapping) + + yield overriding_occupancies @pytest.mark.requires_storage @pytest.mark.asyncio -async def test_rule_get_building_occupancy(connected_storage, building_poi_mapper): - rule = GetBuildingOccupancy(storage=connected_storage, occupancy_mapper=building_poi_mapper) +async def test_rule_get_building_occupancy(storage_consumer, building_poi_mapper): + rule = GetBuildingOccupancy(storage=storage_consumer, occupancy_mapper=building_poi_mapper) payload = {"building_id": -6744517} result = await rule.evaluate(payload=payload) assert result == ["ASS4", "UNDECIDABLE"] + + +def test_overriding_occupancy(overriding_occupancies): + demo_tags = ["ASS1", "COM10"] + occupancy = overriding_occupancies.apply(demo_tags) + assert occupancy == "COM10" + + +def test_overriding_occupancy_unknown(overriding_occupancies): + demo_tags = ["unknown tag"] + occupancy = overriding_occupancies.apply(demo_tags) + assert occupancy is None diff --git a/tests/test_occupancy_mapping.py b/tests/test_occupancy_mapping.py index f010396..d6b5946 100644 --- a/tests/test_occupancy_mapping.py +++ b/tests/test_occupancy_mapping.py @@ -16,16 +16,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see http://www.gnu.org/licenses/. -import pytest -from rabotnikobm.occupancy.mapping import group_tags, OverridingOccupancy - - -@pytest.fixture() -def overriding_occupancies(pytestconfig): - fn_mapping = pytestconfig.rootpath / "data/rules/occupancy/overriding_occupancies.csv" - overriding_occupancies = OverridingOccupancy.from_csv(fn_mapping) - - yield overriding_occupancies +from rabotnikobm.rules.gem_occupancy.mapping import group_tags def test_group_tags(): @@ -47,15 +38,3 @@ def test_mapper_building_pois(building_poi_mapper): def test_mapper_landuse(landuse_mapper): sample_tags = [{"amenity": "university", "a": "b"}, {"landuse": "brownfield"}, {"x": "y"}] assert landuse_mapper.apply(sample_tags) == ["EDU3", "UNDECIDABLE"] - - -def test_overriding_occupancy(overriding_occupancies): - demo_tags = ["ASS1", "COM10"] - occupancy = overriding_occupancies.apply(demo_tags) - assert occupancy == "airport" - - -def test_overriding_occupancy_unknown(overriding_occupancies): - demo_tags = ["unknown tag"] - occupancy = overriding_occupancies.apply(demo_tags) - assert occupancy is None -- GitLab From 838b17b17f23145c713c6530cc1b7a58165e72c5 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Fri, 20 Aug 2021 01:54:37 +0200 Subject: [PATCH 11/20] add rules --- .../gem_occupancy/get_building_occupancy.py | 20 ++++- rabotnikobm/rules/gem_occupancy/mapping.py | 88 ++++++++++++++++++- tests/test_get_gem_occupancy.py | 15 ++++ tests/test_occupancy_mapping.py | 32 ++++++- 4 files changed, 149 insertions(+), 6 deletions(-) diff --git a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py index a454c2b..c67289b 100644 --- a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py +++ b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py @@ -17,18 +17,22 @@ # along with this program. If not, see http://www.gnu.org/licenses/. from __future__ import annotations - import logging import csv from typing import Optional from rabotnik import Rule from rabotnik.storages.base import StorageBase -from rabotnikobm.rules.gem_occupancy.mapping import OccupancyMapper +from rabotnikobm.rules.gem_occupancy.mapping import OccupancyMapper, TagStatistics, GEMTag logger = logging.getLogger() +class TagResult(Exception): + def __init__(self, tag: GEMTag): + self.tag = tag + + class OverridingOccupancy: """Takes precedence over other mappings. @@ -58,6 +62,18 @@ class OverridingOccupancy: return cls(mapping=occupancy_mapping) +def check_exactly_one_unique_tag(occupancies: TagStatistics): + """raises `TagResult` if there is exactly one unique type""" + if occupancies.exactly_one_unique_tag: + raise TagResult(occupancies.tags[0]) + + +def check_one_unique_sub_type(occupancies: TagStatistics): + if occupancies.exactly_one_unique_type() and occupancies.number_of_sub_groups == 1: + # TODO: find raise TagResult(occupancies....) + raise TagResult("ASDASDF") + + class GetBuildingOccupancy(Rule): """A rule to map OSM tags to building occupancies.""" diff --git a/rabotnikobm/rules/gem_occupancy/mapping.py b/rabotnikobm/rules/gem_occupancy/mapping.py index 29b12a7..5716200 100644 --- a/rabotnikobm/rules/gem_occupancy/mapping.py +++ b/rabotnikobm/rules/gem_occupancy/mapping.py @@ -16,9 +16,91 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see http://www.gnu.org/licenses/. from __future__ import annotations - +import re import csv -from collections import defaultdict +from collections import defaultdict, namedtuple, Counter +from dataclasses import dataclass +from typing import Optional + + +# Regular expression to extract group (0-N characters at beginning) +# and sub_group (0-N digits at end) of GEM tags +REGEX_GEM_GROUP = r"(?P^[A-Z]*)(?P[0-9]*$)" + + +@dataclass +class GEMTag: + """Represents a single GEM classification + + Args: + group (str): multi-character GEM classifier + sub_group (int): multi-digit GEM classifier sub-group + """ + + group: str + sub_group: Optional[int] + + @classmethod + def from_string(cls, tag_as_string: str) -> GEMTag: + """Instantiate `GEMClassification`s from GEM occupancy string.""" + matched = re.search(REGEX_GEM_GROUP, tag_as_string, re.IGNORECASE) + + group = str(matched.group("group")) + assert len(group) > 0, f"could not extract a GEM group from {tag_as_string}" + + sub_group = matched.group("sub_group") + sub_group = int(sub_group) if sub_group != "" else None + + return cls(group, sub_group) + + def __hash__(self): + return hash((self.group, self.sub_group)) + + +class TagStatistics( + namedtuple( + "TagStatistics", + [ + "tags", + "tags_counter", + "types_counter", + "subtypes_counter", + ], + ) +): + def exactly_one_unique_tag(self) -> bool: + return self.number_of_unique_tags == 1 + + @property + def number_of_unique_tags(self) -> int: + return len(self.tags_counter) + + @property + def number_of_unique_types(self) -> int: + return len(self.tags_counter) + + @property + def number_of_unique_subtypes(self) -> int: + return len(self.subtypes_counter) + + @classmethod + def from_tags_string(cls, tags: str): + """Analyse a string containing tags separated by `|` and do statistics.""" + tags = [GEMTag.from_string(tag) for tag in tags.split("|")] + tags_counter = Counter(tags) + + types = [tag.group for tag in tags] + types_counter = Counter(types) + + subtypes = [tag.sub_group for tag in tags] + subtypes_counter = Counter(subtypes) + + return cls( + tags=tags, + tags_counter=tags_counter, + types_counter=types_counter, + subtypes_counter=subtypes_counter, + ) def group_tags(occupancy_mapping: list[dict[str, str]]) -> dict[str, dict[str, str]]: @@ -43,7 +125,7 @@ class OccupancyMapper: def apply(self, osm_tags: dict[dict[str, str]]) -> list[str]: """Map `osm_tags` to lists of occupancies as defined in - building_and_POIs_tags.csv and landuse_tags.csv. Tags may be duplicated. + `building_and_POIs_tags.csv` and `landuse_tags.csv`. Tags may be duplicated. Args: osm_tags: list of OSM building tag strings associated with a specific building diff --git a/tests/test_get_gem_occupancy.py b/tests/test_get_gem_occupancy.py index fa977d8..6693b37 100644 --- a/tests/test_get_gem_occupancy.py +++ b/tests/test_get_gem_occupancy.py @@ -20,6 +20,9 @@ import pytest from rabotnikobm.rules.gem_occupancy.get_building_occupancy import ( OverridingOccupancy, GetBuildingOccupancy, + check_exactly_one_unique_tag, + TagStatistics, + TagResult, ) @@ -42,12 +45,24 @@ async def test_rule_get_building_occupancy(storage_consumer, building_poi_mapper def test_overriding_occupancy(overriding_occupancies): + """Rule #1""" demo_tags = ["ASS1", "COM10"] occupancy = overriding_occupancies.apply(demo_tags) assert occupancy == "COM10" def test_overriding_occupancy_unknown(overriding_occupancies): + """Rule #1""" demo_tags = ["unknown tag"] occupancy = overriding_occupancies.apply(demo_tags) assert occupancy is None + + +def test_unique_tags(): + """Rule #2""" + tags = TagStatistics.from_tags_string("COM|COM") + + with pytest.raises(TagResult) as e: + check_exactly_one_unique_tag(tags) + + assert e.value.tag.group == "COM" diff --git a/tests/test_occupancy_mapping.py b/tests/test_occupancy_mapping.py index d6b5946..faa9fad 100644 --- a/tests/test_occupancy_mapping.py +++ b/tests/test_occupancy_mapping.py @@ -16,7 +16,37 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see http://www.gnu.org/licenses/. -from rabotnikobm.rules.gem_occupancy.mapping import group_tags +from rabotnikobm.rules.gem_occupancy.mapping import group_tags, GEMTag, TagStatistics +import pytest + + +def test_tag_statistics(): + stats = TagStatistics.from_tags_string("COM|COM") + assert stats.exactly_one_unique_tag() is True + + stats = TagStatistics.from_tags_string("COM|ASS") + assert stats.exactly_one_unique_tag() is False + + +def test_tag_hash(): + assert hash(GEMTag.from_string("COM")) == hash(GEMTag.from_string("COM")) + assert hash(GEMTag.from_string("COM1")) != hash(GEMTag.from_string("COM")) + + +def test_gem_classification_parser(): + assert GEMTag.from_string("COM").group == "COM" + assert GEMTag.from_string("COM").sub_group is None + + assert GEMTag.from_string("COM1").group == "COM" + assert GEMTag.from_string("COM1").sub_group == 1 + + assert GEMTag.from_string("COM11").sub_group == 11 + + assert GEMTag.from_string("UNDECIDABLE").group == "UNDECIDABLE" + assert GEMTag.from_string("UNDECIDABLE").sub_group is None + + with pytest.raises(AssertionError): + GEMTag.from_string("1") def test_group_tags(): -- GitLab From 6b072fa583312bd25697a5d9b868551279cdd6c4 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Mon, 23 Aug 2021 23:00:44 +0200 Subject: [PATCH 12/20] refactor --- .../gem_occupancy/get_building_occupancy.py | 12 +++++---- rabotnikobm/rules/gem_occupancy/mapping.py | 25 +++++++++++++------ tests/test_get_gem_occupancy.py | 7 +++--- tests/test_occupancy_mapping.py | 5 ++-- 4 files changed, 31 insertions(+), 18 deletions(-) diff --git a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py index c67289b..70eae5c 100644 --- a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py +++ b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py @@ -29,6 +29,9 @@ logger = logging.getLogger() class TagResult(Exception): + + """Raise this exception with the identified occupancy""" + def __init__(self, tag: GEMTag): self.tag = tag @@ -44,13 +47,12 @@ class OverridingOccupancy: def __init__(self, mapping): self.mapping = mapping - def apply(self, occupancies: list[str]) -> Optional[str]: + def apply(self, occupancies: TagStatistics) -> None: """Apply the loaded mapping to a list of `occupancies`.""" - occupancies = set(occupancies) for candidate in self.mapping.keys(): - if candidate in occupancies: - return candidate + if candidate in occupancies.tags: + raise TagResult(GEMTag(candidate)) @classmethod def from_csv(cls, fn: str) -> OverridingOccupancy: @@ -64,7 +66,7 @@ class OverridingOccupancy: def check_exactly_one_unique_tag(occupancies: TagStatistics): """raises `TagResult` if there is exactly one unique type""" - if occupancies.exactly_one_unique_tag: + if occupancies.exactly_one_unique_tag(): raise TagResult(occupancies.tags[0]) diff --git a/rabotnikobm/rules/gem_occupancy/mapping.py b/rabotnikobm/rules/gem_occupancy/mapping.py index 5716200..fad02cd 100644 --- a/rabotnikobm/rules/gem_occupancy/mapping.py +++ b/rabotnikobm/rules/gem_occupancy/mapping.py @@ -23,9 +23,10 @@ from dataclasses import dataclass from typing import Optional -# Regular expression to extract group (0-N characters at beginning) -# and sub_group (0-N digits at end) of GEM tags -REGEX_GEM_GROUP = r"(?P^[A-Z]*)(?P[0-9]*$)" +# Regular expression to extract group (0-N characters at beginning), +# sub_group (0-N digits) and sub_sub_group (0-N characters and end) of GEM tags +# Can be letters-number-letters(-numbers) +REGEX_GEM_GROUP = r"(?P^[A-Z]*)(?P[0-9]*)(?P[A-Z]*$)" @dataclass @@ -35,10 +36,12 @@ class GEMTag: Args: group (str): multi-character GEM classifier sub_group (int): multi-digit GEM classifier sub-group + sub_sub_group (str): multi-char GEM classifier sub-sub-group """ group: str sub_group: Optional[int] + sub_sub_group: Optional[str] @classmethod def from_string(cls, tag_as_string: str) -> GEMTag: @@ -51,10 +54,13 @@ class GEMTag: sub_group = matched.group("sub_group") sub_group = int(sub_group) if sub_group != "" else None - return cls(group, sub_group) + sub_sub_group = matched.group("sub_sub_group") + sub_sub_group = sub_sub_group if sub_sub_group != "" else None + + return cls(group, sub_group, sub_sub_group) def __hash__(self): - return hash((self.group, self.sub_group)) + return hash((self.group, self.sub_group, self.sub_sub_group)) class TagStatistics( @@ -84,9 +90,12 @@ class TagStatistics( return len(self.subtypes_counter) @classmethod - def from_tags_string(cls, tags: str): - """Analyse a string containing tags separated by `|` and do statistics.""" - tags = [GEMTag.from_string(tag) for tag in tags.split("|")] + def from_strings(cls, tags: list[str]) -> TagStatistics: + tags = [GEMTag.from_string(tag_as_string) for tag_as_string in tags] + return cls.from_tags(tags) + + @classmethod + def from_tags(cls, tags: list[GEMTag]) -> TagStatistics: tags_counter = Counter(tags) types = [tag.group for tag in tags] diff --git a/tests/test_get_gem_occupancy.py b/tests/test_get_gem_occupancy.py index 6693b37..18b171a 100644 --- a/tests/test_get_gem_occupancy.py +++ b/tests/test_get_gem_occupancy.py @@ -24,6 +24,7 @@ from rabotnikobm.rules.gem_occupancy.get_building_occupancy import ( TagStatistics, TagResult, ) +from rabotnikobm.rules.gem_occupancy.mapping import GEMTag @pytest.fixture() @@ -46,22 +47,22 @@ async def test_rule_get_building_occupancy(storage_consumer, building_poi_mapper def test_overriding_occupancy(overriding_occupancies): """Rule #1""" - demo_tags = ["ASS1", "COM10"] + demo_tags = TagStatistics.from_strings(["ASS1", "COM10"]) occupancy = overriding_occupancies.apply(demo_tags) assert occupancy == "COM10" def test_overriding_occupancy_unknown(overriding_occupancies): """Rule #1""" - demo_tags = ["unknown tag"] + demo_tags = TagStatistics.from_strings(["unknown tag"]) occupancy = overriding_occupancies.apply(demo_tags) assert occupancy is None def test_unique_tags(): """Rule #2""" - tags = TagStatistics.from_tags_string("COM|COM") + tags = TagStatistics.from_tags([GEMTag.from_string("COM"), GEMTag.from_string("COM")]) with pytest.raises(TagResult) as e: check_exactly_one_unique_tag(tags) diff --git a/tests/test_occupancy_mapping.py b/tests/test_occupancy_mapping.py index faa9fad..9a02024 100644 --- a/tests/test_occupancy_mapping.py +++ b/tests/test_occupancy_mapping.py @@ -21,10 +21,10 @@ import pytest def test_tag_statistics(): - stats = TagStatistics.from_tags_string("COM|COM") + stats = TagStatistics.from_tags([GEMTag.from_string("COM"), GEMTag.from_string("COM")]) assert stats.exactly_one_unique_tag() is True - stats = TagStatistics.from_tags_string("COM|ASS") + stats = TagStatistics.from_tags([GEMTag.from_string("COM"), GEMTag.from_string("COM1")]) assert stats.exactly_one_unique_tag() is False @@ -41,6 +41,7 @@ def test_gem_classification_parser(): assert GEMTag.from_string("COM1").sub_group == 1 assert GEMTag.from_string("COM11").sub_group == 11 + assert GEMTag.from_string("COM11A").sub_sub_group == "A" assert GEMTag.from_string("UNDECIDABLE").group == "UNDECIDABLE" assert GEMTag.from_string("UNDECIDABLE").sub_group is None -- GitLab From 5f4a1124c96a369f8b24bff075e41ac9d009d7e1 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Thu, 9 Sep 2021 17:08:21 +0200 Subject: [PATCH 13/20] fix tests --- rabotnikobm/rules/gem_occupancy/get_building_occupancy.py | 5 +++-- tests/test_get_gem_occupancy.py | 8 ++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py index 70eae5c..217efbe 100644 --- a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py +++ b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py @@ -19,7 +19,6 @@ from __future__ import annotations import logging import csv -from typing import Optional from rabotnik import Rule from rabotnik.storages.base import StorageBase @@ -52,7 +51,7 @@ class OverridingOccupancy: for candidate in self.mapping.keys(): if candidate in occupancies.tags: - raise TagResult(GEMTag(candidate)) + raise TagResult(candidate) @classmethod def from_csv(cls, fn: str) -> OverridingOccupancy: @@ -61,6 +60,8 @@ class OverridingOccupancy: with open(fn) as csvfile: occupancy_mapping = {k.strip(): v.strip() for (k, v) in csv.reader(csvfile)} + occupancy_mapping = {GEMTag.from_string(k): v for k, v in occupancy_mapping.items()} + return cls(mapping=occupancy_mapping) diff --git a/tests/test_get_gem_occupancy.py b/tests/test_get_gem_occupancy.py index 18b171a..c219156 100644 --- a/tests/test_get_gem_occupancy.py +++ b/tests/test_get_gem_occupancy.py @@ -48,8 +48,12 @@ async def test_rule_get_building_occupancy(storage_consumer, building_poi_mapper def test_overriding_occupancy(overriding_occupancies): """Rule #1""" demo_tags = TagStatistics.from_strings(["ASS1", "COM10"]) - occupancy = overriding_occupancies.apply(demo_tags) - assert occupancy == "COM10" + try: + occupancy = overriding_occupancies.apply(demo_tags) + except TagResult as e: + occupancy = e.tag + + assert occupancy == GEMTag.from_string("COM10") def test_overriding_occupancy_unknown(overriding_occupancies): -- GitLab From 4edb8fde6af0cbbd8989f32b8ef144be924bc322 Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Thu, 16 Sep 2021 16:50:01 +0200 Subject: [PATCH 14/20] fix tags --- rabotnikobm/rules/gem_occupancy/mapping.py | 3 ++- tests/test_get_gem_occupancy.py | 8 +++----- tests/test_occupancy_mapping.py | 5 +++++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/rabotnikobm/rules/gem_occupancy/mapping.py b/rabotnikobm/rules/gem_occupancy/mapping.py index fad02cd..8ff1302 100644 --- a/rabotnikobm/rules/gem_occupancy/mapping.py +++ b/rabotnikobm/rules/gem_occupancy/mapping.py @@ -47,9 +47,10 @@ class GEMTag: def from_string(cls, tag_as_string: str) -> GEMTag: """Instantiate `GEMClassification`s from GEM occupancy string.""" matched = re.search(REGEX_GEM_GROUP, tag_as_string, re.IGNORECASE) + assert matched is not None, f"could not convert '{tag_as_string}' to GEMTag" group = str(matched.group("group")) - assert len(group) > 0, f"could not extract a GEM group from {tag_as_string}" + assert group, f"could not extract `group` from {tag_as_string}" sub_group = matched.group("sub_group") sub_group = int(sub_group) if sub_group != "" else None diff --git a/tests/test_get_gem_occupancy.py b/tests/test_get_gem_occupancy.py index c219156..b166f0b 100644 --- a/tests/test_get_gem_occupancy.py +++ b/tests/test_get_gem_occupancy.py @@ -56,11 +56,9 @@ def test_overriding_occupancy(overriding_occupancies): assert occupancy == GEMTag.from_string("COM10") -def test_overriding_occupancy_unknown(overriding_occupancies): - """Rule #1""" - demo_tags = TagStatistics.from_strings(["unknown tag"]) - occupancy = overriding_occupancies.apply(demo_tags) - assert occupancy is None +def test_overriding_occupancy_unknown(): + with pytest.raises(AssertionError): + TagStatistics.from_strings(["unknown tag"]) def test_unique_tags(): diff --git a/tests/test_occupancy_mapping.py b/tests/test_occupancy_mapping.py index 9a02024..c08b3a8 100644 --- a/tests/test_occupancy_mapping.py +++ b/tests/test_occupancy_mapping.py @@ -47,6 +47,11 @@ def test_gem_classification_parser(): assert GEMTag.from_string("UNDECIDABLE").sub_group is None with pytest.raises(AssertionError): + # Tag must not contain whitespace + GEMTag.from_string("unknown tag") + + with pytest.raises(AssertionError): + # Tag must start with character (group) GEMTag.from_string("1") -- GitLab From 3dce5c7b0b5a626c06ef553a8bf45c9cea3196df Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Fri, 15 Oct 2021 18:38:52 +0200 Subject: [PATCH 15/20] fix rules 1 and 2 --- rabotnikobm/instance.py | 20 ++++- .../gem_occupancy/get_building_occupancy.py | 24 ++++-- rabotnikobm/rules/gem_occupancy/mapping.py | 78 +++++++++++++++---- tests/test_get_gem_occupancy.py | 17 +++- tests/test_occupancy_mapping.py | 29 ++++++- 5 files changed, 141 insertions(+), 27 deletions(-) diff --git a/rabotnikobm/instance.py b/rabotnikobm/instance.py index 6165d37..0dc7087 100644 --- a/rabotnikobm/instance.py +++ b/rabotnikobm/instance.py @@ -61,12 +61,30 @@ async def start_rabotnik_obm(message_bus): await message_bus.subscribe("building", rules.run) +async def start_rabotnik_gem_occupancy( + message_bus, storage_consumer, storage_contributor, n_processes_max +): + + rules = [ + GetBuilding(storage_consumer, storage_contributor), + GetFloorspace(storage_consumer, storage_contributor), + GetBuildingLandUse(storage_consumer), + GetPointsInBuilding(storage_consumer), + ] + rules = Assembly(rules=rules, n_processes_max=n_processes_max) + + await message_bus.subscribe("building", rules.run) + + async def start_rabotnik(args): logger.info("start rabotnik") message_bus = await connected_message_bus(args.config_message_bus) - await start_rabotnik_obm(message_bus) + # await start_rabotnik_obm( + # message_bus, storage_consumer, storage_contributor, args.n_processes_max + # ) + await def main(): diff --git a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py index 217efbe..f29823f 100644 --- a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py +++ b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py @@ -17,18 +17,19 @@ # along with this program. If not, see http://www.gnu.org/licenses/. from __future__ import annotations -import logging + import csv +import logging from rabotnik import Rule from rabotnik.storages.base import StorageBase + from rabotnikobm.rules.gem_occupancy.mapping import OccupancyMapper, TagStatistics, GEMTag logger = logging.getLogger() class TagResult(Exception): - """Raise this exception with the identified occupancy""" def __init__(self, tag: GEMTag): @@ -36,7 +37,6 @@ class TagResult(Exception): class OverridingOccupancy: - """Takes precedence over other mappings. If `OverridingOccupancy.apply` returns a result this will be the designated occupancy. @@ -66,15 +66,23 @@ class OverridingOccupancy: def check_exactly_one_unique_tag(occupancies: TagStatistics): - """raises `TagResult` if there is exactly one unique type""" + """raises `TagResult` if there is exactly one unique group""" if occupancies.exactly_one_unique_tag(): raise TagResult(occupancies.tags[0]) -def check_one_unique_sub_type(occupancies: TagStatistics): - if occupancies.exactly_one_unique_type() and occupancies.number_of_sub_groups == 1: - # TODO: find raise TagResult(occupancies....) - raise TagResult("ASDASDF") +def check_one_unique_sub_group(occupancies: TagStatistics): + print(occupancies) + print(occupancies.exactly_one_unique_tag()) + print(occupancies.number_of_unique_groups) + if occupancies.number_of_unique_groups == 1 and occupancies.number_of_unique_subgroups <= 2: + raise TagResult( + GEMTag( + group=occupancies.unique_group, + sub_group=occupancies.unique_sub_group, + sub_sub_group=occupancies.unique_sub_sub_group, + ) + ) class GetBuildingOccupancy(Rule): diff --git a/rabotnikobm/rules/gem_occupancy/mapping.py b/rabotnikobm/rules/gem_occupancy/mapping.py index 8ff1302..02db7be 100644 --- a/rabotnikobm/rules/gem_occupancy/mapping.py +++ b/rabotnikobm/rules/gem_occupancy/mapping.py @@ -29,6 +29,10 @@ from typing import Optional REGEX_GEM_GROUP = r"(?P^[A-Z]*)(?P[0-9]*)(?P[A-Z]*$)" +class GEMTagException(Exception): + pass + + @dataclass class GEMTag: """Represents a single GEM classification @@ -70,25 +74,69 @@ class TagStatistics( [ "tags", "tags_counter", - "types_counter", - "subtypes_counter", + "groups_counter", + "subgroups_counter", + "subsubgroups_counter", ], ) ): def exactly_one_unique_tag(self) -> bool: return self.number_of_unique_tags == 1 + @property + def subsubgroups(self): + """list of available sub_sub_groups without None""" + return list(filter(None, self.subsubgroups_counter.keys())) + + @property + def subgroups(self): + """list of available sub_groups without None""" + return list(filter(None, self.subgroups_counter.keys())) + @property def number_of_unique_tags(self) -> int: - return len(self.tags_counter) + return len(set(self.tags_counter)) @property - def number_of_unique_types(self) -> int: - return len(self.tags_counter) + def number_of_unique_groups(self) -> int: + return len(list(self.groups_counter)) @property - def number_of_unique_subtypes(self) -> int: - return len(self.subtypes_counter) + def number_of_unique_subgroups(self) -> int: + return len(self.subgroups_counter) + + @property + def unique_group(self) -> str: + """Get the unique group ID. In case of multiple available groups, raise GEMTagException.""" + + if self.number_of_unique_groups > 1: + raise GEMTagException(f"{self} has more than unique group.") + + return list(self.groups_counter.keys())[0] + + @property + def unique_sub_group(self) -> Optional[int]: + """Get the unique group ID. In case of multiple available groups, raise GEMTagException.""" + + if len(self.subgroups) > 1: + raise GEMTagException(f"{self} has more than unique sub_group.") + + if len(self.subgroups) == 0: + return None + + return self.subgroups[0] + + @property + def unique_sub_sub_group(self) -> Optional[str]: + """Get the unique group ID. In case of multiple available groups, raise GEMTagException.""" + + if len(self.subsubgroups) > 1: + raise GEMTagException(f"{self} has more than unique sub_sub_group.") + + if len(self.subsubgroups) == 0: + return None + + return self.subsubgroups[0] @classmethod def from_strings(cls, tags: list[str]) -> TagStatistics: @@ -99,17 +147,21 @@ class TagStatistics( def from_tags(cls, tags: list[GEMTag]) -> TagStatistics: tags_counter = Counter(tags) - types = [tag.group for tag in tags] - types_counter = Counter(types) + groups = [tag.group for tag in tags] + groups_counter = Counter(groups) + + subgroups = [tag.sub_group for tag in tags] + subgroups_counter = Counter(subgroups) - subtypes = [tag.sub_group for tag in tags] - subtypes_counter = Counter(subtypes) + subsubgroups = [tag.sub_sub_group for tag in tags] + subsubgroups_counter = Counter(subsubgroups) return cls( tags=tags, tags_counter=tags_counter, - types_counter=types_counter, - subtypes_counter=subtypes_counter, + groups_counter=groups_counter, + subgroups_counter=subgroups_counter, + subsubgroups_counter=subsubgroups_counter, ) diff --git a/tests/test_get_gem_occupancy.py b/tests/test_get_gem_occupancy.py index b166f0b..872733a 100644 --- a/tests/test_get_gem_occupancy.py +++ b/tests/test_get_gem_occupancy.py @@ -23,6 +23,7 @@ from rabotnikobm.rules.gem_occupancy.get_building_occupancy import ( check_exactly_one_unique_tag, TagStatistics, TagResult, + check_one_unique_sub_group, ) from rabotnikobm.rules.gem_occupancy.mapping import GEMTag @@ -57,15 +58,27 @@ def test_overriding_occupancy(overriding_occupancies): def test_overriding_occupancy_unknown(): + """Test Rule #1 """ with pytest.raises(AssertionError): TagStatistics.from_strings(["unknown tag"]) def test_unique_tags(): - """Rule #2""" + """Rule""" - tags = TagStatistics.from_tags([GEMTag.from_string("COM"), GEMTag.from_string("COM")]) + tags = TagStatistics.from_strings(["COM", "COM"]) with pytest.raises(TagResult) as e: check_exactly_one_unique_tag(tags) assert e.value.tag.group == "COM" + + +def test_rule2(): + """Rule #2""" + + tags = TagStatistics.from_strings(["RES", "RES1"]) + with pytest.raises(TagResult) as e: + check_one_unique_sub_group(tags) + + assert e.value.tag.group == "RES" + assert e.value.tag.sub_group == 1 diff --git a/tests/test_occupancy_mapping.py b/tests/test_occupancy_mapping.py index c08b3a8..4cbb5f9 100644 --- a/tests/test_occupancy_mapping.py +++ b/tests/test_occupancy_mapping.py @@ -16,18 +16,41 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see http://www.gnu.org/licenses/. -from rabotnikobm.rules.gem_occupancy.mapping import group_tags, GEMTag, TagStatistics +from rabotnikobm.rules.gem_occupancy.mapping import ( + group_tags, + GEMTag, + TagStatistics, + GEMTagException, +) import pytest def test_tag_statistics(): - stats = TagStatistics.from_tags([GEMTag.from_string("COM"), GEMTag.from_string("COM")]) + stats = TagStatistics.from_strings(["COM", "COM"]) assert stats.exactly_one_unique_tag() is True - stats = TagStatistics.from_tags([GEMTag.from_string("COM"), GEMTag.from_string("COM1")]) + stats = TagStatistics.from_strings(["COM", "COM1"]) assert stats.exactly_one_unique_tag() is False +def test_tag_statistics_unique_getters(): + stats = TagStatistics.from_strings(["COM", "COM"]) + assert stats.unique_group == "COM" + assert stats.unique_sub_group is None + + stats = TagStatistics.from_strings(["COM", "COM1"]) + assert stats.unique_group == "COM" + assert stats.unique_sub_group is 1 + assert stats.unique_sub_sub_group is None + + stats = TagStatistics.from_strings(["COM", "COM1", "RES2"]) + with pytest.raises(GEMTagException): + assert stats.unique_group != "COM" + + with pytest.raises(GEMTagException): + assert stats.unique_sub_group != 1 + + def test_tag_hash(): assert hash(GEMTag.from_string("COM")) == hash(GEMTag.from_string("COM")) assert hash(GEMTag.from_string("COM1")) != hash(GEMTag.from_string("COM")) -- GitLab From 93562cbe92c9d715996ef3403de2c2394c91f05b Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Mon, 18 Oct 2021 11:35:25 +0200 Subject: [PATCH 16/20] GEM rule refactoring --- rabotnikobm/instance.py | 20 ++++---- .../gem_occupancy/get_building_occupancy.py | 49 ++++++++++++------- rabotnikobm/rules/gem_occupancy/mapping.py | 36 +++++++++----- tests/conftest.py | 15 ++---- tests/test_get_gem_occupancy.py | 26 +++++----- 5 files changed, 80 insertions(+), 66 deletions(-) diff --git a/rabotnikobm/instance.py b/rabotnikobm/instance.py index 0dc7087..ddb8791 100644 --- a/rabotnikobm/instance.py +++ b/rabotnikobm/instance.py @@ -24,11 +24,12 @@ import argparse from rabotnik import Rabotnik, Assembly from rabotnik.bus import MessageBus -# These need to be absolute imports. Otherwise, celery will fail to load them -from rabotnikobm.rules import GetBuilding -from rabotnikobm.rules import GetFloorspace -from rabotnikobm.rules import GetBuildingLandUse -from rabotnikobm.rules import GetPointsInBuilding +from rabotnikobm.rules.gem_occupancy.mapping import OccupancyMapper + +from .rules import GetBuilding +from .rules import GetFloorspace +from .rules import GetBuildingLandUse +from .rules import GetPointsInBuilding logger = logging.getLogger(__name__) @@ -65,11 +66,11 @@ async def start_rabotnik_gem_occupancy( message_bus, storage_consumer, storage_contributor, n_processes_max ): + fn_mapping = "building_and_PoIs_tags.csv" + occupancy_mapper = OccupancyMapper.from_csv(fn=fn_mapping) + rules = [ - GetBuilding(storage_consumer, storage_contributor), - GetFloorspace(storage_consumer, storage_contributor), - GetBuildingLandUse(storage_consumer), - GetPointsInBuilding(storage_consumer), + GetBuildingOccupancy(storage_consumer, occupancy_mapper=occupancy_mapper), ] rules = Assembly(rules=rules, n_processes_max=n_processes_max) @@ -84,7 +85,6 @@ async def start_rabotnik(args): # await start_rabotnik_obm( # message_bus, storage_consumer, storage_contributor, args.n_processes_max # ) - await def main(): diff --git a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py index f29823f..0c22205 100644 --- a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py +++ b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py @@ -20,13 +20,16 @@ from __future__ import annotations import csv import logging +from pathlib import Path from rabotnik import Rule from rabotnik.storages.base import StorageBase from rabotnikobm.rules.gem_occupancy.mapping import OccupancyMapper, TagStatistics, GEMTag -logger = logging.getLogger() +logger = logging.getLogger(__name__) + +MODULE_PATH = Path(__file__).parent class TagResult(Exception): @@ -54,7 +57,7 @@ class OverridingOccupancy: raise TagResult(candidate) @classmethod - def from_csv(cls, fn: str) -> OverridingOccupancy: + def from_csv(cls, fn: Path) -> OverridingOccupancy: """Read a csv and initialize a `OverridingOccupancy` instance.""" with open(fn) as csvfile: @@ -64,25 +67,32 @@ class OverridingOccupancy: return cls(mapping=occupancy_mapping) + @classmethod + def overriding_occupancies(cls): + fn = MODULE_PATH / "data/overriding_occupancies.csv" + return cls.from_csv(fn) + -def check_exactly_one_unique_tag(occupancies: TagStatistics): - """raises `TagResult` if there is exactly one unique group""" - if occupancies.exactly_one_unique_tag(): - raise TagResult(occupancies.tags[0]) +class RuleOneUniqueTag: + def apply(self, occupancies: TagStatistics) -> None: + """raises `TagResult` if there is exactly one unique group""" + if occupancies.exactly_one_unique_tag(): + raise TagResult(occupancies.tags[0]) -def check_one_unique_sub_group(occupancies: TagStatistics): - print(occupancies) - print(occupancies.exactly_one_unique_tag()) - print(occupancies.number_of_unique_groups) - if occupancies.number_of_unique_groups == 1 and occupancies.number_of_unique_subgroups <= 2: - raise TagResult( - GEMTag( - group=occupancies.unique_group, - sub_group=occupancies.unique_sub_group, - sub_sub_group=occupancies.unique_sub_sub_group, +class RulesOneUniqueSubGroup: + def apply(self, occupancies: TagStatistics) -> None: + if ( + occupancies.number_of_unique_groups == 1 + and occupancies.number_of_unique_subgroups <= 2 + ): + raise TagResult( + GEMTag( + group=occupancies.unique_group, + sub_group=occupancies.unique_sub_group, + sub_sub_group=occupancies.unique_sub_sub_group, + ) ) - ) class GetBuildingOccupancy(Rule): @@ -92,6 +102,10 @@ class GetBuildingOccupancy(Rule): self.storage = storage self.occupancy_mapper = occupancy_mapper + self.candidates = [ + OverridingOccupancy.overriding_occupancies(), + ] + async def evaluate(self, payload: dict) -> list[str]: building_id = payload["building_id"] logger.debug("Processing building: %s", building_id) @@ -100,5 +114,6 @@ class GetBuildingOccupancy(Rule): ) occupancies = self.occupancy_mapper.apply(tags) + logger.debug("occupancies %s: %s", building_id, occupancies) return occupancies diff --git a/rabotnikobm/rules/gem_occupancy/mapping.py b/rabotnikobm/rules/gem_occupancy/mapping.py index 02db7be..ccbf1b4 100644 --- a/rabotnikobm/rules/gem_occupancy/mapping.py +++ b/rabotnikobm/rules/gem_occupancy/mapping.py @@ -18,6 +18,7 @@ from __future__ import annotations import re import csv +from pathlib import Path from collections import defaultdict, namedtuple, Counter from dataclasses import dataclass from typing import Optional @@ -29,6 +30,9 @@ from typing import Optional REGEX_GEM_GROUP = r"(?P^[A-Z]*)(?P[0-9]*)(?P[A-Z]*$)" +MODULE_PATH = Path(__file__).parent + + class GEMTagException(Exception): pass @@ -107,7 +111,8 @@ class TagStatistics( @property def unique_group(self) -> str: - """Get the unique group ID. In case of multiple available groups, raise GEMTagException.""" + """Get the unique group ID. In case of multiple available groups, + raises GEMTagException.""" if self.number_of_unique_groups > 1: raise GEMTagException(f"{self} has more than unique group.") @@ -116,7 +121,8 @@ class TagStatistics( @property def unique_sub_group(self) -> Optional[int]: - """Get the unique group ID. In case of multiple available groups, raise GEMTagException.""" + """Get the unique group ID. In case of multiple available groups, + raises GEMTagException.""" if len(self.subgroups) > 1: raise GEMTagException(f"{self} has more than unique sub_group.") @@ -128,7 +134,8 @@ class TagStatistics( @property def unique_sub_sub_group(self) -> Optional[str]: - """Get the unique group ID. In case of multiple available groups, raise GEMTagException.""" + """Get the unique group ID. In case of multiple available groups, + raises GEMTagException.""" if len(self.subsubgroups) > 1: raise GEMTagException(f"{self} has more than unique sub_sub_group.") @@ -205,19 +212,22 @@ class OccupancyMapper: return occupancies @classmethod - def read_csv(cls, fn: str) -> list[dict[str, str]]: - """Read content from csv into list of dicts.""" + def from_csv(cls, fn: Path) -> OccupancyMapper: + """Read a csv and initialize a `OccupancyMapper`.""" with open(fn) as csvfile: - mapping = list(csv.DictReader(csvfile)) + occupancy_mapping = list(csv.DictReader(csvfile)) - return mapping - - @classmethod - def from_csv(cls, fn: str) -> OccupancyMapper: - """Read a csv and initialize a `OccupancyMapper`.""" - - occupancy_mapping = cls.read_csv(fn) occupancy_mapping_grouped = group_tags(occupancy_mapping) return cls(mapping=occupancy_mapping_grouped) + + @classmethod + def landuse_mapper(cls: OccupancyMapper): + fn_mapping = MODULE_PATH / "data/landuse_tags.csv" + return cls.from_csv(fn_mapping) + + @classmethod + def building_poi_mapper(cls: OccupancyMapper): + fn_mapping = MODULE_PATH / "data/building_and_PoIs_tags.csv" + return cls.from_csv(fn_mapping) diff --git a/tests/conftest.py b/tests/conftest.py index c45e765..779703e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -51,21 +51,14 @@ def storage_consumer(pytestconfig): @pytest.fixture -def gem_data_path(pytestconfig): - yield pytestconfig.rootpath / "rabotnikobm/rules/gem_occupancy/data" - - -@pytest.fixture -def building_poi_mapper(gem_data_path): - fn_mapping = gem_data_path / "building_and_PoIs_tags.csv" - mapper = OccupancyMapper.from_csv(fn_mapping) +def building_poi_mapper(): + mapper = OccupancyMapper.building_poi_mapper() yield mapper @pytest.fixture -def landuse_mapper(gem_data_path): - fn_mapping = gem_data_path / "landuse_tags.csv" - mapper = OccupancyMapper.from_csv(fn_mapping) +def landuse_mapper(): + mapper = OccupancyMapper.landuse_mapper() yield mapper diff --git a/tests/test_get_gem_occupancy.py b/tests/test_get_gem_occupancy.py index 872733a..d2e4cc5 100644 --- a/tests/test_get_gem_occupancy.py +++ b/tests/test_get_gem_occupancy.py @@ -20,20 +20,17 @@ import pytest from rabotnikobm.rules.gem_occupancy.get_building_occupancy import ( OverridingOccupancy, GetBuildingOccupancy, - check_exactly_one_unique_tag, + RuleOneUniqueTag, TagStatistics, TagResult, - check_one_unique_sub_group, + RulesOneUniqueSubGroup, ) from rabotnikobm.rules.gem_occupancy.mapping import GEMTag @pytest.fixture() -def overriding_occupancies(gem_data_path): - fn_mapping = gem_data_path / "overriding_occupancies.csv" - overriding_occupancies = OverridingOccupancy.from_csv(fn_mapping) - - yield overriding_occupancies +def overriding_occupancies(): + yield OverridingOccupancy.overriding_occupancies() @pytest.mark.requires_storage @@ -46,15 +43,14 @@ async def test_rule_get_building_occupancy(storage_consumer, building_poi_mapper assert result == ["ASS4", "UNDECIDABLE"] -def test_overriding_occupancy(overriding_occupancies): +def test_overriding_occupancy(overriding_occupancies: OverridingOccupancy): """Rule #1""" demo_tags = TagStatistics.from_strings(["ASS1", "COM10"]) try: - occupancy = overriding_occupancies.apply(demo_tags) + overriding_occupancies.apply(demo_tags) except TagResult as e: occupancy = e.tag - - assert occupancy == GEMTag.from_string("COM10") + assert occupancy == GEMTag.from_string("COM10") def test_overriding_occupancy_unknown(): @@ -65,20 +61,20 @@ def test_overriding_occupancy_unknown(): def test_unique_tags(): """Rule""" - + rule = RuleOneUniqueTag() tags = TagStatistics.from_strings(["COM", "COM"]) with pytest.raises(TagResult) as e: - check_exactly_one_unique_tag(tags) + rule.apply(tags) assert e.value.tag.group == "COM" def test_rule2(): """Rule #2""" - + rule = RulesOneUniqueSubGroup() tags = TagStatistics.from_strings(["RES", "RES1"]) with pytest.raises(TagResult) as e: - check_one_unique_sub_group(tags) + rule.apply(tags) assert e.value.tag.group == "RES" assert e.value.tag.sub_group == 1 -- GitLab From b6484603c2529b0fac31c0ab7f1de267a85d447f Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Mon, 18 Oct 2021 13:01:10 +0200 Subject: [PATCH 17/20] restructure the tag generation --- .../gem_occupancy/get_building_occupancy.py | 43 ++++++++++++++++--- tests/test_get_gem_occupancy.py | 3 +- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py index 0c22205..7b242bd 100644 --- a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py +++ b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py @@ -21,6 +21,7 @@ from __future__ import annotations import csv import logging from pathlib import Path +from typing import Optional from rabotnik import Rule from rabotnik.storages.base import StorageBase @@ -39,6 +40,14 @@ class TagResult(Exception): self.tag = tag +def apply_rules(rules, occupancies: TagStatistics) -> Optional[GEMTag]: + for rule in rules: + try: + rule.apply(occupancies) + except TagResult as result: + return result.tag + + class OverridingOccupancy: """Takes precedence over other mappings. @@ -74,14 +83,16 @@ class OverridingOccupancy: class RuleOneUniqueTag: - def apply(self, occupancies: TagStatistics) -> None: + @staticmethod + def apply(occupancies: TagStatistics) -> None: """raises `TagResult` if there is exactly one unique group""" if occupancies.exactly_one_unique_tag(): raise TagResult(occupancies.tags[0]) class RulesOneUniqueSubGroup: - def apply(self, occupancies: TagStatistics) -> None: + @staticmethod + def apply(occupancies: TagStatistics) -> None: if ( occupancies.number_of_unique_groups == 1 and occupancies.number_of_unique_subgroups <= 2 @@ -100,20 +111,38 @@ class GetBuildingOccupancy(Rule): def __init__(self, storage: StorageBase, occupancy_mapper: OccupancyMapper): self.storage = storage - self.occupancy_mapper = occupancy_mapper + + self.mappers = [ + occupancy_mapper.landuse_mapper(), + occupancy_mapper.building_poi_mapper(), + ] self.candidates = [ OverridingOccupancy.overriding_occupancies(), + RuleOneUniqueTag, + RulesOneUniqueSubGroup, ] - async def evaluate(self, payload: dict) -> list[str]: + async def evaluate(self, payload: dict) -> GEMTag: building_id = payload["building_id"] logger.debug("Processing building: %s", building_id) tags = await self.storage.expect_one( f"SELECT tags FROM osm_building_relations WHERE osm_id={building_id} AND index=0" ) - occupancies = self.occupancy_mapper.apply(tags) + # Mapping to GEM taxonomies + occupancies = [] + for mapper in self.mappers: + occupancies.extend(mapper.apply(tags)) + + occupancies = TagStatistics.from_strings(occupancies) + + # Apply rules to find ultimate taxonomy + occupancy = apply_rules(self.candidates, occupancies) + + logger.debug("occupancies %s: %s", building_id, occupancy) + return occupancy - logger.debug("occupancies %s: %s", building_id, occupancies) - return occupancies + async def apply_mappings(self, tags): + for rule in self.candidates: + rule.apply(tags) diff --git a/tests/test_get_gem_occupancy.py b/tests/test_get_gem_occupancy.py index d2e4cc5..7c51490 100644 --- a/tests/test_get_gem_occupancy.py +++ b/tests/test_get_gem_occupancy.py @@ -40,7 +40,8 @@ async def test_rule_get_building_occupancy(storage_consumer, building_poi_mapper payload = {"building_id": -6744517} result = await rule.evaluate(payload=payload) - assert result == ["ASS4", "UNDECIDABLE"] + + assert result == ["ASS4"] def test_overriding_occupancy(overriding_occupancies: OverridingOccupancy): -- GitLab From 212b25cf55f1118724b96a70c61f63b00cd1d77b Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Mon, 18 Oct 2021 22:30:15 +0200 Subject: [PATCH 18/20] include csv in package --- MANIFEST.in | 2 ++ rabotnikobm/instance.py | 30 ++++++++++++------- .../gem_occupancy/get_building_occupancy.py | 11 +++---- setup.py | 3 +- 4 files changed, 29 insertions(+), 17 deletions(-) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..d7bd71f --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include rules/gem_occupancy/data/*.csv +include LICENSE diff --git a/rabotnikobm/instance.py b/rabotnikobm/instance.py index ddb8791..8ab5994 100644 --- a/rabotnikobm/instance.py +++ b/rabotnikobm/instance.py @@ -24,7 +24,7 @@ import argparse from rabotnik import Rabotnik, Assembly from rabotnik.bus import MessageBus -from rabotnikobm.rules.gem_occupancy.mapping import OccupancyMapper +from rabotnikobm.rules.gem_occupancy.get_building_occupancy import GetBuildingOccupancy from .rules import GetBuilding from .rules import GetFloorspace @@ -62,15 +62,10 @@ async def start_rabotnik_obm(message_bus): await message_bus.subscribe("building", rules.run) -async def start_rabotnik_gem_occupancy( - message_bus, storage_consumer, storage_contributor, n_processes_max -): - - fn_mapping = "building_and_PoIs_tags.csv" - occupancy_mapper = OccupancyMapper.from_csv(fn=fn_mapping) +async def start_rabotnik_gem_occupancy(message_bus, storage_consumer, _, n_processes_max): rules = [ - GetBuildingOccupancy(storage_consumer, occupancy_mapper=occupancy_mapper), + GetBuildingOccupancy(storage_consumer), ] rules = Assembly(rules=rules, n_processes_max=n_processes_max) @@ -82,9 +77,13 @@ async def start_rabotnik(args): message_bus = await connected_message_bus(args.config_message_bus) - # await start_rabotnik_obm( - # message_bus, storage_consumer, storage_contributor, args.n_processes_max - # ) + rabotnik_args = (message_bus, storage_consumer, storage_contributor, args.n_processes_max) + + if args.target in ("all", "obm"): + asyncio.create_task(start_rabotnik_obm(*rabotnik_args)) + + if args.target in ("all", "gem"): + asyncio.create_task(start_rabotnik_gem_occupancy(*rabotnik_args)) def main(): @@ -108,6 +107,15 @@ def main(): "--start-celery-worker", action="store_true", ) + + parser.add_argument( + "-t", + "--target", + default="all", + type=str, + help="Process to start (defaults to 'all')", + ) + args = parser.parse_args() default_log_level = logging.WARNING diff --git a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py index 7b242bd..29ebd41 100644 --- a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py +++ b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py @@ -109,12 +109,12 @@ class RulesOneUniqueSubGroup: class GetBuildingOccupancy(Rule): """A rule to map OSM tags to building occupancies.""" - def __init__(self, storage: StorageBase, occupancy_mapper: OccupancyMapper): + def __init__(self, storage: StorageBase): self.storage = storage self.mappers = [ - occupancy_mapper.landuse_mapper(), - occupancy_mapper.building_poi_mapper(), + OccupancyMapper.landuse_mapper(), + OccupancyMapper.building_poi_mapper(), ] self.candidates = [ @@ -130,14 +130,15 @@ class GetBuildingOccupancy(Rule): f"SELECT tags FROM osm_building_relations WHERE osm_id={building_id} AND index=0" ) - # Mapping to GEM taxonomies + # Mapping to GEM taxonomy strings occupancies = [] for mapper in self.mappers: occupancies.extend(mapper.apply(tags)) + # Convert derived occupancies to GEMTags occupancies = TagStatistics.from_strings(occupancies) - # Apply rules to find ultimate taxonomy + # Apply rules to find final taxonomy occupancy = apply_rules(self.candidates, occupancies) logger.debug("occupancies %s: %s", building_id, occupancy) diff --git a/setup.py b/setup.py index 690894f..dc717a1 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,8 @@ setup( "tests": tests_require, "linters": linters_require, }, - packages=find_packages(), entry_points={"console_scripts": ["rabotnikobm = rabotnikobm.instance:main"]}, python_requires=">=3.6", + packages=find_packages(), + include_package_data=True, ) -- GitLab From 01df53443c56ba41045d003637d0cd615c1a89dd Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Tue, 19 Oct 2021 10:55:00 +0200 Subject: [PATCH 19/20] catch empty results --- MANIFEST.in | 2 +- rabotnikobm/rules/gem_occupancy/get_building_occupancy.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index d7bd71f..9b6a330 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ -include rules/gem_occupancy/data/*.csv +include rabotnikobm/rules/gem_occupancy/data/*.csv include LICENSE diff --git a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py index 29ebd41..97dcbb8 100644 --- a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py +++ b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py @@ -130,6 +130,10 @@ class GetBuildingOccupancy(Rule): f"SELECT tags FROM osm_building_relations WHERE osm_id={building_id} AND index=0" ) + logger.debug(f"working on tags: {tags}") + if not tags: + return + # Mapping to GEM taxonomy strings occupancies = [] for mapper in self.mappers: -- GitLab From fd6102da84efa5b189bef8954b847d11394a09ec Mon Sep 17 00:00:00 2001 From: Marius Kriegerowski Date: Fri, 18 Feb 2022 13:29:03 +0100 Subject: [PATCH 20/20] rebase and adapt to new celery implementation --- .../rules/gem_occupancy/get_building_occupancy.py | 12 ++++-------- rabotnikobm/rules/get_building.py | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py index 97dcbb8..82ef276 100644 --- a/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py +++ b/rabotnikobm/rules/gem_occupancy/get_building_occupancy.py @@ -106,6 +106,7 @@ class RulesOneUniqueSubGroup: ) +@Rule.app.task(bind=True, base=Rule) class GetBuildingOccupancy(Rule): """A rule to map OSM tags to building occupancies.""" @@ -123,16 +124,15 @@ class GetBuildingOccupancy(Rule): RulesOneUniqueSubGroup, ] - async def evaluate(self, payload: dict) -> GEMTag: - building_id = payload["building_id"] + def evaluate(self, building_id: int) -> GEMTag | None: logger.debug("Processing building: %s", building_id) - tags = await self.storage.expect_one( + tags = self.storage.expect_one( f"SELECT tags FROM osm_building_relations WHERE osm_id={building_id} AND index=0" ) logger.debug(f"working on tags: {tags}") if not tags: - return + return # Mapping to GEM taxonomy strings occupancies = [] @@ -147,7 +147,3 @@ class GetBuildingOccupancy(Rule): logger.debug("occupancies %s: %s", building_id, occupancy) return occupancy - - async def apply_mappings(self, tags): - for rule in self.candidates: - rule.apply(tags) diff --git a/rabotnikobm/rules/get_building.py b/rabotnikobm/rules/get_building.py index 9705639..1bbec14 100644 --- a/rabotnikobm/rules/get_building.py +++ b/rabotnikobm/rules/get_building.py @@ -37,7 +37,7 @@ class GetBuilding(Rule): """A rule to copy a building entry from a source database to a destination database.""" @Rule.app.task(bind=True, base=Rule) - def evaluate(self, building_id): + def evaluate(self, building_id: int): logger.info("called building task") -- GitLab