From 012748f35efcbdc246962948ba8eb7c00a5cfdc3 Mon Sep 17 00:00:00 2001
From: Danijel Schorlemmer <ds@gfz-potsdam.de>
Date: Tue, 9 Aug 2022 17:39:44 +0200
Subject: [PATCH] Remove the `occupancy` field in `Asset` table and add
 occupancy to taxonomy

---
 exposurelib/database.py | 157 +++++++++++++++++++++++++++-------------
 exposurelib/utils.py    |  39 +++++++++-
 2 files changed, 143 insertions(+), 53 deletions(-)

diff --git a/exposurelib/database.py b/exposurelib/database.py
index 96d629b7..ca9bad84 100644
--- a/exposurelib/database.py
+++ b/exposurelib/database.py
@@ -23,6 +23,8 @@ import glob
 import shutil
 import os.path
 from exposurelib.utils import get_geom_of_quadkey
+from exposurelib.utils import add_occupancy_to_taxonomy
+from exposurelib.utils import split_taxonomy
 from databaselib.database import SpatiaLiteDatabase
 from pygeotile.tile import Tile
 from collections import namedtuple
@@ -103,8 +105,7 @@ class ExposureDatabase(SpatiaLiteDatabase):
         sql_statement += "taxonomy_id  INTEGER, "
         sql_statement += "number       REAL, "
         sql_statement += "structural   REAL, "
-        sql_statement += "night        REAL, "
-        sql_statement += "occupancy    TEXT)"
+        sql_statement += "night        REAL)"
         self.connection.execute(sql_statement)
         logger.debug("Table Asset created")
 
@@ -128,7 +129,7 @@ class ExposureDatabase(SpatiaLiteDatabase):
         sql_statement += "built_up_ratio    REAL, "
         sql_statement += "completeness      INTEGER)"
         self.connection.execute(sql_statement)
-        sql_statement = "SELECT AddGeometryColumn('Tile', 'geom', 4326, 'POLYGON', 'XY')"
+        sql_statement = "SELECT AddGeometryColumn('Tile', 'geom', 4326, 'MULTIPOLYGON', 'XY')"
         self.connection.execute(sql_statement)
         logger.debug("Table Tile created")
 
@@ -504,7 +505,7 @@ class ExposureDatabase(SpatiaLiteDatabase):
         entity_id = self.cursor.fetchone()[0]
         return entity_id
 
-    def insert_asset(self, entity_id, taxonomy_id, number, structural, night, occupancy):
+    def insert_asset(self, entity_id, taxonomy_id, number, structural, night):
         """
         Inserts an asset into the `Asset` table. The `entity_id` points to the entity
         (either a building or a tile) to which this asset belongs.
@@ -520,19 +521,16 @@ class ExposureDatabase(SpatiaLiteDatabase):
                 Structural value of the asset
             night (float):
                 Number of people in the asset (sum)
-            occupancy (str):
-                Occupancy type of the asset
         """
 
         sql_statement = "INSERT INTO Asset "
-        sql_statement += "(entity_id, taxonomy_id, number, structural, night, occupancy) "
-        sql_statement += "VALUES (%d, %d, %f, %f, %f, '%s')" % (
+        sql_statement += "(entity_id, taxonomy_id, number, structural, night) "
+        sql_statement += "VALUES (%d, %d, %f, %f, %f)" % (
             entity_id,
             taxonomy_id,
             number,
             structural,
             night,
-            occupancy,
         )
         self.cursor.execute(sql_statement)
 
@@ -672,7 +670,7 @@ class ExposureDatabase(SpatiaLiteDatabase):
             comment,
         )
 
-        # The RETURNING statement is not supported for SQLite versions<3.34.
+        # The RETURNING statement is not supported for SQLite versions < 3.35.0.
         # Therefore, we do a SELECT statement.
         if self.old_sqlite_version:
             self.cursor.execute(sql_statement)
@@ -1012,7 +1010,9 @@ class ExposureDatabase(SpatiaLiteDatabase):
                 next(reader)
                 line_number = 0
                 for asset in reader:
-                    taxonomy = asset[constants.taxonomy]
+                    taxonomy = add_occupancy_to_taxonomy(
+                        asset[constants.taxonomy], str(asset[constants.occupancy]).upper()
+                    )
                     if not self.taxonomy_string_exists(taxonomy):
                         self.insert_taxonomy(taxonomy)
                     taxonomy_id = self.get_taxonomy_id(taxonomy)
@@ -1022,10 +1022,11 @@ class ExposureDatabase(SpatiaLiteDatabase):
                         quadkey, wkt = self.get_quadkey_and_geom(osm_id)
                         geom = "GeomFromText('%s', 4326)" % wkt
                         self.insert_building_entity(quadkey, osm_id, geom)
+                        entity_id = self.get_entity_id(osm_id=osm_id)
                     else:
                         quadkey = asset[constants.origin_id][5:]
                         self.insert_tile_entity(quadkey)
-                    entity_id = self.get_entity_id(quadkey)
+                        entity_id = self.get_entity_id(quadkey=quadkey)
 
                     self.insert_asset(
                         entity_id,
@@ -1033,7 +1034,6 @@ class ExposureDatabase(SpatiaLiteDatabase):
                         float(asset[constants.number]),
                         float(asset[constants.structural]),
                         float(asset[constants.night]),
-                        asset[constants.occupancy],
                     )
                     if not self.quadkey_exists(quadkey):
                         self.insert_tile_from_quadkey(quadkey)
@@ -1081,56 +1081,109 @@ class ExposureDatabase(SpatiaLiteDatabase):
         writer.writerow(fieldnames)
 
         # Building asset exposure output
-        sql_statement = "SELECT "
-        sql_statement += "Asset.entity_id, "
-        sql_statement += "X(ST_Centroid(Building.geom)), "
-        sql_statement += "Y(ST_Centroid(Building.geom)), "
-        sql_statement += "Taxonomy.taxonomy_string, "
-        sql_statement += "Asset.number, "
-        sql_statement += "Asset.structural, "
-        sql_statement += "Asset.night, "
-        sql_statement += "Asset.occupancy, "
-        sql_statement += "Tile.quadkey, "
-        sql_statement += "ST_AsText(Tile.geom), "
-        sql_statement += "Building.osm_id, "
-        sql_statement += "ST_AsText(Building.geom) "
-        sql_statement += "FROM Entity "
-        sql_statement += "INNER JOIN Asset ON Asset.entity_id = Entity.id "
-        sql_statement += "INNER JOIN Tile ON Entity.quadkey = Tile.quadkey "
-        sql_statement += "INNER JOIN Taxonomy ON Asset.taxonomy_id = Taxonomy.id "
-        sql_statement += "INNER JOIN Building ON Entity.osm_id = Building.osm_id "
-        sql_statement += "WHERE Entity.osm_id IS NOT NULL"
+        sql_statement = """
+            SELECT
+                Asset.entity_id,
+                X(ST_Centroid(Building.geom)),
+                Y(ST_Centroid(Building.geom)),
+                Taxonomy.taxonomy_string,
+                Asset.number,
+                Asset.structural,
+                Asset.night,
+                Tile.quadkey,
+                ST_AsText(Tile.geom),
+                Building.osm_id,
+                ST_AsText(Building.geom)
+            FROM Entity
+            INNER JOIN Asset ON Asset.entity_id = Entity.id
+            INNER JOIN Tile ON Entity.quadkey = Tile.quadkey
+            INNER JOIN Taxonomy ON Asset.taxonomy_id = Taxonomy.id
+            INNER JOIN Building ON Entity.osm_id = Building.osm_id
+            WHERE Entity.osm_id IS NOT NULL
+            """
         self.cursor.execute(sql_statement)
 
         line_number = 0
-        for asset in self.cursor:
-            writer.writerow(asset[0:12])
+        for (
+            entity_id,
+            lon,
+            lat,
+            full_taxonomy,
+            number,
+            structural,
+            night,
+            quadkey,
+            tile_geom,
+            osm_id,
+            building_geom,
+        ) in self.cursor:
+            taxonomy, occupancy = split_taxonomy(full_taxonomy)
+            write_asset = [
+                entity_id,
+                lon,
+                lat,
+                taxonomy,
+                number,
+                structural,
+                night,
+                occupancy,
+                quadkey,
+                tile_geom,
+                osm_id,
+                building_geom,
+            ]
+            writer.writerow(write_asset)
             line_number += 1
             if line_number % 100000 == 0:
                 logger.info("Exported asset " + str(line_number))
         logger.info("Building exposure exported")
 
         # Tile asset exposure output
-        sql_statement = "SELECT "
-        sql_statement += "Asset.entity_id, "
-        sql_statement += "X(ST_Centroid(Tile.geom)), "
-        sql_statement += "Y(ST_Centroid(Tile.geom)), "
-        sql_statement += "Taxonomy.taxonomy_string, "
-        sql_statement += "Asset.number, "
-        sql_statement += "Asset.structural, "
-        sql_statement += "Asset.night, "
-        sql_statement += "Asset.occupancy, "
-        sql_statement += "Tile.quadkey, "
-        sql_statement += "ST_AsText(Tile.geom)"
-        sql_statement += "FROM Entity "
-        sql_statement += "INNER JOIN Asset ON Asset.entity_id = Entity.id "
-        sql_statement += "INNER JOIN Tile ON Entity.quadkey = Tile.quadkey "
-        sql_statement += "INNER JOIN Taxonomy ON Asset.taxonomy_id = Taxonomy.id "
-        sql_statement += "WHERE Entity.osm_id IS NULL"
+        sql_statement = """
+            SELECT
+                Asset.entity_id,
+                X(ST_Centroid(Tile.geom)),
+                Y(ST_Centroid(Tile.geom)),
+                Taxonomy.taxonomy_string,
+                Asset.number,
+                Asset.structural,
+                Asset.night,
+                Tile.quadkey,
+                ST_AsText(Tile.geom)
+            FROM Entity
+            INNER JOIN Asset ON Asset.entity_id = Entity.id
+            INNER JOIN Tile ON Entity.quadkey = Tile.quadkey
+            INNER JOIN Taxonomy ON Asset.taxonomy_id = Taxonomy.id
+            WHERE Entity.osm_id IS NULL
+            """
         self.cursor.execute(sql_statement)
 
-        for asset in self.cursor:
-            write_asset = list(asset[0:10]) + ["-1", "POINT EMPTY"]
+        for (
+            entity_id,
+            lon,
+            lat,
+            full_taxonomy,
+            number,
+            structural,
+            night,
+            quadkey,
+            tile_geom,
+        ) in self.cursor:
+            taxonomy, occupancy = split_taxonomy(full_taxonomy)
+            write_asset = [
+                entity_id,
+                lon,
+                lat,
+                taxonomy,
+                number,
+                structural,
+                night,
+                occupancy,
+                quadkey,
+                tile_geom,
+                "-1",
+                "POINT EMPTY",
+            ]
             writer.writerow(write_asset)
             line_number += 1
             if line_number % 100000 == 0:
diff --git a/exposurelib/utils.py b/exposurelib/utils.py
index c0170e2e..1b5d146e 100644
--- a/exposurelib/utils.py
+++ b/exposurelib/utils.py
@@ -21,7 +21,6 @@ from pygeotile.tile import Tile
 from shapely.geometry import box
 from shapely.geometry.multipolygon import MultiPolygon
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -45,3 +44,41 @@ def get_geom_of_quadkey(quadkey):
         tile.bounds[1].latitude,
     )
     return MultiPolygon([tile_polygon]).wkt
+
+
+def add_occupancy_to_taxonomy(taxonomy, occupancy):
+    """
+    Extends the taxonomy string with the occupancy using the `/` separator as defined by
+    the GEM taxonomy.
+
+    Args:
+        taxonomy (str):
+            Given taxonomy string
+        occupancy (str):
+            Given occupancy string
+
+    Returns:
+        The taxonomy string extended by the occupancy string using the `/` separator
+    """
+
+    return f"{taxonomy}/{occupancy}"
+
+
+def split_taxonomy(full_taxonomy):
+    """
+    Splits the occupancy element from the `full_taxonomy` string by tokenizing the string
+    using `/` as a separator and assuming the occupancy being the last element. Then the
+    occupancy and the remaining taxonomy string are returned.
+
+    Args:
+        full_taxonomy (str):
+            Full taxonomy string containing the occupancy at the end
+
+    Returns:
+        A tuple of the taxonomy string without occupancy and the occupancy string
+    """
+
+    full_taxonomy_list = full_taxonomy.split("/")
+    occupancy = full_taxonomy_list[-1]
+    taxonomy = "/".join(full_taxonomy_list[:-1])
+    return taxonomy, occupancy
-- 
GitLab