From 012748f35efcbdc246962948ba8eb7c00a5cfdc3 Mon Sep 17 00:00:00 2001 From: Danijel Schorlemmer <ds@gfz-potsdam.de> Date: Tue, 9 Aug 2022 17:39:44 +0200 Subject: [PATCH] Remove the `occupancy` field in `Asset` table and add occupancy to taxonomy --- exposurelib/database.py | 157 +++++++++++++++++++++++++++------------- exposurelib/utils.py | 39 +++++++++- 2 files changed, 143 insertions(+), 53 deletions(-) diff --git a/exposurelib/database.py b/exposurelib/database.py index 96d629b7..ca9bad84 100644 --- a/exposurelib/database.py +++ b/exposurelib/database.py @@ -23,6 +23,8 @@ import glob import shutil import os.path from exposurelib.utils import get_geom_of_quadkey +from exposurelib.utils import add_occupancy_to_taxonomy +from exposurelib.utils import split_taxonomy from databaselib.database import SpatiaLiteDatabase from pygeotile.tile import Tile from collections import namedtuple @@ -103,8 +105,7 @@ class ExposureDatabase(SpatiaLiteDatabase): sql_statement += "taxonomy_id INTEGER, " sql_statement += "number REAL, " sql_statement += "structural REAL, " - sql_statement += "night REAL, " - sql_statement += "occupancy TEXT)" + sql_statement += "night REAL)" self.connection.execute(sql_statement) logger.debug("Table Asset created") @@ -128,7 +129,7 @@ class ExposureDatabase(SpatiaLiteDatabase): sql_statement += "built_up_ratio REAL, " sql_statement += "completeness INTEGER)" self.connection.execute(sql_statement) - sql_statement = "SELECT AddGeometryColumn('Tile', 'geom', 4326, 'POLYGON', 'XY')" + sql_statement = "SELECT AddGeometryColumn('Tile', 'geom', 4326, 'MULTIPOLYGON', 'XY')" self.connection.execute(sql_statement) logger.debug("Table Tile created") @@ -504,7 +505,7 @@ class ExposureDatabase(SpatiaLiteDatabase): entity_id = self.cursor.fetchone()[0] return entity_id - def insert_asset(self, entity_id, taxonomy_id, number, structural, night, occupancy): + def insert_asset(self, entity_id, taxonomy_id, number, structural, night): """ Inserts an asset into the `Asset` table. The `entity_id` points to the entity (either a building or a tile) to which this asset belongs. @@ -520,19 +521,16 @@ class ExposureDatabase(SpatiaLiteDatabase): Structural value of the asset night (float): Number of people in the asset (sum) - occupancy (str): - Occupancy type of the asset """ sql_statement = "INSERT INTO Asset " - sql_statement += "(entity_id, taxonomy_id, number, structural, night, occupancy) " - sql_statement += "VALUES (%d, %d, %f, %f, %f, '%s')" % ( + sql_statement += "(entity_id, taxonomy_id, number, structural, night) " + sql_statement += "VALUES (%d, %d, %f, %f, %f)" % ( entity_id, taxonomy_id, number, structural, night, - occupancy, ) self.cursor.execute(sql_statement) @@ -672,7 +670,7 @@ class ExposureDatabase(SpatiaLiteDatabase): comment, ) - # The RETURNING statement is not supported for SQLite versions<3.34. + # The RETURNING statement is not supported for SQLite versions < 3.35.0. # Therefore, we do a SELECT statement. if self.old_sqlite_version: self.cursor.execute(sql_statement) @@ -1012,7 +1010,9 @@ class ExposureDatabase(SpatiaLiteDatabase): next(reader) line_number = 0 for asset in reader: - taxonomy = asset[constants.taxonomy] + taxonomy = add_occupancy_to_taxonomy( + asset[constants.taxonomy], str(asset[constants.occupancy]).upper() + ) if not self.taxonomy_string_exists(taxonomy): self.insert_taxonomy(taxonomy) taxonomy_id = self.get_taxonomy_id(taxonomy) @@ -1022,10 +1022,11 @@ class ExposureDatabase(SpatiaLiteDatabase): quadkey, wkt = self.get_quadkey_and_geom(osm_id) geom = "GeomFromText('%s', 4326)" % wkt self.insert_building_entity(quadkey, osm_id, geom) + entity_id = self.get_entity_id(osm_id=osm_id) else: quadkey = asset[constants.origin_id][5:] self.insert_tile_entity(quadkey) - entity_id = self.get_entity_id(quadkey) + entity_id = self.get_entity_id(quadkey=quadkey) self.insert_asset( entity_id, @@ -1033,7 +1034,6 @@ class ExposureDatabase(SpatiaLiteDatabase): float(asset[constants.number]), float(asset[constants.structural]), float(asset[constants.night]), - asset[constants.occupancy], ) if not self.quadkey_exists(quadkey): self.insert_tile_from_quadkey(quadkey) @@ -1081,56 +1081,109 @@ class ExposureDatabase(SpatiaLiteDatabase): writer.writerow(fieldnames) # Building asset exposure output - sql_statement = "SELECT " - sql_statement += "Asset.entity_id, " - sql_statement += "X(ST_Centroid(Building.geom)), " - sql_statement += "Y(ST_Centroid(Building.geom)), " - sql_statement += "Taxonomy.taxonomy_string, " - sql_statement += "Asset.number, " - sql_statement += "Asset.structural, " - sql_statement += "Asset.night, " - sql_statement += "Asset.occupancy, " - sql_statement += "Tile.quadkey, " - sql_statement += "ST_AsText(Tile.geom), " - sql_statement += "Building.osm_id, " - sql_statement += "ST_AsText(Building.geom) " - sql_statement += "FROM Entity " - sql_statement += "INNER JOIN Asset ON Asset.entity_id = Entity.id " - sql_statement += "INNER JOIN Tile ON Entity.quadkey = Tile.quadkey " - sql_statement += "INNER JOIN Taxonomy ON Asset.taxonomy_id = Taxonomy.id " - sql_statement += "INNER JOIN Building ON Entity.osm_id = Building.osm_id " - sql_statement += "WHERE Entity.osm_id IS NOT NULL" + sql_statement = """ + SELECT + Asset.entity_id, + X(ST_Centroid(Building.geom)), + Y(ST_Centroid(Building.geom)), + Taxonomy.taxonomy_string, + Asset.number, + Asset.structural, + Asset.night, + Tile.quadkey, + ST_AsText(Tile.geom), + Building.osm_id, + ST_AsText(Building.geom) + FROM Entity + INNER JOIN Asset ON Asset.entity_id = Entity.id + INNER JOIN Tile ON Entity.quadkey = Tile.quadkey + INNER JOIN Taxonomy ON Asset.taxonomy_id = Taxonomy.id + INNER JOIN Building ON Entity.osm_id = Building.osm_id + WHERE Entity.osm_id IS NOT NULL + """ self.cursor.execute(sql_statement) line_number = 0 - for asset in self.cursor: - writer.writerow(asset[0:12]) + for ( + entity_id, + lon, + lat, + full_taxonomy, + number, + structural, + night, + quadkey, + tile_geom, + osm_id, + building_geom, + ) in self.cursor: + taxonomy, occupancy = split_taxonomy(full_taxonomy) + write_asset = [ + entity_id, + lon, + lat, + taxonomy, + number, + structural, + night, + occupancy, + quadkey, + tile_geom, + osm_id, + building_geom, + ] + writer.writerow(write_asset) line_number += 1 if line_number % 100000 == 0: logger.info("Exported asset " + str(line_number)) logger.info("Building exposure exported") # Tile asset exposure output - sql_statement = "SELECT " - sql_statement += "Asset.entity_id, " - sql_statement += "X(ST_Centroid(Tile.geom)), " - sql_statement += "Y(ST_Centroid(Tile.geom)), " - sql_statement += "Taxonomy.taxonomy_string, " - sql_statement += "Asset.number, " - sql_statement += "Asset.structural, " - sql_statement += "Asset.night, " - sql_statement += "Asset.occupancy, " - sql_statement += "Tile.quadkey, " - sql_statement += "ST_AsText(Tile.geom)" - sql_statement += "FROM Entity " - sql_statement += "INNER JOIN Asset ON Asset.entity_id = Entity.id " - sql_statement += "INNER JOIN Tile ON Entity.quadkey = Tile.quadkey " - sql_statement += "INNER JOIN Taxonomy ON Asset.taxonomy_id = Taxonomy.id " - sql_statement += "WHERE Entity.osm_id IS NULL" + sql_statement = """ + SELECT + Asset.entity_id, + X(ST_Centroid(Tile.geom)), + Y(ST_Centroid(Tile.geom)), + Taxonomy.taxonomy_string, + Asset.number, + Asset.structural, + Asset.night, + Tile.quadkey, + ST_AsText(Tile.geom) + FROM Entity + INNER JOIN Asset ON Asset.entity_id = Entity.id + INNER JOIN Tile ON Entity.quadkey = Tile.quadkey + INNER JOIN Taxonomy ON Asset.taxonomy_id = Taxonomy.id + WHERE Entity.osm_id IS NULL + """ self.cursor.execute(sql_statement) - for asset in self.cursor: - write_asset = list(asset[0:10]) + ["-1", "POINT EMPTY"] + for ( + entity_id, + lon, + lat, + full_taxonomy, + number, + structural, + night, + quadkey, + tile_geom, + ) in self.cursor: + taxonomy, occupancy = split_taxonomy(full_taxonomy) + write_asset = [ + entity_id, + lon, + lat, + taxonomy, + number, + structural, + night, + occupancy, + quadkey, + tile_geom, + "-1", + "POINT EMPTY", + ] writer.writerow(write_asset) line_number += 1 if line_number % 100000 == 0: diff --git a/exposurelib/utils.py b/exposurelib/utils.py index c0170e2e..1b5d146e 100644 --- a/exposurelib/utils.py +++ b/exposurelib/utils.py @@ -21,7 +21,6 @@ from pygeotile.tile import Tile from shapely.geometry import box from shapely.geometry.multipolygon import MultiPolygon - logger = logging.getLogger(__name__) @@ -45,3 +44,41 @@ def get_geom_of_quadkey(quadkey): tile.bounds[1].latitude, ) return MultiPolygon([tile_polygon]).wkt + + +def add_occupancy_to_taxonomy(taxonomy, occupancy): + """ + Extends the taxonomy string with the occupancy using the `/` separator as defined by + the GEM taxonomy. + + Args: + taxonomy (str): + Given taxonomy string + occupancy (str): + Given occupancy string + + Returns: + The taxonomy string extended by the occupancy string using the `/` separator + """ + + return f"{taxonomy}/{occupancy}" + + +def split_taxonomy(full_taxonomy): + """ + Splits the occupancy element from the `full_taxonomy` string by tokenizing the string + using `/` as a separator and assuming the occupancy being the last element. Then the + occupancy and the remaining taxonomy string are returned. + + Args: + full_taxonomy (str): + Full taxonomy string containing the occupancy at the end + + Returns: + A tuple of the taxonomy string without occupancy and the occupancy string + """ + + full_taxonomy_list = full_taxonomy.split("/") + occupancy = full_taxonomy_list[-1] + taxonomy = "/".join(full_taxonomy_list[:-1]) + return taxonomy, occupancy -- GitLab