Commit 5207384b authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added storing of quadkey and geometry of OBM buildings

parent 3ec46122
Pipeline #44354 failed with stage
in 20 seconds
......@@ -154,6 +154,7 @@ class DatabaseStorage:
occupancy_case,
aggregated_source_id,
obm_buildings_building_classes,
obm_buildings_quadkeys_geometry,
db_gde_tiles_config,
db_table,
):
......@@ -174,6 +175,18 @@ class DatabaseStorage:
aggregated_source_id (int):
ID of the source of the aggregated exposure model associated with the building
classes of the OBM buildings in 'obm_buildings_building_classes'.
obm_buildings_quadkeys_geometry (Pandas DataFrame):
DataFrame indicating the footprints and quadkeys associated with the centroids
of the OBM buildings in 'obm_buildings_building_classes'. It is assumed to have
at least the following columns:
osm_id (int):
OpenStreetMap (OSM) ID of the building. If the building is represented
by a relation, this is the ID of the relation.
quadkey (str):
String indicating the quadkey of the tile to which the centroid of the
building belongs.
geometry (str):
Footprint of the building in Well-Known Text format and EPSG:4326.
obm_buildings_building_classes (dict):
Dictionary containing the building classes and their probabilities for each OBM
building. Dictionary keys correspond to the OSM ID of the building. Each key
......@@ -214,6 +227,9 @@ class DatabaseStorage:
SQL enumerated type describing the building occupancy cases.
data_unit_id (str):
ID of the data unit the OBM building belongs to.
quadkey (str):
Quadkey of the zoom-level 18 tile to which the centroid of the building
belongs.
building_class_names (array of str):
Building class as per the GEM Building Taxonomy.
settlement_types (array of enum):
......@@ -224,6 +240,8 @@ class DatabaseStorage:
class.
probabilities (array of float):
Probabilities of the OBM building belonging to each building class.
geometry (PSQL geometry):
Footprint of the OBM building.
"""
sql_commands = {}
......@@ -231,15 +249,17 @@ class DatabaseStorage:
sql_commands["query"] = "SELECT COUNT(*) FROM %s "
sql_commands["query"] += "WHERE (osm_id=%s AND aggregated_source_id=%s);"
sql_commands["update"] = "UPDATE %s SET (occupancy_case, data_unit_id, "
sql_commands["update"] = "UPDATE %s SET (occupancy_case, data_unit_id, quadkey, "
sql_commands["update"] += "building_class_names, settlement_types, occupancy_subtypes, "
sql_commands["update"] += "probabilities) = ('%s','%s','%s','%s','%s','%s')"
sql_commands["update"] += " WHERE (osm_id=%s AND aggregated_source_id=%s);"
sql_commands["update"] += "probabilities, geometry) = "
sql_commands["update"] += "('%s','%s','%s','%s','%s','%s','%s','%s') "
sql_commands["update"] += "WHERE (osm_id=%s AND aggregated_source_id=%s);"
sql_commands["insert"] = "INSERT INTO %s(osm_id, aggregated_source_id, occupancy_case, "
sql_commands["insert"] += "data_unit_id, building_class_names, settlement_types, "
sql_commands["insert"] += "occupancy_subtypes, probabilities) "
sql_commands["insert"] += "VALUES (%s, %s, '%s', '%s', '%s', '%s', '%s', '%s');"
sql_commands["insert"] += "data_unit_id, quadkey, building_class_names, "
sql_commands["insert"] += "settlement_types, occupancy_subtypes, probabilities, "
sql_commands["insert"] += "geometry) VALUES ( "
sql_commands["insert"] += "%s, %s, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');"
db_gde_tiles = Database(**db_gde_tiles_config)
db_gde_tiles.create_connection_and_cursor()
......@@ -259,6 +279,9 @@ class DatabaseStorage:
db_table,
occupancy_case,
data_unit_id,
obm_buildings_quadkeys_geometry.loc[
obm_buildings_quadkeys_geometry["osm_id"] == osm_id, "quadkey"
].to_numpy()[0],
'{"%s"}'
% (
'", "'.join(
......@@ -275,6 +298,9 @@ class DatabaseStorage:
list(building_classes["probabilities"].to_numpy().astype(str))
)
),
obm_buildings_quadkeys_geometry.loc[
obm_buildings_quadkeys_geometry["osm_id"] == osm_id, "geometry"
].to_numpy()[0],
osm_id,
aggregated_source_id,
)
......@@ -288,6 +314,9 @@ class DatabaseStorage:
aggregated_source_id,
occupancy_case,
data_unit_id,
obm_buildings_quadkeys_geometry.loc[
obm_buildings_quadkeys_geometry["osm_id"] == osm_id, "quadkey"
].to_numpy()[0],
'{"%s"}'
% (
'", "'.join(
......@@ -304,6 +333,9 @@ class DatabaseStorage:
list(building_classes["probabilities"].to_numpy().astype(str))
)
),
obm_buildings_quadkeys_geometry.loc[
obm_buildings_quadkeys_geometry["osm_id"] == osm_id, "geometry"
].to_numpy()[0],
)
)
else: # this should not occur
......
......@@ -84,6 +84,8 @@ class GDEProcessor:
occupancy (str):
Occupancy of the building as per the GEM Building Taxonomy v3.0. Missing
values can be "nan" or "None".
geometry (Polygon or MultiPolygon in Well-Known Text format):
Geometry (footprint) of the building, defined in EPSG:4326.
"""
# Identify unique relation IDs in 'obm_buildings'
......@@ -105,15 +107,14 @@ class GDEProcessor:
if len(which_no_relation) > 0:
# Start output DataFrame with buildings that are not part of relations
obm_buildings_adjusted = obm_buildings.iloc[which_no_relation, :]
obm_buildings_adjusted = obm_buildings_adjusted.drop(
columns=["relation_id", "geometry"]
)
obm_buildings_adjusted = obm_buildings_adjusted.drop(columns=["relation_id"])
# Process buildings that are part of relations
add_osm_ids = []
add_quadkey = []
add_storeys = []
add_occupancy = []
add_geometry = []
for relation_id in unique_relations:
# Identify components of this relation_id
......@@ -136,12 +137,17 @@ class GDEProcessor:
GDEProcessor._ensure_unique_occupancy(obm_buildings_of_relation)
)
add_geometry.append(
GDEProcessor._define_geometry_of_relation(obm_buildings_of_relation)
)
relation_buildings = pandas.DataFrame(
{
"osm_id": pandas.Series(numpy.array(add_osm_ids).astype(int), dtype="int"),
"quadkey": pandas.Series(numpy.array(add_quadkey).astype(str), dtype="str"),
"storeys": pandas.Series(numpy.array(add_storeys).astype(float), dtype="float"),
"occupancy": pandas.Series(numpy.array(add_occupancy).astype(str), dtype="str"),
"geometry": pandas.Series(numpy.array(add_geometry).astype(str), dtype="str"),
}
)
......@@ -310,6 +316,25 @@ class GDEProcessor:
return unique_occupancy
@staticmethod
def _define_geometry_of_relation(obm_buildings):
"""
This function merges all the geometries contained in 'obm_buildings' and returns it in
Well-Known Text (WKT) format (string).
Args:
obm_buildings (GeoPandas GeoDataFrame):
GeoDataFrame with at least a geometry column.
Returns:
merged_geometry (str):
Merged geometries of 'obm_buildings' in Well-Known Text (WKT) format.
"""
merged_geometry = obm_buildings.dissolve()["geometry"].to_numpy()[0].wkt
return merged_geometry
@staticmethod
def assign_building_classes_to_obm_buildings(
obm_buildings, data_unit_building_classes, occupancy_case
......@@ -901,6 +926,7 @@ class GDEProcessor:
occupancy_case,
aggregated_source_id,
obm_buildings_building_classes,
obm_buildings,
configuration.database_gde_tiles,
"gde_buildings",
)
......
......@@ -224,23 +224,26 @@ CREATE TABLE gde_buildings
osm_id integer,
aggregated_source_id SMALLINT,
occupancy_case occupancycase,
data_unit_id VARCHAR,
data_unit_id VARCHAR,
quadkey CHAR(18),
building_class_names VARCHAR[],
settlement_types settlement[],
occupancy_subtypes VARCHAR[],
probabilities FLOAT[],
geometry GEOMETRY,
PRIMARY KEY (osm_id, aggregated_source_id)
);
INSERT INTO gde_buildings(osm_id,
aggregated_source_id,
occupancy_case,
data_unit_id,
data_unit_id,
quadkey,
building_class_names,
settlement_types,
occupancy_subtypes,
probabilities)
VALUES (-101010, 2, 'industrial', 'ABC_10269',
VALUES (-101010, 2, 'industrial', 'ABC_10269', '333333333333333333',
'{"CLASS/X/params/H:1", "CLASS/Y/params/H:2"}',
'{"rural", "rural"}',
'{"all", "all"}',
......
......@@ -19,6 +19,7 @@
import os
import numpy
import pandas
import shapely
from gdeimporter.tools.database import Database
from gdecore.configuration import Configuration
from gdecore.database_storage import DatabaseStorage
......@@ -141,11 +142,23 @@ def test_store_OBM_building_classes(test_db):
}
)
obm_bdg_quadkey_geom = pandas.DataFrame(
{
"osm_id": [11223344],
"quadkey": ["122010321033023130"],
"geometry": [
"POLYGON((15.0487 37.4812,15.0489 37.4810,15.0486 37.4808,15.0484 37.4810,"
"15.0487 37.4812))"
],
}
)
DatabaseStorage.store_OBM_building_classes(
"ABC_10269",
"residential",
2,
obm_bdg_classes,
obm_bdg_quadkey_geom,
config.database_gde_tiles,
"gde_buildings",
)
......@@ -153,14 +166,28 @@ def test_store_OBM_building_classes(test_db):
(
returned_occupancy_case,
returned_data_unit_id,
returned_quadkey,
returned_building_class_names,
returned_settlement_types,
returned_occupancy_subtypes,
returned_probabilities,
returned_geometry,
) = query_OBM_building_classes(config.database_gde_tiles, 11223344, 2)
assert returned_occupancy_case == "residential"
assert returned_data_unit_id == "ABC_10269"
assert returned_quadkey == "122010321033023130"
expected_geometry = shapely.wkt.loads(
obm_bdg_quadkey_geom.loc[
obm_bdg_quadkey_geom["osm_id"] == 11223344, "geometry"
].to_numpy()[0]
)
assert round(returned_geometry.bounds[0], 2) == round(expected_geometry.bounds[0], 2)
assert round(returned_geometry.bounds[1], 2) == round(expected_geometry.bounds[1], 2)
assert round(returned_geometry.bounds[2], 2) == round(expected_geometry.bounds[2], 2)
assert round(returned_geometry.bounds[3], 2) == round(expected_geometry.bounds[3], 2)
assert len(returned_building_class_names) == len(
obm_bdg_classes[11223344]["building_class_name"].to_numpy()
)
......@@ -205,11 +232,25 @@ def test_store_OBM_building_classes(test_db):
}
)
obm_bdg_quadkey_geom = pandas.DataFrame(
{
"osm_id": [-101010, -202020],
"quadkey": ["122010321033023130", "122010321033023132"],
"geometry": [
"POLYGON((15.0491 37.4811,15.0494 37.4814,15.0495 37.4813,15.0497 37.4812,"
"15.0495 37.4811,15.0494 37.4812,15.0492 37.4810,15.0491 37.4811))",
"POLYGON((15.0490 37.4802,15.04924 37.48036,15.0492004 37.4804,15.0494 37.4805,"
"15.0496 37.4803,15.0494 37.4802,15.0491 37.4800,15.0490 37.4802))",
],
}
)
DatabaseStorage.store_OBM_building_classes(
"ABC_10269",
"commercial",
2,
obm_bdg_classes,
obm_bdg_quadkey_geom,
config.database_gde_tiles,
"gde_buildings",
)
......@@ -218,14 +259,34 @@ def test_store_OBM_building_classes(test_db):
(
returned_occupancy_case,
returned_data_unit_id,
returned_quadkey,
returned_building_class_names,
returned_settlement_types,
returned_occupancy_subtypes,
returned_probabilities,
returned_geometry,
) = query_OBM_building_classes(config.database_gde_tiles, osm_id, 2)
assert returned_occupancy_case == "commercial"
assert returned_data_unit_id == "ABC_10269"
assert (
returned_quadkey
== obm_bdg_quadkey_geom.loc[
obm_bdg_quadkey_geom["osm_id"] == osm_id, "quadkey"
].to_numpy()[0]
)
expected_geometry = shapely.wkt.loads(
obm_bdg_quadkey_geom.loc[
obm_bdg_quadkey_geom["osm_id"] == osm_id, "geometry"
].to_numpy()[0]
)
assert round(returned_geometry.bounds[0], 2) == round(expected_geometry.bounds[0], 2)
assert round(returned_geometry.bounds[1], 2) == round(expected_geometry.bounds[1], 2)
assert round(returned_geometry.bounds[2], 2) == round(expected_geometry.bounds[2], 2)
assert round(returned_geometry.bounds[3], 2) == round(expected_geometry.bounds[3], 2)
assert len(returned_building_class_names) == len(
obm_bdg_classes[osm_id]["building_class_name"].to_numpy()
)
......@@ -277,6 +338,8 @@ def query_OBM_building_classes(credentials, osm_id, aggregated_source_id):
data_unit_id (str):
ID of the data unit of the 'aggregated_source_id' and 'occupancy_case' associated
with the OBM building.
quadkey (str):
Quadkey of the zoom-level 18 tile to which the centroid of the OBM building belongs.
building_class_names (list of str):
Building classes as per the GEM Building Taxonomy.
settlement_types (list of str):
......@@ -286,11 +349,14 @@ def query_OBM_building_classes(credentials, osm_id, aggregated_source_id):
Details on the occupancy, if relevant to characterise the building classes.
probabilities (list of float):
Probabilities of the building belonging to the building classes.
geometry (Shapely Polygon or Multipolygon):
Geometry of the OBM building.
"""
sql_command = (
"SELECT occupancy_case, data_unit_id, building_class_names, settlement_types, "
"occupancy_subtypes, probabilities FROM gde_buildings "
"SELECT occupancy_case, data_unit_id, quadkey, building_class_names, settlement_types, "
"occupancy_subtypes, probabilities, geometry FROM gde_buildings "
"WHERE (osm_id=%s AND aggregated_source_id=%s);" % (osm_id, aggregated_source_id)
)
......@@ -301,18 +367,22 @@ def query_OBM_building_classes(credentials, osm_id, aggregated_source_id):
occupancy_case = result[0][0]
data_unit_id = result[0][1]
building_class_names = result[0][2]
settlement_types = result[0][3].replace("{", "").replace("}", "").split(",")
occupancy_subtypes = result[0][4]
probabilities = result[0][5]
quadkey = result[0][2]
building_class_names = result[0][3]
settlement_types = result[0][4].replace("{", "").replace("}", "").split(",")
occupancy_subtypes = result[0][5]
probabilities = result[0][6]
geometry = shapely.wkb.loads(result[0][7], hex=True)
db_test.close_connection()
return (
occupancy_case,
data_unit_id,
quadkey,
building_class_names,
settlement_types,
occupancy_subtypes,
probabilities,
geometry,
)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment