Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Dynamic Exposure
Global Dynamic Exposure
gde-core
Commits
75584e90
Commit
75584e90
authored
Mar 31, 2022
by
Cecilia Nievas
Browse files
Added feature to unify building parts of relations
parent
7d337956
Pipeline
#40933
passed with stage
in 2 minutes and 25 seconds
Changes
6
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
gdecore/database_queries.py
View file @
75584e90
...
...
@@ -345,21 +345,24 @@ class DatabaseQueries:
obm_buildings (GeoPandas GeoDataFrame):
GeoDataFrame with data on the OBM buildings of 'occupancy_case' whose centroids
fall within 'geographic_area'. It comprises the following columns:
osm_id (str):
OpenStreetMap (OSM) ID of the building.
relation_id (int):
osm_id (int):
OpenStreetMap (OSM) ID of the building. It cannot contain missing values
(by definition).
relation_id (str):
OpenStreetMap (OSM) ID of the relation to which an osm_id belongs, if
any.
quadkey (
array of
str):
any.
Missing values can be "nan" or "None".
quadkey (str):
String indicating the quadkey of the tile to which the centroid of the
building belongs.
storeys (int):
Number of storeys of the building.
building belongs. Missing values can be "nan" or "None".
storeys (float):
Number of storeys of the building. Treated as floats so as to be able to
use numpy.nan for missing values.
occupancy (str):
Occupancy of the building as per the GEM Building Taxonomy v3.0.
Occupancy of the building as per the GEM Building Taxonomy v3.0. Missing
values can be "nan" or "None".
geometry (Shapely Polygon or MultiPolygon):
Geometry (footprint) of the building, defined in EPSG:4326.
Any missing attributes of a building are returned as numpy.nan
.
Geometry (footprint) of the building, defined in EPSG:4326.
It cannot
contain missing values (by definition)
.
"""
if
(
...
...
@@ -389,4 +392,87 @@ class DatabaseQueries:
db_obm_buildings
.
close_connection
()
# Force data types (dtype is not a parameter in geopandas.GeoDataFrame.from_postgis)
obm_buildings
=
DatabaseQueries
.
_force_data_types_in_OBM_buildings_GeoDataFrame
(
obm_buildings
)
return
obm_buildings
@
staticmethod
def
_force_data_types_in_OBM_buildings_GeoDataFrame
(
obm_buildings
):
"""This function forces the datatypes of the columns of 'obm_buildings'. This is useful
when 'obm_buildings' has been retrieved with geopandas.GeoDataFrame.from_postgis(), as
'geopandas' automatically interprets data types and does not take 'dtype' as an
argument. The interpreted data types depend on the data retrieved with the PSQL query.
For example, if the query yields all NULL values of "storeys", 'geopandas' will assign
them "None" values instead of floats or integers.
This method does not force the data type of the "geometry" column of 'obm_buildings'.
Args:
obm_buildings (GeoPandas GeoDataFrame):
GeoDataFrame with data on the OBM buildings, assumed to have been retrieved with
geopandas.GeoDataFrame.from_postgis() and to contain the following columns (data
type not specified):
osm_id:
OpenStreetMap (OSM) ID of the building.
relation_id:
OpenStreetMap (OSM) ID of the relation to which an osm_id belongs, if
any.
quadkey:
String indicating the quadkey of the tile to which the centroid of the
building belongs.
storeys:
Number of storeys of the building.
occupancy:
Occupancy of the building as per the GEM Building Taxonomy v3.0.
geometry (Shapely Polygon or MultiPolygon):
Geometry (footprint) of the building, defined in EPSG:4326.
Returns:
obm_buildings (GeoPandas GeoDataFrame):
GeoDataFrame with the same contents as the input 'obm_buildings' but the
following data types:
osm_id (int):
OpenStreetMap (OSM) ID of the building. It cannot contain missing values
(by definition).
relation_id (str):
OpenStreetMap (OSM) ID of the relation to which an osm_id belongs, if
any. Missing values can be "nan" or "None".
quadkey (str):
String indicating the quadkey of the tile to which the centroid of the
building belongs. Missing values can be "nan" or "None".
storeys (float):
Number of storeys of the building. Treated as floats so as to be able to
use numpy.nan for missing values.
occupancy (str):
Occupancy of the building as per the GEM Building Taxonomy v3.0. Missing
values can be "nan" or "None".
geometry (Shapely Polygon or MultiPolygon):
Geometry (footprint) of the building, defined in EPSG:4326. It cannot
contain missing values (by definition).
"""
if
obm_buildings
.
shape
[
0
]
==
0
:
return
obm_buildings
# osm_id
if
not
isinstance
(
obm_buildings
[
"osm_id"
].
to_numpy
()[
0
],
numpy
.
int64
):
new_osm_id
=
obm_buildings
[
"osm_id"
].
astype
(
numpy
.
int64
)
obm_buildings
[
"osm_id"
]
=
new_osm_id
# storeys
if
not
isinstance
(
obm_buildings
[
"storeys"
].
to_numpy
()[
0
],
numpy
.
float64
):
# If all elements of "storeys" are None, turning them into floats converts them
# into numpy.nan
new_storeys
=
obm_buildings
[
"storeys"
].
astype
(
numpy
.
float64
)
obm_buildings
[
"storeys"
]
=
new_storeys
# relation_id, quadkey, occupancy
for
column_name
in
[
"relation_id"
,
"quadkey"
,
"occupancy"
]:
if
not
isinstance
(
obm_buildings
[
column_name
].
to_numpy
()[
0
],
str
):
# None elements become "None", numpy.nan elements become "nan"
new_column
=
obm_buildings
[
column_name
].
astype
(
str
)
obm_buildings
[
column_name
]
=
new_column
return
obm_buildings
gdecore/gdecore.py
View file @
75584e90
...
...
@@ -18,8 +18,10 @@
import
logging
import
sys
from
copy
import
deepcopy
from
gdecore.configuration
import
Configuration
from
gdecore.database_queries
import
DatabaseQueries
from
gdecore.processor
import
GDEProcessor
# Add a logger printing error, warning, info and debug messages to the screen
logger
=
logging
.
getLogger
()
...
...
@@ -102,6 +104,10 @@ def main():
# Retrieve OBM buildings and assign building classes and probabilities to them
for
i
,
data_unit_id
in
enumerate
(
data_units_ids
):
aux_log_string
=
(
"Data unit '%s' (of exposure entity '%s' and occupancy case '%s')"
%
(
data_unit_id
,
exposure_entity_code
,
occupancy_case
)
)
# Going by data unit so as to minimise intersection operations and because
# building classes are associated with specific data units
obm_buildings_raw
=
(
...
...
@@ -113,8 +119,18 @@ def main():
)
)
logger
.
info
(
"Data unit '%s': %s OBM buildings retrieved"
%
(
data_unit_id
,
str
(
obm_buildings_raw
.
shape
[
0
]))
"%s: %s OBM building parts retrieved"
%
(
aux_log_string
,
str
(
obm_buildings_raw
.
shape
[
0
]))
)
if
obm_buildings_raw
.
shape
[
0
]
>
0
:
# Group parts of the same relations existing in 'obm_buildings_raw'
obm_buildings
=
GDEProcessor
.
post_process_obm_relations
(
obm_buildings_raw
)
else
:
obm_buildings
=
deepcopy
(
obm_buildings_raw
)
logger
.
info
(
"%s: %s OBM buildings identified"
%
(
aux_log_string
,
str
(
obm_buildings
.
shape
[
0
]))
)
# Leave the program
...
...
gdecore/processor.py
0 → 100644
View file @
75584e90
#!/usr/bin/env python3
# Copyright (C) 2022:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import
logging
from
copy
import
deepcopy
import
numpy
import
mercantile
import
pandas
import
pyproj
logger
=
logging
.
getLogger
()
class
GDEProcessor
:
"""This class contains methods that are fundamental to the calculations of `gde-core`."""
@
staticmethod
def
post_process_obm_relations
(
obm_buildings
):
"""This function processes the contents of 'obm_buildings' to identify entries that
correspond to individual buildings and entries that correspond to parts of buildings
that belong to the same OpenStreetMap (OSM) relation ID. Data on the former stays the
same. For the latter (i.e. parts of relations), the function gathers the parts and
transforms them into one individual entry in which:
- the 'osm_id' becomes that of the 'relation_id',
- the number of storeys becomes the maximum of all individual parts,
- the quadkey of the ensemble is identified,
- the occupancy of the ensemble is identified.
Args:
obm_buildings (GeoPandas GeoDataFrame):
GeoDataFrame with data on OBM buildings. It comprises the following columns:
osm_id (int):
OpenStreetMap (OSM) ID of the building. It cannot contain missing values
(by definition).
relation_id (str):
OpenStreetMap (OSM) ID of the relation to which an osm_id belongs, if
any. Missing values can be "nan" or "None".
quadkey (str):
String indicating the quadkey of the tile to which the centroid of the
building belongs. Missing values can be "nan" or "None".
storeys (float):
Number of storeys of the building. Treated as floats so as to be able to
use numpy.nan for missing values.
occupancy (str):
Occupancy of the building as per the GEM Building Taxonomy v3.0. Missing
values can be "nan" or "None".
geometry (Shapely Polygon or MultiPolygon):
Geometry (footprint) of the building, defined in EPSG:4326. It cannot
contain missing values (by definition).
Returns:
obm_buildings_adjusted (Pandas DataFrame):
Processed version of 'obm_buildings' in which individual parts of the same
'relation_id' have been grouped together and relevant values of the other fields
have been identified. It comprises the following columns:
osm_id (int):
OpenStreetMap (OSM) ID of the building. If the building is represented
by a relation, this is the ID of the relation.
quadkey (str):
String indicating the quadkey of the tile to which the centroid of the
building belongs.
storeys (float):
Number of storeys of the building (maximum of all components if building
is an OSM relation). Treated as floats so as to be able to use numpy.nan
for missing values.
occupancy (str):
Occupancy of the building as per the GEM Building Taxonomy v3.0. Missing
values can be "nan" or "None".
"""
# Identify unique relation IDs in 'obm_buildings'
unique_relations
=
numpy
.
unique
(
obm_buildings
[
"relation_id"
].
to_numpy
())
# Get rid of "nan" and "None"
unique_relations
=
unique_relations
[
numpy
.
logical_and
(
unique_relations
!=
"nan"
,
unique_relations
!=
"None"
)
]
# Identify rows that contain buildings that are not part of a relation
which_no_relation
=
numpy
.
where
(
numpy
.
logical_or
(
obm_buildings
[
"relation_id"
].
to_numpy
()
==
"nan"
,
obm_buildings
[
"relation_id"
].
to_numpy
()
==
"None"
,
)
)[
0
]
if
len
(
which_no_relation
)
>
0
:
# Start output DataFrame with buildings that are not part of relations
obm_buildings_adjusted
=
obm_buildings
.
iloc
[
which_no_relation
,
:]
obm_buildings_adjusted
=
obm_buildings_adjusted
.
drop
(
columns
=
[
"relation_id"
,
"geometry"
]
)
# Process buildings that are part of relations
add_osm_ids
=
[]
add_quadkey
=
[]
add_storeys
=
[]
add_occupancy
=
[]
for
relation_id
in
unique_relations
:
# Identify components of this relation_id
which
=
numpy
.
where
(
obm_buildings
[
"relation_id"
].
to_numpy
()
==
relation_id
)[
0
]
obm_buildings_of_relation
=
deepcopy
(
obm_buildings
.
iloc
[
which
,
:])
add_osm_ids
.
append
(
int
(
float
(
relation_id
)))
add_quadkey
.
append
(
GDEProcessor
.
_identify_unique_quadkey_L18
(
obm_buildings_of_relation
)
)
add_storeys
.
append
(
GDEProcessor
.
_select_max_of_array
(
obm_buildings_of_relation
[
"storeys"
].
to_numpy
()
)
)
add_occupancy
.
append
(
GDEProcessor
.
_ensure_unique_occupancy
(
obm_buildings_of_relation
)
)
relation_buildings
=
pandas
.
DataFrame
(
{
"osm_id"
:
pandas
.
Series
(
numpy
.
array
(
add_osm_ids
).
astype
(
int
),
dtype
=
"int"
),
"quadkey"
:
pandas
.
Series
(
numpy
.
array
(
add_quadkey
).
astype
(
str
),
dtype
=
"str"
),
"storeys"
:
pandas
.
Series
(
numpy
.
array
(
add_storeys
).
astype
(
float
),
dtype
=
"float"
),
"occupancy"
:
pandas
.
Series
(
numpy
.
array
(
add_occupancy
).
astype
(
str
),
dtype
=
"str"
),
}
)
if
len
(
which_no_relation
)
>
0
:
obm_buildings_adjusted
=
pandas
.
concat
([
obm_buildings_adjusted
,
relation_buildings
])
else
:
obm_buildings_adjusted
=
deepcopy
(
relation_buildings
)
return
obm_buildings_adjusted
@
staticmethod
def
_identify_unique_quadkey_L18
(
obm_buildings
):
"""This function identifies the unique level 18 quadkey associated with the (parts of)
buildings contained in 'obm_buildings'.
It is asssumed that 'obm_buildings' contains different OpenStreetMap (OSM) building
polygons that are linked by the same OSM relation ID. If all values of 'quadkey' within
'obm_buildings' are the same, this is identified as the sought unique level 18 quadkey.
If 'obm_buildings' contains different values of 'quadkey', then all geometries of
'obm_buildings' are aggregated ("dissolved") into one, whose centroid is calculated and
used to identify the corresponding level 18 quadkey.
If 'obm_buildings' contains more than one value of 'relation_id', this function returns
an empty string.
Args:
obm_buildings (GeoPandas GeoDataFrame):
GeoDataFrame with data on OBM buildings. It comprises the following columns:
osm_id (int):
OpenStreetMap (OSM) ID of the building.
relation_id (str):
OpenStreetMap (OSM) ID of the relation to which an osm_id belongs, if
any.
quadkey (str):
String indicating the quadkey of the tile to which the centroid of the
building belongs.
storeys (float):
Number of storeys of the building.
occupancy (str):
Occupancy of the building as per the GEM Building Taxonomy v3.0.
geometry (Shapely Polygon or MultiPolygon):
Geometry (footprint) of the building, defined in EPSG:4326.
Returns:
unique_quadkey (str):
String indicating the unique quadkey to which the centroid of the geometries in
'obm_buildings' belongs.
"""
if
len
(
numpy
.
unique
(
obm_buildings
[
"relation_id"
].
to_numpy
()))
!=
1
:
logger
.
error
(
"'obm_buildings' passed on to GDEProcessor._identify_unique_quadkey_L18() "
"contains more than one unique value of 'relation_id'. The program cannot run."
)
return
""
unique_quadkeys
=
numpy
.
unique
(
obm_buildings
[
"quadkey"
].
to_numpy
())
if
len
(
unique_quadkeys
)
==
1
:
unique_quadkey
=
unique_quadkeys
[
0
]
else
:
obm_buildings_aux
=
deepcopy
(
obm_buildings
)
obm_buildings_aux
=
obm_buildings_aux
.
drop
(
columns
=
[
"osm_id"
,
"quadkey"
,
"storeys"
,
"occupancy"
]
)
obm_buildings_dissolved
=
obm_buildings_aux
.
dissolve
(
by
=
"relation_id"
)
# Project 'obm_buildings_dissolved' onto Albers Equal Area
lon_w
=
obm_buildings_dissolved
.
total_bounds
[
0
]
lat_s
=
obm_buildings_dissolved
.
total_bounds
[
1
]
lon_e
=
obm_buildings_dissolved
.
total_bounds
[
2
]
lat_n
=
obm_buildings_dissolved
.
total_bounds
[
3
]
projection_string
=
"+proj=aea +lat_1={} +lat_2={} +lat_0={} +lon_0={}"
.
format
(
lat_s
,
lat_n
,
(
lat_s
+
lat_n
)
/
2.0
,
(
lon_w
+
lon_e
)
/
2.0
)
obm_buildings_dissolved
=
obm_buildings_dissolved
.
to_crs
(
pyproj
.
CRS
(
projection_string
)
)
centroid
=
obm_buildings_dissolved
[
"geometry"
].
centroid
.
to_crs
(
"EPSG:4326"
)[
0
]
unique_quadkey
=
mercantile
.
quadkey
(
mercantile
.
tile
(
centroid
.
x
,
centroid
.
y
,
18
))
return
unique_quadkey
@
staticmethod
def
_select_max_of_array
(
numbers
):
"""This function returns the largest value in 'numbers'. If all elements of 'numbers'
are NaNs, it returns numpy.nan. If some elements of 'numbers' are NaNs, the NaNs are
ignored and the maximum of the rest of the elements is returned.
Note: The purpose of this function is to be able to handle the case in which all
elements of 'numbers' are NaNs, as numpy.nanmax() returns a RuntimeWarning in this case.
Args:
numbers (array of int or float):
Array of integers or floats. May contain NaNs.
Returns:
max_number (int, float or numpy.nan):
Maximum value of 'numbers'.
"""
if
numpy
.
all
(
numpy
.
isnan
(
numbers
)):
max_number
=
numpy
.
nan
else
:
max_number
=
numpy
.
nanmax
(
numbers
)
return
max_number
@
staticmethod
def
_ensure_unique_occupancy
(
obm_buildings
):
"""This function identifies the unique occupancy associated with the (parts of)
buildings contained in 'obm_buildings'.
It is asssumed that 'obm_buildings' contains different OpenStreetMap (OSM) building
polygons that are linked by the same OSM relation ID. If all values of 'occupancy'
within 'obm_buildings' are the same, this is identified as the sought unique occupancy.
If 'obm_buildings' contains different values of 'occupancy', or if 'obm_buildings'
contains more than one value of 'relation_id', an empty string is returned.
NOTE: This function assumes that all parts of the same relation are assigned the same
occupancy by `rabotnik-obm`.
Args:
obm_buildings (GeoPandas GeoDataFrame):
GeoDataFrame with data on OBM buildings. It comprises the following columns:
osm_id (int):
OpenStreetMap (OSM) ID of the building.
relation_id (str):
OpenStreetMap (OSM) ID of the relation to which an osm_id belongs, if
any.
quadkey (str):
String indicating the quadkey of the tile to which the centroid of the
building belongs.
storeys (float):
Number of storeys of the building.
occupancy (str):
Occupancy of the building as per the GEM Building Taxonomy v3.0.
geometry (Shapely Polygon or MultiPolygon):
Geometry (footprint) of the building, defined in EPSG:4326.
Returns:
unique_occupancy (str):
String indicating the unique occupancy of all the parts of the building
contained in 'obm_buildings' as per the GEM Building Taxonomy v3.0.
"""
if
len
(
numpy
.
unique
(
obm_buildings
[
"relation_id"
].
to_numpy
()))
!=
1
:
logger
.
error
(
"'obm_buildings' passed on to GDEProcessor._ensure_unique_occupancy() contains "
"more than one unique value of 'relation_id'. The program cannot run."
)
return
""
unique_occupancies
=
numpy
.
unique
(
obm_buildings
[
"occupancy"
].
to_numpy
())
if
len
(
unique_occupancies
)
==
1
:
unique_occupancy
=
unique_occupancies
[
0
]
else
:
unique_occupancy
=
""
logger
.
warning
(
"GDEProcessor._ensure_unique_occupancy(): individual parts of 'relation_id' %s "
"have different values of 'occupancy'. A unique occupancy value could not be "
"determined."
%
(
numpy
.
unique
(
obm_buildings
[
"relation_id"
].
to_numpy
())[
0
])
)
return
unique_occupancy
tests/data/test_database_set_up.sql
View file @
75584e90
...
...
@@ -77,10 +77,12 @@ CREATE TABLE obm_buildings
PRIMARY
KEY
(
osm_id
)
);
-- Residential building with number of storeys, not part of relation
INSERT
INTO
obm_buildings
(
osm_id
,
storeys
,
occupancy
,
occupancy_case
,
quadkey
,
geometry
)
VALUES
(
11223344
,
4
,
'RES2'
,
'residential'
,
'122010321033023130'
,
ST_GeomFromText
(
'POLYGON((15.0487 37.4812,15.0489 37.4810,15.0486 37.4808,15.0484 37.4810,15.0487 37.4812))'
));
-- Buildings that are not part of a relation and do not have number of storeys
INSERT
INTO
obm_buildings
(
osm_id
,
occupancy
,
occupancy_case
,
quadkey
,
geometry
)
VALUES
(
22334455
,
'RES1'
,
'residential'
,
'122010321033023130'
,
...
...
@@ -89,12 +91,21 @@ VALUES (
55667788
,
'RES3'
,
'commercial'
,
'122010321033023130'
,
ST_GeomFromText
(
'POLYGON((15.0495 37.4810,15.0498 37.4810,15.0498 37.4808,15.0495 37.4808,15.0495 37.4810))'
)),
(
88990011
,
'RES'
,
'residential'
,
'1220103210330231
3
0'
,
88990011
,
'RES'
,
'residential'
,
'1220103210330231
2
0'
,
ST_GeomFromText
(
'POLYGON((15.0463 37.4809,15.0463 37.4808,15.0461 37.4808,15.0461 37.4809,15.0463 37.4809))'
));
-- Commercial buildings that are part of a relation, with number of storeys
INSERT
INTO
obm_buildings
(
osm_id
,
storeys
,
relation_id
,
occupancy
,
occupancy_case
,
quadkey
,
geometry
)
VALUES
(
33445566
,
2
,
-
101010
,
'COM3'
,
'commercial'
,
'122010321033023130'
,
ST_GeomFromText
(
'POLYGON((15.0491 37.4811,15.0494 37.4814,15.0495 37.4813,15.0492 37.4810,15.0491 37.4811))'
)),
(
44556677
,
3
,
-
101010
,
'COM3'
,
'commercial'
,
'122010321033023130'
,
ST_GeomFromText
(
'POLYGON((15.0495 37.4813,15.0497 37.4812,15.0495 37.4811,15.0494 37.4812,15.0495 37.4813))'
));
ST_GeomFromText
(
'POLYGON((15.0495 37.4813,15.0497 37.4812,15.0495 37.4811,15.0494 37.4812,15.0495 37.4813))'
)),
(
66778899
,
4
,
-
202020
,
'COM2'
,
'commercial'
,
'122010321033023132'
,
ST_GeomFromText
(
'POLYGON((15.0490 37.4802,15.0493 37.4804,15.0494 37.4802,15.0491 37.4800,15.0490 37.4802))'
));
-- Commercial buildings that are part of a relation, without number of storeys
INSERT
INTO
obm_buildings
(
osm_id
,
relation_id
,
occupancy
,
occupancy_case
,
quadkey
,
geometry
)
VALUES
(
77889900
,
-
202020
,
'COM2'
,
'commercial'
,
'122010321033023130'
,
ST_GeomFromText
(
'POLYGON((15.0494 37.4805,15.0496 37.4803,15.0494 37.4802,15.0492 37.4804,15.0494 37.4805))'
));
tests/test_database_queries.py
View file @
75584e90
...
...
@@ -179,6 +179,11 @@ def test_get_OBM_buildings_in_data_unit_by_occupancy_case(test_db):
# Check building whose footprint intersects the data unit but whose centroid is outside
assert
88990011
not
in
returned_obm_buildings
[
"osm_id"
].
to_numpy
()
# Check that relation_id, quadkey and occupancy are strings
for
column_name
in
[
"relation_id"
,
"quadkey"
,
"occupancy"
]:
for
i
in
range
(
returned_obm_buildings
.
shape
[
0
]):
assert
isinstance
(
returned_obm_buildings
[
column_name
].
to_numpy
()[
i
],
str
)
returned_obm_buildings
=
DatabaseQueries
.
get_OBM_buildings_in_data_unit_by_occupancy_case
(
"commercial"
,
geometry
,
...
...
@@ -186,14 +191,18 @@ def test_get_OBM_buildings_in_data_unit_by_occupancy_case(test_db):
"obm_buildings"
,
)
assert
returned_obm_buildings
.
shape
[
0
]
==
3
assert
returned_obm_buildings
.
shape
[
0
]
==
5
for
col_name
in
expected_columns
:
assert
col_name
in
returned_obm_buildings
.
columns
assert
33445566
in
returned_obm_buildings
[
"osm_id"
].
to_numpy
()
assert
44556677
in
returned_obm_buildings
[
"osm_id"
].
to_numpy
()
assert
55667788
in
returned_obm_buildings
[
"osm_id"
].
to_numpy
()
assert
numpy
.
isnan
(
returned_obm_buildings
.
loc
[
0
,
"storeys"
])
assert
numpy
.
isnan
(
returned_obm_buildings
.
loc
[
0
,
"relation_id"
])
expected_osm_ids
=
[
33445566
,
44556677
,
55667788
,
66778899
,
77889900
]
for
expected_id
in
expected_osm_ids
:
assert
expected_id
in
returned_obm_buildings
[
"osm_id"
].
to_numpy
()
which_55667788
=
numpy
.
where
(
returned_obm_buildings
[
"osm_id"
].
to_numpy
()
==
55667788
)[
0
][
0
]
assert
numpy
.
isnan
(
returned_obm_buildings
[
"storeys"
].
to_numpy
()[
which_55667788
])
assert
(
returned_obm_buildings
[
"relation_id"
].
to_numpy
()[
which_55667788
]
==
"nan"
)
or
(
returned_obm_buildings
[
"relation_id"
].
to_numpy
()[
which_55667788
]
==
"None"
)
# Test case in which no buildings will be retrieved
geometry
=
returned_data_units_geometries
[
...
...
tests/test_processor.py
0 → 100644
View file @
75584e90
#!/usr/bin/env python3
# Copyright (C) 2022:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import
os
import
logging
import
numpy
from
gdecore.processor
import
GDEProcessor
from
gdecore.configuration
import
Configuration
from
gdecore.database_queries
import
DatabaseQueries
logger
=
logging
.
getLogger
()
def
test_post_process_obm_relations
(
test_db
):
# Preliminary steps to retrieve the relevant data from the test database
# Database connection (the Configuration class will define the credentials based on whether
# the code is running in the CI or locally)
config
=
Configuration
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"data"
,
"config_for_testing_good.yml"
)
)
(
returned_data_units_ids
,
returned_data_units_geometries
,
_
,
)
=
DatabaseQueries
.
get_data_unit_ids_geometries_of_entity_and_occupancy_case
(
"ABC"
,
"residential"
,
2
,
config
.
database_gde_tiles
,
"data_units"
)
# auxiliary, to retrieve the geometry of the data unit
geometry
=
returned_data_units_geometries
[
numpy
.
where
(
returned_data_units_ids
==
"ABC_10269"
)[
0
][
0
]
]
# Group of residential buildings that do not belong to relations
raw_obm_buildings
=
DatabaseQueries
.
get_OBM_buildings_in_data_unit_by_occupancy_case
(
"residential"
,
geometry
,
config
.
database_obm_buildings
,
"obm_buildings"
,
)
returned_obm_buildings
=
GDEProcessor
.
post_process_obm_relations
(
raw_obm_buildings
)
assert
returned_obm_buildings
.
shape
[
0
]
==
2
assert
11223344
in
returned_obm_buildings
[
"osm_id"
].
to_numpy
()
assert
22334455
in
returned_obm_buildings
[
"osm_id"
].
to_numpy
()
# Group of commercial buildings, some in relations, some not
raw_obm_buildings
=
DatabaseQueries
.
get_OBM_buildings_in_data_unit_by_occupancy_case
(
"commercial"
,
geometry
,