test_database_storage.py 11.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/usr/bin/env python3

# Copyright (C) 2022:
#   Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.

import os
20
import numpy
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import pandas
from gdeimporter.tools.database import Database
from gdecore.configuration import Configuration
from gdecore.database_storage import DatabaseStorage


def test_store_number_OBM_and_remainder_buildings(test_db):
    # Database connection (the Configuration class will define the credentials based on whether
    # the code is running in the CI or locally)
    config = Configuration(
        os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml")
    )

    data_unit_tiles = pandas.DataFrame(
        {
            "quadkey": [
                "122010321033023120",
                "122010321033023132",
                "122010321033023130",
                "122010321033023121",
                "122010321033023123",
            ],
            "aggregated_buildings": [39.1, 34.4, 15.7, 26.2, 16.5],
            "obm_buildings": [41, 12, 3, 0, 0],
            "remainder_buildings": [0.0, 0.0, 12.7, 26.2, 0.0],
        }
    )

    DatabaseStorage.store_number_OBM_and_remainder_buildings(
        "ABC_10269",
        "residential",
        2,
        data_unit_tiles,
        config.database_gde_tiles,
        "data_unit_tiles",
    )

    for i, quadkey in enumerate(data_unit_tiles["quadkey"].to_numpy()):
        returned_obm_buildings, returned_remainder_buildings = query_obm_and_remainder(
            config.database_gde_tiles, quadkey, "ABC_10269", "residential", 2
        )

        assert returned_obm_buildings == data_unit_tiles["obm_buildings"].to_numpy()[i]
        assert round(returned_remainder_buildings, 2) == round(
            data_unit_tiles["remainder_buildings"].to_numpy()[i], 2
        )


def query_obm_and_remainder(
    credentials, quadkey, data_unit_id_full, occupancy_case, aggregated_source_id
):
    """This auxiliary function queries the 'data_unit_tiles' table of the test database to
    retrieve the number of OBM and remainder buildings of the entry corresponding to 'quadkey',
    'data_unit_id_full', 'occupancy_case' and 'aggregated_source_id'.

    Args:
        credentials (dict):
            Dictionary containing the credentials needed to connect to the test SQL database.
            The keys of the dictionary need to be:
                host (str):
                    SQL database host address.
                dbname (str):
                    Name of the SQL database.
                port (int):
                    Port where the SQL database can be found.
                username (str):
                    User name to connect to the SQL database.
                password (str):
                    Password associated with self.username.
        quadkey (str):
            Zoom level 18 tile identifier of the data-unit tile.
        data_unit_id_full (str):
            ID of the Data Unit of the data-unit tile, including the 3-character code of its
            corresponding exposure entity.
        occupancy_case (str):
            Occupancy case of the data-unit tile.
        aggregated_source_id (int):
            ID of the aggregated exposure model source associated with the data-unit tile.

    Returns:
        obm_buildings (int):
            Number of OBM buildings in the data-unit tile.
        remainder_buildings (float):
            Number of remainder buildings in the data-unit tile.
    """

    sql_command = (
        "SELECT obm_buildings, remainder_buildings FROM data_unit_tiles "
        "WHERE (quadkey='%s' AND data_unit_id='%s' AND occupancy_case='%s' "
        "AND aggregated_source_id='%s');"
        % (quadkey, data_unit_id_full, occupancy_case, aggregated_source_id)
    )

    db_test = Database(**credentials)
    db_test.create_connection_and_cursor()
    db_test.cursor.execute(sql_command)
    result = db_test.cursor.fetchall()

    obm_buildings = result[0][0]
    remainder_buildings = result[0][1]

    db_test.close_connection()

    return obm_buildings, remainder_buildings
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318


def test_store_OBM_building_classes(test_db):
    # Database connection (the Configuration class will define the credentials based on whether
    # the code is running in the CI or locally)
    config = Configuration(
        os.path.join(os.path.dirname(__file__), "data", "config_for_testing_good.yml")
    )

    obm_bdg_classes = {}
    obm_bdg_classes[11223344] = pandas.DataFrame(
        {
            "building_class_name": ["A2/HBET:4-6", "B2/H:4"],
            "settlement_type": ["urban", "rural"],
            "occupancy_subtype": ["all", "all"],
            "probabilities": [0.666667, 0.333333],
        }
    )

    DatabaseStorage.store_OBM_building_classes(
        "ABC_10269",
        "residential",
        2,
        obm_bdg_classes,
        config.database_gde_tiles,
        "gde_buildings",
    )

    (
        returned_occupancy_case,
        returned_data_unit_id,
        returned_building_class_names,
        returned_settlement_types,
        returned_occupancy_subtypes,
        returned_probabilities,
    ) = query_OBM_building_classes(config.database_gde_tiles, 11223344, 2)

    assert returned_occupancy_case == "residential"
    assert returned_data_unit_id == "ABC_10269"
    assert len(returned_building_class_names) == len(
        obm_bdg_classes[11223344]["building_class_name"].to_numpy()
    )
    for i, bdg_class_name in enumerate(returned_building_class_names):
        which = numpy.where(
            obm_bdg_classes[11223344]["building_class_name"].to_numpy() == bdg_class_name
        )[0][0]
        assert (
            returned_settlement_types[i]
            == obm_bdg_classes[11223344]["settlement_type"].to_numpy()[which]
        )
        assert (
            returned_occupancy_subtypes[i]
            == obm_bdg_classes[11223344]["occupancy_subtype"].to_numpy()[which]
        )
        assert round(returned_probabilities[i], 4) == round(
            obm_bdg_classes[11223344]["probabilities"].to_numpy()[which], 4
        )

    obm_bdg_classes = {}
    obm_bdg_classes[-101010] = pandas.DataFrame(
        {
            "building_class_name": ["C6/HBET:3-5"],
            "settlement_type": ["urban"],
            "occupancy_subtype": ["Offices"],
            "probabilities": [1.0],
        }
    )
    obm_bdg_classes[-202020] = pandas.DataFrame(
        {
            "building_class_name": [
                "C1/HBET:1-2",
                "C2/HBET:3-",
                "C3/H:1",
                "C4/HBET:2-3",
                "C5/HBET:1-2",
                "C6/HBET:3-5",
            ],
            "settlement_type": ["urban", "urban", "urban", "urban", "urban", "urban"],
            "occupancy_subtype": ["Hotels", "Hotels", "Trade", "Trade", "Offices", "Offices"],
            "probabilities": [0.10, 0.25, 0.05, 0.10, 0.20, 0.30],
        }
    )

    DatabaseStorage.store_OBM_building_classes(
        "ABC_10269",
        "commercial",
        2,
        obm_bdg_classes,
        config.database_gde_tiles,
        "gde_buildings",
    )

    for osm_id in obm_bdg_classes.keys():
        (
            returned_occupancy_case,
            returned_data_unit_id,
            returned_building_class_names,
            returned_settlement_types,
            returned_occupancy_subtypes,
            returned_probabilities,
        ) = query_OBM_building_classes(config.database_gde_tiles, osm_id, 2)

        assert returned_occupancy_case == "commercial"
        assert returned_data_unit_id == "ABC_10269"
        assert len(returned_building_class_names) == len(
            obm_bdg_classes[osm_id]["building_class_name"].to_numpy()
        )
        for i, bdg_class_name in enumerate(returned_building_class_names):
            which = numpy.where(
                obm_bdg_classes[osm_id]["building_class_name"].to_numpy() == bdg_class_name
            )[0][0]
            assert (
                returned_settlement_types[i]
                == obm_bdg_classes[osm_id]["settlement_type"].to_numpy()[which]
            )
            assert (
                returned_occupancy_subtypes[i]
                == obm_bdg_classes[osm_id]["occupancy_subtype"].to_numpy()[which]
            )
            assert round(returned_probabilities[i], 4) == round(
                obm_bdg_classes[osm_id]["probabilities"].to_numpy()[which], 4
            )


def query_OBM_building_classes(credentials, osm_id, aggregated_source_id):
    """This auxiliary function queries the 'gde_buildings' table of the test database to
    retrieve the building classes (and related attributes) associated with an OBM building with
    ID 'osm_id' as per an aggregated model with 'aggregated_source_id'.

    Args:
        credentials (dict):
            Dictionary containing the credentials needed to connect to the test SQL database.
            The keys of the dictionary need to be:
                host (str):
                    SQL database host address.
                dbname (str):
                    Name of the SQL database.
                port (int):
                    Port where the SQL database can be found.
                username (str):
                    User name to connect to the SQL database.
                password (str):
                    Password associated with self.username.
        osm_id (int):
            OSM ID of the building to query.
        aggregated_source_id (int):
            ID of the source of the aggregated exposure model associated with the building
            classes.

    Returns:
        occupancy_case (str):
            Occupancy case associated with the OBM building.
        data_unit_id (str):
            ID of the data unit of the 'aggregated_source_id' and 'occupancy_case' associated
            with the OBM building.
        building_class_names (list of str):
            Building classes as per the GEM Building Taxonomy.
        settlement_types (list of str):
            Type of settlements within the data unit. Possible values: "urban", "rural",
            "big_city", "all".
        occupancy_subtypes (list of str):
            Details on the occupancy, if relevant to characterise the building classes.
        probabilities (list of float):
            Probabilities of the building belonging to the building classes.
    """

    sql_command = (
        "SELECT occupancy_case, data_unit_id, building_class_names, settlement_types, "
        "occupancy_subtypes, probabilities FROM gde_buildings "
        "WHERE (osm_id=%s AND aggregated_source_id=%s);" % (osm_id, aggregated_source_id)
    )

    db_test = Database(**credentials)
    db_test.create_connection_and_cursor()
    db_test.cursor.execute(sql_command)
    result = db_test.cursor.fetchall()

    occupancy_case = result[0][0]
    data_unit_id = result[0][1]
    building_class_names = result[0][2]
    settlement_types = result[0][3].replace("{", "").replace("}", "").split(",")
    occupancy_subtypes = result[0][4]
    probabilities = result[0][5]

    db_test.close_connection()

    return (
        occupancy_case,
        data_unit_id,
        building_class_names,
        settlement_types,
        occupancy_subtypes,
        probabilities,
    )