Commit 79215d60 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Optimised ExportHandler to avoid unnecessary (re-)retrieval of parameters

parent 214a2d39
Pipeline #47867 passed with stage
in 2 minutes and 10 seconds
......@@ -471,6 +471,14 @@ class DatabaseQueries:
number_aggregated = exec_result[0][0]
number_obm = exec_result[0][1]
number_remainder = exec_result[0][2]
if number_aggregated is None: # The field is NULL (dependencies have not been run)
number_aggregated = -999.9
if number_obm is None: # The field is NULL (dependencies have not been run)
number_obm = -999.9
if number_remainder is None: # The field is NULL (dependencies have not been run)
number_remainder = -999.9
elif len(exec_result) == 0: # No entry found
number_aggregated = -999.9
number_obm = -999
......
......@@ -126,7 +126,12 @@ def main():
p.close()
p.join()
print(summary_values)
for i in range(len(quadkeys_occupancy_groups)):
logger.info(
"Summary results for quadkey group '%s' and occupancy case '%s':"
% (quadkeys_occupancy_groups[i][0], quadkeys_occupancy_groups[i][1])
)
logger.info(summary_values[i])
# Leave the program
logger.info("gde-exporter has finished")
......
......@@ -17,6 +17,7 @@
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
from copy import deepcopy
from gdeexporter.tileexposure import TileExposure
from gdeexporter.database_queries import DatabaseQueries
from gdeexporter.to_openquake import export_to_OpenQuake_CSV
......@@ -102,8 +103,46 @@ class ExportHandler:
config.database_gde_tiles,
"data_unit_tiles",
)
# Make sure data unit IDs with the same exposure entity code are consecutive
# (to avoid re-retrieving data associated with the exposure entities in each loop)
data_unit_ids.sort()
# Avoid re-retrieving data from the database if not necessary
previous_exposure_entity_code = ""
for data_unit_id in data_unit_ids:
# Retrieve number of aggregated, OBM and remainder buildings in the tile
(
number_aggregated,
number_obm,
number_remainder,
) = DatabaseQueries.get_numbers_buildings_for_data_unit_tile(
quadkey,
aggregated_source_id,
occupancy_case,
data_unit_id,
config.database_gde_tiles,
"data_unit_tiles",
)
if number_aggregated < -1.0 or number_obm < -1.0 or number_remainder < -1.0:
logger.error(
"get_numbers_buildings_for_data_unit_tile could not retrieve number of "
"aggregated and/or remainder and/or OBM buildings for quadkey = '%s' "
"and data unit ID = '%s'" % (quadkey, data_unit_id)
)
# Retrieve data only if there are any aggregated, OBM or remainder buildings in
# this data unit tile
building_classes = None # Clear any previous results (safety net)
if (
number_aggregated <= 1e-6
and number_obm <= 1e-6
and number_remainder <= 1e-6
):
# Avoid running the rest of the code if this data-unit tile is empty
continue
# Retrieve building classes associated with this data unit, occupancy case and
# aggregated source ID
building_classes = DatabaseQueries.get_building_classes_of_data_unit(
......@@ -114,47 +153,31 @@ class ExportHandler:
"data_units_buildings",
)
# Retrieve needed parameters at the level of the exposure entity
exposure_entity_code = data_unit_id[:3]
# Retrieve cost assumptions
cost_assumptions = DatabaseQueries.get_exposure_entities_costs_assumptions(
config.cost_cases,
exposure_entity_code,
occupancy_case,
aggregated_source_id,
config.database_gde_tiles,
"exposure_entities_costs_assumptions",
)
# Retrieve distribution of people at different times of the day
people_distribution = (
DatabaseQueries.get_exposure_entities_population_time_distribution(
config.people_cases,
if exposure_entity_code != previous_exposure_entity_code:
# Retrieve cost assumptions
cost_assumptions = DatabaseQueries.get_exposure_entities_costs_assumptions(
config.cost_cases,
exposure_entity_code,
occupancy_case,
aggregated_source_id,
config.database_gde_tiles,
"exposure_entities_population_time_distribution",
"exposure_entities_costs_assumptions",
)
)
# Retrieve number of aggregated, OBM and remainder buildings in the tile
(
number_aggregated,
number_obm,
number_remainder,
) = DatabaseQueries.get_numbers_buildings_for_data_unit_tile(
quadkey,
aggregated_source_id,
occupancy_case,
data_unit_id,
config.database_gde_tiles,
"data_unit_tiles",
)
if number_aggregated < -1.0 or number_obm < -1.0 or number_remainder < -1.0:
logger.error(
"get_numbers_buildings_for_data_unit_tile could not retrieve number "
"of aggregated, remainder and OBM buildings for quadkey = '%s' and "
"data unit ID = '%s'" % (quadkey, data_unit_id)
# Retrieve distribution of people at different times of the day
people_distribution = (
DatabaseQueries.get_exposure_entities_population_time_distribution(
config.people_cases,
exposure_entity_code,
occupancy_case,
aggregated_source_id,
config.database_gde_tiles,
"exposure_entities_population_time_distribution",
)
)
previous_exposure_entity_code = deepcopy(exposure_entity_code)
# Append aggregated buildings to quadtile.aggregated_buildings
if number_aggregated > 1e-6: # If smaller, consider equal to zero
......
......@@ -76,7 +76,9 @@ def export_to_GeoSummary(
- quadkey (str): Quadkey of the tile.
- geometry (geometry): Geometry of the tile.
- [occupancy_case]_number_data_units: Number of data units associated with this
quadtile and occupancy case.
quadtile and occupancy case for which there are buildings of any of the types
indicated in 'buildings_to_export' (data units associated with this quadtile and
occupancy case for which all buildings are zero are not counted).
- [occupancy_case]_[building_type]_buildings: Number of buildings in the tile of the
type 'building_type' (each of the elements of 'buildings_to_export') and of this
'occupancy_case'.
......
......@@ -114,6 +114,8 @@ VALUES ('122010321033023130', 2, 'residential', 'ABC', 'ABC_10269', 0.0, 0.0, 0.
('122010321033032301', 2, 'commercial', 'ABC', 'ABC_10278', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
('122010321033023130', 2, 'residential', 'DEF', 'DEF_00000', 0.0, 0.0, 0.0, 0.0, 8.6, 0, 8.6),
('122010321033023130', 2, 'commercial', 'DEF', 'DEF_00000', 0.0, 0.0, 0.0, 0.0, 8.6, 0, 8.6),
('122010321033023121', 2, 'residential', 'ABC', 'ABC_27726', 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0.0),
('122010321033023121', 2, 'residential', 'ABC', 'ABC_45933', 0.0, 0.0, 0.0, 0.0, 17.1, 0, 17.1),
('122010321033023120', 2, 'residential', 'GHI', 'GHI_22222', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
CREATE TABLE data_units_buildings
......@@ -166,7 +168,9 @@ VALUES ('A1/HBET:1-3', 'urban', 'all', 2, 'ABC', 'residential', 'ABC_10269', 0.2
('C6/HBET:3-5', 'urban', 'Offices', 2, 'ABC', 'commercial', 'ABC_10269', 0.30, 0.0, 0.0, 3, 5),
('D1/HBET:1-3', 'urban', 'all', 2, 'DEF', 'residential', 'DEF_00000', 0.55, 5.0, 1600000.0, 1, 3),
('D2/HBET:4-6', 'urban', 'all', 2, 'DEF', 'residential', 'DEF_00000', 0.40, 10.5, 2400000.0, 4, 6),
('D3/HBET:7-12', 'urban', 'all', 2, 'DEF', 'residential', 'DEF_00000', 0.05, 30.0, 4800000.0, 7, 12);
('D3/HBET:7-12', 'urban', 'all', 2, 'DEF', 'residential', 'DEF_00000', 0.05, 30.0, 4800000.0, 7, 12),
('B1/HBET:1-3', 'rural', 'all', 2, 'ABC', 'residential', 'ABC_45933', 0.85, 6.0, 1400000.0, 1, 3),
('B2/H:4', 'rural', 'all', 2, 'ABC', 'residential', 'ABC_45933', 0.15, 8.0, 1600000.0, 4, 4);
CREATE TABLE exposure_entities_costs_assumptions
(
......
......@@ -12,3 +12,5 @@ quadkeys_list_residential_remainder_10,15.04784,37.48085,B2/H:4,3.93,7467000,7.7
quadkeys_list_residential_remainder_11,15.04921,37.48085,D1/HBET:1-3,4.73,7568000,6.416245,22.51953,13.70281,residential,DEF_00000,122010321033023130
quadkeys_list_residential_remainder_12,15.04921,37.48085,D2/HBET:4-6,3.44,8256000,9.799356,34.393464,20.927928,residential,DEF_00000,122010321033023130
quadkeys_list_residential_remainder_13,15.04921,37.48085,D3/HBET:7-12,0.43,2064000,3.49977,12.28338,7.47426,residential,DEF_00000,122010321033023130
quadkeys_list_residential_remainder_14,15.04784,37.48085,B1/HBET:1-3,14.535,20349000,21.427497,83.904741,52.570188,residential,ABC_45933,122010321033023121
quadkeys_list_residential_remainder_15,15.04784,37.48085,B2/H:4,2.565,4104000,5.041764,19.742292,12.369456,residential,ABC_45933,122010321033023121
......@@ -44,12 +44,20 @@ def test_ExportHandler_process_quadkey_occupancy_group(test_db):
- 122010321033023130: associated with two data unit IDs, 'ABC_10269' of exposure entity
'ABC' and 'DEF_00000' of exposure entity 'DEF'. In the former it contains both OBM and
remainder buildings, while in the latter it only contains remainder buildings.
- 122010321033023121: associated with data unit ID 'ABC_10269' of exposure entity 'ABC',
which contains only remainder buildings.
- 122010321033023121: associated with data unit IDs 'ABC_10269', 'ABC_45933' and
'ABC_27726' of exposure entity 'ABC', the first two of which contain only remainder
buildings while the latter contains no buildings at all.
- 122010321033023132: associated with data unit ID 'ABC_10269' of exposure entity 'ABC',
which contains only OBM buildings.
- 122010321033023131: associated with no data unit ID (i.e. there is no entry for this
quadkey in the 'data_unit_tiles' table of the test database.
- By having three data unit IDs associated with quadkey 122010321033023121, the test
evaluates the tricks used to avoid re-retrieving parameters from the database that are
associated with the same exposure entity ('ABC'), as well as the use of "continue" to not
run the code for data-unit tiles with zero buildings. When sorting the list of data unit
IDs, 'ABC_27726' (no buildings) ends up in between 'ABC_10269' and 'ABC_45933', so that the
test tries out a retrieve - not retrieve - retrieve sequence, while in all cases the
exposure entity's parameters (those of 'ABC') are retrieved just once.
- In the smaller test, geographic selection is done via one data unit ID, 'DEF_00000', which
results in only one quadkey being processed (122010321033023130).
"""
......@@ -97,8 +105,8 @@ def test_ExportHandler_process_quadkey_occupancy_group(test_db):
# Check summary values
assert returned_summary_values["processed_quadkeys"] == 4
assert round(returned_summary_values["OBM_buildings"], 0) == 15
assert round(returned_summary_values["aggregated_buildings"], 1) == 84.9
assert round(returned_summary_values["remainder_buildings"], 1) == 47.5
assert round(returned_summary_values["aggregated_buildings"], 1) == 102.0
assert round(returned_summary_values["remainder_buildings"], 1) == 64.6
# Check that output files that need to be created have been created
assert os.path.exists(os.path.join(output_path, expected_name_remainder_buildings))
......@@ -120,10 +128,14 @@ def test_ExportHandler_process_quadkey_occupancy_group(test_db):
# Find the corresponding row in 'returned_obm_buildings'
row_index_returned = numpy.where(
numpy.logical_and(
returned_remainder_buildings["taxonomy"].to_numpy()
== expected_remainder_buildings.loc[row_index_expected, "taxonomy"],
returned_remainder_buildings["quadkey"].to_numpy()
== expected_remainder_buildings.loc[row_index_expected, "quadkey"],
numpy.logical_and(
returned_remainder_buildings["taxonomy"].to_numpy()
== expected_remainder_buildings.loc[row_index_expected, "taxonomy"],
returned_remainder_buildings["quadkey"].to_numpy()
== expected_remainder_buildings.loc[row_index_expected, "quadkey"],
),
returned_remainder_buildings["data_unit_id"].to_numpy()
== expected_remainder_buildings.loc[row_index_expected, "data_unit_id"],
)
)[0][0]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment