Skip to content
Snippets Groups Projects
Commit d73beae9 authored by Laurens Oostwegel's avatar Laurens Oostwegel
Browse files

Use dictionary to detect if a boundary already exists

parent 9cad7080
No related branches found
No related tags found
No related merge requests found
Pipeline #65882 passed
......@@ -205,17 +205,17 @@ class ExposureInitializer:
asset_dict[taxonomy_id]["floorspace"] += asset["floorspace"]
return asset_dict
def multiprocess_districts(self, districts, num_processors):
def multiprocess_districts(self, country_iso_code, district_assets, num_processors):
"""
Initializes the queue and a pool of workers that run the `process_district` function on
one core each.
Args:
districts (list):
A list of all districts, containing the information below for each district:
Boundary ID
Country ISO code
Asset dictionary
country_iso_code (str):
ISO 3166-1 alpha-3 code of the country
district_assets (dict):
A dictionary of all districts, with the boundary ID as key and the asset
dictionary as values.
num_processors (int):
Number of processors that are used in the multiprocessing pool.
"""
......@@ -227,7 +227,7 @@ class ExposureInitializer:
pool = Pool(num_processors - 1, self.worker, (queue,))
# Fill the queue
for boundary_id, country_iso_code, asset_dict in districts:
for boundary_id, asset_dict in district_assets.items():
queue.put([boundary_id, country_iso_code, asset_dict])
# Wait until the queue is empty
......@@ -397,28 +397,16 @@ class ExposureInitializer:
"""
country_asset_dict = {}
districts = []
district_assets = {}
# Iterate through all given exposure files
for exposure_filepath in glob.glob(exposure_model_search_pattern):
logger.info(f"Processing {exposure_filepath}")
csv_reader = csv.DictReader(open(exposure_filepath), delimiter=",")
# Sort the exposure file by boundary ID to have all assets of one district being
# listed consecutively to avoid listing same taxonomies multiple times
sorted_exposure = sorted(csv_reader, key=lambda line: line["BOUNDARY_ID"])
# Prepare the control variables
last_boundary_id = None
location_count = 0
asset_dict = {}
boundary_id = None
for row in sorted_exposure:
# Check if the line starts the asset list of a new location
if not (last_boundary_id == row["BOUNDARY_ID"]):
if location_count > 0:
districts.append([boundary_id, country_iso_code, asset_dict])
location_count += 1
for row in csv_reader:
# Check if the asset dict of the boundary already exists
boundary_id = row["BOUNDARY_ID"]
asset_dict = {} # Reset the location-based asset dictionary
last_boundary_id = row["BOUNDARY_ID"]
if boundary_id not in district_assets:
district_assets[boundary_id] = {}
# Read in an asset
# Create the expanded taxonomy string and add the occupancy to it
......@@ -447,21 +435,21 @@ class ExposureInitializer:
area_per_dwelling = float(row["AREA_PER_DWELLING_SQM"])
asset["floorspace"] = dwellings * area_per_dwelling
# Store the asset in a location-based list and country-based list
asset_dict = self.add_asset_to_dict(asset_dict, taxonomy_id, asset)
# Store the asset in a location-based dictionary and country-based list
district_assets[boundary_id] = self.add_asset_to_dict(
district_assets[boundary_id], taxonomy_id, asset
)
country_asset_dict = self.add_asset_to_dict(
country_asset_dict, taxonomy_id, asset
)
districts.append([boundary_id, country_iso_code, asset_dict])
# If there is more than one processor, process the districts with the
# `multiprocess_districts` function
if num_processors == 1:
for boundary_id, country_iso_code, asset_dict in districts:
for boundary_id, asset_dict in district_assets.items():
self.process_district(boundary_id, country_iso_code, asset_dict)
else:
self.multiprocess_districts(districts, num_processors)
self.multiprocess_districts(country_iso_code, district_assets, num_processors)
logger.info("Assign the country-average assets")
# Normalize the country-average asset distribution
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment