Commit e0348fbc authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Merge branch 'feature/ind06' into 'master'

Completed module and tools that create industrial cells around input points

See merge request !16
parents 80be8100 2b4eea99
......@@ -1222,6 +1222,7 @@ def generate_country_industrial_cells(
in_crs="EPSG:4326",
consistency_tol_dist=0.05,
consistency_tol_area=0.05,
verbose=False,
):
"""This function reads the input aggregated exposure model of the country (location defined
by aggr_model_pathname and country), identifies the unique points present in this input
......@@ -1316,6 +1317,8 @@ def generate_country_industrial_cells(
range, which is calculated as ([max-min]/mean) of all
cells. Default: 0.05. Only needed if consistency_checks is
True.
verbose (bool): if True, print statements will be executed to describe progress in the
calculations. Default = False.
Returns:
cells_adj_bound_gdf (GeoDataFrame): GeoPandas GeoDataFrame with the cells defined around
......@@ -1394,6 +1397,8 @@ def generate_country_industrial_cells(
# Retrieve unique points in the exposure file, determined with a specific precision
# (points_gdf is a GeoPandas GeoDataFrame, ids_aggr is an array of strings with length
# equal to the number of rows of aggr_mod_df):
if verbose:
print(" Building cells from input points...")
points_gdf, ids_aggr = retrieve_unique_points(
aggr_mod_df, col_lon, col_lat, id_str, precision=precision_points, in_crs=in_crs
)
......@@ -1423,6 +1428,8 @@ def generate_country_industrial_cells(
# Adjust all the coordinates of the corners of the cells (coords_dict) by taking the
# average value of all instances of that coordinate that "should be the same", as identified
# in coords_uq:
if verbose:
print(" Adjusting cells' geometries...")
coords_dict_adj = adjust_coords(coords_dict, coords_uq) # coords_dict_adj is a dictionary
# Generate final output with adjusted cell geometries (cells_adj_gdf is a
......@@ -1436,7 +1443,11 @@ def generate_country_industrial_cells(
big_dist_diff = "Not_Checked"
big_area_diff = "Not_Checked"
else:
if verbose:
print(" Running consistency checks...")
# Consistency check 1: the output geometries should not overlap
if verbose:
print(" Consistency check #1 of 4...")
num_overlaps = 999 # Initialise variable for the while loop to run at least once
while num_overlaps > 0:
intsect_gdf = overlap_by_full_geom_intersection(cells_adj_gdf, "id_1", "id_2")
......@@ -1461,6 +1472,8 @@ def generate_country_industrial_cells(
overlap_found = "False"
# Consistency check 2:
if verbose:
print(" Consistency check #2 of 4...")
gaps_found = True # Initialise variable for the while loop to run at least once
while gaps_found:
# Expand the cells by 25% of their dimensions in all directions:
......@@ -1499,6 +1512,8 @@ def generate_country_industrial_cells(
gap_found = str(gaps_found)
# Consistency check 3: maximum distance between original points and final centroids:
if verbose:
print(" Consistency check #3 of 4...")
max_dist_centr = get_distance_centroids(cells_adj_gdf, col_lon, col_lat)
# Compare the maximum distance against the tolerance:
if max_dist_centr > min(width_EW, width_NS) * consistency_tol_dist:
......@@ -1507,6 +1522,8 @@ def generate_country_industrial_cells(
big_dist_diff = "False"
# Consistency check 4: stability/variability of area of resulting cells:
if verbose:
print(" Consistency check #4 of 4...")
rel_area_range = get_relative_area_range(cells_gdf)
# Compare the relative area range ([max-min]/mean) against the tolerance:
if rel_area_range > consistency_tol_area:
......@@ -1515,6 +1532,8 @@ def generate_country_industrial_cells(
big_area_diff = "False"
# Intersect cells with admnistrative boundary of country:
if verbose:
print(" Intersecting cells with admnistrative boundary of country...")
cells_adj_bound_gdf = gpd.overlay(cells_adj_gdf, bounds_gdf, how="intersection")
# Eliminate columns that are not useful:
if "ID_0" in cells_adj_bound_gdf.columns:
......@@ -1536,6 +1555,9 @@ def generate_country_industrial_cells(
cells_adj_bound_gdf["lat_s"].values[row] = geometry_row.bounds[1]
cells_adj_bound_gdf["lat_n"].values[row] = geometry_row.bounds[3]
if verbose:
print(" generate_country_industrial_cells has finished generating the cells")
return (
cells_adj_bound_gdf,
aggr_mod_df,
......@@ -1545,3 +1567,147 @@ def generate_country_industrial_cells(
big_area_diff,
country_id,
)
def export_modified_aggregated_model_file(aggr_mod_df, out_pathname, out_filename, separ=","):
"""This function writes the aggr_mod_df DataFrame to a CSV file with name out_filename,
under the path out_pathname. If the file already exists, it will overwrite it. If the path
does not exist, it will create it.
Args:
aggr_mod_df (DataFrame): Pandas DataFrame to be exported.
out_pathname (str): Path to where the output CSV will be written.
out_filename (str): File name of the output CSV file.
separ: Separator to use in the CSV file (default=',').
"""
# Create out_pathname if it does not exist:
if not os.path.exists(out_pathname):
os.makedirs(out_pathname)
aggr_mod_df.to_csv(os.path.join(out_pathname, out_filename), sep=separ)
def export_cells_to_geodatafile(
country_name, country_id, cells_gdf, out_pathname, out_filename, out_driver="ESRI Shapefile"
):
"""This function creates a geospatial data file of format given by out_driver, with the IDs
and geometries of cells_gdf. Additional fields are written with the country name and ID. If
the file already exists, it will overwrite it. If the path does not exist, it will create
it.
Args:
country_name (str): Name of country (as in the files of the aggregated model).
country_id (int): ID of the country as per the geospatial data files of the input
aggregated model.
cells_gdf (GeoDataFrame): GeoPandas GeoDataFrame with the cells geometry. This function
assumes that it contains at least the following columns:
- id: ID of the cell, given by id_str and an incremental
integer.
- geometry: (Shapely) polygons of the output cells.
out_pathname (str): Path to where the geospatial data file will be written.
out_filename (str): File name of the geospatial data file.
out_driver (str): "ESRI Shapefile", "GeoJSON", "GPKG", etc. Format of the output
geospatial data file.
"""
# Check that in_gdf has the needed columns and terminate otherwise:
if ("id" not in cells_gdf.columns) or ("geometry" not in cells_gdf.columns):
print("ERROR!! One or more of id, geometry missing as columns of cells_gdf")
return
# Create out_pathname if it does not exist:
if not os.path.exists(out_pathname):
os.makedirs(out_pathname)
# Start a new GeoDataFrame with the desired columns/features of cells_gdf:
shp_gdf = gpd.GeoDataFrame({}, geometry=cells_gdf["geometry"])
shp_gdf["ID_99"] = deepcopy(cells_gdf["id"])
shp_gdf["NAME_99"] = deepcopy(cells_gdf["id"])
# Add values of admin unit level 0:
id_0 = np.array([country_id for i in range(shp_gdf.shape[0])])
name_0 = np.array([country_name for i in range(shp_gdf.shape[0])])
shp_gdf["ID_0"] = id_0
shp_gdf["NAME_0"] = name_0
# Write to ShapeFile or other format (as per out_driver):
shp_gdf.to_file(os.path.join(out_pathname, out_filename), driver=out_driver)
def which_countries_on_a_grid(
metadata_filepath,
sheet_name="IND",
col_name="Variables",
var_name="Resolution",
target="30 arc seconds",
export=False,
out_pathname="",
out_filename="",
):
"""This function retrieves the list of countries for which the industrial exposure is
defined on a 30-arcsec grid in the SERA exposure model. This information is provided in the
EFEHR repository within an Excel spreadsheet called
"European_Exposure_Model_Data_Assumptions.xlsx". The default values of all input parameters
refer to the structure of this file.
Args:
metadata_filepath (str): Full path to the source metadata file, including file name and
extension. It needs to be an Excel spreadsheet (.xls or .xlsx).
sheet_name (str): Name of the sheet of metadata_filepath to read. Default: "IND".
col_name (str): Name of the column of sheet_name that contains the parameter names.
Default: "Variables".
var_name (str): Name of the parameter to read. Default: "Resolution".
target (str): Target value of var_name (i.e. the code will seek for cases in which the
value of var_name is target. Default: "30 arc seconds".
export (bool): If True, the resulting list of countries will be exported to a CSV file
under the path out_pathname and the file name out_filename.
Default: False.
out_pathname (str): Path where to export the output CSV to. Only needed if export is
True. Default: "".
out_filename (str): Name of the file where to write the list of countries. Only needed
if export is True. Default: "".
Returns:
countries (list of str): List of countries for which the var_name takes the value given
by target (the countries for which the industrial exposure is
defined on a 30-arcsec grid in the SERA exposure model.
"""
# Check metadata_filepath exists and points at an Excel spreadsheet:
if not os.path.isfile(metadata_filepath):
print("ERROR in which_countries_on_a_grid: input file not found")
return []
if "xls" not in metadata_filepath.split(".")[-1]:
print("ERROR in which_countries_on_a_grid: input file is not an Excel spreadsheet")
return []
# Read the SERA metadata file:
metadata = pd.read_excel(metadata_filepath, sheet_name=sheet_name)
# Identify the row in which the variable var_name is given:
which_row = np.where(metadata[col_name].values == var_name)[0]
if len(which_row) != 1: # This should not occur
print("ERROR READING %s: ROW NOT FOUND." % (metadata_filepath))
return []
# Retrieve the row in which the variable var_name is given:
whole_row = metadata.iloc[which_row[0], :]
# Columns in which var_name takes the value target:
which_cols = np.where(whole_row == target)[0]
# The names of those columns are the countries for which var_name takes the value target:
countries = list(whole_row.index[which_cols])
# Write a csv file with the list of countries:
if export:
# Create out_pathname if it does not exist:
if not os.path.exists(out_pathname):
os.makedirs(out_pathname)
# Write to file:
out_filepath = os.path.join(out_pathname, out_filename)
f = open(out_filepath, "w")
f.write(",".join(countries))
f.close()
return countries
This diff is collapsed.
......@@ -23,6 +23,8 @@ sera_models_path = WRITE_PATH
sera_models_OQ_path = WRITE_PATH
# Path to the shapefiles of admin units used in SERA:
sera_boundaries_path = WRITE_PATH
# File type containing the boundaries ("shp"=Shapefile, "gpkg"=Geopackage):
boundaries_type = shp
# Path to the GHS layer:
ghs_path = WRITE_PATH
# Path to the GPW data:
......@@ -274,3 +276,36 @@ occupancy_cases = Res, Com, Ind, Oth
decimal_places_gral = 4
decimal_places_costs = 0
[SERA_creating_industrial_cells]
# Countries to process (if many, separate with comma and space):
countries = Albania, Austria, Belgium, Bosnia_and_Herzegovina, Bulgaria, Croatia, Cyprus, Czechia, Denmark, Estonia, Finland, France, Germany, Greece, Hungary, Iceland, Ireland, Italy, Latvia, Lithuania, Luxembourg, Malta, Moldova, Montenegro, Netherlands, North_Macedonia, Norway, Poland, Portugal, Romania, Serbia, Slovakia, Slovenia, Spain, Sweden, Switzerland, United_Kingdom
# Names of the columns in the SERA model that contain longitudes and latitudes:
col_lon = LONGITUDE
col_lat = LATITUDE
# Width of the cell in the East-West direction, in arcseconds, >0:
width_EW = 30
# Width of the cell in the North-south direction, in arcseconds, >0:
width_NS = 30
# First part of the string used to generate IDs of the inidividual points (e.g. "IND").
# Do not include country's ISO2 code:
id_str = IND
# Number of decimal places to be used to determine unique points present in the aggregated
# exposure model (4 is a reasonable value):
precision_points = 4
# Run consistency checks (True) or not (False):
consistency_checks = True
# Autoadjust potential leftover overlaps and gaps (True) or not (False):
autoadjust = True
# Print statemens of progress while running:
verbose = False
# CRS of the SERA files:
in_crs = EPSG:4326
# Tolerance to assess how large the maximum distance between the original points and the
# centroids of the generated cells is with respect to the width of the cells. Only needed
# if consistency_checks is True (e.g. 0.05 implies a 5% of the width as tolerance):
consistency_tol_dist = 0.05
# Tolerance to assess how large the variability of the area of the generated cells is. Only
# needed if consistency_checks is True:
consistency_tol_area = 0.05
# File type to export the created cells to ("shp"=Shapefile, "gpkg"=Geopackage):
export_type = shp
"""
Copyright (C) 2021
Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Global Dynamic Exposure Model
Helmholtz Centre Potsdam
GFZ German Research Centre for Geosciences
Section 2.6: Seismic Hazard and Risk Dynamics
SERA_creating_industrial_cells
==============================
This code is used to handle the fact that industrial exposure may be defined in 30-arcsec cells
instead of administrative units in the SERA exposure model. The SERA model only provides the
centroids of those cells with a certain decimal precision. This code makes use of the tools
defined in GDE_TOOLS_create_industrial_cells.py, which create cells around the points given as
input and export these cells to a geodata file. The country/ies to process is defined in the
configuration file. The cells' North-South and East-West widths are given as input too.
"""
import sys
import os
import GDE_TOOLS_create_industrial_cells as gdet_ind
import GDE_TOOLS_general as gdet_gral
import GDE_TOOLS_read_config_file as gdet_conf
import datetime
def run(config_dict):
####################################################
# READ CONFIGURATION PARAMETERS
####################################################
print("Processing configuration parameters...")
# Path for output:
out_path = config_dict["File Paths"]["out_path"]
# SERA models path:
sera_models_path = config_dict["File Paths"]["sera_models_path"]
# SERA boundaries shapefiles path:
sera_shp_path = config_dict["File Paths"]["sera_boundaries_path"]
bound_type = config_dict["File Paths"]["boundaries_type"]
# Countries to process:
countries = config_dict["SERA_creating_industrial_cells"]["countries"].split(", ")
# Name of columns with longitude and latitude in the SERA model:
col_lon = config_dict["SERA_creating_industrial_cells"]["col_lon"]
col_lat = config_dict["SERA_creating_industrial_cells"]["col_lat"]
# Widths of the cells to be generated:
width_EW = float(config_dict["SERA_creating_industrial_cells"]["width_EW"])
width_NS = float(config_dict["SERA_creating_industrial_cells"]["width_NS"])
# Input is given in arcseconds, transform to degrees:
width_EW = width_EW / (60.0 * 60.0)
width_NS = width_NS / (60.0 * 60.0)
# First part of the string used to generate IDs of the individual points (e.g. "IND"):
id_str = config_dict["SERA_creating_industrial_cells"]["id_str"]
# Number of decimal places to be used to determine unique points in the aggregated model:
precision_points = int(config_dict["SERA_creating_industrial_cells"]["precision_points"])
# Run consistency checks (True) or not (False):
consistency_checks_str = config_dict["SERA_creating_industrial_cells"]["consistency_checks"]
if consistency_checks_str.lower() == "true":
consist_checks = True
elif consistency_checks_str.lower() == "false":
consist_checks = False
else:
print(
"ERROR!! IT SHOULD NOT GET TO THIS POINT, "
"AS THE PARAMETER IS CHECKED IN THE CONFIG FILE CHECKS"
)
# Autoadjust potential leftover overlaps and gaps (True) or not (False):
autoadjust_str = config_dict["SERA_creating_industrial_cells"]["autoadjust"]
if autoadjust_str.lower() == "true":
autoadjust = True
elif autoadjust_str.lower() == "false":
autoadjust = False
else:
print(
"ERROR!! IT SHOULD NOT GET TO THIS POINT, "
"AS THE PARAMETER IS CHECKED IN THE CONFIG FILE CHECKS"
)
# Print statemens of progress while running:
verbose_str = config_dict["SERA_creating_industrial_cells"]["verbose"]
if verbose_str.lower() == "true":
verbose = True
elif verbose_str.lower() == "false":
verbose = False
else:
print(
"ERROR!! IT SHOULD NOT GET TO THIS POINT, "
"AS THE PARAMETER IS CHECKED IN THE CONFIG FILE CHECKS"
)
# Tolerances for the consistency checks:
if consist_checks:
# Tolerance for maximum distance between the original points and the centroids of the
# generated cells:
tol_dist = float(config_dict["SERA_creating_industrial_cells"]["consistency_tol_dist"])
# Tolerance for variability of the area of the generated cells:
tol_area = float(config_dict["SERA_creating_industrial_cells"]["consistency_tol_area"])
else: # Dummy values, they will not be used
tol_dist = 0.05
tol_area = 0.05
# CRS of the SERA files:
in_crs = config_dict["SERA_creating_industrial_cells"]["in_crs"]
# File type to export the created cells to (e.g. "shp"=Shapefile, "gpkg"=Geopackage):
export_type = config_dict["SERA_creating_industrial_cells"]["export_type"]
####################################################
# START
####################################################
# Auxiliary dictionary definining the precision to use to convert coordinates into strings:
dec_precision_EW = int("{:E}".format(width_EW).split("-")[1])
dec_precision_NS = int("{:E}".format(width_NS).split("-")[1])
precision_cells = {}
precision_cells["lon_w"] = "{:.%sf}" % (dec_precision_EW)
precision_cells["lat_s"] = "{:.%sf}" % (dec_precision_NS)
precision_cells["lon_e"] = "{:.%sf}" % (dec_precision_EW)
precision_cells["lat_n"] = "{:.%sf}" % (dec_precision_NS)
# Run country by country
if consist_checks:
log = ["country,overlap_found,gap_found,big_distance_found,big_area_variability_found"]
processing_times = ["country,processing_time"]
for k, country_name in enumerate(countries):
start_time = datetime.datetime.now()
print("\n")
print(
"Working on %s, country %s of %s\n"
% (country_name, str(k + 1), str(len(countries)))
)
# Generate cells for this country:
country_iso2 = gdet_gral.get_ISO2_code_for_country(country_name)
id_str_country = "%s_%s" % (country_iso2, id_str)
results = gdet_ind.generate_country_industrial_cells(
country_name,
col_lon,
col_lat,
width_EW,
width_NS,
id_str_country,
precision_points,
precision_cells,
sera_models_path,
sera_shp_path,
boundaries_type=bound_type,
consistency_checks=consist_checks,
autoadjust_overlap_gap=autoadjust,
in_crs=in_crs,
consistency_tol_dist=tol_dist,
consistency_tol_area=tol_area,
verbose=verbose,
)
(
cells,
aggr_model,
overlap_found,
gap_found,
big_dist_diff,
big_area_diff,
country_id,
) = results
# Print to screen and a log file the results of the consistency check if requested:
if consist_checks:
print("\n")
print(" Results for consistency checks:")
if overlap_found == "True":
overlap_str = "Problem found with cells overlapping. ERROR."
else:
overlap_str = "Cells do not overlap. OK."
if gap_found == "True":
gap_str = "Problem found with gaps in between cells. ERROR."
else:
gap_str = "Cells do not have gaps in between. OK."
if big_dist_diff == "True":
big_dist_str = "Distances between centroids and points too large. ERROR."
else:
big_dist_str = "Distances between centroids and points are OK."
if big_area_diff == "True":
big_area_str = "Variability of resulting cell areas too large. ERROR."
else:
big_area_str = "Variability of resulting cell areas is OK."
print(" %s" % (overlap_str))
print(" %s" % (gap_str))
print(" %s" % (big_dist_str))
print(" %s" % (big_area_str))
out_str = [country_name, overlap_found, gap_found, big_dist_diff, big_area_diff]
log.append(",".join(out_str))
# Export SERA aggregated exposure model with additional columns referring to cells:
if verbose:
print("\n")
print(" Updating aggregated exposure file...")
aggr_filename = "Exposure_Ind_%s.csv" % (country_name)
gdet_ind.export_modified_aggregated_model_file(
aggr_model, os.path.join(out_path, "Ind_Cells"), aggr_filename, separ=","
)
# Export geodata file of cells:
if verbose:
print("\n")
print(" Exporting cells geometry to file...")
cells_filename = "Adm99_%s.%s" % (country_name, export_type)
gdet_ind.export_cells_to_geodatafile(
country_name, country_id, cells, os.path.join(out_path, "Ind_Cells"), cells_filename
)
# Time it took to process this country:
end_time = datetime.datetime.now()
duration = (end_time - start_time).total_seconds()
processing_times.append("%s,%s" % (country_name, str(duration)))
# Write output log of consistency checks:
if consist_checks:
gdet_gral.write_log_file(
log, os.path.join(out_path, "Ind_Cells", "log_consistency_checks.csv")
)
# Write output log of processing times:
gdet_gral.write_log_file(
processing_times, os.path.join(out_path, "Ind_Cells", "log_processing_times.csv")
)
print("\n")
print("Done!")
if __name__ == "__main__":
# This code needs to be run from the command line as python3 namefile.py configfile.ini.
# sys.argv retrieves all the commands entered in the command line; position [0] is this
# code, position [1] will be the config file name.
config_filename = sys.argv[1]
section_names_to_validate = ["File Paths", os.path.basename(__file__).split(".")[0]]
config_dict = gdet_conf.read_config_parameters(
os.path.join(os.getcwd(), config_filename), section_names_to_validate
)
run(config_dict)
......@@ -14,17 +14,18 @@ The scripts are run from the command line as:
The order in which the scripts in the present repository need to be run to produce the GDE model for a region of interest is:
1. Run `OBM_assign_cell_ids_and_adm_ids_to_footprints.py`
2. Run `SERA_create_HDF5_metadata.py`.
3. Run `SERA_mapping_admin_units_to_cells.py`
4. Run `SERA_mapping_admin_units_to_cells_add_GHS.py` (if GHS criterion desired)
5. Run `SERA_mapping_admin_units_to_cells_add_GPW.py` (if GPW criterion desired)
6. Run `SERA_mapping_admin_units_to_cells_add_Sat.py` (if Sat or Sat_mod criterion desired)
7. Run `SERA_distributing_exposure_to_cells.py` with the desired distribution method.
8. If the OpenQuake input files for the SERA model distributed onto a grid are desired (i.e. not GDE, just SERA), run `SERA_create_OQ_input_files.py` with the desired distribution method.
9. If a CSV summarising the number of buildings, dwellings, people and costs by cell according to the SERA model is desired (i.e. not GDE, just SERA), run `SERA_create_visual_output_of_grid_model_full_files.py` with the desired distribution method.
10. Run `OBM_buildings_per_cell.py` with the desired distribution method.
11. Run `GDE_gather_SERA_and_OBM.py` with the desired distribution method. The output is:
1. If the country/ies of interest have their industrial exposure defined on a 30-arcsec grid, run `SERA_creating_industrial_cells.py`
2. Run `OBM_assign_cell_ids_and_adm_ids_to_footprints.py`
3. Run `SERA_create_HDF5_metadata.py`
4. Run `SERA_mapping_admin_units_to_cells.py`
5. Run `SERA_mapping_admin_units_to_cells_add_GHS.py` (if GHS criterion desired)
6. Run `SERA_mapping_admin_units_to_cells_add_GPW.py` (if GPW criterion desired)
7. Run `SERA_mapping_admin_units_to_cells_add_Sat.py` (if Sat or Sat_mod criterion desired)
8. Run `SERA_distributing_exposure_to_cells.py` with the desired distribution method.
9. If the OpenQuake input files for the SERA model distributed onto a grid are desired (i.e. not GDE, just SERA), run `SERA_create_OQ_input_files.py` with the desired distribution method.
10. If a CSV summarising the number of buildings, dwellings, people and costs by cell according to the SERA model is desired (i.e. not GDE, just SERA), run `SERA_create_visual_output_of_grid_model_full_files.py` with the desired distribution method.
11. Run `OBM_buildings_per_cell.py` with the desired distribution method.
12. Run `GDE_gather_SERA_and_OBM.py` with the desired distribution method. The output is:
- a series of CSV files that serve as input for damage/risk calculations to be run in OpenQuake (https://github.com/gem/oq-engine);
- a CSV file that summarises results per cell and contains the geometry of the cells so that it can all be visualised with a GIS;
- a CSV file that summarises results per adminstrative unit and contains the geometry of the administrative boundaries so that it can all be visualised with a GIS;
......@@ -32,19 +33,19 @@ The order in which the scripts in the present repository need to be run to produ
## Testing Scripts
- The scripts `SERA_testing_rebuilding_exposure_from_cells_alternative_01.py`, `SERA_testing_rebuilding_exposure_from_cells_alternative_02.py` and `SERA_testing_rebuilding_exposure_from_cells_alternative_03.py` can be run after step 7 above. They compare the SERA-on-a-grid model against the original files of the SERA model.
- The scripts `SERA_testing_rebuilding_exposure_from_cells_alternative_01.py`, `SERA_testing_rebuilding_exposure_from_cells_alternative_02.py` and `SERA_testing_rebuilding_exposure_from_cells_alternative_03.py` can be run after step 8 above. They compare the SERA-on-a-grid model against the original files of the SERA model.
- The script `SERA_testing_compare_visual_output_vs_OQ_input_files.py` can be run after step 9 above to compare the number of buildings, people and cost per cell reported in the OpenQuake input file (generated from the grid) and the visual output CSV.
- The script `SERA_testing_compare_visual_output_vs_OQ_input_files.py` can be run after step 10 above to compare the number of buildings, people and cost per cell reported in the OpenQuake input file (generated from the grid) and the visual output CSV.
- The script `SERA_create_outputs_QGIS_for_checking.py` can be run after step 6 above to create a summary of the parameters mapped (GHS, GPW, Sat, etc) in CSV format to be read with QGIS, enabling a visual check of the results.
- The script `SERA_create_outputs_QGIS_for_checking.py` can be run after step 7 above to create a summary of the parameters mapped (GHS, GPW, Sat, etc) in CSV format to be read with QGIS, enabling a visual check of the results.
- The script `SERA_testing_mapping_admin_units_to_cells_qualitycontrol.py` can be run after step 3 above to check the areas of the cells mapped for the administrative units for which step 3 was run.
- The script `SERA_testing_mapping_admin_units_to_cells_qualitycontrol.py` can be run after step 4 above to check the areas of the cells mapped for the administrative units for which step 3 was run.
- The script `GDE_check_consistency.py` can be run after step 11 above. It carries out different consistency checks on the resulting GDE model (see detailed description of this script).
- The script `GDE_check_consistency.py` can be run after step 12 above. It carries out different consistency checks on the resulting GDE model (see detailed description of this script).
- The script `GDE_check_OQ_input_files.py` can be run after step 11 above. It prints to screen some summary values of the files and checks that the asset ID values are all unique.
- The script `GDE_check_OQ_input_files.py` can be run after step 12 above. It prints to screen some summary values of the files and checks that the asset ID values are all unique.
- The script `GDE_check_tiles_vs_visual_CSVs.py` can be run after step 11 above. It reads the visual CSV output by cell and the corresponding GDE tiles HDF5 files and compares the number of buildings, cost and number of people in each cell according to each of the two. An output CSV file collects the discrepancies found, if any.
- The script `GDE_check_tiles_vs_visual_CSVs.py` can be run after step 12 above. It reads the visual CSV output by cell and the corresponding GDE tiles HDF5 files and compares the number of buildings, cost and number of people in each cell according to each of the two. An output CSV file collects the discrepancies found, if any.
## Other Scripts
......
......@@ -3,6 +3,30 @@
For each core script, the enumerated configurable parameters are those that are specific to that script, i.e. defined in the configuration file under a subtitle that matches the name of the file. General parameters are not explained herein but in `03_Config_File.md` and `GDE_config_file_TEMPLATE.ini`.
# SERA_creating_industrial_cells.py
## Configurable parameters:
The parameters that need to be specified under the `SERA_creating_industrial_cells` section of the configuration file are:
- countries = Countries to process. If more than one, separate with comma and space.
- col_lon, col_lat = Names of the columns in the SERA model that contain longitudes and latitudes.
- width_EW, width_NS = Widths (arcseconds) of the cells in which the industrial exposure is defined, in the east-west and north-south directions, respectively.
- id_str= First part of the string used to generate IDs of the inidividual points (e.g. "IND"). Do not include the country's ISO2 code (it gets added by the script automatically).
- precision_points = Number of decimal places to be used to determine unique points present in the input aggregated exposure model.
- consistency_checks = True or False (run consistency checks or not).
- autoadjust = After a first adjustment of the cells' geometries to fix any overlaps and/or gaps, a check is carried out if consistency_checks is True to determine if there are any potential leftover overlaps and/or gaps. If autoadjust is True, the script will adjust the geometry again until no further overlaps/gaps are found. If False, the script will not carry out this further adjustment.