Commit 1c052799 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Merge branch 'feature/reduce_parameters_class' into 'master'

Reduced number of parameters used to define building classes and general new-ESRM20 update

See merge request !22
parents cb1bcdc9 0db151be
......@@ -77,7 +77,7 @@ def make_list_country_admin_units_to_retrieve(admin_unit_type, list_subgroups, w
def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5_filename, countries_shp_path, sera_meta_path, gral_output_path, gde_tile_filename, occu_case, admin_unit_type= 'all', disagg_method= 'area', which_country= '', which_admin_ID= ''):
def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5_filename, countries_shp_path, sera_meta_path, gral_output_path, gde_tile_filename, occu_case, cost_per_area_field, currency, admin_unit_type= 'all', disagg_method= 'area', which_country= '', which_admin_ID= ''):
"""
obm_hdf5_filename: full file path to the OBM HDF5 file for a particular cell and occupancy case
bdg_classes_hdf5_filename: full file path to the HDF5 file of the SERA building classes for a particular occupancy case
......@@ -88,6 +88,8 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5
gde_tile_filename will only be created/written to if obm_hdf5_filename exists, i.e. if there exist OBM buildings
in this cell
occu_case: Res, Com, Ind, Oth
cost_per_area_field: name of the field that contains the cost per area (e.g., 'cost_per_area_eur', 'cost_per_area_usd')
currency: string indicating the currency of the replacement costs (e.g. 'EUR', 'USD')
admin_unit_type:
"all" = all admin units within the cell
"country" = all admin units of a certain country (defined by setting which_country)
......@@ -182,7 +184,7 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5
all_sera_proportions= fle[country_adminID][osm_id]['SERA_vals'+'_'+disagg_method][:]
dwell_per_bdg= np.zeros([len(all_sera_classes)])
area_per_dwelling_sqm= np.zeros([len(all_sera_classes)])
cost_per_area_usd= np.zeros([len(all_sera_classes)])
cost_per_area= np.zeros([len(all_sera_classes)])
ppl_per_dwell= np.zeros([len(all_sera_classes)])
for k, bdg_class in enumerate(all_sera_classes):
osm_ids.append('OSM_'+str(osm_id))
......@@ -196,21 +198,21 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5
which_row_admin_id= np.where(country_adminid_locs==country_adminID)[0][0]
dwell_per_bdg[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='dwell_per_bdg')[0][0]].split('_')[-1])]
area_per_dwelling_sqm[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='area_per_dwelling_sqm')[0][0]].split('_')[-1])]
cost_per_area_usd[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='cost_per_area_usd')[0][0]].split('_')[-1])]
cost_per_area[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)==cost_per_area_field)[0][0]].split('_')[-1])]
ppl_per_dwell[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='ppl_per_dwell')[0][0]].split('_')[-1])]
# For adding values of people and cost per building to the GDE tile HDF5 file:
ppl_per_bdg= dwell_per_bdg * ppl_per_dwell # 1D array of len(all_sera_classes)
cost_per_bdg_usd= dwell_per_bdg * area_per_dwelling_sqm * cost_per_area_usd # 1D array of len(all_sera_classes)
cost_per_bdg= dwell_per_bdg * area_per_dwelling_sqm * cost_per_area # 1D array of len(all_sera_classes)
weighted_av_ppl= (ppl_per_bdg * all_sera_proportions[:,0]).sum()
weighted_av_cost= (cost_per_bdg_usd * all_sera_proportions[:,0]).sum()
error_writing_gde_tile= gdet_gral.write_OBM_to_GDE_tiles_add_params(gral_output_path, gde_tile_filename, occu_case, disagg_method, osm_id, country_adminID, all_sera_classes, ppl_per_bdg, cost_per_bdg_usd, weighted_av_ppl, weighted_av_cost, 'USD')
weighted_av_cost= (cost_per_bdg * all_sera_proportions[:,0]).sum()
error_writing_gde_tile= gdet_gral.write_OBM_to_GDE_tiles_add_params(gral_output_path, gde_tile_filename, occu_case, disagg_method, osm_id, country_adminID, all_sera_classes, ppl_per_bdg, cost_per_bdg, weighted_av_ppl, weighted_av_cost, currency)
if error_writing_gde_tile!='':
error_messages= error_messages+' '+error_writing_gde_tile
# For building the DataFrame to output:
num_bdgs= np.hstack([num_bdgs, all_sera_proportions[:,0]])
num_dwells_local= all_sera_proportions[:,0] * dwell_per_bdg
num_dwells= np.hstack([num_dwells, num_dwells_local])
cost_local= num_dwells_local * area_per_dwelling_sqm * cost_per_area_usd
cost_local= num_dwells_local * area_per_dwelling_sqm * cost_per_area
cost= np.hstack([cost, cost_local])
ppl_local= num_dwells_local * ppl_per_dwell
ppl= np.hstack([ppl, ppl_local])
......
......@@ -564,11 +564,13 @@ def get_down_to_one_building_DEPRECATED(sera_for_cell_and_admin_unit_dict, disag
return aux_sera_vals / sum_all_bdgs_sera
def compute_bdgs_dwells_ppl_cost_for_cell_total(cell_attrs_tot_num_bdgs, bdg_classes_names, bdg_classes_proportions, bdg_classes_proportions_per_admin, list_of_cols_of_country_adminIDs, list_of_country_adminIDs, list_of_country_adminIDs_levels, sera_bdg_classes_full_path, output_by_admin_unit=False):
def compute_bdgs_dwells_ppl_cost_for_cell_total(cell_attrs_tot_num_bdgs, bdg_classes_names, bdg_classes_proportions, bdg_classes_proportions_per_admin, list_of_cols_of_country_adminIDs, list_of_country_adminIDs, list_of_country_adminIDs_levels, sera_bdg_classes_full_path, cost_per_area_field, output_by_admin_unit=False):
"""
cell_attrs_tot_num_bdgs, bdg_classes_names, bdg_classes_proportions, bdg_classes_proportions_per_admin, list_of_cols_of_country_adminIDs, list_of_country_adminIDs: all retrieved using the
function "retrieve_sera_hdf5_classes_and_vals" for the total of a cell (i.e. in_admin_id='Total').
cost_per_area_field: name of the field that contains the cost per area (e.g., 'cost_per_area_eur', 'cost_per_area_usd')
No assessment of the admin units and/or countries involved in the cell is made.
Output:
......@@ -610,12 +612,12 @@ def compute_bdgs_dwells_ppl_cost_for_cell_total(cell_attrs_tot_num_bdgs, bdg_cla
if country_adminid in country_adminid_locs:
dwell_per_bdg= parameter_vals[np.where(country_adminid_locs==country_adminid)[0][0], int(col_names[np.where(np.array(col_contents)=='dwell_per_bdg')[0][0]].split('_')[-1])]
area_per_dwelling_sqm= parameter_vals[np.where(country_adminid_locs==country_adminid)[0][0], int(col_names[np.where(np.array(col_contents)=='area_per_dwelling_sqm')[0][0]].split('_')[-1])]
cost_per_area_usd= parameter_vals[np.where(country_adminid_locs==country_adminid)[0][0], int(col_names[np.where(np.array(col_contents)=='cost_per_area_usd')[0][0]].split('_')[-1])]
cost_per_area= parameter_vals[np.where(country_adminid_locs==country_adminid)[0][0], int(col_names[np.where(np.array(col_contents)==cost_per_area_field)[0][0]].split('_')[-1])]
ppl_per_dwell= parameter_vals[np.where(country_adminid_locs==country_adminid)[0][0], int(col_names[np.where(np.array(col_contents)=='ppl_per_dwell')[0][0]].split('_')[-1])]
# Calculate dwellings, people and cost for this building class in this country_admin_ID:
num_dwells_per_class_and_admin_unit[k,j]= num_bdgs_per_class_and_admin_unit[k,j] * dwell_per_bdg
num_ppl_per_class_and_admin_unit[k,j]= num_dwells_per_class_and_admin_unit[k,j] * ppl_per_dwell
repl_cost_per_class_and_admin_unit[k,j]= num_dwells_per_class_and_admin_unit[k,j] * area_per_dwelling_sqm * cost_per_area_usd
repl_cost_per_class_and_admin_unit[k,j]= num_dwells_per_class_and_admin_unit[k,j] * area_per_dwelling_sqm * cost_per_area
if output_by_admin_unit: # the output arrays are two-dimensional, with one row per bdg class and one column per country_adminID
out_num_bdgs= num_bdgs_per_class_and_admin_unit
out_num_dwells= num_dwells_per_class_and_admin_unit
......
......@@ -1379,11 +1379,14 @@ def generate_country_industrial_cells(
return
# Load exposure file of the aggregated exposure model:
aggr_model_filepath = os.path.join(aggr_model_pathname, "Exposure_Ind_%s.csv" % (country))
if not os.path.isfile(aggr_model_filepath):
print("ERROR in generate_country_industrial_cells: aggregated model file not found")
return
aggr_mod_df = pd.read_csv(aggr_model_filepath, sep=",") # aggr_mod_df is a Pandas DataFrame
try:
aggr_mod_df = pd.read_csv(os.path.join(aggr_model_pathname,
"Exposure_Ind_%s.csv" % (country)),
sep=",") # aggr_mod_df is a Pandas DataFrame
except:
aggr_mod_df = pd.read_csv(os.path.join(aggr_model_pathname,
"Exposure_Model_%s_Ind.csv" % (country)),
sep=",") # aggr_mod_df is a Pandas DataFrame
# Load country boundaries:
boundaries_filepath = os.path.join(
......
......@@ -1178,7 +1178,7 @@ def disaggregate_OBM_by_class_and_adm_unit(in_obm_DF, in_list_country_adminIDs,
return out_obm_num_bdgs_per_class_and_adm_unit, out_obm_num_dwells_per_class_and_adm_unit, out_obm_num_ppl_per_class_and_adm_unit, out_obm_cost_per_class_and_adm_unit
def calculate_leftover_per_class_and_admin_unit(sera_num_bdgs_per_class_and_adm_unit, sera_list_country_adminIDs, sera_classes, obm_with_classes_num_bdgs_per_class_and_adm_unit, obm_list_country_adminIDs_of_arrays, obm_list_bdg_classes_of_arrays, bdg_classes_hdf5_filename):
def calculate_leftover_per_class_and_admin_unit(sera_num_bdgs_per_class_and_adm_unit, sera_list_country_adminIDs, sera_classes, obm_with_classes_num_bdgs_per_class_and_adm_unit, obm_list_country_adminIDs_of_arrays, obm_list_bdg_classes_of_arrays, cost_per_area_field, bdg_classes_hdf5_filename):
"""
This function calculates the number of "leftover" buildings and their associated number of dwellings, people and costs.
The premise is N_leftover= MAX[0, N_sera - N_obm]. This calculation is carried out for a particular cell and adm unit,
......@@ -1192,6 +1192,7 @@ def calculate_leftover_per_class_and_admin_unit(sera_num_bdgs_per_class_and_adm_
and columns are country_adminIDs (as per obm_list_country_adminIDs_of_arrays)
obm_list_country_adminIDs_of_arrays: list of country_adminIDs associated with each column of obm_with_classes_num_bdgs_per_class_and_adm_unit
obm_list_bdg_classes_of_arrays: list of bdg classes associated with each row of obm_with_classes_num_bdgs_per_class_and_adm_unit
cost_per_area_field: name of the field that contains the cost per area (e.g., 'cost_per_area_eur', 'cost_per_area_usd')
In the most general case, the dimensions of sera_num_bdgs_per_class_and_adm_unit and
obm_with_classes_num_bdgs_per_class_and_adm_unit may or may not match. While SERA is the only source of building classes
......@@ -1219,7 +1220,7 @@ def calculate_leftover_per_class_and_admin_unit(sera_num_bdgs_per_class_and_adm_
# arrays to collect values for all bdg classes:
dwell_per_bdg= np.zeros([len(sera_classes)])
area_per_dwelling_sqm= np.zeros([len(sera_classes)])
cost_per_area_usd= np.zeros([len(sera_classes)])
cost_per_area= np.zeros([len(sera_classes)])
ppl_per_dwell= np.zeros([len(sera_classes)])
for k, bdg_class in enumerate(sera_classes):
country_adminid_locs, parameter_vals, col_names, col_contents= gdet_serah.retrieve_parameters_for_taxonomy(bdg_class, bdg_classes_hdf5_filename)
......@@ -1228,13 +1229,13 @@ def calculate_leftover_per_class_and_admin_unit(sera_num_bdgs_per_class_and_adm_
if len(which_row_admin_id)==1:
dwell_per_bdg[k]= parameter_vals[which_row_admin_id[0], int(col_names[np.where(np.array(col_contents)=='dwell_per_bdg')[0][0]].split('_')[-1])]
area_per_dwelling_sqm[k]= parameter_vals[which_row_admin_id[0], int(col_names[np.where(np.array(col_contents)=='area_per_dwelling_sqm')[0][0]].split('_')[-1])]
cost_per_area_usd[k]= parameter_vals[which_row_admin_id[0], int(col_names[np.where(np.array(col_contents)=='cost_per_area_usd')[0][0]].split('_')[-1])]
cost_per_area[k]= parameter_vals[which_row_admin_id[0], int(col_names[np.where(np.array(col_contents)==cost_per_area_field)[0][0]].split('_')[-1])]
ppl_per_dwell[k]= parameter_vals[which_row_admin_id[0], int(col_names[np.where(np.array(col_contents)=='ppl_per_dwell')[0][0]].split('_')[-1])]
else: # this can happen because the SERA array might contain bdg classes that are only in one of the adm units it has, and in this case the function retrieve_parameters_for_taxonomy will not be able to provide values for classes in adm units that do not occur together
pass
LO_num_dwells_per_class_and_adm_unit[:,j]= LO_num_bdgs_per_class_and_adm_unit[:,j] * dwell_per_bdg
LO_num_ppl_per_class_and_adm_unit[:,j]= LO_num_dwells_per_class_and_adm_unit[:,j] * ppl_per_dwell
LO_cost_per_class_and_adm_unit[:,j]= LO_num_dwells_per_class_and_adm_unit[:,j] * area_per_dwelling_sqm * cost_per_area_usd
LO_cost_per_class_and_adm_unit[:,j]= LO_num_dwells_per_class_and_adm_unit[:,j] * area_per_dwelling_sqm * cost_per_area
return LO_num_bdgs_per_class_and_adm_unit, LO_num_dwells_per_class_and_adm_unit, LO_num_ppl_per_class_and_adm_unit, LO_cost_per_class_and_adm_unit
......@@ -1441,7 +1442,7 @@ def write_OBM_to_GDE_tiles_add_params(gral_output_path, gde_tile_filename, occu_
building belonging to that class ('SERA_vals' proportions)
weighted_average_cost: float; the weighted average cost of this building, obtained as in the case of
weighted_average_ppl
cost_currency: string, e.g. "USD" or "EURO"
cost_currency: string, e.g. "USD" or "EUR"
"""
error_str= ''
gde_fle = h5py.File(os.path.join(gral_output_path, 'GDE_tiles', disagg_method, gde_tile_filename), "a")
......@@ -1493,7 +1494,7 @@ def write_LeftOver_to_GDE_tiles(gral_output_path, gde_tile_filename, occu_case,
just 1D because there is just one admin unit involved, hence the reshaping step
arr_costs: same dimensions as arr_num_bdgs, content is total cost for the total number of buildings in arr_num_bdgs
arr_ppl: same dimensions as arr_num_bdgs, content is total number of people for the total number of buildings in arr_num_bdgs
cost_currency: string, e.g. "USD" or "EURO"
cost_currency: string, e.g. "USD" or "EUR"
"""
gde_fle = h5py.File(os.path.join(gral_output_path, 'GDE_tiles', disagg_method, gde_tile_filename), "a")
if occu_case not in list(gde_fle):
......
......@@ -66,7 +66,15 @@ def get_admin_level_definition(country_name, path, output_dfs=False, full_files=
actual_levels_list= [0,0,0]
out_dataframes= {}
for k,case in enumerate(exposure_cases):
aux_df= pd.read_csv(os.path.join(path, 'Exposure_'+case+'_'+country_name+'.csv'), sep=',')
try:
aux_df= pd.read_csv(os.path.join(path,
'Exposure_%s_%s.csv' % (case, country_name)),
sep=',')
except:
aux_df= pd.read_csv(os.path.join(path,
'Exposure_Model_%s_%s.csv'
% (country_name, case)),
sep=',')
if full_files:
for col in aux_df.columns:
aux_df= aux_df.rename(columns={col: col.lower()})
......@@ -81,6 +89,8 @@ def get_admin_level_definition(country_name, path, output_dfs=False, full_files=
elif complete_col_list_types[j]== 'float':
if col=='dwellings':
aux_df[col]= aux_df['buildings'].values # one dwelling per building assumed when no dwellings column available (the assumption is consistent with the Com and Ind models, for Res the dwellings column exists)
elif col=='area_per_dwelling_sqm':
aux_df[col]= aux_df['area_per_building_sqm'].values # one dwelling per building assumed
else:
aux_df[col]= np.zeros([aux_df.shape[0]])
out_dataframes[case]= aux_df
......@@ -121,8 +131,8 @@ def get_complete_list_of_columns(full_files_flag):
id_X and name_X are ommited, and so are longitude and latitude (we cannot "fill in" these data).
"""
if full_files_flag:
out_list= ['settlement_type','occupancy_type','dwellings']
out_types= ['str','str','float']
out_list= ['settlement_type','occupancy_type','dwellings','area_per_dwelling_sqm']
out_types= ['str','str','float','float']
else: # I expect this function to be irrelevant for the case of the OpenQuake CSV files
out_list= []
out_types= []
......@@ -142,7 +152,15 @@ def check_admin_levels_unequivocally_defined(country_name, path, full_files=Fals
is_unequivocally_defined= [True, True, True]
other_levels_checked= []
for k,case in enumerate(exposure_cases):
aux_df= pd.read_csv(os.path.join(path, 'Exposure_'+case+'_'+country_name+'.csv'), sep=',')
try:
aux_df= pd.read_csv(os.path.join(path,
'Exposure_%s_%s.csv' % (case, country_name)),
sep=',')
except:
aux_df= pd.read_csv(os.path.join(path,
'Exposure_Model_%s_%s.csv'
% (country_name, case)),
sep=',')
if full_files:
for col in aux_df.columns:
aux_df= aux_df.rename(columns={col: col.lower()})
......@@ -155,7 +173,15 @@ def check_admin_levels_unequivocally_defined(country_name, path, full_files=Fals
available_levels_list.append(aux_list_levels)
for k,case in enumerate(exposure_cases):
print(' Checking '+case)
aux_df= pd.read_csv(os.path.join(path, 'Exposure_'+case+'_'+country_name+'.csv'), sep=',')
try:
aux_df= pd.read_csv(os.path.join(path,
'Exposure_%s_%s.csv' % (case, country_name)),
sep=',')
except:
aux_df= pd.read_csv(os.path.join(path,
'Exposure_Model_%s_%s.csv'
% (country_name, case)),
sep=',')
if full_files:
for col in aux_df.columns:
aux_df= aux_df.rename(columns={col: col.lower()})
......@@ -199,14 +225,30 @@ def check_number_points_per_admin_level(country_name, path):
actual_levels_list= [0,0,0]
have_one_point_per_ID= [True, True, True]
for k,case in enumerate(exposure_cases):
aux_df= pd.read_csv(os.path.join(path, 'Exposure_'+case+'_'+country_name+'.csv'), sep=',')
try:
aux_df= pd.read_csv(os.path.join(path,
'Exposure_%s_%s.csv' % (case, country_name)),
sep=',')
except:
aux_df= pd.read_csv(os.path.join(path,
'Exposure_Model_%s_%s.csv'
% (country_name, case)),
sep=',')
for i in range(0,max_possible_level+1):
if 'id_'+str(i) in aux_df.columns:
if np.all(aux_df['id_'+str(i)].values!='No_tag'): # if not('No_tag' in aux_df['id_'+str(i)].values):
actual_levels_list[k]= i
for k,case in enumerate(exposure_cases):
print(' Checking '+case)
aux_df= pd.read_csv(os.path.join(path, 'Exposure_'+case+'_'+country_name+'.csv'), sep=',')
try:
aux_df= pd.read_csv(os.path.join(path,
'Exposure_%s_%s.csv' % (case, country_name)),
sep=',')
except:
aux_df= pd.read_csv(os.path.join(path,
'Exposure_Model_%s_%s.csv'
% (country_name, case)),
sep=',')
try:
mixcase= False
unique_ids_actual= np.unique(aux_df['id_'+str(actual_levels_list[k])].values) # unique admin unit IDs of the level that defines the exposure
......@@ -316,7 +358,7 @@ def write_hdf5_of_building_classes_parameters(in_case, in_country_admID, in_df,
in_case= Res, Com, Ind
in_country_admID= e.g. 'BE_12', 'GB_32', etc (i.e. ISO2 of country with admin ID
in_df= Pandas DataFrame containing only the columns of interest, i.e. taxonomy (which is taxonomy*),
dwell_per_bdg, area_per_dwelling_sqm, cost_per_area_usd, ppl_per_dwell, for in_country_admID.
dwell_per_bdg, area_per_dwelling_sqm, cost_per_area, ppl_per_dwell, for in_country_admID.
Each value of taxonomy appears only once (i.e. group_same_taxonomies has already been run).
Structure of the HDF5 file:
......@@ -397,20 +439,20 @@ def determine_unique_combinations_of_taxonomies_and_values_per_building(in_case,
For example, for residential exposure a "unique" class will be defined by:
taxonomy///settlement_type/occupancy_type/dwell_per_bdg/area_per_dwelling_sqm
This combination of parameters is referred to in brief as taxonomy*. The columns used for taxonomy* are not the same
This combination of parameters is referred to in brief as taxonomy*. The columns used for taxonomy* are not necessarily the same
for Res, Com and Ind (see function get_list_for_grouping).
This function will then define if there are repeated instances of taxonomy* within with_duplicates_df.
For example, if with_duplicates_df is:
taxonomy structural night
RC/H:2///RURAL//2.00/125.00 2000.5 3.51
RC/H:2///RURAL//2.00/125.00 2000.5 3.51
RC/H:2///RURAL//1.00/150.00 1050.2 3.51
RC/H:2///RURAL// 2000.5 3.51
RC/H:2///RURAL// 2000.5 3.51
RC/H:2///RURAL// 1050.2 3.51
The result will be:
there_are_duplicates= True
uniq_vals= array['RC/H:2///RURAL//2.00/125.00', 'RC/H:2///RURAL//1.00/150.00']
uniq_vals= array['RC/H:2///RURAL//', 'RC/H:2///RURAL//']
uniq_inverse= array[0,0,1] (i.e. the positions of with_duplicates_df that correspond to each unique element in uniq_vals)
groupby_crit= ['taxonomy', 'settlement_type', 'occupancy_type', 'dwell_per_bdg', 'area_per_dwelling_sqm']
......@@ -456,17 +498,17 @@ def get_list_for_grouping(occupancy_case):
"""
The columns enumerated in the out_list are those that will be used to identify the building class.
The decision regarding on which columns to include here was based on the analysis carried out
using SERA_which_countries_have_duplicate_classes_include_settlement.py (March 2020).
in March 2020, May/June 2020 and November 2021.
"""
if occupancy_case=='Res':
out_list= ['taxonomy', 'settlement_type', 'occupancy_type', 'dwell_per_bdg', 'area_per_dwelling_sqm']
round_by= ['', '', '', get_rounding_type('dwell_per_bdg'), get_rounding_type('area_per_dwelling_sqm')] # method for rounding floats
out_list= ['taxonomy', 'settlement_type', 'occupancy_type']
round_by= ['', '', ''] # method for rounding floats
elif occupancy_case=='Com':
out_list= ['taxonomy', 'settlement_type', 'occupancy_type', 'area_per_dwelling_sqm']
round_by= ['', '', '', get_rounding_type('area_per_dwelling_sqm')] # method for rounding floats
out_list= ['taxonomy', 'settlement_type', 'occupancy_type']
round_by= ['', '', ''] # method for rounding floats
elif occupancy_case=='Ind':
out_list= ['taxonomy', 'settlement_type', 'occupancy_type', 'cost_per_area_usd']
round_by= ['', '', '', get_rounding_type('cost_per_area_usd')] # method for rounding floats
out_list= ['taxonomy', 'settlement_type', 'occupancy_type']
round_by= ['', '', ''] # method for rounding floats
else:
out_list= []
round_by= []
......@@ -533,6 +575,8 @@ def get_rounding_type(param):
rounding_str= 'normal_2'
elif param=='cost_per_area_usd':
rounding_str= 'cost_100'
elif param=='cost_per_area_eur':
rounding_str= 'cost_100'
elif param=='ppl_per_dwell':
rounding_str= 'normal_2'
else:
......@@ -620,7 +664,7 @@ def group_same_taxonomies(in_df, in_array_unique_combis, in_position_of_unique,
in_array_unique_combis= numpy array of unique combinations of taxonomy* (i.e. taxonomy considering all parameters enumerated in get_list_for_grouping)
in_position_of_unique= its length is the same as the number of rows in in_df; for each row, it indicates the corresponding element of in_array_unique_taxonoms
in_country_adm_ids= an array of strings with length equal to the number of rows in in_df. It indicates the country_admin_ID each row in in_df is coming from.
out_df= Pandas DataFrame in which a certain combination of taxonomy* only exists once (the different rows of in_df for the same combination of taxonomy*
have been grouped together). Note that the number of buildings (i.e. "number" or "buildings" column), number of dwellings and number of people
("occupants_per_asset") are the only values that are added, all other values (e.g. "dwell_per_bdg") are values per building or dwelling and are
......@@ -655,9 +699,9 @@ def group_same_taxonomies(in_df, in_array_unique_combis, in_position_of_unique,
aux_num_bdgs_list= []
for j in range(0,len(which_t)):
aux_country_admin_ID.append(in_country_adm_ids[which_t[j]])
aux_num_bdgs_list.append(in_df['buildings'].values[which_t[j]])
aux_num_bdgs_list.append(in_df['buildings'].values[which_t[j]])
for k, col in enumerate(cols_to_distribute_effective):
if (col!='buildings') and (col!='occupants_per_asset') and (col!='dwellings') and (col!='total_repl_cost_usd'): # col=='occupants_per_asset' and col=='dwellings' should not happen, written this way just in case we change our minds later
if (col!='buildings') and (col!='occupants_per_asset') and (col!='dwellings') and (col!='total_repl_cost_usd') and (col!='total_repl_cost_eur'): # col=='occupants_per_asset' and col=='dwellings' should not happen, written this way just in case we change our minds later
arr_floats[t,k]= in_df[col].values[which_t[j]] # values will be overwritten but they should be the same
else:
arr_floats[t,k]= arr_floats[t,k] + in_df[col].values[which_t[j]]
......
......@@ -162,6 +162,19 @@ def check_parameters(config, section_name):
config["GDE_gather_SERA_and_OBM"]["print_screen_during_run"],
"print_screen_during_run",
)
if not config.has_option("GDE_gather_SERA_and_OBM", "currency"):
raise IOError(
"ERROR!! currency PARAMETER MISSING FROM CONFIG FILE!!"
)
else:
if config["GDE_gather_SERA_and_OBM"]["currency"] != "EUR" and config["GDE_gather_SERA_and_OBM"]["currency"] != "USD":
raise IOError(
"ERROR!! currency PARAMETER PROVIDED IN CONFIG FILE COULD NOT BE RECOGNISED!! VALID OPTIONS ARE 'EUR' AND 'USD'."
)
if not config.has_option("GDE_gather_SERA_and_OBM", "cost_per_area_field"):
raise IOError(
"ERROR!! cost_per_area_field PARAMETER MISSING FROM CONFIG FILE!!"
)
elif section_name == "GDE_plot_maps":
_ = check_of_occupancy_cases_parameter(config, section_name)
if not config.has_option("GDE_plot_maps", "location_var"):
......@@ -173,6 +186,8 @@ def check_parameters(config, section_name):
elif section_name == "GDE_check_consistency":
if not config.has_option("GDE_check_consistency", "location_var"):
raise IOError("ERROR!! location_var PARAMETER MISSING FROM CONFIG FILE!!")
if not config.has_option("GDE_check_consistency", "total_repl_cost_field"):
raise IOError("ERROR!! total_repl_cost_field PARAMETER MISSING FROM CONFIG FILE!!")
elif section_name == "OBM_assign_cell_ids_and_adm_ids_to_footprints":
if not config.has_option(
"OBM_assign_cell_ids_and_adm_ids_to_footprints", "method_to_use"
......@@ -231,6 +246,10 @@ def check_parameters(config, section_name):
_ = check_of_occupancy_cases_parameter(config, section_name)
if not config.has_option("SERA_create_OQ_input_files", "countries"):
raise IOError("ERROR!! countries PARAMETER MISSING FROM CONFIG FILE!!")
if not config.has_option("SERA_create_OQ_input_files", "cost_per_area_field"):
raise IOError(
"ERROR!! cost_per_area_field PARAMETER MISSING FROM CONFIG FILE!!"
)
elif section_name == "SERA_create_outputs_QGIS_for_checking":
if not config.has_option("SERA_create_outputs_QGIS_for_checking", "country"):
raise IOError("ERROR!! country PARAMETER MISSING FROM CONFIG FILE!!")
......@@ -248,6 +267,10 @@ def check_parameters(config, section_name):
raise IOError(
"ERROR!! max_num_cells_per_stage NEEDS TO BE A POSITIVE INTEGER!!"
)
if not config.has_option("SERA_create_visual_output_of_grid_model_full_files", "cost_per_area_field"):
raise IOError(
"ERROR!! cost_per_area_field PARAMETER MISSING FROM CONFIG FILE!!"
)
elif section_name == "SERA_distributing_exposure_to_cells":
check_of_sera_disaggregation_to_consider_parameter(config, section_name)
if not config.has_option("SERA_distributing_exposure_to_cells", "countries"):
......@@ -286,6 +309,26 @@ def check_parameters(config, section_name):
float(config[section_name]["tolerance_diff"])
except:
raise IOError("ERROR!! tolerance_diff PARAMETER NEEDS TO BE A REAL NUMBER")
if not config.has_option("SERA_exploration_investigate_full_CSV_files", "cost_per_area_field"):
raise IOError(
"ERROR!! cost_per_area_field PARAMETER MISSING FROM CONFIG FILE!!"
)
if not config.has_option("SERA_exploration_investigate_full_CSV_files", "total_repl_cost_field"):
raise IOError(
"ERROR!! total_repl_cost_field PARAMETER MISSING FROM CONFIG FILE!!"
)
if not config.has_option("SERA_exploration_investigate_full_CSV_files", "cost_structural_field"):
raise IOError(
"ERROR!! cost_structural_field PARAMETER MISSING FROM CONFIG FILE!!"
)
if not config.has_option("SERA_exploration_investigate_full_CSV_files", "cost_nonstructural_field"):
raise IOError(
"ERROR!! cost_nonstructural_field PARAMETER MISSING FROM CONFIG FILE!!"
)
if not config.has_option("SERA_exploration_investigate_full_CSV_files", "cost_contents_field"):
raise IOError(
"ERROR!! cost_contents_field PARAMETER MISSING FROM CONFIG FILE!!"
)
elif section_name == "SERA_exploration_investigate_macro_taxonomies":
if not config.has_option("SERA_exploration_investigate_macro_taxonomies", "country"):
raise IOError("ERROR!! country PARAMETER MISSING FROM CONFIG FILE!!")
......@@ -374,11 +417,19 @@ def check_parameters(config, section_name):
or section_name == "SERA_testing_rebuilding_exposure_from_cells_alternative_03"
):
if not config.has_option(
"SERA_testing_rebuilding_exposure_from_cells_alternative_01", "countries"
section_name, "countries"
):
raise IOError("ERROR!! countries PARAMETER MISSING FROM CONFIG FILE!!")
check_of_sera_disaggregation_to_consider_parameter(config, section_name)
_ = check_of_occupancy_cases_parameter(config, section_name)
_ = check_of_occupancy_cases_parameter(config, section_name)
if not config.has_option(section_name, "cost_per_area_field"):
raise IOError(
"ERROR!! cost_per_area_field PARAMETER MISSING FROM CONFIG FILE!!"
)
if not config.has_option(section_name, "total_repl_cost_field"):
raise IOError(
"ERROR!! total_repl_cost_field PARAMETER MISSING FROM CONFIG FILE!!"
)
elif section_name == "GDE_check_tiles_vs_visual_CSVs":
_ = check_of_occupancy_cases_parameter(config, section_name)
if not config.has_option("GDE_check_tiles_vs_visual_CSVs", "path_GDE_tiles"):
......
......@@ -50,6 +50,8 @@ def run_this_file(config_dict):
sera_models_path= config_dict['File Paths']['sera_models_path']
# The visual output files have names such as GDE_visual_'+crit+'_'+location_var+'.csv'
location_var= config_dict['GDE_check_consistency']['location_var']
# Total-replacement-cost field (e.g. 'total_repl_cost_eur', 'total_repl_cost_usd'):
total_repl_cost_field = config_dict['GDE_check_consistency']['total_repl_cost_field']
# Load dictionary of result paths to be used within the code:
results_available_for= config_dict['Available Results']['results_available_for'].split(', ') # eg: area, gpw_2015_pop, ghs, sat_27f, sat_27f_model
result_paths= config_dict['Available Results']['result_paths'].split(', ') # each associated with each element of results_available_for
......@@ -134,7 +136,7 @@ def run_this_file(config_dict):
sera_num_bdgs_j= dfs_dict[occu]['buildings'].values[which].sum()
sera_num_dwells_j= dfs_dict[occu]['dwellings'].values[which].sum()
sera_num_ppl_j= dfs_dict[occu]['occupants_per_asset'].values[which].sum()
sera_repl_cost_j= dfs_dict[occu]['total_repl_cost_usd'].values[which].sum()
sera_repl_cost_j= dfs_dict[occu][total_repl_cost_field].values[which].sum()
gde_num_bdgs_sera_j= dict_by_crit[crit]['visual_by_admin']['number_'+occu+'_SERA'].values[j]
gde_num_dwells_sera_j= dict_by_crit[crit]['visual_by_admin']['num_dwells_'+occu+'_SERA'].values[j]
gde_num_ppl_sera_j= dict_by_crit[crit]['visual_by_admin']['night_'+occu+'_SERA'].values[j]
......@@ -208,7 +210,7 @@ def run_this_file(config_dict):
out_list.append('{:.2f}'.format(dfs_dict[occu]['buildings'].values[which_attica].sum()))
out_list.append('{:.2f}'.format(dfs_dict[occu]['dwellings'].values[which_attica].sum()))
out_list.append('{:.2f}'.format(dfs_dict[occu]['occupants_per_asset'].values[which_attica].sum()))
out_list.append('{:.2f}'.format(dfs_dict[occu]['total_repl_cost_usd'].values[which_attica].sum()))
out_list.append('{:.2f}'.format(dfs_dict[occu][total_repl_cost_field].values[which_attica].sum()))
out_list.append(sera_models_path)
out_csv.write(','.join(out_list)+'\n')
for crit in list(dict_by_crit.keys()):
......
......@@ -118,6 +118,10 @@ read_completeness = obm_tiles
print_screen_during_run = False
# Occupancy cases in which the OBM buildings will be classified ('Oth' is implicit in the method):
occupancy_cases = Res, Com, Ind
# Currency (options: EUR, USD):
currency = EUR
# Cost-per-area field (options: cost_per_area_eur, cost_per_area_usd):
cost_per_area_field = cost_per_area_eur
[GDE_plot_maps]
# The visual output files have names such as GDE_visual_'+crit+'_'+location_var+'.csv'
......@@ -133,6 +137,8 @@ sera_disaggregation_to_consider = sat_27f
[GDE_check_consistency]
# The visual output files have names such as GDE_visual_'+crit+'_'+location_var+'.csv'
location_var = Greece
# Total-replacement-cost field (options: total_repl_cost_eur, total_repl_cost_usd):
total_repl_cost_field = total_repl_cost_eur
[OBM_assign_cell_ids_and_adm_ids_to_footprints]
# Method to use to assign a building footprint to a cell or admin unit.
......@@ -165,6 +171,8 @@ countries = Greece
# Within those countries, do not process admin units with the following IDs
# (useful for running parts of countries only, it can be empty or ignored too):
admin_ids_to_ignore = 9999999
# Cost-per-area field (options: cost_per_area_eur, cost_per_area_usd):
cost_per_area_field = cost_per_area_eur
[SERA_create_outputs_QGIS_for_checking]
country = Greece
......@@ -177,6 +185,8 @@ sera_disaggregation_to_consider = sat_27f
occupancy_cases = Res, Com, Ind
# Maximum number of cells per calculation stage (the code processes and appends output by blocks):
max_num_cells_per_stage = 1000
# Cost-per-area field (options: cost_per_area_eur, cost_per_area_usd):
cost_per_area_field = cost_per_area_eur
[SERA_distributing_exposure_to_cells]
# SELECT CASE OF SERA DISAGGREGATION TO CARRY OUT:
......@@ -190,7 +200,7 @@ countries = Greece
# (useful for running parts of countries only, it can be empty or ignored too):
admin_ids_to_ignore = 9999999
# Columns to distribute (TO DO: provide better description of this parameter):
columns_to_distribute = buildings, dwell_per_bdg, area_per_dwelling_sqm, cost_per_area_usd, ppl_per_dwell
columns_to_distribute = buildings, dwell_per_bdg, area_per_dwelling_sqm, cost_per_area_eur, ppl_per_dwell
# Write the HDF5 file of building classes parameters or not.
# (write_hdf5_bdg_classes parameter for gdet_sera.distribute_SERA_to_cells() function):
write_hdf5_bdg_classes_param = True
......@@ -200,6 +210,16 @@ write_hdf5_bdg_classes_param = True
tolerance_ratio = 1E-3
# Tolerance for the difference of parameters (e.g. X-Y):
tolerance_diff = 10.0
# Cost-per-area field (options: cost_per_area_eur, cost_per_area_usd):
cost_per_area_field = cost_per_area_eur
# Total-replacement-cost field (options: total_repl_cost_eur, total_repl_cost_usd):
total_repl_cost_field = total_repl_cost_eur
# Structural-replacement-cost field (options: cost_structural_eur, cost_structural_usd):
cost_structural_field = cost_structural_eur
# Non-structural-replacement-cost field (options: cost_nonstructural_eur, cost_nonstructural_usd):
cost_nonstructural_field = cost_nonstructural_eur
# Contents-replacement-cost field (options: cost_contents_eur, cost_contents_usd):
cost_contents_field = cost_contents_eur
[SERA_exploration_investigate_macro_taxonomies]
country = Greece
......@@ -253,6 +273,10 @@ admin_ids_to_ignore = 9999999
sera_disaggregation_to_consider = sat_27f
# Occupancy cases to consider:
occupancy_cases = Res, Com, Ind
# Cost-per-area field (options: cost_per_area_eur, cost_per_area_usd):
cost_per_area_field = cost_per_area_eur
# Total-replacement-cost field (options: total_repl_cost_eur, total_repl_cost_usd):
total_repl_cost_field = total_repl_cost_eur
[SERA_testing_rebuilding_exposure_from_cells_alternative_02]
# Countries to process (if many, separate with comma and space):
......@@ -264,6 +288,10 @@ min_grid_cell_id = 0
sera_disaggregation_to_consider = sat_27f
# Occupancy cases to consider:
occupancy_cases = Res, Com, Ind
# Cost-per-area field (options: cost_per_area_eur, cost_per_area_usd):
cost_per_area_field = cost_per_area_eur
# Total-replacement-cost field (options: total_repl_cost_eur, total_repl_cost_usd):
total_repl_cost_field = total_repl_cost_eur
[SERA_testing_rebuilding_exposure_from_cells_alternative_03]
# Countries to process (if many, separate with comma and space):
......@@ -276,6 +304,10 @@ admin_ids_to_ignore = 9999999
sera_disaggregation_to_consider = sat_27f
# Occupancy cases to consider:
occupancy_cases = Res, Com, Ind
# Cost-per-area field (options: cost_per_area_eur, cost_per_area_usd):
cost_per_area_field = cost_per_area_eur
# Total-replacement-cost field (options: total_repl_cost_eur, total_repl_cost_usd):
total_repl_cost_field = total_repl_cost_eur
[GDE_check_tiles_vs_visual_CSVs]
# Path to the GDE tiles HDF5 files to consider (directory):
......
......@@ -84,6 +84,10 @@ def run_this_file(config_dict):
occupancy_cases= config_dict['GDE_gather_SERA_and_OBM']['occupancy_cases'].split(', ')
if 'Oth' not in occupancy_cases: # i.e. all other cases (e.g. neither Res nor Com nor Ind)
occupancy_cases.append('Oth')
# Currency:
currency = config_dict['GDE_gather_SERA_and_OBM']['currency']
# Cost-per-area field:
cost_per_area_field = config_dict['GDE_gather_SERA_and_OBM']['cost_per_area_field']
# This code will go cell by cell. Define the list of cells from the config file:
grid_cells_list, grid_cells_def_method= gdet_gral.define_grid_cell_list_to_process(config_dict, 'Cells to Process')
# Cell IDs with numbers below this one will be ignored (useful while running pieces of regions):
......@@ -139,7 +143,7 @@ def run_this_file(config_dict):
bdg_classes_hdf5_filename= os.path.join(out_path,'Europe_SERA_bdg_classes_'+case+'.hdf5')