From 0b89ddaf6c99fc1b45230e08e4ff400ef5004b9d Mon Sep 17 00:00:00 2001 From: Cecilia Nievas Date: Fri, 11 Sep 2020 17:39:14 +0200 Subject: [PATCH 1/4] Round to 5 decimal places when creating the OQ input files, function write_OQ_input_file --- GDE_TOOLS_access_SERA_HDF.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/GDE_TOOLS_access_SERA_HDF.py b/GDE_TOOLS_access_SERA_HDF.py index b0d46c3..81ea591 100644 --- a/GDE_TOOLS_access_SERA_HDF.py +++ b/GDE_TOOLS_access_SERA_HDF.py @@ -31,6 +31,8 @@ import h5py import numpy as np import GDE_TOOLS_general as gdet_gral import GDE_TOOLS_world_grid as gdet_grid +import pdb + def verify_paths_are_given(gral_path, hdf5_filename, in_case, distrib_method): """ @@ -436,7 +438,10 @@ def write_OQ_input_file(in_df, filename, cols_order, writemode='w', sep=','): for i in range(0,in_df.shape[0]): out_str= [] for colname in cols_order: - out_str.append(str(in_df[colname].values[i])) + if colname.lower()=='lon' or colname.lower()=='lat' or colname.lower()=='longitude' or colname.lower()=='latitude' or colname.lower()=='number' or colname.lower()=='structural' or colname.lower()=='night': + out_str.append('{:.5f}'.format(in_df[colname].values[i])) + else: + out_str.append(str(in_df[colname].values[i])) out_csv.write(sep.join(out_str)+'\n') out_csv.close() -- GitLab From ac109b56f9f11c6260839226d52d4fec2ae120e8 Mon Sep 17 00:00:00 2001 From: Cecilia Nievas Date: Tue, 22 Sep 2020 12:06:04 +0200 Subject: [PATCH 2/4] New feature to generate GDE tiles as HDF5 files as output of GDE_gather_SERA_and_OBM, as before the output was just OpenQuake input files and two CSV files with a summary of the models to be loaded to QGIS --- GDE_TOOLS_access_OBM_HDF.py | 53 ++--- GDE_TOOLS_access_SERA_HDF.py | 1 - GDE_TOOLS_general.py | 374 ++++++++++++++++++++++++++++-- GDE_TOOLS_read_config_file.py | 19 ++ GDE_check_tiles_vs_visual_CSVs.py | 160 +++++++++++++ GDE_config_file_TEMPLATE.ini | 11 +- GDE_gather_SERA_and_OBM.py | 6 +- 7 files changed, 562 insertions(+), 62 deletions(-) create mode 100644 GDE_check_tiles_vs_visual_CSVs.py diff --git a/GDE_TOOLS_access_OBM_HDF.py b/GDE_TOOLS_access_OBM_HDF.py index c3effe7..bb5a13e 100644 --- a/GDE_TOOLS_access_OBM_HDF.py +++ b/GDE_TOOLS_access_OBM_HDF.py @@ -77,12 +77,16 @@ def make_list_country_admin_units_to_retrieve(admin_unit_type, list_subgroups, w -def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5_filename, countries_shp_path, sera_meta_path, occu_case, admin_unit_type= 'all', disagg_method= 'area', which_country= '', which_admin_ID= ''): +def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5_filename, countries_shp_path, sera_meta_path, gral_output_path, gde_tile_filename, occu_case, admin_unit_type= 'all', disagg_method= 'area', which_country= '', which_admin_ID= ''): """ obm_hdf5_filename: full file path to the OBM HDF5 file for a particular cell and occupancy case bdg_classes_hdf5_filename: full file path to the HDF5 file of the SERA building classes for a particular occupancy case countries_shp_path: full file path to the folder containing all shapefiles of admin units used in SERA sera_meta_path: full file path to the HDF5 file with the SERA metadata + gral_output_path: path to the general output directory (gde_tile_filename will be written in gral_output_path/GDE_tiles/gde_tile_filename + gde_tile_filename: filename of the output GDE tile HDF5 file (of the kind GDE_cell_XXXXX_disagg_method.hdf5). + gde_tile_filename will only be created/written to if obm_hdf5_filename exists, i.e. if there exist OBM buildings + in this cell occu_case: Res, Com, Ind, Oth admin_unit_type: "all" = all admin units within the cell @@ -92,7 +96,6 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5 (i.e. method used to distribute SERA to a grid) which_country: only needed if admin_unit_type is "country" or "admin_unit"; full country name which_admin_ID: only needed if admin_unit_type is "admin_unit" - Output: - out_dict: dictionary with three sub-keys: - DF: Pandas DataFrame with the OBM buildings in this cell (according to admin_unit_type). Its @@ -130,6 +133,7 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5 file_found= True except: file_found= False + gdet_gral.write_OBM_to_GDE_tiles_initialisation(obm_hdf5_filename, gral_output_path, gde_tile_filename, occu_case, disagg_method) if file_found: list_country_admin_units= make_list_country_admin_units_to_retrieve(admin_unit_type, list(fle), which_country=which_country, which_admin_ID=which_admin_ID) obm_without_classes_num_bdgs_per_adm_unit= np.zeros([len(list_country_admin_units)]) @@ -179,7 +183,7 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5 dwell_per_bdg= np.zeros([len(all_sera_classes)]) area_per_dwelling_sqm= np.zeros([len(all_sera_classes)]) cost_per_area_usd= np.zeros([len(all_sera_classes)]) - ppl_per_dwell= np.zeros([len(all_sera_classes)]) + ppl_per_dwell= np.zeros([len(all_sera_classes)]) for k, bdg_class in enumerate(all_sera_classes): osm_ids.append('OSM_'+str(osm_id)) taxonomy_stars.append(bdg_class) @@ -194,6 +198,15 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5 area_per_dwelling_sqm[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='area_per_dwelling_sqm')[0][0]].split('_')[-1])] cost_per_area_usd[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='cost_per_area_usd')[0][0]].split('_')[-1])] ppl_per_dwell[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='ppl_per_dwell')[0][0]].split('_')[-1])] + # For adding values of people and cost per building to the GDE tile HDF5 file: + ppl_per_bdg= dwell_per_bdg * ppl_per_dwell # 1D array of len(all_sera_classes) + cost_per_bdg_usd= dwell_per_bdg * area_per_dwelling_sqm * cost_per_area_usd # 1D array of len(all_sera_classes) + weighted_av_ppl= (ppl_per_bdg * all_sera_proportions[:,0]).sum() + weighted_av_cost= (cost_per_bdg_usd * all_sera_proportions[:,0]).sum() + error_writing_gde_tile= gdet_gral.write_OBM_to_GDE_tiles_add_params(gral_output_path, gde_tile_filename, occu_case, disagg_method, osm_id, country_adminID, all_sera_classes, ppl_per_bdg, cost_per_bdg_usd, weighted_av_ppl, weighted_av_cost, 'USD') + if error_writing_gde_tile!='': + error_messages= error_messages+' '+error_writing_gde_tile + # For building the DataFrame to output: num_bdgs= np.hstack([num_bdgs, all_sera_proportions[:,0]]) num_dwells_local= all_sera_proportions[:,0] * dwell_per_bdg num_dwells= np.hstack([num_dwells, num_dwells_local]) @@ -244,38 +257,6 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5 error_messages= error_messages+ ' ERROR IN retrieve_OBM_buildings_in_cell_from_hdf5: occu_case NOT RECOGNISED' fle.close() return out_dict, list_country_admin_units, obm_without_classes_num_bdgs_per_adm_unit, list_country_admin_units_levels, error_messages + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/GDE_TOOLS_access_SERA_HDF.py b/GDE_TOOLS_access_SERA_HDF.py index 81ea591..840e499 100644 --- a/GDE_TOOLS_access_SERA_HDF.py +++ b/GDE_TOOLS_access_SERA_HDF.py @@ -31,7 +31,6 @@ import h5py import numpy as np import GDE_TOOLS_general as gdet_gral import GDE_TOOLS_world_grid as gdet_grid -import pdb def verify_paths_are_given(gral_path, hdf5_filename, in_case, distrib_method): diff --git a/GDE_TOOLS_general.py b/GDE_TOOLS_general.py index 9eec3bc..1c74e16 100644 --- a/GDE_TOOLS_general.py +++ b/GDE_TOOLS_general.py @@ -40,6 +40,7 @@ import GDE_TOOLS_world_grid as gdet_grid import GDE_TOOLS_psql as gdet_psql from copy import deepcopy + def add_to_log_shapefiles_found(countryname, num_not_found): if num_not_found>0.5: aux_str= str(num_not_found)+' administrative boundaries not found in the Shapefiles. ERROR!!' @@ -225,44 +226,44 @@ def write_bdgs_to_cell_hdf(bdgs_df, in_cell_id, in_case, obm_hdf5_path, sera_dis if len(adm_level_unique_list)==1: country_admIDs= np.array([bdgs_df['country_iso2'].values[i]+'_'+bdgs_df[in_case.lower()+'_adm_id'].values[i] for i in range(0,bdgs_df.shape[0])]) country_admIDs_unique= np.unique(country_admIDs) - for country_admin_i in country_admIDs_unique: - if country_admin_i not in list(fle): - gr= fle.create_group(country_admin_i) - fle[country_admin_i].attrs['Adm_Level']= adm_level_unique_list[0] - which= np.where(country_admIDs==country_admin_i)[0] - sera_dict= gdet_serah.get_sera_for_cell_and_admin_unit(in_cell_id, in_case, country_admin_i, sera_disaggreg_cases, sera_disaggreg_path) + for country_admin_id in country_admIDs_unique: + if country_admin_id not in list(fle): + gr= fle.create_group(country_admin_id) + fle[country_admin_id].attrs['Adm_Level']= adm_level_unique_list[0] + which= np.where(country_admIDs==country_admin_id)[0] + sera_dict= gdet_serah.get_sera_for_cell_and_admin_unit(in_cell_id, in_case, country_admin_id, sera_disaggreg_cases, sera_disaggreg_path) # sera_dict[disag_case]= {} if the cell does not exist in the HDF5 file for this in_case and disag_case # keys of sera_dict will be all elements of sera_disaggreg_cases, irrespective of whether the cell exists or not for osm_id_j in bdgs_df['osm_id'].values[which]: - write_bdg(bdgs_df[bdgs_df.osm_id==osm_id_j], fle[country_admin_i]) - err_str= write_sera_to_bdg(osm_id_j, fle[country_admin_i], sera_dict, adm_level_unique_list) + write_bdg(bdgs_df[bdgs_df.osm_id==osm_id_j], fle[country_admin_id]) + err_str= write_sera_to_bdg(osm_id_j, fle[country_admin_id], sera_dict, adm_level_unique_list) if err_str!='': out_str= 'OBM ID ' + str(osm_id_j)+': '+err_str - fle[country_admin_i].attrs['Num_Bdgs']= len(which) + fle[country_admin_id].attrs['Num_Bdgs']= len(which) elif len(adm_level_unique_list)>1: # case of len(adm_level_unique_list)==0 does not need to be considered (it can happen if all buildings are in bdgs_99_df) out_str= out_str + ' ERROR!! INCONSISTENCY FOUND IN bdgs_df PASSED TO write_bdgs_to_cell_hdf FUNCTION: MORE THAN ONE ADMIN LEVEL ASSOCIATED WITH THESE BUILDINGS.' print(' '+out_str) if bdgs_99_df.shape[0]>0: # there are some buildings with no country assigned (i.e. -99 was in adm_level_unique_list) country_admIDs_unique= np.unique(bdgs_99_df['country_iso2'].values) - for country_admin_i in country_admIDs_unique: - if country_admin_i not in list(fle): - gr= fle.create_group(country_admin_i) - which= np.where(bdgs_99_df['country_iso2'].values==country_admin_i)[0] + for country_admin_id in country_admIDs_unique: + if country_admin_id not in list(fle): + gr= fle.create_group(country_admin_id) + which= np.where(bdgs_99_df['country_iso2'].values==country_admin_id)[0] for osm_id_j in bdgs_99_df['osm_id'].values[which]: - write_bdg(bdgs_99_df[bdgs_99_df.osm_id==osm_id_j], fle[country_admin_i]) - fle[country_admin_i].attrs['Num_Bdgs']= len(which) + write_bdg(bdgs_99_df[bdgs_99_df.osm_id==osm_id_j], fle[country_admin_id]) + fle[country_admin_id].attrs['Num_Bdgs']= len(which) fle.close() else: # buildings whose occupancy is neither Res nor Com nor Ind ---> cannot be associated to an admin unit because admin unit is occupancy-dependent hdf5_filename= os.path.join(obm_hdf5_path, 'OBM_cell_'+str(in_cell_id).zfill(10)+'_'+in_case+'.hdf5') fle = h5py.File(hdf5_filename, "a") # Read/write if exists, create otherwise (default) country_admIDs_unique= np.unique(bdgs_df['country_iso2'].values) # For "Other" we cannot define specific admin units because these are dependent on the definition of the SERA model - for country_admin_i in country_admIDs_unique: - if country_admin_i not in list(fle): - gr= fle.create_group(country_admin_i) - which= np.where(bdgs_df['country_iso2'].values==country_admin_i)[0] + for country_admin_id in country_admIDs_unique: + if country_admin_id not in list(fle): + gr= fle.create_group(country_admin_id) + which= np.where(bdgs_df['country_iso2'].values==country_admin_id)[0] for osm_id_j in bdgs_df['osm_id'].values[which]: - write_bdg(bdgs_df[bdgs_df.osm_id==osm_id_j], fle[country_admin_i]) - fle[country_admin_i].attrs['Num_Bdgs']= len(which) + write_bdg(bdgs_df[bdgs_df.osm_id==osm_id_j], fle[country_admin_id]) + fle[country_admin_id].attrs['Num_Bdgs']= len(which) fle.attrs['Num_Bdgs']= bdgs_df.shape[0] fle.close() return out_str @@ -270,7 +271,7 @@ def write_bdgs_to_cell_hdf(bdgs_df, in_cell_id, in_case, obm_hdf5_path, sera_dis def write_bdg(one_bdg_df, open_fle_part): """ one_bdg_df= Pandas DataFrame with the one and only building to write to file - open_fle_part= something like "fle" or "fle[country_admin_i]", where "fle" is the opened HDF5 file + open_fle_part= something like "fle" or "fle[country_admin_id]", where "fle" is the opened HDF5 file """ if one_bdg_df.shape[0]!=1: print(' ERROR!!! DataFrame HAS MORE THAN ONE BUILDING!! IT SHOULD ONLY HAVE ONE IN FUNCTION write_bdg!!!') @@ -290,7 +291,7 @@ def write_bdg(one_bdg_df, open_fle_part): def write_sera_to_bdg(in_osm_id, open_fle_part, sera_dict, adm_level_from_OBM): """ in_osm_id= OBM ID of the building - open_fle_part= something like "fle" or "fle[country_admin_i]", where "fle" is the opened HDF5 file of the OBM buildings in cells + open_fle_part= something like "fle" or "fle[country_admin_id]", where "fle" is the opened HDF5 file of the OBM buildings in cells sera_dict= dictionary of SERA distributions of building classes for a particular cell and occupancy case, retrieved using gdet_serah.get_sera_for_cell_and_admin_unit(). Its keys are disaggregation cases (e.g. area, GPW, etc). These keys exist in sera_dict @@ -1282,4 +1283,331 @@ def define_grid_cell_list_to_process(configuration_dict, script_name): out_grid_cells_list= [] out_grid_cells_def_method= 'UNRECOGNISED' return out_grid_cells_list, out_grid_cells_def_method + + + +def write_OBM_to_GDE_tiles_initialisation(obm_hdf5_filename, gral_output_path, gde_tile_filename, occu_case, disagg_method): + """ + This function writes the info on all OBM buildings present in obm_hdf5_filename to gde_tile_filename (the GDE tile + HDF5 file). It is assumed that obm_hdf5_filename will direct to the specific cell of interest and that the name + given to gde_tile_filename will correspond to that same cell. All this is done for a particular occupancy (occu_case). + + This function is called at the beginning of gdet_obmh.retrieve_OBM_buildings_in_cell_from_hdf5, which is called + by GDE_gather_SERA_and_OBM.py to retrieve data on the OBM buildings from the HDF5 files. + + The function creates the sub-group for the occupancy occu_case. Within it, it creates sub-groups for each + country_adminID present in obm_hdf5_filename. Within each, it creates the GDE_bdgs sub-group. Within it, it + creates a sub-group for each OSM ID. Within each OSM ID, it creates a dataset containing the array of building classes + associated with the building (SERA_classes) and another dataset containing the corresponding probabilities. For + each OSM ID it creates attribues (lat, lon, occupancy, storeys...) that it retrieves from obm_hdf5_filename. For + each GDE_bdgs sub-group of each country_adminID sub-group, it creates the Num_Bdgs_Total and Num_Bdgs_with_Class + attributes, that add up the number of OSM buildings present ("with class" meaning that SERA clases have been + assigned to it). After having gone through all the country_adminIDs, it creates a "Total" sub-group within the + occupancy sub-group, and a "GDE_bdgs" sub-group within it, to which it assigns the attributes Num_Bdgs_Total and + Num_Bdgs_with_Class by adding the numbers of buildings from all the country_adminIDs. + + The weighted average cost and number of people of each building are not calculated in this function because they + need the cost/building and people/building that are retrieved at a later stage by + gdet_obmh.retrieve_OBM_buildings_in_cell_from_hdf5. Herein only the weighted average cost and number of people + equal to zero are written for the buildings to which no SERA building classes can be assigned (e.g. when they are + classified as occupancy="Other" or when they are located in a district for which SERA is not defined). + + For the admin unit "Total" only the total number of buildings are written at this stage, because information on + the cost and number of people of buildings has not been added yet. + + obm_hdf5_filename: path to the OBM buildings HDF5 file + gral_output_path: path to general outputs (where the SERA HDF5 files are, for example) + gde_tile_filename: of the kind "GDE_cell_XXXXXXX.hdf5' + occu_case: Res, Com, Ind, Oth + disagg_method: area, gpw_2015_pop, ghs, sat_27f, sat_27f_model + """ + if not os.path.exists(os.path.join(gral_output_path, 'GDE_tiles')): + os.makedirs(os.path.join(gral_output_path, 'GDE_tiles')) + if not os.path.exists(os.path.join(gral_output_path, 'GDE_tiles', disagg_method)): + os.makedirs(os.path.join(gral_output_path, 'GDE_tiles', disagg_method)) + gde_fle = h5py.File(os.path.join(gral_output_path, 'GDE_tiles', disagg_method, gde_tile_filename), "a") + if occu_case in list(gde_fle): + del gde_fle[occu_case] + occu_gr= gde_fle.create_group(occu_case) + try: + obm_fle= h5py.File(obm_hdf5_filename, "r") + file_found= True + except: + file_found= False + if file_found: + for country_admin_id in list(obm_fle): + adm_gr= occu_gr.create_group(country_admin_id) + if 'Adm_Level' in list(obm_fle[country_admin_id].attrs): + adm_gr.attrs['Adm_Level']= obm_fle[country_admin_id].attrs['Adm_Level'] + adm_obm_gr= adm_gr.create_group('GDE_bdgs') + adm_obm_gr.attrs['Num_Bdgs_Total']= obm_fle[country_admin_id].attrs['Num_Bdgs'] + num_bdgs_with_class= 0 + for osm_id in list(obm_fle[country_admin_id]): + obm_gr= adm_obm_gr.create_group(osm_id) + obm_gr.attrs['Area']= obm_fle[country_admin_id][osm_id].attrs['Area'] + obm_gr.attrs['Lat']= obm_fle[country_admin_id][osm_id].attrs['Lat'] + obm_gr.attrs['Lon']= obm_fle[country_admin_id][osm_id].attrs['Lon'] + obm_gr.attrs['Occup']= obm_fle[country_admin_id][osm_id].attrs['Occup'] + obm_gr.attrs['Storeys']= obm_fle[country_admin_id][osm_id].attrs['Storeys'] + if ('SERA_classes_'+disagg_method) in list(obm_fle[country_admin_id][osm_id]):# if the building can be assigned SERA building classes + obm_gr.attrs['SERA_classes']= 'True' + num_bdgs_with_class= num_bdgs_with_class + 1 + obm_gr.attrs['Sto_SERA_inconsist']= obm_fle[country_admin_id][osm_id].attrs['Sto_SERA_inconsist_'+disagg_method] + dt=h5py.special_dtype(vlen=str) # examples on using special_dtype: https://www.programcreek.com/python/example/105243/h5py.special_dtype + classes_data= obm_gr.create_dataset('SERA_classes', obm_fle[country_admin_id][osm_id]['SERA_classes_'+disagg_method][:].shape,dtype=dt) + classes_data[:]= obm_fle[country_admin_id][osm_id]['SERA_classes_'+disagg_method][:] + vals_data= obm_gr.create_dataset('SERA_vals', obm_fle[country_admin_id][osm_id]['SERA_vals_'+disagg_method][:].shape,dtype='float64') + vals_data[:]= obm_fle[country_admin_id][osm_id]['SERA_vals_'+disagg_method][:] + else: # e.g. when occu_case is "Oth", or when an industrial building is located in a district undefined in SERA + obm_gr.attrs['SERA_classes']= 'False' + obm_gr.attrs['Weighted_Av_Ppl']= 0.0 + obm_gr.attrs['Weighted_Av_Cost']= 0.0 + adm_obm_gr.attrs['Num_Bdgs_with_Class']= num_bdgs_with_class + obm_fle.close() + all_num_bdgs_tot= 0 + all_num_bdgs_with_class= 0 + for country_admin_id in list(gde_fle[occu_case]): + all_num_bdgs_tot= all_num_bdgs_tot + gde_fle[occu_case][country_admin_id]['GDE_bdgs'].attrs['Num_Bdgs_Total'] + all_num_bdgs_with_class= all_num_bdgs_with_class + gde_fle[occu_case][country_admin_id]['GDE_bdgs'].attrs['Num_Bdgs_with_Class'] + gde_fle[occu_case].create_group('Total') + gde_fle[occu_case]['Total'].create_group('GDE_bdgs') + gde_fle[occu_case]['Total']['GDE_bdgs'].attrs['Num_Bdgs_Total']= all_num_bdgs_tot + gde_fle[occu_case]['Total']['GDE_bdgs'].attrs['Num_Bdgs_with_Class']= all_num_bdgs_with_class + gde_fle.close() + + +def write_OBM_to_GDE_tiles_add_params(gral_output_path, gde_tile_filename, occu_case, disagg_method, osm_id, country_adminID, all_sera_classes, arr_ppl_per_bdg, arr_cost_per_bdg, weighted_average_ppl, weighted_average_cost, cost_currency): + """ + This function writes the values of people per building and cost per building associated with each + building class that may correspond to the OBM building with ID osm_id, within the corresponding + GDE tile (gral_output_path/GDE_tiles/disagg_method/gde_tile_filename). It also writes the weighted + average of number of people and cost, which are calculated by multiplying the cost/building or + people/building of each class by the probability of the class being associated with these building. + The arrays and attributes associated with costs and people written by this function correspond to a + particular OSM_ID, and not to the totals of a country_adminID or a cell. + + The path gral_output_path/GDE_tiles/disagg_method/gde_tile_filename already exists because this + function is run after write_OBM_to_GDE_tiles_initialisation(), which will have created the path if + it didn't exist from before. + + This function is called mid-way of gdet_obmh.retrieve_OBM_buildings_in_cell_from_hdf5, which is called + by GDE_gather_SERA_and_OBM.py to retrieve data on the OBM buildings from the HDF5 files. It is called + once gdet_obmh.retrieve_OBM_buildings_in_cell_from_hdf5 has retrieved data on cost/building and + people/building for each SERA building class that can be assigned to the building. + + gral_output_path: path to general outputs (where the SERA HDF5 files are, for example) + gde_tile_filename: of the kind "GDE_cell_XXXXXXX.hdf5' + occu_case: Res, Com, Ind, Oth + disagg_method: area, gpw_2015_pop, ghs, sat_27f, sat_27f_model + osm_id: OSM ID of the building whose parameters will be written + country_adminID: in which the building is located + arr_ppl_per_bdg: array containing the values of people per building associated with each building class. + The order of the elements needs to be the same as that of SERA_classes, which is already + part of the GDE tile HDF5 file. + arr_cost_per_bdg: array containing the values of cost per building associated with each building class. + The order of the elements needs to be the same as that of SERA_classes, which is already + part of the GDE tile HDF5 file. + weighted_average_ppl: float; the weighted average number of people in this building, obtained multiplying + the number of people expected for each building class by the probability of this + building belonging to that class ('SERA_vals' proportions) + weighted_average_cost: float; the weighted average cost of this building, obtained as in the case of + weighted_average_ppl + cost_currency: string, e.g. "USD" or "EURO" + """ + error_str= '' + gde_fle = h5py.File(os.path.join(gral_output_path, 'GDE_tiles', disagg_method, gde_tile_filename), "a") + if np.any(gde_fle[occu_case][country_adminID]['GDE_bdgs'][osm_id]['SERA_classes'][:]!=all_sera_classes): + error_str= 'ERROR IN write_OBM_to_GDE_tiles_add_params, '+os.path.join(gral_output_path, 'GDE_tiles', disagg_method, gde_tile_filename)+': The order of SERA classes is incorrect.' + print(error_str) + ppl_dataset= gde_fle[occu_case][country_adminID]['GDE_bdgs'][osm_id].create_dataset('SERA_Ppl_per_Bdg', arr_ppl_per_bdg.shape, dtype='float64') + ppl_dataset[:]= arr_ppl_per_bdg + cost_dataset= gde_fle[occu_case][country_adminID]['GDE_bdgs'][osm_id].create_dataset('SERA_Cost_per_Bdg', arr_cost_per_bdg.shape, dtype='float64') + cost_dataset[:]= arr_cost_per_bdg + cost_dataset.attrs['Currency']= cost_currency + gde_fle[occu_case][country_adminID]['GDE_bdgs'][osm_id].attrs['Weighted_Av_Ppl']= weighted_average_ppl + gde_fle[occu_case][country_adminID]['GDE_bdgs'][osm_id].attrs['Weighted_Av_Cost']= weighted_average_cost + gde_fle.close() + return error_str + + +def write_LeftOver_to_GDE_tiles(gral_output_path, gde_tile_filename, occu_case, disagg_method, arr_sera_classes, + arr_country_adminIDs, arr_num_bdgs, arr_costs, arr_ppl, cost_currency): + """ + This function is called by GDE_gather_SERA_and_OBM.py to write the summary data on the LeftOver buildings + to the GDE tiles. + + It takes as input the list of country_adminIDs retrieved from SERA (that thus define the LeftOver buildings). + It loops through each of them, creating the sub-group "LeftOver" within them. The country_adminID may exist + from before if there are OBM buildings in it, or may be created by the present function otherwise. Within + the "LeftOver" subgroup it creates four datasets: SERA_classes (array of building classes), SERA_vals (array + of numbers of buildings by building class), SERA_Ppl_per_Bdg (array of number of people per building, for + each of the building classes in SERA_classes), SERA_Cost_per_Bdg (array of cost per building, for each of + the building classes in SERA_classes. If the "Total" subgroup of the occu_case does not exist from before + (i.e. there are no OBM buildings), it creates it. Within the "Total" subgroup of the occu_case it creates + the "LeftOver" sub-group, to which it assigns four attributes: Num_Bdgs_Total, Cost_Total, Num_Ppl_Total + and SERA_classes. + + The code also checks whether there were other country_adminIDs in the file from before that had OBM buildings + but are not named in arr_country_adminIDs (e.g. 'GR'). If there are, it creates the sub-group "LeftOver" in + them and fills it in with zeros (number of buildings, cost, etc). + + gral_output_path/GDE_tiles/disagg_method/gde_tile_filename path assumed. + + gral_output_path: path to general outputs (where the SERA HDF5 files are, for example) + gde_tile_filename: of the kind "GDE_cell_XXXXXXX.hdf5' + occu_case: Res, Com, Ind, Oth + disagg_method: area, gpw_2015_pop, ghs, sat_27f, sat_27f_model + arr_sera_classes: 1D array with list of SERA building classes + arr_country_adminIDs: 1D array with list of country_adminIDs associated with this cell + arr_num_bdgs: 2D array, each row corresponds to an element of arr_sera_classes, + each column to an element of arr_country_adminIDs. It can happen that arr_num_bdgs is + just 1D because there is just one admin unit involved, hence the reshaping step + arr_costs: same dimensions as arr_num_bdgs, content is total cost for the total number of buildings in arr_num_bdgs + arr_ppl: same dimensions as arr_num_bdgs, content is total number of people for the total number of buildings in arr_num_bdgs + cost_currency: string, e.g. "USD" or "EURO" + """ + gde_fle = h5py.File(os.path.join(gral_output_path, 'GDE_tiles', disagg_method, gde_tile_filename), "a") + if occu_case not in list(gde_fle): + gde_fle.create_group(occu_case) + # arr_num_bdgs might be 1D if len(arr_country_adminIDs)==1, so re-shape: + arr_num_bdgs_reshaped= np.reshape(arr_num_bdgs, [arr_num_bdgs.shape[0], len(arr_country_adminIDs)]) + arr_costs_reshaped= np.reshape(arr_costs, [arr_costs.shape[0], len(arr_country_adminIDs)]) + arr_ppl_reshaped= np.reshape(arr_ppl, [arr_ppl.shape[0], len(arr_country_adminIDs)]) + if len(arr_country_adminIDs)>0.5: # it will be zero if occu_case=='Oth' + for j, country_adminID in enumerate(arr_country_adminIDs): + if country_adminID not in list(gde_fle[occu_case]): + gde_fle[occu_case].create_group(country_adminID) + lo_gr= gde_fle[occu_case][country_adminID].create_group('LeftOver') + if arr_num_bdgs_reshaped[:,j].sum()>1E-20: # if there are any leftover buildings in country_adminID + lo_gr.attrs['Num_Bdgs_Total']= arr_num_bdgs_reshaped[:,j].sum() + which= np.where(arr_num_bdgs_reshaped[:,j]>1E-20)[0] # consider only the classes that have buildings + lo_gr.attrs['SERA_classes']= 'True' + dt=h5py.special_dtype(vlen=str) # examples on using special_dtype: https://www.programcreek.com/python/example/105243/h5py.special_dtype + classes_data= lo_gr.create_dataset('SERA_classes', arr_num_bdgs_reshaped[which,j].shape,dtype=dt) + classes_data[:]= arr_sera_classes[which] + vals_data= lo_gr.create_dataset('SERA_vals', arr_num_bdgs_reshaped[which,j].shape,dtype='float64') + vals_data[:]= arr_num_bdgs_reshaped[which,j] + ppl_data= lo_gr.create_dataset('SERA_Ppl_per_Bdg', arr_num_bdgs_reshaped[which,j].shape,dtype='float64') + ppl_data[:]= arr_ppl_reshaped[which,j] / arr_num_bdgs_reshaped[which,j] + cost_data= lo_gr.create_dataset('SERA_Cost_per_Bdg', arr_num_bdgs_reshaped[which,j].shape,dtype='float64') + cost_data[:]= arr_costs_reshaped[which,j] / arr_num_bdgs_reshaped[which,j] + cost_data.attrs['Currency']= cost_currency + lo_gr.attrs['Num_Ppl_Total']= (ppl_data[:] * vals_data[:]).sum() + lo_gr.attrs['Cost_Total']= (cost_data[:] * vals_data[:]).sum() + else: + lo_gr.attrs['Num_Bdgs_Total']= 0.0 + lo_gr.attrs['SERA_classes']= 'False' + lo_gr.attrs['Num_Ppl_Total']= 0.0 + lo_gr.attrs['Cost_Total']= 0.0 + else: # i.e. arr_country_adminIDs is an empty list, as in the case of occu_case=='Oth' + for j, country_adminID in enumerate(list(gde_fle[occu_case])): + if country_adminID!='Total': + lo_gr= gde_fle[occu_case][country_adminID].create_group('LeftOver') + lo_gr.attrs['Num_Bdgs_Total']= 0.0 + lo_gr.attrs['SERA_classes']= 'False' + lo_gr.attrs['Num_Ppl_Total']= 0.0 + lo_gr.attrs['Cost_Total']= 0.0 + # Need to check if there were other country_adminIDs in the file from before that had OBM buildings but not named in arr_country_adminIDs (e.g. 'GR'): + for j, country_adminID in enumerate(list(gde_fle[occu_case])): + if 'LeftOver' not in list(gde_fle[occu_case][country_adminID]) and country_adminID!='Total': + lo_gr= gde_fle[occu_case][country_adminID].create_group('LeftOver') + lo_gr.attrs['Num_Bdgs_Total']= 0.0 + lo_gr.attrs['SERA_classes']= 'False' + lo_gr.attrs['Num_Ppl_Total']= 0.0 + lo_gr.attrs['Cost_Total']= 0.0 + if 'Total' not in list(gde_fle[occu_case]): + gde_fle[occu_case].create_group('Total') + lo_gr= gde_fle[occu_case]['Total'].create_group('LeftOver') + if arr_num_bdgs_reshaped.sum()>1E-20: # if there are any leftover buildings in the cell + lo_gr.attrs['Num_Bdgs_Total']= arr_num_bdgs_reshaped.sum() + lo_gr.attrs['SERA_classes']= 'True' + lo_gr.attrs['Num_Ppl_Total']= arr_ppl_reshaped.sum() + lo_gr.attrs['Cost_Total']= arr_costs_reshaped.sum() + dt=h5py.special_dtype(vlen=str) # examples on using special_dtype: https://www.programcreek.com/python/example/105243/h5py.special_dtype + classes_data= lo_gr.create_dataset('SERA_classes', arr_num_bdgs_reshaped.sum(axis=1).shape,dtype=dt) + classes_data[:]= arr_sera_classes + vals_data= lo_gr.create_dataset('SERA_vals', arr_num_bdgs_reshaped.sum(axis=1).shape,dtype='float64') + vals_data[:]= arr_num_bdgs_reshaped.sum(axis=1) + else: + lo_gr.attrs['Num_Bdgs_Total']= 0.0 + lo_gr.attrs['SERA_classes']= 'False' + lo_gr.attrs['Num_Ppl_Total']= 0.0 + lo_gr.attrs['Cost_Total']= 0.0 + gde_fle.close() + + +def wrap_up_totals_in_GDE_tiles(gral_output_path, gde_tile_filename, disagg_method): + """ + This function is called by GDE_gather_SERA_and_OBM.py for a specific cell, and loops through all occupancies + present in gde_tile_filename. Then it loops through all country_adminIDs within each occupancy case. If + the country_adminID does not have a "GDE_bdgs" sub-group, it creates it and assigns zero to all its attributes + (number of buildings, people, cost). Otherwise it goes building by building, calculating the cost and total + number of people as a weighted average of the cost/building and people/building of each building class + associated with the OBM building, multiplied by the probability of the building belonging to that class. These + values become the Weighted_Av_Cost and Weighted_Av_People attributes of the OSM_ID. It then adds up these costs + and number of people for all buildings in the country_adminID and writes them as attributes (Weighted_Av_Cost, + Weighted_Av_People) of "GDE_bdgs". Once it is done, it loops again through all country_adminIDs calculating + the total number of buildings, cost and number of people for the country_adminID, i.e. it sums up the values + from GDE_buildings and LeftOver. This is done for the "Total" (i.e. all country_adminIDs together) too. + + gral_output_path/GDE_tiles/disagg_method/gde_tile_filename path assumed. + + gral_output_path: path to general outputs (where the SERA HDF5 files are, for example) + gde_tile_filename: of the kind "GDE_cell_XXXXXXX.hdf5' + disagg_method: area, gpw_2015_pop, ghs, sat_27f, sat_27f_model + """ + gde_fle= h5py.File(os.path.join(gral_output_path, 'GDE_tiles', disagg_method, gde_tile_filename), "a") + for occupancy in list(gde_fle): + sum_weighted_av_ppl_whole_cell= 0.0 + sum_weighted_av_cost_whole_cell= 0.0 + for country_adminID in list(gde_fle[occupancy]): + # Totals associated with the GDE buildings for each country_adminID, excluding the "Total" of the cell (i.e. all admin IDs summed up): + if country_adminID!='Total': + if 'GDE_bdgs' not in list(gde_fle[occupancy][country_adminID]): + gde_fle[occupancy][country_adminID].create_group('GDE_bdgs') + gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Num_Bdgs_Total']= 0 + gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Num_Bdgs_with_Class']= 0 + gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Weighted_Av_Ppl']= 0.0 + gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Weighted_Av_Cost']= 0.0 + else: + sum_weighted_av_ppl= 0.0 + sum_weighted_av_cost= 0.0 + for osm_id in list(gde_fle[occupancy][country_adminID]['GDE_bdgs']): + if gde_fle[occupancy][country_adminID]['GDE_bdgs'][osm_id].attrs['SERA_classes']=='True': # only buildings that can be assigned SERA classes have cost and people estimates + sum_weighted_av_ppl= sum_weighted_av_ppl + gde_fle[occupancy][country_adminID]['GDE_bdgs'][osm_id].attrs['Weighted_Av_Ppl'] + sum_weighted_av_cost= sum_weighted_av_cost + gde_fle[occupancy][country_adminID]['GDE_bdgs'][osm_id].attrs['Weighted_Av_Cost'] + gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Weighted_Av_Ppl']= sum_weighted_av_ppl + gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Weighted_Av_Cost']= sum_weighted_av_cost + # Note: If there are GDE_bdgs then the total number of buildings have already been calculated by write_OBM_to_GDE_tiles_initialisation() + sum_weighted_av_ppl_whole_cell= sum_weighted_av_ppl_whole_cell + gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Weighted_Av_Ppl'] + sum_weighted_av_cost_whole_cell= sum_weighted_av_cost_whole_cell + gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Weighted_Av_Cost'] + # Totals associated with the GDE buildings for the "Total" of the cell (i.e. all admin IDs summed up): + gde_fle[occupancy]['Total']['GDE_bdgs'].attrs['Weighted_Av_Ppl']= sum_weighted_av_ppl_whole_cell + gde_fle[occupancy]['Total']['GDE_bdgs'].attrs['Weighted_Av_Cost']= sum_weighted_av_cost_whole_cell + # Totals associated with the GDE and LeftOver buildings all together, for each country_adminID including the total of the cell: + for country_adminID in list(gde_fle[occupancy]): + gde_fle[occupancy][country_adminID].attrs['Num_Bdgs_Total']= float(gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Num_Bdgs_Total']) + gde_fle[occupancy][country_adminID]['LeftOver'].attrs['Num_Bdgs_Total'] + gde_fle[occupancy][country_adminID].attrs['Num_Bdgs_with_Class']= float(gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Num_Bdgs_with_Class']) + gde_fle[occupancy][country_adminID]['LeftOver'].attrs['Num_Bdgs_Total'] + gde_fle[occupancy][country_adminID].attrs['Num_Ppl_Total']= gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Weighted_Av_Ppl'] + gde_fle[occupancy][country_adminID]['LeftOver'].attrs['Num_Ppl_Total'] + gde_fle[occupancy][country_adminID].attrs['Cost_Total']= gde_fle[occupancy][country_adminID]['GDE_bdgs'].attrs['Weighted_Av_Cost'] + gde_fle[occupancy][country_adminID]['LeftOver'].attrs['Cost_Total'] + gde_fle.close() + + +def write_completness_to_GDE_tiles(gral_output_path, gde_tile_filename, disagg_method, completeness_value): + """ + This code simply writes the value of completeness indicated in completeness_value as an attribute of the cell. + The specific cell is implicit in gde_tile_filename. + + gral_output_path: path to general outputs (where the SERA HDF5 files are, for example) + gde_tile_filename: of the kind "GDE_cell_XXXXXXX.hdf5' + disagg_method: area, gpw_2015_pop, ghs, sat_27f, sat_27f_model + completeness_value: integer + """ + gde_fle= h5py.File(os.path.join(gral_output_path, 'GDE_tiles', disagg_method, gde_tile_filename), "a") + gde_fle.attrs['Completeness']= completeness_value + gde_fle.close() + + + + \ No newline at end of file diff --git a/GDE_TOOLS_read_config_file.py b/GDE_TOOLS_read_config_file.py index dee3b92..d36902a 100644 --- a/GDE_TOOLS_read_config_file.py +++ b/GDE_TOOLS_read_config_file.py @@ -242,6 +242,25 @@ def check_parameters(config, section_name): raise IOError('ERROR!! countries PARAMETER MISSING FROM CONFIG FILE!!') check_of_sera_disaggregation_to_consider_parameter(config, section_name) _= check_of_occupancy_cases_parameter(config, section_name) + elif section_name=='GDE_check_tiles_vs_visual_CSVs': + _= check_of_occupancy_cases_parameter(config, section_name) + if not config.has_option('GDE_check_tiles_vs_visual_CSVs', 'path_GDE_tiles'): + raise IOError('ERROR!! path_GDE_tiles PARAMETER MISSING FROM CONFIG FILE!!') + if not config.has_option('GDE_check_tiles_vs_visual_CSVs', 'path_visual_csv'): + raise IOError('ERROR!! path_visual_csv PARAMETER MISSING FROM CONFIG FILE!!') + else: + if config['GDE_check_tiles_vs_visual_CSVs']['path_visual_csv'].split('.')[-1]!='csv': + raise IOError('ERROR!! path_visual_csv IN CONFIG FILE IS MEANT TO BE A .csv FILE!!') + if not config.has_option('GDE_check_tiles_vs_visual_CSVs', 'decimal_places_gral'): + raise IOError('ERROR!! decimal_places_gral PARAMETER MISSING FROM CONFIG FILE!!') + else: + if not config[section_name]['decimal_places_gral'].isnumeric(): + raise IOError('ERROR!! decimal_places_gral NEEDS TO BE A POSITIVE INTEGER!!') + if not config.has_option('GDE_check_tiles_vs_visual_CSVs', 'decimal_places_costs'): + raise IOError('ERROR!! decimal_places_costs PARAMETER MISSING FROM CONFIG FILE!!') + else: + if not config[section_name]['decimal_places_costs'].isnumeric(): + raise IOError('ERROR!! decimal_places_costs NEEDS TO BE A POSITIVE INTEGER!!') else: raise IOError('ERROR IN check_parameters(): '+section_name+' NOT RECOGNISED!!') diff --git a/GDE_check_tiles_vs_visual_CSVs.py b/GDE_check_tiles_vs_visual_CSVs.py new file mode 100644 index 0000000..a150edb --- /dev/null +++ b/GDE_check_tiles_vs_visual_CSVs.py @@ -0,0 +1,160 @@ +""" +Copyright (C) 2020 +Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . + + +Global Dynamic Exposure Model +Helmholtz Centre Potsdam +GFZ German Research Centre for Geosciences +Section 2.6: Seismic Hazard and Risk Dynamics + +GDE_check_tiles_vs_visual_CSVs +============================== +This code reads the visual CSV output by cell and the corresponding GDE tiles HDF5 files +and compares the number of buildings, cost and number of people in each cell according +to each of the two. An output CSV file collects the discrepancies found, if any. + +Note that the code goes through each cell ID in the CSV output and attemps to open the +corresponding HDF5 file. No check is carried out to see if there is any HDF5 file wi +""" + +import datetime +import os +import sys +import h5py +import pandas as pd +import numpy as np +import GDE_TOOLS_read_config_file as gdet_conf + + +def run_this_file(config_dict): + #################################################### + # READ CONFIGURATION PARAMETERS + #################################################### + print('Processing configuration parameters...') + # Path for output: + out_path= config_dict['File Paths']['out_path'] + # Path to the GDE tiles HDF5 files to consider: + path_GDE_tiles = config_dict['GDE_check_tiles_vs_visual_CSVs']['path_GDE_tiles'] + # Path to the by-cell visual output CSV file to consider: + path_visual_csv = config_dict['GDE_check_tiles_vs_visual_CSVs']['path_visual_csv'] + # Occupancy cases and subclassifications to consider: + occupancy_cases= config_dict['GDE_check_tiles_vs_visual_CSVs']['occupancy_cases'].split(', ') + # Decimal places to use for the comparison + decimal_places_gral= int(config_dict['GDE_check_tiles_vs_visual_CSVs']['decimal_places_gral']) + decimal_places_costs= int(config_dict['GDE_check_tiles_vs_visual_CSVs']['decimal_places_costs']) + #################################################### + # START + #################################################### + # String with present time to be used for naming the output TXT file: + now= datetime.datetime.now() + now_str= str(now.year)+'_'+str(now.month).zfill(2)+'_'+str(now.day).zfill(2)+'_'+str(now.hour).zfill(2)+'_'+str(now.minute).zfill(2)+'_'+str(now.second).zfill(2) + # Read IDs of all cells for which there are HDF5 files in the directory path_GDE_tiles: + list_GDE_tiles= [] + for file in os.listdir(path_GDE_tiles): + if file.endswith(".hdf5"): + list_GDE_tiles.append(file.split('.')[0].split('_')[-1]) + problems_found= [] + # Load the visual CSV file: + visual_cells= pd.read_csv(path_visual_csv, sep=';') + for i, cell_id in enumerate(visual_cells['cell_id'].values): + print('\r Working on cell ID '+str(cell_id)+'. Cell '+str(i+1)+' of '+str(visual_cells.shape[0])+'.', end='') + # Open the GDE tile HDF5 file associated with this cell: + try: + fle= h5py.File(os.path.join(path_GDE_tiles, 'GDE_cell_'+str(cell_id).zfill(10)+'.hdf5'), "r") + file_found= True + except: + file_found= False + if file_found: + # Remove this ID from the list of GDE tiles: + list_GDE_tiles.remove(str(cell_id)) + # Check completeness value: + if fle.attrs['Completeness']!=visual_cells['completeness'].values[i]: + print('PROBLEM!!!') + # Check totals of GDE buildings, LeftOver buildings and total buildings: + for occupancy in occupancy_cases: + # Check numbers of buildings (GDE buildings compared as integers because they are): + if round(visual_cells['number_'+occupancy+'_OBM_with_classes'].values[i], 0)!=round(fle[occupancy]['Total']['GDE_bdgs'].attrs['Num_Bdgs_with_Class'], 0): + problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of GDE buildings with classes do not match.') + if round(visual_cells['number_'+occupancy+'_OBM'].values[i], 0)!=round(fle[occupancy]['Total']['GDE_bdgs'].attrs['Num_Bdgs_Total'], 0): + problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of total GDE buildings do not match.') + if round(visual_cells['number_'+occupancy+'_LeftOver'].values[i], decimal_places_gral)!=round(fle[occupancy]['Total']['LeftOver'].attrs['Num_Bdgs_Total'], decimal_places_gral): + problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of LeftOver buildings do not match.') + if round(visual_cells['number_'+occupancy+'_Total'].values[i], decimal_places_gral)!=round(fle[occupancy]['Total'].attrs['Num_Bdgs_Total'], decimal_places_gral): + problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of total (GDE+LeftOver) buildings do not match.') + if occupancy!='Oth': + # Check cost: + if round(visual_cells['structural_'+occupancy+'_OBM_with_classes'].values[i], decimal_places_costs)!=round(fle[occupancy]['Total']['GDE_bdgs'].attrs['Weighted_Av_Cost'], decimal_places_costs): + problems_found.append('Cell '+str(cell_id)+', '+occupancy+': costs of GDE buildings with classes do not match.') + if round(visual_cells['structural_'+occupancy+'_LeftOver'].values[i], decimal_places_costs)!=round(fle[occupancy]['Total']['LeftOver'].attrs['Cost_Total'], decimal_places_costs): + problems_found.append('Cell '+str(cell_id)+', '+occupancy+': costs of LeftOver buildings do not match.') + if round(visual_cells['structural_'+occupancy+'_Total'].values[i], decimal_places_costs)!=round(fle[occupancy]['Total'].attrs['Cost_Total'], decimal_places_costs): + problems_found.append('Cell '+str(cell_id)+', '+occupancy+': costs of total (GDE+LeftOver) buildings do not match.') + # Check people: + if round(visual_cells['night_'+occupancy+'_OBM_with_classes'].values[i], decimal_places_gral)!=round(fle[occupancy]['Total']['GDE_bdgs'].attrs['Weighted_Av_Ppl'], decimal_places_gral): + problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of people of GDE buildings with classes do not match.') + if round(visual_cells['night_'+occupancy+'_LeftOver'].values[i], decimal_places_gral)!=round(fle[occupancy]['Total']['LeftOver'].attrs['Num_Ppl_Total'], decimal_places_gral): + problems_found.append('Cell '+str(cell_id)+', '+occupancy+': number of people of LeftOver buildings do not match.') + if round(visual_cells['night_'+occupancy+'_Total'].values[i], decimal_places_gral)!=round(fle[occupancy]['Total'].attrs['Num_Ppl_Total'], decimal_places_gral): + problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of people of total (GDE+LeftOver) buildings do not match.') + fle.close() + else: + print('\n') + print(' ERROR: GDE tile HDF5 file for cell ID '+str(cell_id)+' not found!! Moving on to next cell...') + problems_found.append('Cell '+str(cell_id)+': GDE tile HDF5 file NOT FOUND.') + if len(list_GDE_tiles)>0: + print('\n') + print(' WARNING: There are GDE tile HDF5 files in the provided path that are not in the CSV file!!') + problems_found.append('WARNING: There are GDE tile HDF5 files in the provided path that are not in the CSV file!! These are: '+', '.join(list_GDE_tiles)) + print('\n') + out_txt= open(os.path.join(out_path, 'GDE_check_tiles_vs_visual_CSVs_'+now_str+'.txt'), 'w') + out_txt.write('Path to the GDE tiles analysed:\n') + out_txt.write(path_GDE_tiles+'\n') + out_txt.write('\n') + out_txt.write('Path to the by-cell visual output CSV file analysed:\n') + out_txt.write(path_visual_csv+'\n') + out_txt.write('\n') + out_txt.write('Number of decimal places used for the comparison (number of buildings and people):\n') + out_txt.write(str(decimal_places_gral)+'\n') + out_txt.write('\n') + out_txt.write('Number of decimal places used for the comparison (costs):\n') + out_txt.write(str(decimal_places_costs)+'\n') + out_txt.write('\n') + out_txt.write('================================================================\n') + out_txt.write('\n') + if len(problems_found)==0: + out_txt.write('No discrepancies found between the two kinds of output. All OK according to this check!\n') + print('No discrepancies found between the two kinds of output. All OK according to this check!') + else: + print('DISCREPANCIES FOUND BETWEEN THE TWO KINDS OF OUTPUT!! ERROR!!') + print(' Find the errors found listed in '+os.path.join(out_path, 'GDE_check_tiles_vs_visual_CSVs_'+now_str+'.txt (being stored now...)')) + out_txt.write('The following discrepancies were found between the two kinds of output. ERRORS FOUND!!:\n') + for error_message in problems_found: + out_txt.write(error_message+'\n') + out_txt.close() + print('\n') + print('Done!') + + + +if __name__=='__main__': + # This code needs to be run from the command line as python3 namefile.py configfile.ini + config_filename= sys.argv[1] # sys.argv retrieves all the commands entered in the command line; position [0] is this code, position [1] will be the config file name + section_names_to_validate= ['File Paths', os.path.basename(__file__).split('.')[0]] + config_dict= gdet_conf.read_config_parameters(os.path.join(os.getcwd(), config_filename), section_names_to_validate) + run_this_file(config_dict) + + \ No newline at end of file diff --git a/GDE_config_file_TEMPLATE.ini b/GDE_config_file_TEMPLATE.ini index d699fab..bb5a4c2 100644 --- a/GDE_config_file_TEMPLATE.ini +++ b/GDE_config_file_TEMPLATE.ini @@ -263,5 +263,14 @@ sera_disaggregation_to_consider = sat_27f # Occupancy cases to consider: occupancy_cases = Res, Com, Ind - +[GDE_check_tiles_vs_visual_CSVs] +# Path to the GDE tiles HDF5 files to consider (directory): +path_GDE_tiles = WRITE_PATH +# Path to the by-cell visual output CSV file to consider (file): +path_visual_csv = WRITE_PATH_INCLUDING_FILENAME +# Occupancy cases to consider: +occupancy_cases = Res, Com, Ind, Oth +# Decimal places tolerance (costs tend to have discrepancies, use less decimal places): +decimal_places_gral = 4 +decimal_places_costs = 0 diff --git a/GDE_gather_SERA_and_OBM.py b/GDE_gather_SERA_and_OBM.py index fce7a75..9fb2ceb 100644 --- a/GDE_gather_SERA_and_OBM.py +++ b/GDE_gather_SERA_and_OBM.py @@ -127,7 +127,7 @@ def run_this_file(config_dict): bdg_classes_hdf5_filename= os.path.join(out_path,'Europe_SERA_bdg_classes_'+case+'.hdf5') else: bdg_classes_hdf5_filename= '' - obm_dict, obm_country_adminIDs, obm_without_classes_num_bdgs_per_adm_unit, obm_country_adminIDs_levels, error_message= gdet_obmh.retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5_filename, sera_shp_path, os.path.join(out_path, 'Europe_SERA_metadata_v_'+version_of_SERA_metadata+'.hdf5'), case, admin_unit_type= 'all', disagg_method= sera_disaggregation_to_consider) + obm_dict, obm_country_adminIDs, obm_without_classes_num_bdgs_per_adm_unit, obm_country_adminIDs_levels, error_message= gdet_obmh.retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5_filename, sera_shp_path, os.path.join(out_path, 'Europe_SERA_metadata_v_'+version_of_SERA_metadata+'.hdf5'), out_path, 'GDE_cell_'+str(grid_cell_id).zfill(10)+'.hdf5', case, admin_unit_type= 'all', disagg_method= sera_disaggregation_to_consider) if error_message!='': print(error_message) log.append(error_message) @@ -257,8 +257,12 @@ def run_this_file(config_dict): cell_summary_dict= gdet_gral.fill_in_cell_summary_dictionary_with_SERA_LeftOver_or_Total_bdgs_dwells_ppl(cell_summary_dict, case, 'Total', total_num_bdgs, total_num_dwells, total_num_ppl, total_cost) admin_summary_dict= gdet_gral.fill_in_admin_summary_dictionary_with_SERA_LeftOver_or_Total_bdgs_dwells_ppl(admin_summary_dict, case, 'LeftOver', sera_country_adminIDs, sera_country_adminIDs_levels, leftover_num_bdgs_per_class_and_adm_unit, leftover_num_dwells_per_class_and_adm_unit, leftover_num_ppl_per_class_and_adm_unit, leftover_cost_per_class_and_adm_unit, sera_shp_path, os.path.join(out_path, 'Europe_SERA_metadata_v_'+version_of_SERA_metadata+'.hdf5'), input_kind='per_class_and_adm_unit') admin_summary_dict= gdet_gral.fill_in_admin_summary_dictionary_with_SERA_LeftOver_or_Total_bdgs_dwells_ppl(admin_summary_dict, case, 'Total', total_list_country_adminIDs, total_list_country_adminIDs_levels, total_num_bdgs_all_per_adm_unit, total_num_dwells_per_adm_unit, total_num_ppl_per_adm_unit, total_cost_per_adm_unit, sera_shp_path, os.path.join(out_path, 'Europe_SERA_metadata_v_'+version_of_SERA_metadata+'.hdf5'), input_kind='per_adm_unit') + gdet_gral.write_LeftOver_to_GDE_tiles(out_path, 'GDE_cell_'+str(grid_cell_id).zfill(10)+'.hdf5', case, sera_disaggregation_to_consider, sera_classes, sera_country_adminIDs, leftover_num_bdgs_per_class_and_adm_unit, leftover_cost_per_class_and_adm_unit, leftover_num_ppl_per_class_and_adm_unit, 'USD') + gdet_gral.wrap_up_totals_in_GDE_tiles(out_path, 'GDE_cell_'+str(grid_cell_id).zfill(10)+'.hdf5', sera_disaggregation_to_consider) # Generate visual output: gdet_gral.write_visual_output_GDE_by_cell(grid_cell_id, cell_summary_dict, os.path.join(out_path, 'export_grids_QGIS', 'GDE_visual_'+out_name+'.csv')) + # Store completeness value of cell in GDE tile: + gdet_gral.write_completness_to_GDE_tiles(out_path, 'GDE_cell_'+str(grid_cell_id).zfill(10)+'.hdf5', sera_disaggregation_to_consider, compl_cells[i]) gdet_gral.write_visual_output_GDE_by_admin_unit(admin_summary_dict, os.path.join(out_path, 'export_grids_QGIS', 'GDE_visual_'+out_name+'_by_admin_units.csv')) print('\n') print('Done!') -- GitLab From 26bd7923acca981af4738a77665333c4ea785e4d Mon Sep 17 00:00:00 2001 From: Cecilia Nievas Date: Tue, 22 Sep 2020 13:57:44 +0200 Subject: [PATCH 3/4] New GDE feature: updated documentation --- GDE_check_tiles_vs_visual_CSVs.py | 7 +++++-- docs/02_Execution.md | 8 ++++++-- docs/guide_to_use_of_config_file.csv | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/GDE_check_tiles_vs_visual_CSVs.py b/GDE_check_tiles_vs_visual_CSVs.py index a150edb..bed0450 100644 --- a/GDE_check_tiles_vs_visual_CSVs.py +++ b/GDE_check_tiles_vs_visual_CSVs.py @@ -27,8 +27,11 @@ This code reads the visual CSV output by cell and the corresponding GDE tiles HD and compares the number of buildings, cost and number of people in each cell according to each of the two. An output CSV file collects the discrepancies found, if any. -Note that the code goes through each cell ID in the CSV output and attemps to open the -corresponding HDF5 file. No check is carried out to see if there is any HDF5 file wi +The code goes through each cell ID in the CSV output and attemps to open the +corresponding HDF5 file. It outputs an error if the HDF5 file is not found. The code also +reads the list of all HDF5 files in the indicated directory and checks if there are any +HDF5 files that are not in the CSV file, writing a warning to the output file if that +is the case. """ import datetime diff --git a/docs/02_Execution.md b/docs/02_Execution.md index 7995666..702849f 100644 --- a/docs/02_Execution.md +++ b/docs/02_Execution.md @@ -24,8 +24,11 @@ The order in which the scripts in the present repository need to be run to produ 8. If the OpenQuake input files for the SERA model distributed onto a grid are desired (i.e. not GDE, just SERA), run `SERA_create_OQ_input_files.py` with the desired distribution method. 9. If a CSV summarising the number of buildings, dwellings, people and costs by cell according to the SERA model is desired (i.e. not GDE, just SERA), run `SERA_create_visual_output_of_grid_model_full_files.py` with the desired distribution method. 10. Run `OBM_buildings_per_cell.py` with the desired distribution method. -11. Run `GDE_gather_SERA_and_OBM.py` with the desired distribution method. The output is a series of CSV files that serve as input for damage/risk calculations to be run in OpenQuake (https://github.com/gem/oq-engine), a CSV file that summarises results per cell and contains the geometry of the cells so that it can all be visualised with a GIS, and a CSV file that summarises results per adminstrative unit and contains the geometry of the administrative boundaries so that it can all be visualised with a GIS. - +11. Run `GDE_gather_SERA_and_OBM.py` with the desired distribution method. The output is: + a. a series of CSV files that serve as input for damage/risk calculations to be run in OpenQuake (https://github.com/gem/oq-engine); + b. a CSV file that summarises results per cell and contains the geometry of the cells so that it can all be visualised with a GIS; + c. a CSV file that summarises results per adminstrative unit and contains the geometry of the administrative boundaries so that it can all be visualised with a GIS; + d. a series of HDF5 files (one per cell) that contain the final GDE model. ## Testing Scripts @@ -41,6 +44,7 @@ The order in which the scripts in the present repository need to be run to produ - The script `GDE_check_OQ_input_files.py` can be run after step 11 above. It prints to screen some summary values of the files and checks that the asset ID values are all unique. +- The script `GDE_check_tiles_vs_visual_CSVs.py` can be run after step 11 above. It reads the visual CSV output by cell and the corresponding GDE tiles HDF5 files and compares the number of buildings, cost and number of people in each cell according to each of the two. An output CSV file collects the discrepancies found, if any. ## Other Scripts diff --git a/docs/guide_to_use_of_config_file.csv b/docs/guide_to_use_of_config_file.csv index d5e2036..e079455 100644 --- a/docs/guide_to_use_of_config_file.csv +++ b/docs/guide_to_use_of_config_file.csv @@ -26,3 +26,4 @@ SERA_testing_mapping_admin_units_to_cells_qualitycontrol,Y,Y,N,Y,N,N,N,N SERA_testing_rebuilding_exposure_from_cells_alternative_01,Y,Y,N,Y,N,N,N,N SERA_testing_rebuilding_exposure_from_cells_alternative_02,Y,Y,N,N,N,N,N,N SERA_testing_rebuilding_exposure_from_cells_alternative_03,Y,Y,N,N,N,N,N,N +GDE_check_tiles_vs_visual_CSVs,Y,Y,N,N,N,N,N,N -- GitLab From 58034bff16e0600a2ab16e9131784100f9568a7d Mon Sep 17 00:00:00 2001 From: Cecilia Nievas Date: Tue, 22 Sep 2020 16:38:46 +0200 Subject: [PATCH 4/4] Change format of sublist in docs/02_Execution.md --- docs/02_Execution.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/02_Execution.md b/docs/02_Execution.md index 702849f..dd670dd 100644 --- a/docs/02_Execution.md +++ b/docs/02_Execution.md @@ -25,10 +25,10 @@ The order in which the scripts in the present repository need to be run to produ 9. If a CSV summarising the number of buildings, dwellings, people and costs by cell according to the SERA model is desired (i.e. not GDE, just SERA), run `SERA_create_visual_output_of_grid_model_full_files.py` with the desired distribution method. 10. Run `OBM_buildings_per_cell.py` with the desired distribution method. 11. Run `GDE_gather_SERA_and_OBM.py` with the desired distribution method. The output is: - a. a series of CSV files that serve as input for damage/risk calculations to be run in OpenQuake (https://github.com/gem/oq-engine); - b. a CSV file that summarises results per cell and contains the geometry of the cells so that it can all be visualised with a GIS; - c. a CSV file that summarises results per adminstrative unit and contains the geometry of the administrative boundaries so that it can all be visualised with a GIS; - d. a series of HDF5 files (one per cell) that contain the final GDE model. + - a series of CSV files that serve as input for damage/risk calculations to be run in OpenQuake (https://github.com/gem/oq-engine); + - a CSV file that summarises results per cell and contains the geometry of the cells so that it can all be visualised with a GIS; + - a CSV file that summarises results per adminstrative unit and contains the geometry of the administrative boundaries so that it can all be visualised with a GIS; + - a series of HDF5 files (one per cell) that contain the final GDE model. ## Testing Scripts -- GitLab