GDE_check_consistency.py 26.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
"""
Copyright (C) 2020
Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.


Global Dynamic Exposure Model
Helmholtz Centre Potsdam
GFZ German Research Centre for Geosciences
Section 2.6: Seismic Hazard and Risk Dynamics

GDE_check_consistency
=====================
26
See docs/05_Testing_Scripts.md.
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
import sys
import os
import numpy as np
import pandas as pd
import GDE_TOOLS_psql as gdet_psql
import GDE_TOOLS_read_SERA as gdet_sera
import GDE_TOOLS_read_config_file as gdet_conf


def run_this_file(config_dict):
    ####################################################
    # READ CONFIGURATION PARAMETERS
    ####################################################
    print('Processing configuration parameters...')
    # Path for input and output:
    gde_path= os.path.join(config_dict['File Paths']['out_path'], 'examining_outputs_GDE') 
    # OBM database:
    DB_username_bdgs= config_dict['OBM Database']['db_obm_username']
    DB_name_bdgs= config_dict['OBM Database']['db_obm_name']
    DB_schema_name_bdgs= config_dict['OBM Database']['db_obm_schema_name']
    DB_table_name_bdgs= config_dict['OBM Database']['db_obm_table_name'] 
    # SERA models path:
    sera_models_path= config_dict['File Paths']['sera_models_path'] 
    # The visual output files have names such as GDE_visual_'+crit+'_'+location_var+'.csv'
    location_var= config_dict['GDE_check_consistency']['location_var']
    # Load dictionary of result paths to be used within the code:
    results_available_for= config_dict['Available Results']['results_available_for'].split(', ') # eg: area, gpw_2015_pop, ghs, sat_27f, sat_27f_model
    result_paths= config_dict['Available Results']['result_paths'].split(', ') # each associated with each element of results_available_for
    dict_by_crit= {}
    for m in range(0,len(results_available_for)):
        dict_by_crit[results_available_for[m]]= {}
        dict_by_crit[results_available_for[m]]['path']= result_paths[m]      
    # Occupancy cases and subclassifications to consider:
    occupancy_cases= config_dict['Ocuppancy String Groups']['occupancy_cases'].split(', ')
    aux_list_classifs= config_dict['Ocuppancy String Groups']['occupancy_classifications'].split(', ')
    occup_keys= {}
    for k, occu_case in enumerate(occupancy_cases):
        occup_keys[occu_case]= aux_list_classifs[k][1:-1].split('; ') 
    ####################################################
    # START
    ####################################################
    # SERA
    admin_levels, _, _, dfs_dict= gdet_sera.get_admin_level_definition(location_var, sera_models_path, output_dfs=True, full_files=True, complete_missing_cols=True)
    print('\n')
    print('=============')
    print('OBM buildings')
    print('=============')
    obm_num_bdgs= gdet_psql.get_number_entries_in_database_table(DB_name_bdgs, DB_username_bdgs, DB_schema_name_bdgs, DB_table_name_bdgs)
    broad_occ, nums_broad, _, _= gdet_psql.get_obm_buildings_by_broad_occupancy(DB_name_bdgs, DB_username_bdgs, DB_schema_name_bdgs, DB_table_name_bdgs, occup_keys)
    if obm_num_bdgs!=nums_broad.sum():
        print('\n')
        print('ERROR!! The total number of buildings in the PSQL database does not match the number of buildings per broad occupancy type. CHECK!!')
    print('\n')
    print('Number of OBM buildings in PSQL database (from number of entries): '+'{:,.2f}'.format(obm_num_bdgs))
    print('Number of OBM buildings in PSQL database (from summation of broad occupancy types): '+'{:,.2f}'.format(nums_broad.sum()))
    for i, occ in enumerate(broad_occ):
        print('Number of OBM buildings in PSQL database of broad occupancy '+occ+': '+'{:,.2f}'.format(nums_broad[i]))    
    out_csv= open(os.path.join(gde_path, 'check_obm_bdgs.csv'), 'w')
    out_csv.write('Criterion,Output,Res,Com,Ind,Oth,Total,File_Path\n')
    out_csv.write('PSQL,PSQL,'+'{:.2f}'.format(nums_broad[np.where(np.array(broad_occ)=='Res')[0][0]])+','+'{:.2f}'.format(nums_broad[np.where(np.array(broad_occ)=='Com')[0][0]])+','+'{:.2f}'.format(nums_broad[np.where(np.array(broad_occ)=='Ind')[0][0]])+','+'{:.2f}'.format(nums_broad[np.where(np.array(broad_occ)=='Oth')[0][0]])+','+'{:.2f}'.format(nums_broad.sum())+','+DB_name_bdgs+'\n')
    for crit in list(dict_by_crit.keys()):
        visual_by_admin= pd.read_csv(os.path.join(dict_by_crit[crit]['path'], 'GDE_visual_'+crit+'_'+location_var+'_by_admin_units.csv'), sep=';')
        visual_by_cells= pd.read_csv(os.path.join(dict_by_crit[crit]['path'], 'GDE_visual_'+crit+'_'+location_var+'.csv'), sep=';')
        dict_by_crit[crit]['visual_by_admin']= visual_by_admin
        dict_by_crit[crit]['visual_by_cells']= visual_by_cells
        total_obm_from_visual_by_admin= visual_by_admin['number_Res_OBM'].values.sum() + visual_by_admin['number_Com_OBM'].values.sum() + visual_by_admin['number_Ind_OBM'].values.sum() + visual_by_admin['number_Oth_OBM'].values.sum()
        total_obm_from_visual_by_cell= visual_by_cells['number_Res_OBM'].values.sum() + visual_by_cells['number_Com_OBM'].values.sum() + visual_by_cells['number_Ind_OBM'].values.sum() + visual_by_cells['number_Oth_OBM'].values.sum()
        print('\n')
        print('Number of OBM buildings in output visual CSV files by admin unit for criterion='+crit+': '+'{:,.2f}'.format(total_obm_from_visual_by_admin))
        print('Number of OBM buildings in output visual CSV files by cell for criterion='+crit+': '+'{:,.2f}'.format(total_obm_from_visual_by_cell))   
        out_csv.write(crit+',Admin,'+'{:.2f}'.format(visual_by_admin['number_Res_OBM'].values.sum())+','+'{:.2f}'.format(visual_by_admin['number_Com_OBM'].values.sum())+','+'{:.2f}'.format(visual_by_admin['number_Ind_OBM'].values.sum())+','+'{:.2f}'.format(visual_by_admin['number_Oth_OBM'].values.sum())+','+'{:.2f}'.format(total_obm_from_visual_by_admin)+','+dict_by_crit[crit]['path']+'\n')
        out_csv.write(crit+',Cell,'+'{:.2f}'.format(visual_by_cells['number_Res_OBM'].values.sum())+','+'{:.2f}'.format(visual_by_cells['number_Com_OBM'].values.sum())+','+'{:.2f}'.format(visual_by_cells['number_Ind_OBM'].values.sum())+','+'{:.2f}'.format(visual_by_cells['number_Oth_OBM'].values.sum())+','+'{:.2f}'.format(total_obm_from_visual_by_cell)+','+dict_by_crit[crit]['path']+'\n')
    out_csv.close()    
    print('\n')
    print('==============')
    print('SERA buildings')
    print('==============')
    print('\n')
    print('Checking SERA buildings in per admin unit output...')
    for crit in list(dict_by_crit.keys()):    
        print('\n')
        print('Criterion: '+crit)    
        out_csv= open(os.path.join(gde_path, 'check_sera_admin_'+crit+'.csv'), 'w')
        header_list= ['AdminID']
        for occu in list(dfs_dict.keys()):
            header_list.append(occu+'_SERA_bdgs')
            header_list.append(occu+'_GDE_bdgs')
            header_list.append(occu+'_SERA_dwells')
            header_list.append(occu+'_GDE_dwells')
            header_list.append(occu+'_SERA_ppl')
            header_list.append(occu+'_GDE_ppl') 
            header_list.append(occu+'_SERA_cost')
            header_list.append(occu+'_GDE_cost')          
        header_list.append('File_Path')
        out_csv.write(','.join(header_list)+'\n')
        adm_units= dict_by_crit[crit]['visual_by_admin']['country_admin_id'].values
        for j, country_admin_id in enumerate(adm_units):      
            country_admin_id_split= country_admin_id.split('_')
            out_list= [country_admin_id]
            if len(country_admin_id_split)>1: # to get rid of "GR"
                for occu in list(dfs_dict.keys()):
129
                    if len(country_admin_id_split)==2:
130
131
132
133
                        which= np.where(dfs_dict[occu]['id_'+str(admin_levels[occu])].values==int(country_admin_id_split[1]))[0]
                    else:  # e.g. industrial 30-arcsec cells
                        country_admin_id_split_reconstruct = '_'.join(country_admin_id_split[1:])
                        which= np.where(dfs_dict[occu]['id_'+str(admin_levels[occu])].values==country_admin_id_split_reconstruct)[0]
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
                    sera_num_bdgs_j= dfs_dict[occu]['buildings'].values[which].sum()
                    sera_num_dwells_j= dfs_dict[occu]['dwellings'].values[which].sum()
                    sera_num_ppl_j= dfs_dict[occu]['occupants_per_asset'].values[which].sum()
                    sera_repl_cost_j= dfs_dict[occu]['total_repl_cost_usd'].values[which].sum()
                    gde_num_bdgs_sera_j= dict_by_crit[crit]['visual_by_admin']['number_'+occu+'_SERA'].values[j] 
                    gde_num_dwells_sera_j= dict_by_crit[crit]['visual_by_admin']['num_dwells_'+occu+'_SERA'].values[j] 
                    gde_num_ppl_sera_j= dict_by_crit[crit]['visual_by_admin']['night_'+occu+'_SERA'].values[j] 
                    gde_repl_cost_sera_j= dict_by_crit[crit]['visual_by_admin']['structural_'+occu+'_SERA'].values[j] 
                    out_list.append('{:.2f}'.format(sera_num_bdgs_j))
                    out_list.append('{:.2f}'.format(gde_num_bdgs_sera_j))
                    out_list.append('{:.2f}'.format(sera_num_dwells_j))
                    out_list.append('{:.2f}'.format(gde_num_dwells_sera_j))
                    out_list.append('{:.2f}'.format(sera_num_ppl_j))
                    out_list.append('{:.2f}'.format(gde_num_ppl_sera_j))
                    out_list.append('{:.2f}'.format(sera_repl_cost_j))
                    out_list.append('{:.2f}'.format(gde_repl_cost_sera_j))
            else: # "GR"
                for occu in list(dfs_dict.keys()):
                    gde_num_bdgs_sera_j= dict_by_crit[crit]['visual_by_admin']['number_'+occu+'_SERA'].values[j] 
                    gde_num_dwells_sera_j= dict_by_crit[crit]['visual_by_admin']['num_dwells_'+occu+'_SERA'].values[j] 
                    gde_num_ppl_sera_j= dict_by_crit[crit]['visual_by_admin']['night_'+occu+'_SERA'].values[j] 
                    gde_repl_cost_sera_j= dict_by_crit[crit]['visual_by_admin']['structural_'+occu+'_SERA'].values[j] 
                    out_list.append('0.00')
                    out_list.append('{:.2f}'.format(gde_num_bdgs_sera_j))
                    out_list.append('0.00')
                    out_list.append('{:.2f}'.format(gde_num_dwells_sera_j))
                    out_list.append('0.00')
                    out_list.append('{:.2f}'.format(gde_num_ppl_sera_j))
                    out_list.append('0.00')
                    out_list.append('{:.2f}'.format(gde_repl_cost_sera_j))                
            out_list.append(dict_by_crit[crit]['path'])
            out_csv.write(','.join(out_list)+'\n')
        out_csv.close()
        # Open the CSV file as a Pandas DF to check its contents:
        out_csv_as_df= pd.read_csv(os.path.join(gde_path, 'check_sera_admin_'+crit+'.csv'), sep=',')
        out_csv= open(os.path.join(gde_path, 'check_sera_'+crit+'_differences.txt'), 'w')
        for occu in list(dfs_dict.keys()):
            diff_bdgs= np.abs(out_csv_as_df[occu+'_SERA_bdgs'].values - out_csv_as_df[occu+'_GDE_bdgs'].values)
            diff_dwells= np.abs(out_csv_as_df[occu+'_SERA_dwells'].values - out_csv_as_df[occu+'_GDE_dwells'].values)
            diff_ppl= np.abs(out_csv_as_df[occu+'_SERA_ppl'].values - out_csv_as_df[occu+'_GDE_ppl'].values)
            diff_cost= np.abs(out_csv_as_df[occu+'_SERA_cost'].values - out_csv_as_df[occu+'_GDE_cost'].values)
            print('\n')
            outstr= '      Occupancy: '+occu
            print(outstr)
            out_csv.write(outstr+'\n')
            outstr= '      Maximum difference in number of buildings '+'{:.2f}'.format(np.max(diff_bdgs))
            print(outstr)
            out_csv.write(outstr+'\n')
            outstr= '      Maximum difference in number of dwellings '+'{:.2f}'.format(np.max(diff_dwells))
            print(outstr)
            out_csv.write(outstr+'\n')
            outstr= '      Maximum difference in number of people '+'{:.2f}'.format(np.max(diff_ppl))
            print(outstr)
            out_csv.write(outstr+'\n')
            outstr= '      Maximum difference in cost '+'{:.2f}'.format(np.max(diff_cost))
            print(outstr)
            out_csv.write(outstr+'\n')
            how_many_sera_adm_3s_in_attica= len(np.unique(dfs_dict[occu]['id_'+str(admin_levels[occu])].values[np.where(dfs_dict[occu]['name_2'].values=='Attica')[0]]))
            how_many_non_zero_gde= len(np.where(out_csv_as_df[occu+'_GDE_bdgs'].values>1E-20)[0])
            if how_many_sera_adm_3s_in_attica==how_many_non_zero_gde:
                outstr= '      Total number of administrative units is correct ('+'{:.2f}'.format(how_many_sera_adm_3s_in_attica)+')! (WARNING: CHECK HARDCODED FOR ATTICA)'
            else:
                outstr= '      The total number of administrative units does not match ('+'{:.2f}'.format(how_many_sera_adm_3s_in_attica)+')! (WARNING: CHECK HARDCODED FOR ATTICA)'
            print(outstr)
            out_csv.write(outstr+'\n')            
        out_csv.close()
    print('\n')
    print('Checking SERA buildings in per cell output...')    
    print('NOTE: THIS CHECK IS HARDCODED FOR ATTICA')
    out_csv= open(os.path.join(gde_path, 'check_sera_cells.csv'), 'w')
    out_csv.write('Criterion,Output,Res_bdgs,Res_dwells,Res_ppl,Res_cost,Com_bdgs,Com_dwells,Com_ppl,Com_cost,Ind_bdgs,Ind_dwells,Ind_ppl,Ind_cost,File_Path\n')
    out_list= ['SERA','SERA']
    for occu in ['Res','Com','Ind']:
        which_attica= np.where(dfs_dict[occu]['name_2'].values=='Attica')[0]
        out_list.append('{:.2f}'.format(dfs_dict[occu]['buildings'].values[which_attica].sum()))
        out_list.append('{:.2f}'.format(dfs_dict[occu]['dwellings'].values[which_attica].sum()))
        out_list.append('{:.2f}'.format(dfs_dict[occu]['occupants_per_asset'].values[which_attica].sum()))
        out_list.append('{:.2f}'.format(dfs_dict[occu]['total_repl_cost_usd'].values[which_attica].sum()))
    out_list.append(sera_models_path)
    out_csv.write(','.join(out_list)+'\n')
    for crit in list(dict_by_crit.keys()):    
        out_list= [crit]
        out_list.append('Admin')
        for occu in ['Res','Com','Ind']:
            out_list.append('{:.2f}'.format(dict_by_crit[crit]['visual_by_admin']['number_'+occu+'_SERA'].values.sum()))
            out_list.append('{:.2f}'.format(dict_by_crit[crit]['visual_by_admin']['num_dwells_'+occu+'_SERA'].values.sum()))
            out_list.append('{:.2f}'.format(dict_by_crit[crit]['visual_by_admin']['night_'+occu+'_SERA'].values.sum()))
            out_list.append('{:.2f}'.format(dict_by_crit[crit]['visual_by_admin']['structural_'+occu+'_SERA'].values.sum()))
        out_list.append(dict_by_crit[crit]['path'])
        out_csv.write(','.join(out_list)+'\n')      
        out_list= [crit]
        out_list.append('Cell')
        for occu in ['Res','Com','Ind']:
            out_list.append('{:.2f}'.format(dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_SERA'].values.sum()))
            out_list.append('{:.2f}'.format(dict_by_crit[crit]['visual_by_cells']['num_dwells_'+occu+'_SERA'].values.sum()))
            out_list.append('{:.2f}'.format(dict_by_crit[crit]['visual_by_cells']['night_'+occu+'_SERA'].values.sum()))
            out_list.append('{:.2f}'.format(dict_by_crit[crit]['visual_by_cells']['structural_'+occu+'_SERA'].values.sum()))
        out_list.append(dict_by_crit[crit]['path'])
        out_csv.write(','.join(out_list)+'\n')  
    out_csv.close()    
    print('\n')
    print('============================')
    print('LeftOver and Total buildings')
    print('============================')
    out_csv= open(os.path.join(gde_path, 'check_leftover_total_cells.txt'), 'w')
    for crit in list(dict_by_crit.keys()): 
        print('\n')
        outstr= 'Criterion: '+crit
        print(outstr)
        out_csv.write(outstr+'\n')   
        which_complete= np.where(dict_by_crit[crit]['visual_by_cells']['completeness'].values==1)[0]
        for occu in ['Res','Com','Ind']:
            outstr= '   Occupancy: '+occu
            print(outstr)
            out_csv.write(outstr+'\n')    
            # For all cells, Total = OBM + LeftOver
            theo_total= dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_OBM'].values + dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_LeftOver'].values
            diff_total= np.abs(theo_total - dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_Total'].values)
            if np.any(diff_total>1E-3):
                outstr= '      The difference in Total buildings is larger than 1E-3 at least once. PROBLEM!! Maximum difference: '+'{:.8f}'.format(np.max(diff_total))
                print(outstr)
                out_csv.write(outstr+'\n')        
            else:
                outstr= '      The difference in Total buildings is never larger than 1E-3. OK. Maximum difference: '+'{:.8f}'.format(np.max(diff_total))
                print(outstr)
                out_csv.write(outstr+'\n')   
            # For all cells, LeftOver <= SERA:
            if np.any(dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_LeftOver'].values - dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_SERA'].values >1E-10):
                outstr= '      The number of LeftOver buildings is larger than that of SERA buildings at least once. PROBLEM!! Maximum LeftOver-SERA difference: '+'{:.8f}'.format(np.max(dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_LeftOver'].values - dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_SERA'].values))
                print(outstr)
                out_csv.write(outstr+'\n')     
            else:
                outstr= '      The number of LeftOver buildings is always equal to or smaller than that of SERA buildings. OK.'
                print(outstr)
                out_csv.write(outstr+'\n')  
            # For all cells, OBM with classes <= OBM:
            if np.any(dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_OBM_with_classes'].values - dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_OBM'].values >1E-10):
                outstr= '      The number of OBM_with_classes buildings is larger than that of total OBM buildings at least once. PROBLEM!! Maximum _OBM_with_classes - OBM difference: '+'{:.8f}'.format(np.max(dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_OBM_with_classes'].values - dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_OBM'].values))
                print(outstr)
                out_csv.write(outstr+'\n')     
            else:
                outstr= '      The number of OBM_with_classes buildings is always equal to or smaller than that of total OBM buildings. OK.'
                print(outstr)
                out_csv.write(outstr+'\n')          
            # For complete cells, LeftOver = 0:
            if np.any(dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_LeftOver'].values[which_complete]>1E-10):
                outstr= '      There are complete cells with number of LeftOver buildings different from zero. PROBLEM!! Maximum value of LeftOver in the complete cells: '+'{:.8f}'.format(np.max(dict_by_crit[crit]['visual_by_cells']['number_'+occu+'_LeftOver'].values[which_complete]))
                print(outstr)
                out_csv.write(outstr+'\n')   
            else:
                outstr= '      All complete cells have number of LeftOver buildings equal to zero. OK.'
                print(outstr)
                out_csv.write(outstr+'\n') 
    out_csv.close()  
    out_csv= open(os.path.join(gde_path, 'check_leftover_total.csv'), 'w')
    out_list= ['Criterion,Output']
    for occu in ['Res','Com','Ind']:
        for param in ['number', 'num_dwells', 'structural', 'night']:
            for bds in ['LeftOver','Total']:
                out_list.append(occu+'_'+param+'_'+bds)
    out_list.append('File_Path')
    out_csv.write(','.join(out_list)+'\n')
    for crit in list(dict_by_crit.keys()):
        for by_what in ['visual_by_admin', 'visual_by_cells']:
            out_list= [crit, by_what.split('_')[-1]]
            for occu in ['Res','Com','Ind']:
                for param in ['number', 'num_dwells', 'structural', 'night']:
                    for bds in ['LeftOver','Total']:
                        out_list.append('{:.2f}'.format(dict_by_crit[crit][by_what][param+'_'+occu+'_'+bds].values.sum()))
            out_list.append(dict_by_crit[crit]['path'])        
            out_csv.write(','.join(out_list)+'\n')       
    out_csv.close()    
    # Open the CSV file as a Pandas DF to check its contents:
    out_csv_as_df= pd.read_csv(os.path.join(gde_path, 'check_leftover_total.csv'), sep=',')
    out_csv= open(os.path.join(gde_path, 'check_leftover_total_differences.txt'), 'w')
    for crit in list(dict_by_crit.keys()):
        print('\n')
        print('Checking LeftOver and Total in by cells vs by admin unit')
        print('\n')
        outstr= 'Criterion: '+crit
        print(outstr)
        out_csv.write(outstr+'\n')       
        which_crit= np.where(out_csv_as_df['Criterion'].values==crit)[0]
        if len(which_crit)==2:
            diff= np.abs(out_csv_as_df.iloc[which_crit[0],2:-1].as_matrix() - out_csv_as_df.iloc[which_crit[1],2:-1].as_matrix())
            outstr= '      Maximum difference between the two outputs (by cell vs by admin unit), from all parameters (i.e. buildings, dwellings, people, cost): '+'{:.2f}'.format(np.max(diff))
            print(outstr)
            out_csv.write(outstr+'\n')         
        else:
            print('ERROR WHEN CHECKING check_leftover_total.csv!!!')
    out_csv.close()  
    print('\n')
    print('=================================')
    print('OQ input files and Visual Outputs')
    print('=================================')
    def distrib_parameters_folder_DEPRECATED(criterion):
        if criterion=='area':
            folder= 'gde_2020_05_25_full_run_area'
        elif criterion=='gpw_2015_pop':
            folder= 'gde_2020_05_26_full_run_gpw'
        elif criterion=='ghs':
            folder= 'gde_2020_05_22_full_run_ghs'
        elif criterion=='sat_27f':
            folder= 'gde_2020_06_05_full_run_sat'
        else:
            folder= 'UNKNOWN'
        return folder
    def distrib_parameters_folder(criterion, dictionary_by_crit):
        if criterion in dictionary_by_crit.keys():
            return dictionary_by_crit[criterion]['path'].split('/')[-1]
        else:
            return 'UNKNOWN'
    out_csv= open(os.path.join(gde_path, 'check_obm_leftover_visual_vs_OQ.csv'), 'w')
    out_list= ['Criterion,Output']
    for occu in ['Res','Com','Ind']:
        for bds in ['OBM_with_classes','LeftOver']:
            for param in ['number', 'structural', 'night']:
                out_list.append(occu+'_'+param+'_'+bds)
    out_list.append('File_Path')
    out_csv.write(','.join(out_list)+'\n')
    print('\n')
    for crit in list(dict_by_crit.keys()):
        print('Working on '+crit)
        for by_what in ['visual_by_admin', 'visual_by_cells']:
            print('   Working on '+by_what)
            out_list= [crit, by_what.split('_')[-1]]
            for occu in ['Res','Com','Ind']:
                print('      Working on '+occu)
                for bds in ['OBM_with_classes','LeftOver']:
                    for param in ['number', 'structural', 'night']:
                        out_list.append('{:.2f}'.format(dict_by_crit[crit][by_what][param+'_'+occu+'_'+bds].values.sum()))
            out_list.append(dict_by_crit[crit]['path'])        
            out_csv.write(','.join(out_list)+'\n')       
        out_list= [crit, 'OQ_input']
        print('   Working on OQ_input')
        for occu in ['Res','Com','Ind']:
            print('      Working on '+occu)
            for bdgs_part in ['OBMpart','cellspart']:
                oq_input_df= pd.read_csv(os.path.join(gde_path, distrib_parameters_folder(crit, dict_by_crit), 'Exposure_GDE_'+bdgs_part+'_'+crit+'_Greece_'+occu+'.csv'), sep=',')
                for param in ['number', 'structural', 'night']:
                    out_list.append('{:.2f}'.format(oq_input_df[param].values.sum()))
        out_list.append(dict_by_crit[crit]['path'])        
        out_csv.write(','.join(out_list)+'\n')       
    out_csv.close()    
    # Open the CSV file as a Pandas DF to check its contents:
    out_csv_as_df= pd.read_csv(os.path.join(gde_path, 'check_obm_leftover_visual_vs_OQ.csv'), sep=',')
    out_csv= open(os.path.join(gde_path, 'check_obm_leftover_visual_vs_OQ_differences.txt'), 'w')
    for crit in list(dict_by_crit.keys()):
        print('\n')
        print('Checking LeftOver and OBM in visual vs OQ input')
        print('\n')
        outstr= 'Criterion: '+crit
        print(outstr)
        out_csv.write(outstr+'\n')       
        which_crit= np.where(out_csv_as_df['Criterion'].values==crit)[0]
        if len(which_crit)==3:
            diff_1= np.abs(out_csv_as_df.iloc[which_crit[0],2:-1].as_matrix() - out_csv_as_df.iloc[which_crit[1],2:-1].as_matrix())
            diff_2= np.abs(out_csv_as_df.iloc[which_crit[1],2:-1].as_matrix() - out_csv_as_df.iloc[which_crit[2],2:-1].as_matrix())
            outstr= '      Maximum difference between the three outputs (visual vs OQ input), from all parameters (i.e. buildings, people, cost): '+'{:.2f}'.format(max(np.max(diff_1),np.max(diff_2)))
            print(outstr)
            out_csv.write(outstr+'\n')         
        else:
            print('ERROR WHEN CHECKING check_obm_leftover_visual_vs_OQ.csv!!!')
    out_csv.close()  
    print('\n')
399
    print('Done!')
400
401
402
403
404
405
406
407
408
409
410
411

                    
if __name__=='__main__':
    # This code needs to be run from the command line as python3 namefile.py configfile.ini
    config_filename= sys.argv[1] # sys.argv retrieves all the commands entered in the command line; position [0] is this code, position [1] will be the config file name
    section_names_to_validate= ['File Paths', 'OBM Database', 'Available Results', 'Ocuppancy String Groups', os.path.basename(__file__).split('.')[0]]
    config_dict= gdet_conf.read_config_parameters(os.path.join(os.getcwd(), config_filename), section_names_to_validate)
    run_this_file(config_dict)