Commit 456aea3d authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Restyling of SERA_mapping_admin_units_to_cells

parent 2ac7d6ce
......@@ -23,4 +23,5 @@ var/
*.egg
# Ignore configuration file in use:
GDE_config_file.ini
\ No newline at end of file
GDE_config_file.ini
GDE_config_file_T1.ini
\ No newline at end of file
......@@ -34,6 +34,7 @@ import os
import numpy as np
import geopandas as gpd
import datetime
import logging
import GDE_TOOLS_world_grid as gdet_grid
import GDE_TOOLS_read_SERA as gdet_sera
import GDE_TOOLS_general as gdet_gral
......@@ -42,10 +43,17 @@ import GDE_TOOLS_read_config_file as gdet_conf
def run_this_file(config_dict):
####################################################
# SET UP LOGGER
####################################################
now= datetime.datetime.now()
now_str= str(now.year)+'_'+str(now.month)+'_'+str(now.day)+'_'+str(now.hour)+'_'+str(now.minute)+'_'+str(now.second)
fh = logging.FileHandler(os.path.join(out_path, 'log_file_mapping_adm_units_'+now_str+'.log'))
logger = logging.getLogger(__name__)
####################################################
# READ CONFIGURATION PARAMETERS
####################################################
print('Processing configuration parameters...')
logger.info('Processing configuration parameters...')
# Path for output:
out_path= config_dict['File Paths']['out_path']
# SERA models path:
......@@ -62,58 +70,119 @@ def run_this_file(config_dict):
####################################################
# START
####################################################
log= [] # list that I fill in for logging purposes and then dump to a txt file
for k,country_name in enumerate(countries):
print('\n')
print('Working on '+country_name+', country '+str(k+1)+' of '+str(len(countries))+'\n')
# There is repetition of admin levels across Res/Com/Ind. I pre-analyse the case so as to carry out spatial intersections only once.
admin_levels, level_groups, level_groups_levels, dfs_dict= gdet_sera.get_admin_level_definition(country_name, sera_models_path, output_dfs=True)
for j in range(0,len(level_groups)): # Group of any combination of Res/Com/Ind with the same admin level definition (given by level_groups_levels)
try:
adm_boundary = gpd.GeoDataFrame.from_file(os.path.join(sera_shp_path,'Adm'+str(level_groups_levels[j])+'_'+country_name+'.shp'))
except:
print(' Shapefile not found for Admin Level '+str(level_groups_levels[j]))
shp_not_found= 0
admin_units_investigated= [] # keep track of admin units that I check, just in case not all units are mentioned in all exposure cases (Res,Com,Ind)
for case in level_groups[j]: # Res, Com or Ind
unique_admins= np.unique(dfs_dict[case]['id_'+str(level_groups_levels[j])].values)
for m, admin_id in enumerate(unique_admins): # Go one by one the admin units named in the exposure CSV file
if admin_id not in admin_units_investigated:
admin_units_investigated.append(admin_id)
if admin_id not in adm_boundary['ID_'+str(level_groups_levels[j])].values: # if the admin id cannot be found in the corresponding shapefile
shp_not_found= shp_not_found + 1
else:
# Determine the grid cells associated with this administrative unit
print(' '+country_name+', level group '+str(j+1)+' of '+str(len(level_groups))+', '+case+' ('+str(len(unique_admins))+' associated admin IDs)'+': Working on admin ID '+str(admin_id)+' (position '+str(m+1)+')')
which_row= np.where(adm_boundary['ID_'+str(level_groups_levels[j])].values==admin_id)[0]
if len(which_row)==1:
bbox= adm_boundary['geometry'].values[which_row[0]].bounds # bounding box of the admin unit
gdf_cells= gdet_grid.get_cells_in_bbox_as_geopandas(bbox[0], bbox[1], bbox[2], bbox[3]) # GeoPandas DF with the cells associated with this admin unit
gdf_intersect= gpd.sjoin(gdf_cells, adm_boundary.iloc[[which_row[0]],:], how='right') # gdf_intersect has as many rows as cells do intersect this admin unit
for i in range(0,gdf_intersect.shape[0]): #for i in range(0,10): #
print('\r Intersection of cell '+str(gdf_intersect['cell_id'].values[i]), end='')
geom_cell= gdf_cells['geometry'].values[gdf_intersect['index_left'].values[i]]
geom_adm_unit= adm_boundary['geometry'].values[which_row[0]]
geom_intersection= geom_adm_unit.intersection(geom_cell)
area_intersection= gdet_grid.get_area_polygon_on_Earth(geom_intersection,units='km2')
# Write entry to database:
error_message= gdet_psql.insert_data_in_cell_admin_unit_mapping(DB_name_grid, DB_username_grid, DB_schema_name_grid, DB_table_name_grid, gdf_intersect['cell_id'].values[i], country_name, level_groups_levels[j], admin_id, area_intersection, geom_intersection, level_groups[j])
if len(error_message)>0:
log.append(country_name+': '+error_message)
print('\n')
else:
log.append(gdet_gral.add_to_log_too_many_boundaries_found(country_name, len(which_row), admin_id))
log.append(gdet_gral.add_to_log_shapefiles_found(country_name, shp_not_found))
# Dump the log list to a file:
now= datetime.datetime.now()
now_str= str(now.year)+'_'+str(now.month)+'_'+str(now.day)+'_'+str(now.hour)+'_'+str(now.minute)+'_'+str(now.second)
gdet_gral.write_log_file(log, os.path.join(out_path, 'log_file_'+now_str+'.txt'))
print('\n')
print('Done!')
for result in process_country(country_name, sera_models_path, sera_shp_path):
# Unpack results:
cell_id, adm_level, admin_id,
area_intersect, geom_intersect, cases_in_lev_grs, shp_not_found = result
# Write entry to database:
err_msg = gdet_psql.insert_data_in_cell_admin_unit_mapping(DB_name_grid,
DB_username_grid,
DB_schema_name_grid,
DB_table_name_grid,
cell_id,
country_name,
adm_level,
admin_id,
area_intersect,
geom_intersect,
cases_in_lev_grs)
if len(err_msg)>0:
logger.info(country_name+': '+err_msg)
logger.info(gdet_gral.add_to_log_shapefiles_found(country_name, shp_not_found))
def process_country(country_str, sera_mods_path, sera_shape_path):
"""
country_str = country name
sera_mods_path = path to the SERA model files
sera_shape_path = path to the shapefiles used by SERA
"""
logger.info('\nWorking on %s, country %i of %i\n'
% (country_name, k+1, len(countries)))
# There is repetition of admin levels across Res/Com/Ind.
# Pre-analyse the case so as to carry out spatial intersections once.
packed_res = gdet_sera.get_admin_level_definition(country_name,
sera_mods_path,
output_dfs=True)
admin_levels, level_groups, level_groups_levels, dfs_dict = packed_res
# Go by group of any combination of Res/Com/Ind with the same admin level
# definition (given by level_groups_levels):
for j in range(len(level_groups)):
try:
adm_boundary = gpd.GeoDataFrame.from_file(os.path.join(sera_shape_path,'Adm'+str(level_groups_levels[j])+'_'+country_name+'.shp'))
except:
logger.info(' Shapefile not found for Admin Level %i'
% (level_groups_levels[j]))
# Keep track of admin IDs not found in the shapefile:
shp_not_found= 0
# Keep track of admin units that checked, because not all units might
# exist in each case (Res, Com, Ind):
admin_units_investigated= []
# Go by case (Res, Com or Ind):
for case in level_groups[j]:
unique_admins= np.unique(dfs_dict[case]['id_'+str(level_groups_levels[j])].values)
# Go one by one the admin units named in the exposure CSV file:
for m, admin_id in enumerate(unique_admins):
if admin_id in admin_units_investigated:
continue
admin_units_investigated.append(admin_id)
if admin_id not in adm_boundary['ID_'+str(level_groups_levels[j])].values: # if the admin id cannot be found in the corresponding shapefile
shp_not_found= shp_not_found + 1
continue
# Identify row of this administrative unit in the adm_boundary DF:
logger.info(' %s, level group %i of %i, %s (%i associated admin IDs): Working on admin ID %i (position %i)'
% (country_name, j+1, len(level_groups), case, len(unique_admins), admin_id, m+1))
which_row= np.where(adm_boundary['ID_'+str(level_groups_levels[j])].values==admin_id)[0]
if len(which_row) != 1:
logger.info(gdet_gral.add_to_log_too_many_boundaries_found(country_name, len(which_row), admin_id))
continue
# Pass on adm_boundary and the row
# to process all cells associated with this admin unit:
for results_adm in process_admin_unit(adm_boundary, which_row):
cell_id, area_intersect, geom_intersect = results_adm
yield cell_id, level_groups_levels[j], admin_id, area_intersect, geom_intersect, level_groups[j], shp_not_found
def process_admin_unit(adm_boundary_gdf, which_row_of_gdf):
"""
adm_boundary_gdf = GeoPandas DataFrame with admin boundaries
which_row_of_gdf = row of adm_boundary_gdf to be processed
"""
# Bounding box of the admin unit:
bbox= adm_boundary_gdf['geometry'].values[which_row_of_gdf[0]].bounds
# GeoPandas DF with the cells associated with this admin unit:
gdf_cells= gdet_grid.get_cells_in_bbox_as_geopandas(bbox[0], bbox[1],
bbox[2], bbox[3])
# gdf_intersect has as many rows as cells do intersect this admin unit:
gdf_intersect= gpd.sjoin(gdf_cells,
adm_boundary_gdf.iloc[[which_row_of_gdf[0]],:],
how='right')
# Loop through all cells that intersect the admin boundary:
for i in range(gdf_intersect.shape[0]):
#print('\r Intersection of cell '+str(gdf_intersect['cell_id'].values[i]), end='')
logger.info('\r Intersection of cell %i'
% (gdf_intersect['cell_id'].values[i]), end='')
geom_cell= gdf_cells['geometry'].values[gdf_intersect['index_left'].values[i]]
geom_adm_unit= adm_boundary_gdf['geometry'].values[which_row_of_gdf[0]]
geom_intersection= geom_adm_unit.intersection(geom_cell)
area_intersection= gdet_grid.get_area_polygon_on_Earth(geom_intersection,units='km2')
yield gdf_intersect['cell_id'].values[i], area_intersection, geom_intersection
if __name__=='__main__':
# This code needs to be run from the command line as python3 namefile.py configfile.ini
logger.basicConfig(level='INFO')
config_filename= sys.argv[1] # sys.argv retrieves all the commands entered in the command line; position [0] is this code, position [1] will be the config file name
section_names_to_validate= ['File Paths', 'Tiles Database', os.path.basename(__file__).split('.')[0]]
config_dict= gdet_conf.read_config_parameters(os.path.join(os.getcwd(), config_filename), section_names_to_validate)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment