Commit 08395980 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Adjustments to refactoring of SERA_mapping_admin_units_to_cells.py

parent 456aea3d
......@@ -23,10 +23,11 @@ Section 2.6: Seismic Hazard and Risk Dynamics
SERA_mapping_admin_units_to_cells
=================================
This code determines the mapping between the administrative units used in SERA and the grid cells
we are defining with a 10 arcsec resolution. It does not carry out the assignment of SERA buildings
to each cell but only creates this mapping, as there can be different ways of doing the assignment
of SERA buildings that do not need this mapping to be re-done each time.
This code determines the mapping between the administrative units used in SERA
and the grid cells we are defining with a 10 arcsec resolution. It does not
carry out the assignment of SERA buildings to each cell but only creates this
mapping, as there can be different ways of doing the assignment of SERA
buildings that do not need this mapping to be re-done each time.
"""
import sys
......@@ -43,38 +44,46 @@ import GDE_TOOLS_read_config_file as gdet_conf
def run_this_file(config_dict):
####################################################
# SET UP LOGGER
####################################################
now= datetime.datetime.now()
now_str= str(now.year)+'_'+str(now.month)+'_'+str(now.day)+'_'+str(now.hour)+'_'+str(now.minute)+'_'+str(now.second)
fh = logging.FileHandler(os.path.join(out_path, 'log_file_mapping_adm_units_'+now_str+'.log'))
logger = logging.getLogger(__name__)
####################################################
# READ CONFIGURATION PARAMETERS
####################################################
logger.info('Processing configuration parameters...')
print('Processing configuration parameters...')
# Path for output:
out_path= config_dict['File Paths']['out_path']
out_path = config_dict['File Paths']['out_path']
# SERA models path:
sera_models_path= config_dict['File Paths']['sera_models_path']
sera_models_path = config_dict['File Paths']['sera_models_path']
# SERA boundaries shapefiles path:
sera_shp_path= config_dict['File Paths']['sera_boundaries_path']
sera_shp_path = config_dict['File Paths']['sera_boundaries_path']
# Tiles Database:
DB_username_grid= config_dict['Tiles Database']['db_tiles_username']
DB_name_grid= config_dict['Tiles Database']['db_tiles_name']
DB_schema_name_grid= config_dict['Tiles Database']['db_tiles_schema_name']
DB_table_name_grid= config_dict['Tiles Database']['db_tiles_table_name']
DB_username_grid = config_dict['Tiles Database']['db_tiles_username']
DB_name_grid = config_dict['Tiles Database']['db_tiles_name']
DB_schema_name_grid = config_dict['Tiles Database']['db_tiles_schema_name']
DB_table_name_grid = config_dict['Tiles Database']['db_tiles_table_name']
# Countries to process:
countries= config_dict['SERA_mapping_admin_units_to_cells']['countries'].split(', ')
countries = config_dict['SERA_mapping_admin_units_to_cells']['countries'].split(', ')
####################################################
# SET UP LOGGER
####################################################
now = datetime.datetime.now()
now_str = '{:s}_{:s}_{:s}_{:s}_{:s}_{:s}'.format(str(now.year),
str(now.month).zfill(2),
str(now.day).zfill(2),
str(now.hour).zfill(2),
str(now.minute).zfill(2),
str(now.second).zfill(2))
log_fn = 'log_file_mapping_adm_units_%s.log' % (now_str)
fh = logging.FileHandler(os.path.join(out_path, log_fn))
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
####################################################
# START
####################################################
num_countries = len(countries)
for k,country_name in enumerate(countries):
for result in process_country(country_name, sera_models_path, sera_shp_path):
for result in process_country(country_name, k, num_countries,
sera_models_path, sera_shp_path, logger):
# Unpack results:
cell_id, adm_level, admin_id,
area_intersect, geom_intersect, cases_in_lev_grs, shp_not_found = result
cell_id, adm_level, admin_id, area_intersect, geom_intersect, cases_in_lev_grs, shp_not_found = result
# Write entry to database:
err_msg = gdet_psql.insert_data_in_cell_admin_unit_mapping(DB_name_grid,
......@@ -90,22 +99,25 @@ def run_this_file(config_dict):
cases_in_lev_grs)
if len(err_msg)>0:
logger.info(country_name+': '+err_msg)
logger.info(gdet_gral.add_to_log_shapefiles_found(country_name, shp_not_found))
logger.debug('\r %s' % (gdet_gral.add_to_log_shapefiles_found(country_name,
shp_not_found)))
def process_country(country_str, sera_mods_path, sera_shape_path):
def process_country(country_str, country_pos, number_countries,
sera_mods_path, sera_shape_path, logger):
"""
country_str = country name
sera_mods_path = path to the SERA model files
sera_shape_path = path to the shapefiles used by SERA
"""
logger.info('\nWorking on %s, country %i of %i\n'
% (country_name, k+1, len(countries)))
% (country_str, country_pos+1, number_countries))
# There is repetition of admin levels across Res/Com/Ind.
# Pre-analyse the case so as to carry out spatial intersections once.
packed_res = gdet_sera.get_admin_level_definition(country_name,
sera_mods_path,
packed_res = gdet_sera.get_admin_level_definition(country_str,
sera_mods_path,
full_files=True,
output_dfs=True)
admin_levels, level_groups, level_groups_levels, dfs_dict = packed_res
......@@ -113,7 +125,9 @@ def process_country(country_str, sera_mods_path, sera_shape_path):
# definition (given by level_groups_levels):
for j in range(len(level_groups)):
try:
adm_boundary = gpd.GeoDataFrame.from_file(os.path.join(sera_shape_path,'Adm'+str(level_groups_levels[j])+'_'+country_name+'.shp'))
adm_boundary = gpd.GeoDataFrame.from_file(os.path.join(sera_shape_path,
'Adm%i_%s.shp'
% (level_groups_levels[j], country_str)))
except:
logger.info(' Shapefile not found for Admin Level %i'
% (level_groups_levels[j]))
......@@ -126,65 +140,75 @@ def process_country(country_str, sera_mods_path, sera_shape_path):
# Go by case (Res, Com or Ind):
for case in level_groups[j]:
unique_admins= np.unique(dfs_dict[case]['id_'+str(level_groups_levels[j])].values)
#unique_admins= np.unique(dfs_dict[case]['id_%i' % (level_groups_levels[j])].values)
unique_admins= np.unique(dfs_dict[case]['id_%i' % (level_groups_levels[j])].values[np.where(dfs_dict[case]['id_2']==4)[0]])
# Go one by one the admin units named in the exposure CSV file:
for m, admin_id in enumerate(unique_admins):
if admin_id in admin_units_investigated:
continue
admin_units_investigated.append(admin_id)
if admin_id not in adm_boundary['ID_'+str(level_groups_levels[j])].values: # if the admin id cannot be found in the corresponding shapefile
# If the admin id cannot be found in the corresponding shapefile:
if admin_id not in adm_boundary['ID_%i' % (level_groups_levels[j])].values:
shp_not_found= shp_not_found + 1
continue
# Identify row of this administrative unit in the adm_boundary DF:
logger.info(' %s, level group %i of %i, %s (%i associated admin IDs): Working on admin ID %i (position %i)'
% (country_name, j+1, len(level_groups), case, len(unique_admins), admin_id, m+1))
which_row= np.where(adm_boundary['ID_'+str(level_groups_levels[j])].values==admin_id)[0]
print(' %s, level group %i of %i, %s (%i associated admin IDs): Working on admin ID %i (position %i)'
% (country_str, j+1, len(level_groups), case, len(unique_admins), admin_id, m+1))
#logger.info(' %s, level group %i of %i, %s (%i associated admin IDs): Working on admin ID %i (position %i)'
# % (country_str, j+1, len(level_groups), case, len(unique_admins), admin_id, m+1))
which_row= np.where(adm_boundary['ID_%i' % (level_groups_levels[j])].values==admin_id)[0]
if len(which_row) != 1:
logger.info(gdet_gral.add_to_log_too_many_boundaries_found(country_name, len(which_row), admin_id))
logger.info(gdet_gral.add_to_log_too_many_boundaries_found(country_str, len(which_row), admin_id))
continue
# Pass on adm_boundary and the row
# to process all cells associated with this admin unit:
for results_adm in process_admin_unit(adm_boundary, which_row):
for results_adm in process_admin_unit(adm_boundary, which_row, logger):
cell_id, area_intersect, geom_intersect = results_adm
yield cell_id, level_groups_levels[j], admin_id, area_intersect, geom_intersect, level_groups[j], shp_not_found
def process_admin_unit(adm_boundary_gdf, which_row_of_gdf):
def process_admin_unit(adm_boundary_gdf, which_row_of_gdf, logger):
"""
adm_boundary_gdf = GeoPandas DataFrame with admin boundaries
which_row_of_gdf = row of adm_boundary_gdf to be processed
"""
# Bounding box of the admin unit:
bbox= adm_boundary_gdf['geometry'].values[which_row_of_gdf[0]].bounds
bbox = adm_boundary_gdf['geometry'].values[which_row_of_gdf[0]].bounds
# GeoPandas DF with the cells associated with this admin unit:
gdf_cells= gdet_grid.get_cells_in_bbox_as_geopandas(bbox[0], bbox[1],
bbox[2], bbox[3])
gdf_cells = gdet_grid.get_cells_in_bbox_as_geopandas(bbox[0], bbox[1],
bbox[2], bbox[3])
# gdf_intersect has as many rows as cells do intersect this admin unit:
gdf_intersect= gpd.sjoin(gdf_cells,
adm_boundary_gdf.iloc[[which_row_of_gdf[0]],:],
how='right')
gdf_intersect = gpd.sjoin(gdf_cells,
adm_boundary_gdf.iloc[[which_row_of_gdf[0]],:],
how='right')
# Loop through all cells that intersect the admin boundary:
for i in range(gdf_intersect.shape[0]):
#print('\r Intersection of cell '+str(gdf_intersect['cell_id'].values[i]), end='')
logger.info('\r Intersection of cell %i'
% (gdf_intersect['cell_id'].values[i]), end='')
geom_cell= gdf_cells['geometry'].values[gdf_intersect['index_left'].values[i]]
geom_adm_unit= adm_boundary_gdf['geometry'].values[which_row_of_gdf[0]]
geom_intersection= geom_adm_unit.intersection(geom_cell)
area_intersection= gdet_grid.get_area_polygon_on_Earth(geom_intersection,units='km2')
print('\r Intersection of cell '+str(gdf_intersect['cell_id'].values[i]), end='')
#logger.info('\r Intersection of cell %s'
# % (gdf_intersect['cell_id'].values[i]))
geom_cell = gdf_cells['geometry'].values[gdf_intersect['index_left'].values[i]]
geom_adm_unit = adm_boundary_gdf['geometry'].values[which_row_of_gdf[0]]
geom_intersection = geom_adm_unit.intersection(geom_cell)
area_intersection = gdet_grid.get_area_polygon_on_Earth(geom_intersection,
units='km2')
yield gdf_intersect['cell_id'].values[i], area_intersection, geom_intersection
if __name__=='__main__':
# This code needs to be run from the command line as python3 namefile.py configfile.ini
logger.basicConfig(level='INFO')
config_filename= sys.argv[1] # sys.argv retrieves all the commands entered in the command line; position [0] is this code, position [1] will be the config file name
section_names_to_validate= ['File Paths', 'Tiles Database', os.path.basename(__file__).split('.')[0]]
config_dict= gdet_conf.read_config_parameters(os.path.join(os.getcwd(), config_filename), section_names_to_validate)
# sys.argv retrieves all the commands entered in the command line;
# position [0] is this code, position [1] will be the config file name
config_filename = sys.argv[1]
section_names_to_validate = ['File Paths',
'Tiles Database',
os.path.basename(__file__).split('.')[0]]
config_dict = gdet_conf.read_config_parameters(os.path.join(os.getcwd(),
config_filename),
section_names_to_validate)
run_this_file(config_dict)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment