Commit 8951537e authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Initial commit of GDE prototype code

parents
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# Ignore configuration file in use:
GDE_config_file.ini
\ No newline at end of file
"""
Copyright (C) 2020
Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Global Dynamic Exposure Model
Helmholtz Centre Potsdam
GFZ German Research Centre for Geosciences
Section 2.6: Seismic Hazard and Risk Dynamics
GDE_TOOLS_GPW
=============
Tools to load the population and density grids of Gridded Population of the World (GPW) v4.0.
The input HDF5 files have been parsed from the original GPW files.
The Gridded Population of the World (GPW) v4.0 dataset:
Center for International Earth Science Information Network-CIESIN-Columbia University (2016)
Gridded Population of the World, Version 4 (GPWv4). NASA Socioeconomic Data and Applications
Center, Palisades. http://dx.doi.org/10.7927/H4NP22DQ
"""
import os
import h5py
import numpy as np
def get_gpw_grids(gpw_path):
# From this loading, the following variables come out:
# population_values: array of population counts per cell
# density_values: array of population density (people/km2) per cell
# gpw_pop_x_coord_edges
# gpw_pop_y_coord_edges
# gpw_pop_x_coord_midpoints
# gpw_pop_y_coord_midpoints
# gpw_pop_nrows
# error_with_grids: Boolean
# top_relevant_row
# bottom_relevant_row
# Population counts:
gpw_pop_hdf5_file = h5py.File(os.path.join(gpw_path, 'Count_2015_HDF5', 'gpw-v4-population-count_2015.hdf5'))
population_values= gpw_pop_hdf5_file['Population'][:]
gpw_pop_low_corner_x= round(gpw_pop_hdf5_file.attrs['xll'],6)
gpw_pop_low_corner_y= round(gpw_pop_hdf5_file.attrs['yll'],6)
gpw_pop_cellsize= round(gpw_pop_hdf5_file.attrs['Cell Size'],12) #degrees
gpw_pop_ncols= np.size(population_values, axis=1)
gpw_pop_nrows= np.size(population_values, axis=0)
gpw_pop_x_coord_edges= gpw_pop_low_corner_x * np.ones([gpw_pop_ncols+1]) + np.linspace(0.0, float(gpw_pop_ncols), num= gpw_pop_ncols+1) * gpw_pop_cellsize
gpw_pop_y_coord_edges= gpw_pop_low_corner_y * np.ones([gpw_pop_nrows+1]) + np.linspace(0.0, float(gpw_pop_nrows), num= gpw_pop_nrows+1) * gpw_pop_cellsize
gpw_pop_y_coord_edges= gpw_pop_y_coord_edges[::-1] # y_coord_edges goes from up to down
# Correct float precision issue:
gpw_pop_x_coord_edges[-1]= round(gpw_pop_x_coord_edges[-1], 6)
gpw_pop_y_coord_edges[0]= round(gpw_pop_y_coord_edges[0], 6)
gpw_pop_x_coord_midpoints= (gpw_pop_x_coord_edges[:-1] + gpw_pop_x_coord_edges[1:]) / 2.0
gpw_pop_y_coord_midpoints= (gpw_pop_y_coord_edges[:-1] + gpw_pop_y_coord_edges[1:]) / 2.0
# Population density:
gpw_dens_hdf5_file = h5py.File(os.path.join(gpw_path, 'Density_2015_HDF5', 'gpw-v4-population-dens_2015.hdf5'))
density_values= gpw_dens_hdf5_file['Density'][:]
gpw_dens_low_corner_x= round(gpw_dens_hdf5_file.attrs['xll'],6)
gpw_dens_low_corner_y= round(gpw_dens_hdf5_file.attrs['yll'],6)
gpw_dens_cellsize= round(gpw_dens_hdf5_file.attrs['Cell Size'],12) #degrees
gpw_dens_ncols= np.size(density_values, axis=1)
gpw_dens_nrows= np.size(density_values, axis=0)
error_with_grids= False
if abs(gpw_dens_low_corner_x - gpw_pop_low_corner_x) > 1E-4:
error_with_grids= True
if abs(gpw_dens_low_corner_y - gpw_pop_low_corner_y) > 1E-4:
error_with_grids= True
if abs(gpw_dens_cellsize - gpw_pop_cellsize) > 1E-6:
error_with_grids= True
if abs(gpw_dens_ncols - gpw_pop_ncols) > 1E-1:
error_with_grids= True
if abs(gpw_dens_nrows - gpw_pop_nrows) > 1E-1:
error_with_grids= True
if error_with_grids:
print('ERROR WITH GRIDS OF GPW!!!!')
print(' THE GRIDS FOR POPULATION AND DENSITY DO NOT COINCIDE!!!')
print(' THE CODE WAS WRITTEN UNDER THE ASSUMPTION THAT THEY MATCH!!!')
top_relevant_row= 865 # From separate analysis
bottom_relevant_row= 16806 # From separate analysis
return population_values, density_values, gpw_pop_x_coord_edges, gpw_pop_y_coord_edges, gpw_pop_x_coord_midpoints, gpw_pop_y_coord_midpoints, gpw_pop_nrows, error_with_grids, top_relevant_row, bottom_relevant_row
def get_population_value_for_point(target_lon, target_lat, input_matrix, input_edges_x, input_edges_y, grid_cell_adjustment= 1.0):
"""
Call as:
get_population_value_for_point(lon_val, lat_val, population_values, gpw_pop_x_coord_edges, gpw_pop_y_coord_edges)
grid_cell_adjustment= default value is 1.0. This variable should be used to adjust the GPW value due to the target cell being smaller than the GPW cell.
"""
which_lon= np.searchsorted(input_edges_x, target_lon)-1 # directly the cell in the pop/density grid that I want
which_lat= (len(input_edges_y) - 1) - np.searchsorted(input_edges_y[::-1], target_lat) # directly the cell in the pop/density grid that I want
return input_matrix[which_lat, which_lon] * grid_cell_adjustment
\ No newline at end of file
"""
Copyright (C) 2020
Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Global Dynamic Exposure Model
Helmholtz Centre Potsdam
GFZ German Research Centre for Geosciences
Section 2.6: Seismic Hazard and Risk Dynamics
GDE_TOOLS_access_OBM_HDF
========================
Tools for accessing the HDF5 files of OBM buildings.
"""
import os
import h5py
import pandas as pd
import numpy as np
import GDE_TOOLS_general as gdet_gral
import GDE_TOOLS_access_SERA_HDF as gdet_serah
from copy import deepcopy
def make_list_country_admin_units_to_retrieve(admin_unit_type, list_subgroups, which_country='', which_admin_ID=''):
"""
Function used by "retrieve_OBM_buildings_in_cell_from_hdf5".
admin_unit_type:
"all" = all admin units within the cell
"country" = all admin units of a certain country (defined by setting which_country)
"admin_unit" = the admin unit defined by setting which_country together with which_admin_ID
list_subgroups: list of subgroups of an HDF5 file
which_country: only needed if admin_unit_type is "country" or "admin_unit"; full country name
which_admin_ID: only needed if admin_unit_type is "admin_unit"
Out:
list of elements from list_subgroups that comply with the criteria set by admin_unit_type, which_country and which_admin_ID
"""
list_subgroups_out= []
if admin_unit_type=='all':
list_subgroups_out= deepcopy(list_subgroups)
if admin_unit_type=='country':
if which_country=='':
print('ERROR IN make_list_country_admin_units_to_retrieve: admin_unit_type IS "country" BUT NO COUNTRY WAS GIVEN AS INPUT')
else:
country_iso2= gdet_gral.get_ISO2_code_for_country(which_country)
for country_adminID in list_subgroups:
if country_iso2 in country_adminID:
list_subgroups_out.append(country_adminID)
if admin_unit_type=='admin_unit':
if which_country=='':
print('ERROR IN make_list_country_admin_units_to_retrieve: admin_unit_type IS "admin_unit" BUT NO COUNTRY WAS GIVEN AS INPUT')
else:
if which_admin_ID=='':
print('ERROR IN make_list_country_admin_units_to_retrieve: admin_unit_type IS "admin_unit" BUT NO ADMIN ID WAS GIVEN AS INPUT')
else:
country_iso2= gdet_gral.get_ISO2_code_for_country(which_country)
list_subgroups_out= [country_iso2+'_'+str(which_admin_ID)]
if list_subgroups_out[0] not in list_subgroups:
print('ERROR IN retrieve_OBM_buildings_in_cell_from_hdf5: COUNTRY_ADMIN_ID NOT FOUND IN FILE!!!')
list_subgroups_out= []
return list_subgroups_out
def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5_filename, countries_shp_path, sera_meta_path, occu_case, admin_unit_type= 'all', disagg_method= 'area', which_country= '', which_admin_ID= ''):
"""
obm_hdf5_filename: full file path to the OBM HDF5 file for a particular cell and occupancy case
bdg_classes_hdf5_filename: full file path to the HDF5 file of the SERA building classes for a particular occupancy case
countries_shp_path: full file path to the folder containing all shapefiles of admin units used in SERA
sera_meta_path: full file path to the HDF5 file with the SERA metadata
occu_case: Res, Com, Ind, Oth
admin_unit_type:
"all" = all admin units within the cell
"country" = all admin units of a certain country (defined by setting which_country)
"admin_unit" = the admin unit defined by setting which_country together with which_admin_ID
disagg_method: disaggregation method for SERA 'area', 'gpw_2015_pop', 'ghs', or 'sat'
(i.e. method used to distribute SERA to a grid)
which_country: only needed if admin_unit_type is "country" or "admin_unit"; full country name
which_admin_ID: only needed if admin_unit_type is "admin_unit"
Output:
- out_dict: dictionary with three sub-keys:
- DF: Pandas DataFrame with the OBM buildings in this cell (according to admin_unit_type). Its
columns are almost the same as those in the OQ input files, except for the row ID and the
occupancy column, and the fact that the admin IDs and names can belong to different countries
and, thus, different admin levels.
- Number_with_Classes: the number of OBM buildings with assigned distributions of building classes
(i.e. the total number of buildings in DF)
- Number_Tot: the total number of OBM buildings of occu_case in this cell. By definition, it
is equal to or larger than Number_with_Classes. It is larger when there are OBM
buildings located within a country_adminID for which SERA is not defined for this
occu_case.
- error_messages: '' if no errors have arisen
Interpreation (file aux_tables.xlsx helps):
- If occu_case=="Other", out_dict['DF'] is empty, out_dict['Number_with_Classes'] is zero, and
out_dict['Number_Tot'] is the total number of OBM buildings in this cell of occupancy 'Oth'.
- If occu_case!="Other", out_dict['DF'] is the dataframe with all the details of the OBM buildings
classified as per the corresponding SERA building classes, out_dict['Number_with_Classes'] is the
total number of buildings in DF and out_dict['Number_Tot'] is the total number of OBM buildings
of occu_case in this cell.
- If out_dict is empty, the obm_hdf5_filename does not exist. This means that there are no OBM buildings
in this occu_case, irrespective of whether occu_case is Res, Com, Ind or Oth and of whether SERA is
defined for this particular cell and occu_case. I.e. out_dict is empty and num_other is -999 is a
synonym for zero OBM buildings.
- If out_dict is not empty, THERE ARE OBM buildings.
"""
error_messages= ''
out_dict= {}
list_country_admin_units= []
obm_without_classes_num_bdgs_per_adm_unit= []
list_country_admin_units_levels= []
try:
fle= h5py.File(obm_hdf5_filename, "r")
file_found= True
except:
file_found= False
if file_found:
list_country_admin_units= make_list_country_admin_units_to_retrieve(admin_unit_type, list(fle), which_country=which_country, which_admin_ID=which_admin_ID)
obm_without_classes_num_bdgs_per_adm_unit= np.zeros([len(list_country_admin_units)])
list_country_admin_units_levels= np.zeros([len(list_country_admin_units)], dtype='int64')
if occu_case!='Oth':
num_bdgs_tot= 0
any_bdgs_without_SERA_classes= False
# Initialise arrays that will collect the info of buildings:
osm_ids= []
taxonomies= []
taxonomy_stars= []
admin_names= []
admin_ids= []
lon= np.empty([0], dtype='float64')
lat= np.empty([0], dtype='float64')
num_bdgs= np.empty([0], dtype='float64')
num_dwells= np.empty([0], dtype='float64')
cost= np.empty([0], dtype='float64')
ppl= np.empty([0], dtype='float64')
for j, country_adminID in enumerate(list_country_admin_units):
if 'Adm_Level' in list(fle[country_adminID].attrs):
list_country_admin_units_levels[j]= fle[country_adminID].attrs['Adm_Level']
else: # if the attribute does not exist it is because country_adminID is not really and adm unit (e.g. "99" or "GR")
list_country_admin_units_levels[j]= -99
country_adminID_split= country_adminID.split('_')
if len(country_adminID_split)>1:
iso2_code= country_adminID.split('_')[0]
admin_ID_alone= country_adminID.split('_')[1]
else:
iso2_code= country_adminID.split('_')[0]
admin_ID_alone= ''
country_name= gdet_serah.get_country_name_from_iso2_and_metadata_HDF5(iso2_code, sera_meta_path)
if country_name!='UNKNOWN':
admin_levels, _, _= gdet_serah.get_admin_level_definition_from_HDF5(country_name, sera_meta_path)
adm_name, _= gdet_gral.get_admin_name_from_ID(country_name, admin_levels[occu_case], admin_ID_alone, countries_shp_path)
else:
error_messages= error_messages+ ' ERROR: Country name could not be retrieved from ISO2 code by get_country_name_from_iso2_and_metadata_HDF5 function.'
adm_name= 'Unk'
for osm_id in list(fle[country_adminID]):
num_bdgs_tot= num_bdgs_tot + 1
if ('SERA_classes'+'_'+disagg_method) in list(fle[country_adminID][osm_id]):
# Retrieve lat, lon, SERA_classes and SERA_vals for this particular disagg_method
lon_val= fle[country_adminID][osm_id].attrs['Lon']
lat_val= fle[country_adminID][osm_id].attrs['Lat']
all_sera_classes= fle[country_adminID][osm_id]['SERA_classes'+'_'+disagg_method][:]
all_sera_proportions= fle[country_adminID][osm_id]['SERA_vals'+'_'+disagg_method][:]
dwell_per_bdg= np.zeros([len(all_sera_classes)])
area_per_dwelling_sqm= np.zeros([len(all_sera_classes)])
cost_per_area_usd= np.zeros([len(all_sera_classes)])
ppl_per_dwell= np.zeros([len(all_sera_classes)])
for k, bdg_class in enumerate(all_sera_classes):
osm_ids.append('OSM_'+str(osm_id))
taxonomy_stars.append(bdg_class)
taxonomies.append(bdg_class.split('///')[0])
admin_names.append(adm_name)
admin_ids.append(country_adminID)
# Retrieve parameters from SERA for this building class:
country_adminid_locs, parameter_vals, col_names, col_contents= gdet_serah.retrieve_parameters_for_taxonomy(bdg_class, bdg_classes_hdf5_filename)
# Calculate people and cost:
which_row_admin_id= np.where(country_adminid_locs==country_adminID)[0][0]
dwell_per_bdg[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='dwell_per_bdg')[0][0]].split('_')[-1])]
area_per_dwelling_sqm[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='area_per_dwelling_sqm')[0][0]].split('_')[-1])]
cost_per_area_usd[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='cost_per_area_usd')[0][0]].split('_')[-1])]
ppl_per_dwell[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='ppl_per_dwell')[0][0]].split('_')[-1])]
num_bdgs= np.hstack([num_bdgs, all_sera_proportions[:,0]])
num_dwells_local= all_sera_proportions[:,0] * dwell_per_bdg
num_dwells= np.hstack([num_dwells, num_dwells_local])
cost_local= num_dwells_local * area_per_dwelling_sqm * cost_per_area_usd
cost= np.hstack([cost, cost_local])
ppl_local= num_dwells_local * ppl_per_dwell
ppl= np.hstack([ppl, ppl_local])
lon= np.hstack([lon, np.array([lon_val for i in range(0,len(all_sera_classes))])])
lat= np.hstack([lat, np.array([lat_val for i in range(0,len(all_sera_classes))])])
else:
any_bdgs_without_SERA_classes= True
obm_without_classes_num_bdgs_per_adm_unit[j]= obm_without_classes_num_bdgs_per_adm_unit[j] + 1.0
if len(osm_ids)>0:
data= {'osm_id': osm_ids,
'lon': lon,
'lat': lat,
'taxonomy': taxonomies,
'taxonomy_star': taxonomy_stars,
'number': num_bdgs,
'num_dwells': num_dwells,
'structural': cost,
'night': ppl,
'admin_name': admin_names,
'admin_ID': admin_ids}
df= pd.DataFrame(data, columns=data.keys())
out_dict['DF']= df
out_dict['Number_Tot']= num_bdgs_tot # it is, by definition, >=['Number_with_Classes']
if any_bdgs_without_SERA_classes:
out_dict['Number_with_Classes']= df['number'].values.sum()
else:
out_dict['Number_with_Classes']= int(round(num_bdgs_tot, 0)) # done like this to avoid differences due to floating point issues in the sum(); it is a round number because it is the same as num_bdgs_tot
else:
out_dict['DF']= np.empty([0], dtype='int') # I need it to have a "shape" method
out_dict['Number_with_Classes']= 0
out_dict['Number_Tot']= num_bdgs_tot
elif occu_case=='Oth': # there is no SERA because occupancy=Other
# gather number of buildings
num_other= 0
for j, country_adminID in enumerate(list_country_admin_units):
num_other= num_other + fle[country_adminID].attrs['Num_Bdgs']
obm_without_classes_num_bdgs_per_adm_unit[j]= fle[country_adminID].attrs['Num_Bdgs']
list_country_admin_units_levels[j]= -99
out_dict['DF']= np.empty([0], dtype='int') # I need it to have a "shape" method
out_dict['Number_with_Classes']= 0
out_dict['Number_Tot']= num_other
else:
print(' ERROR IN retrieve_OBM_buildings_in_cell_from_hdf5: occu_case NOT RECOGNISED')
error_messages= error_messages+ ' ERROR IN retrieve_OBM_buildings_in_cell_from_hdf5: occu_case NOT RECOGNISED'
fle.close()
return out_dict, list_country_admin_units, obm_without_classes_num_bdgs_per_adm_unit, list_country_admin_units_levels, error_messages
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
"""
Copyright (C) 2020
Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Global Dynamic Exposure Model
Helmholtz Centre Potsdam
GFZ German Research Centre for Geosciences
Section 2.6: Seismic Hazard and Risk Dynamics
GDE_TOOLS_world_grid
====================
THIS CODE ALLOWS TO GENERATE THE WORLD GRID THAT WE
USE FOR OUR DYNAMIC EXPOSURE MODEL
HARD-CODED ASSUMPTIONS ARE:
- The grid spacing is 10 arc-seconds.
- The grid runs from -180.0 through +180.0 in longitude
- The grid runs from -90.0 through +90.0 in latitude
- The top-left-most cell (NW) is cell number 0
- The cell id increases from this first cell to the east, by "row"
- At the end of each row, the cell id "jumps" to the first (westmost)
cell of the next row.
"""
import numpy as np
import geopandas as gpd
import pandas as pd
import shapely
import pyproj
from functools import partial
def adjust_lon(coord_lon):
"""
This function adjusts the longitude so that get_cell_id gives the correct cell in the extremes of the grid.
The adjustment is based on the assumption of a 10 arcsec grid.
"""
lon_lowest= -180.0+(5.0/60.0)/60.0
lon_highest= 180.0-(5.0/60.0)/60.0
if coord_lon<lon_lowest:
return lon_lowest
elif coord_lon>lon_highest:
return lon_highest
else:
return coord_lon
def adjust_lat(coord_lat):
"""
This function adjusts the latitude so that get_cell_id gives the correct cell in the extremes of the grid.
The adjustment is based on the assumption of a 10 arcsec grid.
"""
lat_lowest= -90.0+(5.0/60.0)/60.0
lat_highest= 90.0-(5.0/60.0)/60.0
if coord_lat<lat_lowest:
return lat_lowest
elif coord_lat>lat_highest:
return lat_highest
else:
return coord_lat
def get_cell_id(coord_lon, coord_lat):
"""
coord_lon= [-180.0,+180.0]
coord_lat= [-90.0,+90.0]
Given any pair of (lon,lat) coordinates, this function returns the ID of the cell
where this point belongs.
This function needs the functions adjust_lon and adjust_lat to make sure that
it gives the correct cell in the extremes of the grid.
"""
lon_edges= np.linspace(-180.0,180.0,num=129601)
lat_edges_inv= np.linspace(-90.0,90.0,num=64801)
cell_x= np.searchsorted(lon_edges,adjust_lon(coord_lon)) - 1 # position of the cell within the longitude vector
cell_y= np.searchsorted(lat_edges_inv,adjust_lat(coord_lat)) - 1 # position of the cell within the latitude vector
cell_id= cell_x + 8397950400 - 129600 * cell_y
return cell_id
def get_corner_cells_of_bbox(sw_lon, sw_lat, ne_lon, ne_lat):
"""
This function returns the IDs of the four cells that make up the
corner of the bounding box defined by the input coordinates.
sw_lon= longitude of the South-West corner
sw_lat= latitude of the South-West corner
ne_lon= longitude of the North-East corner
ne_lat= latitude of the North-East corner
The output order is: South-West corner, South-East corner, North-East corner, North-West corner
(i.e. counter-clockwise, starting from South-west corner).
"""
cell_sw_id= get_cell_id(sw_lon, sw_lat)
cell_ne_id= get_cell_id(ne_lon, ne_lat)
cell_nw_id= get_cell_id(sw_lon, ne_lat)
cell_se_id= get_cell_id(ne_lon, sw_lat)
return cell_sw_id, cell_se_id, cell_ne_id, cell_nw_id
def get_cells_in_bbox(sw_lon, sw_lat, ne_lon, ne_lat):
"""
This function returns the IDs of all the cells contained within the bounding box
defined by the input coordinates.
sw_lon= longitude of the South-West corner
sw_lat= latitude of the South-West corner
ne_lon= longitude of the North-East corner
ne_lat= latitude of the North-East corner
The output list starts with the North-West corner and moves first to the East, to then
jump to the row to the South, and then goes again from West to East.
"""
cell_sw_id, cell_se_id, cell_ne_id, cell_nw_id= get_corner_cells_of_bbox(sw_lon, sw_lat, ne_lon, ne_lat)
ew_width= cell_ne_id - (cell_nw_id - 1) # number of cells in the width of the bbox
ns_height= int((cell_sw_id - cell_nw_id) / 129600 + 1) # number of cells in the height of the bbox
cells_ids= []
count= 0
for i in range(0,ns_height):
for j in range(0,ew_width):
if ew_width*ns_height>50000:
if count%50000==0:
print(' Working on cell '+str(count+1)+' of '+str(ew_width*ns_height))
count= count + 1
cells_ids.append(cell_nw_id+i*129600+j)
return cells_ids
def get_coordinates_from_cell_id(cell_id):
"""
Given a cell ID, it returns the coordinates that define the four corners of the cell.
"""
if cell_id<8398080000 and cell_id>-1:
lon_edges= np.linspace(-180.0,180.0,num=129601)
lat_edges_inv= np.linspace(-90.0,90.0,num=64801)
cell_y= -(cell_id // 129600) +64799
cell_x= cell_id - 8397950400 + 129600 * cell_y
lon_w= lon_edges[cell_x]
lon_e= lon_edges[cell_x+1]
lat_s= lat_edges_inv[cell_y]
lat_n= lat_edges_inv[cell_y+1]
else:
print('ERROR!! CELL ID '+str(cell_id)+' DOES NOT EXIST!!!')
lon_w= -999.9
lon_e= -999.9
lat_s= -999.9
lat_n= -999.9
return lon_w, lat_s, lon_e, lat_n
def get_cells_geometry_as_geopandas(cell_id_list, filename=None):
"""
This function returns a GeoPandas DataFrame of all cells in cell_id_list.
If a filename is given, it will export it as a ShapeFile.
cell_id_list= list of cell IDs
"""
geom_list= []
for cell_id in cell_id_list:
lon_w, lat_s, lon_e, lat_n= get_coordinates_from_cell_id(cell_id)
geom_list.append(shapely.geometry.Polygon([(lon_w,lat_s), (lon_e,lat_s), (lon_e,lat_n), (lon_w,lat_n)]))
geometry = gpd.GeoSeries(geom_list)
data= {'cell_id': cell_id_list,
'geometry': geometry}
out_gdf= gpd.GeoDataFrame(data, columns = ['cell_id','geometry'])
out_gdf.crs = {'init' :'epsg:4326'}
if filename: