Commit 8c9e451d authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added draft functionality to create district tiles

parent a58b2bbf
Pipeline #26152 failed with stage
in 54 seconds
......@@ -32,11 +32,13 @@ class DataUnit:
ID of the DataUnit (e.g. ID of the administrative unit it represents).
self.geometry (Shapely polygon):
Geometry of the data unit.
self.district_tiles
"""
def __init__(self, dataunit_id, geometries_table, target_column_name):
self.id = dataunit_id
self.geometry = self.get_data_unit_geometry(geometries_table, target_column_name)
self.district_tiles = None
def get_data_unit_geometry(self, geometries_table, target_column_name):
"""This function retrieves the geometry of the data unit, by reading it from
......
......@@ -17,6 +17,12 @@
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
import geopandas
from pyproj import CRS
from multiprocessing import Pool
from babelgrid import Babel
from gdeimporter.tools_tiles import define_district_tiles
import pdb
logger = logging.getLogger()
......@@ -76,3 +82,27 @@ class ExposureEntity:
def __init__(self, name):
self.name = name
self.occupancy_cases = {}
def create_district_tiles(self, occupancy_case, number_cores=4):
data_units_ids = list(self.occupancy_cases[occupancy_case]["data_units"].keys())
data_units_geoms = [
dataunit.geometry
for dataunit in self.occupancy_cases[occupancy_case]["data_units"].values()
]
p = Pool(processes=number_cores)
all_district_tiles = p.map(define_district_tiles, data_units_geoms)
p.close()
p.join()
"""
all_district_tiles = []
for i, data_unit_geometry in enumerate(data_units_geoms):
all_district_tiles.append(define_district_tiles(data_unit_geometry)) #, data_units_ids[i]))
"""
for i, data_unit_id in enumerate(data_units_ids):
self.occupancy_cases[occupancy_case]["data_units"][
data_unit_id
].district_tiles = all_district_tiles[i]
return
......@@ -20,6 +20,7 @@ import logging
import sys
from gdeimporter.configuration import Configuration
from gdeimporter.aggregatedexposuremodel import ExposureModelESRM20
import pdb
# Add a logger printing error, warning, info and debug messages to the screen
logger = logging.getLogger()
......@@ -54,7 +55,8 @@ def main():
for exposure_entity_name in config.exposure_entities_to_run:
for occupancy_case in config.occupancies_to_run:
aem.get_data_units(config, exposure_entity_name, occupancy_case)
aem.exposure_entities[exposure_entity_name].create_district_tiles(occupancy_case)
# pdb.set_trace()
print("Name of the model: %s" % (aem.model_name))
print("Format: %s" % (aem.exposure_format))
if aem.occupancy_cases is not None:
......
#!/usr/bin/env python3
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
import geopandas
import numpy
import pyproj
import shapely
from functools import partial
from babelgrid import Babel
import pdb
logger = logging.getLogger()
def define_district_tiles(in_geometry):
"""This function defines the district tiles associated with 'in_geometry'. District tiles
are defined as the intersection between zoom level 18 quadtiles and 'in_geometry'.
Args:
in_geometry (Shapely Polygon or MultiPolygon):
Geometry for which the associated district tiles will be defined.
Returns:
district_tiles (GeoPandas GeoDataFrame without geometry):
GeoDataFrame containing the definition of the district tiles in terms of:
quadkey (str):
Quadkey of the associated zoom level 18 quadtile.
size_district_tile_area (float):
Surface area of the district tile, in m2.
size_district_tile_built_up_area (float):
Built-up area of the district tile, in m2.
fraction_district_area (float):
Fraction (0.0, 1.0] that the surface area of the district tile represents
with respect to the surface area of 'in_geometry' (i.e. the surface area of
the summation of all defined district tiles). It cannot be zero.
fraction_district_built_up_area (float):
Fraction [0.0, 1.0] that the built-up area of the district tile represents
with respect to the built-up area contained by 'in_geometry' (i.e. the
summation of built-up areas of all defined district tiles). It can be zero.
"""
# Retrieve all quadtiles associated with in_geometry
full_quadtiles = get_quadtiles_from_polygon(in_geometry)
# Create a GeoPandas GeoDataFrame with the quadtiles
full_ids = [tile.tile_id for tile in full_quadtiles]
full_geometries = [tile.geometry.shapely for tile in full_quadtiles]
grid = geopandas.GeoDataFrame({"quadkey": full_ids, "geometry": full_geometries})
grid.crs = pyproj.CRS("epsg:4326")
# Create a GeoPandas GeoDataFrame with the original geometry
in_geometry_df = geopandas.GeoDataFrame({"geometry": [in_geometry]})
in_geometry_df.crs = pyproj.CRS("epsg:4326")
# Intersect quadtiles with the original geometry
district_tiles = geopandas.overlay(grid, in_geometry_df, how="intersection")
# Check that the order of the quadkeys in 'grid' and 'district_tiles' is the same
if not numpy.all(grid["quadkey"].values == district_tiles["quadkey"].values):
return # See what to do in this case
# Determine which tiles are fully contained in the original geometry 'in_geometry'
district_tiles["contained"] = determine_if_areas_are_the_same(district_tiles, grid)
# Calculate district tile areas and fractions of tiles
all_areas = numpy.zeros([district_tiles.shape[0]])
all_fractions = numpy.zeros([district_tiles.shape[0]])
# Tiles that are fully contained in the original geometry 'in_geometry'
all_areas[district_tiles.contained] = get_areas_rectangles(
district_tiles[district_tiles.contained]["geometry"].values
)
all_fractions[district_tiles.contained] = numpy.ones(
[len(all_areas[district_tiles.contained])]
)
# Tiles that are partially contained in the original geometry 'in_geometry'
results = get_areas_and_ratios_arbitrary_shapes(
district_tiles[numpy.logical_not(district_tiles.contained)]["geometry"].values,
grid[numpy.logical_not(district_tiles.contained)]["geometry"].values,
)
(
all_areas[numpy.logical_not(district_tiles.contained)],
_,
all_fractions[numpy.logical_not(district_tiles.contained)],
) = results
# Write values to 'district_tiles'
district_tiles["size_district_tile_area"] = all_areas
district_tiles["size_district_tile_built_up_area"] = (
retrieve_built_up_area(district_tiles["quadkey"].values) * all_fractions
)
district_tiles["fraction_district_area"] = all_areas / all_areas.sum()
district_tiles["fraction_district_built_up_area"] = (
district_tiles["size_district_tile_built_up_area"].values
/ district_tiles["size_district_tile_built_up_area"].values.sum()
) # NEED TO DEFINE WHAT TO DO WHEN THE DENOMINATOR IS ZERO!!!
# Discard unnecessary columns of 'district_tiles' (free up memory)
district_tiles = district_tiles.drop(columns=["geometry", "contained"])
return district_tiles
def retrieve_built_up_area(quadkeys):
"""This function retrieves the built-up area associated with the input 'quadkeys' from the
OBM Tiles database.
================== TO BE IMPLEMENTED ==================
Returns:
Array of floats with the built-up area values associated with the 'quadkeys'.
"""
return numpy.ones([len(quadkeys)])
def get_quadtiles_from_polygon(in_geometry, zoomlevel=18):
"""This function retrieves the quadtiles of zoom level 'zoomlevel' that fully contain
'in_geometry'. The quadtiles are defined in EPSG:3857 projection, but their geometry is
given in the WGS84 coordinate system (ESPG:4326).
Args:
in_geometry (Shapely Polygon or MultiPolygon):
Geometry for which the associated quadtiles will be retrieved.
zoomlevel (int):
Zoom level of the quadtiles to be retrieved (>0).
Returns:
tiles (list of babelgrid Tile objects):
List of retrieved tiles with the following relevant attributes:
Tile.tile_id (str):
Quadkey of the tile.
Tile.geometry.shapely:
Shapely polygon of the tile geometry.
"""
# ADD CHECK FOR ZOOM LEVEL NOT TO BE TOO LARGE DUE TO PROBLEM ON EDGES OF THE WORLD?
tiles = []
if in_geometry.geom_type == "Polygon":
geometries = [in_geometry]
elif in_geometry.geom_type == "MultiPolygon":
geometries = list(in_geometry)
else:
raise IOError("get_quadtiles_from_polygon: argument is not a valid geometry")
for geometry in geometries:
try:
tiles.extend(Babel("bing").polyfill(geometry, resolution=zoomlevel))
except ZeroDivisionError:
# Quadkeys with this zoomlevel are bigger than the geometry, use the centroid
centroid = geometry.centroid
tiles.append(
Babel("bing").geo_to_tile(lat=centroid.y, lon=centroid.x, resolution=zoomlevel)
)
return tiles
def get_area_Albers_equal_area(input_polygon):
"""This function returns the area (m2) of a polygon on Earth calculated using the Albers
equal area projection. The polygon's geometry is assumed to be defined in ESPG:4326.
Args:
in_geometry (Shapely polygon):
Polygon defined in ESPG:4326.
Returns:
projected_area: Area of the polygon in m2 (in Albers equal area projection).
"""
# Get the bounding box of the polygon
lon_w = input_polygon.bounds[0]
lat_s = input_polygon.bounds[1]
lon_e = input_polygon.bounds[2]
lat_n = input_polygon.bounds[3]
# Use the resulting coordinates to carry out the transformation
project_albers = pyproj.Proj(
"+proj=aea +lat_1={} +lat_2={} +lat_0={} +lon_0={}".format(
lat_s, lat_n, (lat_s + lat_n) / 2.0, (lon_w + lon_e) / 2.0
)
)
geometry = shapely.ops.transform(project_albers, input_polygon)
return geometry.area
def get_area_on_sphere(lon_w, lat_s, lon_e, lat_n, earth_sphere_radius=6371000.0):
"""This function returns the area (m2) of a rectangle on Earth calculated assuming the Earth
is a sphere with radius 'earth_sphere_radius'. The rectangle is defined by its four bounding
coordinates 'lon_w', 'lat_s', 'lon_e' and 'lat_n', all given in degrees.
When used to calculate the area of zoom level 18 quadtiles, this approximate function yields
area values that differ in less than 1% with respect to those calculated using the Albers
equal area projection.
References:
Kelly, K., Šavrič, B. (2021). Area and volume computation of longitude–latitude grids
and three-dimensional meshes. Transactions in GIS, 25(1):6–24.
Args:
lon_w (float or array of floats):
West bound of the tile(s), in degrees.
lat_s (float or array of floats):
South bound of the tile(s), in degrees.
lon_e (float or array of floats):
East bound of the tile(s), in degrees.
lat_n (float or array of floats):
North bound of the tile(s), in degrees.
earth_sphere_radius (float):
Radius of the Earth assuming a sphere, in meters. Default: 6,371,000 m.
Returns:
area (float or array of floats):
Area of the rectangles defined by the input coordinates, in meters.
"""
area = (
(numpy.pi / 180.0)
* numpy.power(earth_sphere_radius, 2)
* numpy.abs(numpy.sin(numpy.deg2rad(lat_n)) - numpy.sin(numpy.deg2rad(lat_s)))
* numpy.abs(lon_e - lon_w)
)
return area
def determine_if_areas_are_the_same(district_tiles, tiles):
"""This function compares the areas of the geometries of 'district_tiles' and 'tiles' and
determines whether they are the same value or not. Each element of .district_tiles' needs
to be equal to or smaller than its corresponding element in 'tiles'. Both GeoDataFrames
neeed to have the same length and be defined in the same CRS.
The areas are determined using the built-in function of GeoPandas GeoDataFrames. Their
actual value is of no consequence (i.e. the CRS can be ESPG:4326) because it is the ratio
that is relevant.
Args:
district_tiles, tiles (GeoPandas GeoDataFrames):
GeoDataFrames with at least a "geometry" field, both with the same CRS (any).
Returns:
are_the_same (array of bool):
Array with the same length as 'district_tiles' and 'tiles', indicating whether the
areas are the same (True) or not (False).
"""
unprojected_area_district_tiles = district_tiles["geometry"].area
unprojected_area_tiles = tiles["geometry"].area
unprojected_area_ratio = (
unprojected_area_district_tiles.values / unprojected_area_tiles.values
)
are_the_same = numpy.array([False for i in range(district_tiles.shape[0])])
are_the_same[numpy.where(unprojected_area_ratio > 0.99999)[0]] = True
return are_the_same
def get_areas_rectangles(geometries):
"""This function uses get_area_on_sphere() to calculate the area of each polygon of
'geometries', which is assumed to be of rectangular shape (only the four bounding
coordinates are considered for the calculation). See get_area_on_sphere() for details on
relevant assumptions.
Args:
geometries (array of Shapely Polygons):
Array of polygons, each of which is assumed to be of rectangular shape and defined
in ESPG:4326.
Returns:
areas (array of floats):
Areas of the rectangles defined in 'geometries', in meters.
"""
lons_w = numpy.array([geometries[i].bounds[0] for i in range(len(geometries))])
lats_s = numpy.array([geometries[i].bounds[1] for i in range(len(geometries))])
lons_e = numpy.array([geometries[i].bounds[2] for i in range(len(geometries))])
lats_n = numpy.array([geometries[i].bounds[3] for i in range(len(geometries))])
areas = get_area_on_sphere(lons_w, lats_s, lons_e, lats_n)
return areas
def get_areas_and_ratios_arbitrary_shapes(geometries_1, geometries_2):
"""This function uses get_area_Albers_equal_area() to calculate the area of each polygon of
'geometries_1' and 'geometries_2' and calculates the ratio of the former with respect to the
latter. Both 'geometries_1' and 'geometries_2' need to have the same length.
Args:
geometries_1, geometries_2 (arrays of Shapely Polygons):
Array of polygons of arbitrary shapes, defined in ESPG:4326.
Returns:
areas_1, areas_2 (arrays of floats):
Areas in meters of the rectangles defined in 'geometries_1' and 'geometries_2',
respectively.
ratios (array of floats):
Ratios of areas_1 to areas_2.
"""
areas_1 = numpy.zeros([len(geometries_1)])
areas_2 = numpy.zeros([len(geometries_1)])
ratios = numpy.zeros([len(geometries_1)])
for i in range(len(geometries_1)):
areas_1[i] = get_area_Albers_equal_area(geometries_1[i])
areas_2[i] = get_area_Albers_equal_area(geometries_2[i])
ratios[i] = areas_1[i] / areas_2[i]
return areas_1, areas_2, ratios
......@@ -28,7 +28,15 @@ setup(
keywords="Global Dynamic Exposure, GDE, buildings, exposure model",
author="Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ",
license="AGPLv3+",
install_requires=["numpy", "pyyaml", "pandas", "openpyxl", "geopandas"],
install_requires=[
"numpy",
"pyyaml",
"pandas",
"openpyxl",
"geopandas",
"babelgrid",
"rtree",
],
extras_require={
"tests": tests_require,
"linters": linters_require,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment