From 6d5199c454c670cf0285205f8469290e7a435b42 Mon Sep 17 00:00:00 2001
From: Cecilia Nievas <cnievas@gfz-potsdam.de>
Date: Tue, 27 Oct 2020 17:58:37 +0100
Subject: [PATCH] Test and test data created for
 SERA_mapping_admin_units_to_cells.py

---
 SERA_mapping_admin_units_to_cells.py      |   8 +-
 test_SERA_mapping_admin_units_to_cells.py |  96 ++++++++++++++++++++++
 tests_datasets/mapping_cells.csv          |   7 ++
 tests_datasets/square_adm_units.cpg       |   1 +
 tests_datasets/square_adm_units.dbf       | Bin 0 -> 390 bytes
 tests_datasets/square_adm_units.prj       |   1 +
 tests_datasets/square_adm_units.shp       | Bin 0 -> 644 bytes
 tests_datasets/square_adm_units.shx       | Bin 0 -> 132 bytes
 8 files changed, 109 insertions(+), 4 deletions(-)
 create mode 100644 test_SERA_mapping_admin_units_to_cells.py
 create mode 100644 tests_datasets/mapping_cells.csv
 create mode 100644 tests_datasets/square_adm_units.cpg
 create mode 100644 tests_datasets/square_adm_units.dbf
 create mode 100644 tests_datasets/square_adm_units.prj
 create mode 100644 tests_datasets/square_adm_units.shp
 create mode 100644 tests_datasets/square_adm_units.shx

diff --git a/SERA_mapping_admin_units_to_cells.py b/SERA_mapping_admin_units_to_cells.py
index 2a4bda1..24cd1de 100644
--- a/SERA_mapping_admin_units_to_cells.py
+++ b/SERA_mapping_admin_units_to_cells.py
@@ -176,7 +176,7 @@ def process_country(country_str, country_pos, number_countries,
                 # Pass on adm_boundary and the row
                 # to process all cells associated with this admin unit:
                 for results_adm in process_admin_unit(adm_boundary,
-                                                      which_row):
+                                                      which_row[0]):
                     cell_id, area_intersect, geom_intersect = results_adm
                     yield cell_id, level_groups_levels[j], admin_id, \
                         area_intersect, geom_intersect, level_groups[j], \
@@ -190,13 +190,13 @@ def process_admin_unit(adm_boundary_gdf, which_row_of_gdf):
     which_row_of_gdf = row of adm_boundary_gdf to be processed
     """
     # Bounding box of the admin unit:
-    bbox = adm_boundary_gdf['geometry'].values[which_row_of_gdf[0]].bounds
+    bbox = adm_boundary_gdf['geometry'].values[which_row_of_gdf].bounds
     # GeoPandas DF with the cells associated with this admin unit:
     gdf_cells = gdet_grid.get_cells_in_bbox_as_geopandas(bbox[0], bbox[1],
                                                          bbox[2], bbox[3])
     # gdf_intersect has as many rows as cells do intersect this admin unit:
     gdf_intersect = gpd.sjoin(gdf_cells,
-                              adm_boundary_gdf.iloc[[which_row_of_gdf[0]], :],
+                              adm_boundary_gdf.iloc[[which_row_of_gdf], :],
                               how='right')
 
     # Loop through all cells that intersect the admin boundary:
@@ -204,7 +204,7 @@ def process_admin_unit(adm_boundary_gdf, which_row_of_gdf):
         print('\r      Intersection of cell '+str(gdf_intersect['cell_id'].values[i]),
               end='')
         geom_cell = gdf_cells['geometry'].values[gdf_intersect['index_left'].values[i]]
-        geom_adm_unit = adm_boundary_gdf['geometry'].values[which_row_of_gdf[0]]
+        geom_adm_unit = adm_boundary_gdf['geometry'].values[which_row_of_gdf]
         geom_intersect = geom_adm_unit.intersection(geom_cell)
         area_intersect = gdet_grid.get_area_polygon_on_Earth(geom_intersect,
                                                              units='km2')
diff --git a/test_SERA_mapping_admin_units_to_cells.py b/test_SERA_mapping_admin_units_to_cells.py
new file mode 100644
index 0000000..748d5a4
--- /dev/null
+++ b/test_SERA_mapping_admin_units_to_cells.py
@@ -0,0 +1,96 @@
+import os
+import pandas as pd
+import geopandas as gpd
+import numpy as np
+from shapely.wkt import loads
+import SERA_mapping_admin_units_to_cells as sera_mapping
+
+
+PATH_DATA = os.path.join(os.getcwd(), 'tests_datasets')
+
+FLOAT_TOL = 11
+
+
+def get_geometry_for_cell_id(target_id, target_adm_id, df):
+    """
+    Read the results for this target cell ID from the df DataFrame.
+    """
+    which = np.where(np.logical_and(df['cell_id'].values == target_id,
+                                    df['adm_ID'].values == target_adm_id))[0]
+    if len(which) != 1:
+        return 'NOT A POLYGON'
+    else:
+        geometry = df['geometry'].apply(loads)  # transform into shapely polyg
+        polygon = geometry.values[which][0]
+        return polygon
+
+
+def test_process_admin_unit():
+    """
+    Inputs of process_admin_unit():
+    - GeoPandas DataFrame containing administrative boundaries
+    - a row of interest in the GeoPandas DataFrame (integer); this points at
+    a particular admin unit
+
+    Outputs of process_admin_unit() in each loop (yield):
+    - the cell ID of a cell that intersects the admin unit (string)
+    - the area of intersection between the cell and the admin unit (float)
+    - the geometry of the intersection (Shapely Polygon)
+
+    As the area of the intersection is calculated from the geometry of the
+    intersection, this function tests that the geometry of the intersections
+    are correct. The datasets used to test were generated with
+    create_data_test_SERA_mapping.ipynb. and are stored in the folder
+    tests_datasets as mapping_cells.csv and square_adm_units.shp. The
+    comparison of the intersection polygons is carried out by calculating
+    the area of the intersection (of the intersections) and comparing it to
+    the areas of the original intersection polygons. If the polygons being
+    compared are the same, the area of their intersection should be the same
+    as the area of each of them. In other words, if A is the geometry of the
+    intersection as calculated within process_admin_unit() and B is the "true"
+    geometry of the intersection as stored in mapping_cells.csv, this function
+    checks that:
+    (A intersect B).area == A.area
+    AND
+    (A intersect B).area == B.area
+    """
+    # Open test results:
+    res_df = pd.read_csv(os.path.join(PATH_DATA, 'mapping_cells.csv'),
+                         sep=';',
+                         dtype={'cell_id': str,
+                                'adm_ID': str,
+                                'area_ratio': float,
+                                'geometry': str})
+    cell_ids_to_check = np.unique(res_df['cell_id'].values)
+
+    # Open test shapefile:
+    shp_gdf = gpd.GeoDataFrame.from_file(os.path.join(PATH_DATA,
+                                                      'square_adm_units.shp'))
+    for row in range(shp_gdf.shape[0]):
+        for results in sera_mapping.process_admin_unit(shp_gdf, row):
+            # Results of function being tested:
+            cell_id, _, intersect_geom = results
+
+            if cell_id not in cell_ids_to_check:
+                continue
+
+            # True results against which to compare:
+            true_geom = get_geometry_for_cell_id(cell_id,
+                                                 shp_gdf['adm_ID'].values[row],
+                                                 res_df)
+
+            intersection = true_geom.intersection(intersect_geom)
+
+            # Compare:
+            print('\n         Comparing %s against %s'
+                  % (("{:.%gf}"
+                      % (FLOAT_TOL)).format(intersection.area),
+                     ("{:.%gf}"
+                      % (FLOAT_TOL)).format(intersect_geom.area)))
+            print('         Comparing %s against %s'
+                  % (("{:.%gf}"
+                      % (FLOAT_TOL)).format(intersection.area),
+                     ("{:.%gf}"
+                      % (FLOAT_TOL)).format(true_geom.area)))
+            assert round(intersection.area, FLOAT_TOL) == round(intersect_geom.area, FLOAT_TOL)
+            assert round(intersection.area, FLOAT_TOL) == round(true_geom.area, FLOAT_TOL)
diff --git a/tests_datasets/mapping_cells.csv b/tests_datasets/mapping_cells.csv
new file mode 100644
index 0000000..8ebbf72
--- /dev/null
+++ b/tests_datasets/mapping_cells.csv
@@ -0,0 +1,7 @@
+cell_id;adm_ID;area_ratio;geometry
+2427610968;ID_1;1.00;POLYGON ((23.8 37.9666666666, 23.8027777777 37.9666666666, 23.8027777777 37.9694444444, 23.8 37.9694444444, 23.8 37.9666666666))
+2427610968;ID_2;1.00;POLYGON ((23.8 37.9666666666, 23.8027777777 37.9666666666, 23.8027777777 37.9694444444, 23.8 37.9694444444, 23.8 37.9666666666))
+2427610968;ID_3;0.50;POLYGON ((23.8 37.9666666666, 23.8013888888 37.9666666666, 23.8013888888 37.9694444444, 23.8 37.9694444444, 23.8 37.9666666666))
+2427610968;ID_4;0.25;POLYGON ((23.8 37.9680555555, 23.8013888888 37.9680555555, 23.8013888888 37.9694444444, 23.8 37.9694444444, 23.8 37.9680555555))
+2427610969;ID_1;1.00;POLYGON ((23.8027777777 37.9666666666, 23.8055555555 37.9666666666, 23.8055555555 37.9694444444, 23.8027777777 37.9694444444, 23.8027777777 37.9666666666))
+2427610969;ID_2;0.50;POLYGON ((23.8027777777 37.9666666666, 23.8041666666 37.9666666666, 23.8041666666 37.9694444444, 23.8027777777 37.9694444444, 23.8027777777 37.9666666666))
diff --git a/tests_datasets/square_adm_units.cpg b/tests_datasets/square_adm_units.cpg
new file mode 100644
index 0000000..cd89cb9
--- /dev/null
+++ b/tests_datasets/square_adm_units.cpg
@@ -0,0 +1 @@
+ISO-8859-1
\ No newline at end of file
diff --git a/tests_datasets/square_adm_units.dbf b/tests_datasets/square_adm_units.dbf
new file mode 100644
index 0000000000000000000000000000000000000000..73fb14e3710f761da93d4fb61b792baca4b5a946
GIT binary patch
literal 390
zcmZRs;F4xxU|?`$2xPzp5>s;HJzc;u&LA=XL?B^a1y7fFLj`IAPnUQjYMKXfp)s`$
JG@+(}QUGgOF9`qu

literal 0
HcmV?d00001

diff --git a/tests_datasets/square_adm_units.prj b/tests_datasets/square_adm_units.prj
new file mode 100644
index 0000000..a30c00a
--- /dev/null
+++ b/tests_datasets/square_adm_units.prj
@@ -0,0 +1 @@
+GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
\ No newline at end of file
diff --git a/tests_datasets/square_adm_units.shp b/tests_datasets/square_adm_units.shp
new file mode 100644
index 0000000000000000000000000000000000000000..25f1a6980b1e33794b75daaadf3fe2db3da43ff2
GIT binary patch
literal 644
zcmZQzQ0HR63K*SUFf%Z)GB7YqNR>HWc*fkJJo&Lj-)|=e?qs`{lh2tud^B`%arxuq
zfK@xHAR_|<g99;UffRz&fwZIB3**D=MRx~`4|69>J&e!5z`%sd9TTVAy?gg8!cU~Q
z16?nS53?7GJD72~L+n`1C3Q00fvy+E$Kwtbba!kyHQDavZzl(sURYRT3lErDnEzmE
M(e=Xk=;~p70G_JkX#fBK

literal 0
HcmV?d00001

diff --git a/tests_datasets/square_adm_units.shx b/tests_datasets/square_adm_units.shx
new file mode 100644
index 0000000000000000000000000000000000000000..46747912864642fd5cff8ca07ec1ecbc519d45f4
GIT binary patch
literal 132
zcmZQzQ0HR64xC;vGcd3+FfdF=l{sE`#@wMi`LRXcZzl)tWV@G>&zU=XG<0!s`Qzk(
WT|0`X5d#AQNL?9}-UX%qfoK3t7ZWJ}

literal 0
HcmV?d00001

-- 
GitLab