From 6d5199c454c670cf0285205f8469290e7a435b42 Mon Sep 17 00:00:00 2001 From: Cecilia Nievas <cnievas@gfz-potsdam.de> Date: Tue, 27 Oct 2020 17:58:37 +0100 Subject: [PATCH] Test and test data created for SERA_mapping_admin_units_to_cells.py --- SERA_mapping_admin_units_to_cells.py | 8 +- test_SERA_mapping_admin_units_to_cells.py | 96 ++++++++++++++++++++++ tests_datasets/mapping_cells.csv | 7 ++ tests_datasets/square_adm_units.cpg | 1 + tests_datasets/square_adm_units.dbf | Bin 0 -> 390 bytes tests_datasets/square_adm_units.prj | 1 + tests_datasets/square_adm_units.shp | Bin 0 -> 644 bytes tests_datasets/square_adm_units.shx | Bin 0 -> 132 bytes 8 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 test_SERA_mapping_admin_units_to_cells.py create mode 100644 tests_datasets/mapping_cells.csv create mode 100644 tests_datasets/square_adm_units.cpg create mode 100644 tests_datasets/square_adm_units.dbf create mode 100644 tests_datasets/square_adm_units.prj create mode 100644 tests_datasets/square_adm_units.shp create mode 100644 tests_datasets/square_adm_units.shx diff --git a/SERA_mapping_admin_units_to_cells.py b/SERA_mapping_admin_units_to_cells.py index 2a4bda1..24cd1de 100644 --- a/SERA_mapping_admin_units_to_cells.py +++ b/SERA_mapping_admin_units_to_cells.py @@ -176,7 +176,7 @@ def process_country(country_str, country_pos, number_countries, # Pass on adm_boundary and the row # to process all cells associated with this admin unit: for results_adm in process_admin_unit(adm_boundary, - which_row): + which_row[0]): cell_id, area_intersect, geom_intersect = results_adm yield cell_id, level_groups_levels[j], admin_id, \ area_intersect, geom_intersect, level_groups[j], \ @@ -190,13 +190,13 @@ def process_admin_unit(adm_boundary_gdf, which_row_of_gdf): which_row_of_gdf = row of adm_boundary_gdf to be processed """ # Bounding box of the admin unit: - bbox = adm_boundary_gdf['geometry'].values[which_row_of_gdf[0]].bounds + bbox = adm_boundary_gdf['geometry'].values[which_row_of_gdf].bounds # GeoPandas DF with the cells associated with this admin unit: gdf_cells = gdet_grid.get_cells_in_bbox_as_geopandas(bbox[0], bbox[1], bbox[2], bbox[3]) # gdf_intersect has as many rows as cells do intersect this admin unit: gdf_intersect = gpd.sjoin(gdf_cells, - adm_boundary_gdf.iloc[[which_row_of_gdf[0]], :], + adm_boundary_gdf.iloc[[which_row_of_gdf], :], how='right') # Loop through all cells that intersect the admin boundary: @@ -204,7 +204,7 @@ def process_admin_unit(adm_boundary_gdf, which_row_of_gdf): print('\r Intersection of cell '+str(gdf_intersect['cell_id'].values[i]), end='') geom_cell = gdf_cells['geometry'].values[gdf_intersect['index_left'].values[i]] - geom_adm_unit = adm_boundary_gdf['geometry'].values[which_row_of_gdf[0]] + geom_adm_unit = adm_boundary_gdf['geometry'].values[which_row_of_gdf] geom_intersect = geom_adm_unit.intersection(geom_cell) area_intersect = gdet_grid.get_area_polygon_on_Earth(geom_intersect, units='km2') diff --git a/test_SERA_mapping_admin_units_to_cells.py b/test_SERA_mapping_admin_units_to_cells.py new file mode 100644 index 0000000..748d5a4 --- /dev/null +++ b/test_SERA_mapping_admin_units_to_cells.py @@ -0,0 +1,96 @@ +import os +import pandas as pd +import geopandas as gpd +import numpy as np +from shapely.wkt import loads +import SERA_mapping_admin_units_to_cells as sera_mapping + + +PATH_DATA = os.path.join(os.getcwd(), 'tests_datasets') + +FLOAT_TOL = 11 + + +def get_geometry_for_cell_id(target_id, target_adm_id, df): + """ + Read the results for this target cell ID from the df DataFrame. + """ + which = np.where(np.logical_and(df['cell_id'].values == target_id, + df['adm_ID'].values == target_adm_id))[0] + if len(which) != 1: + return 'NOT A POLYGON' + else: + geometry = df['geometry'].apply(loads) # transform into shapely polyg + polygon = geometry.values[which][0] + return polygon + + +def test_process_admin_unit(): + """ + Inputs of process_admin_unit(): + - GeoPandas DataFrame containing administrative boundaries + - a row of interest in the GeoPandas DataFrame (integer); this points at + a particular admin unit + + Outputs of process_admin_unit() in each loop (yield): + - the cell ID of a cell that intersects the admin unit (string) + - the area of intersection between the cell and the admin unit (float) + - the geometry of the intersection (Shapely Polygon) + + As the area of the intersection is calculated from the geometry of the + intersection, this function tests that the geometry of the intersections + are correct. The datasets used to test were generated with + create_data_test_SERA_mapping.ipynb. and are stored in the folder + tests_datasets as mapping_cells.csv and square_adm_units.shp. The + comparison of the intersection polygons is carried out by calculating + the area of the intersection (of the intersections) and comparing it to + the areas of the original intersection polygons. If the polygons being + compared are the same, the area of their intersection should be the same + as the area of each of them. In other words, if A is the geometry of the + intersection as calculated within process_admin_unit() and B is the "true" + geometry of the intersection as stored in mapping_cells.csv, this function + checks that: + (A intersect B).area == A.area + AND + (A intersect B).area == B.area + """ + # Open test results: + res_df = pd.read_csv(os.path.join(PATH_DATA, 'mapping_cells.csv'), + sep=';', + dtype={'cell_id': str, + 'adm_ID': str, + 'area_ratio': float, + 'geometry': str}) + cell_ids_to_check = np.unique(res_df['cell_id'].values) + + # Open test shapefile: + shp_gdf = gpd.GeoDataFrame.from_file(os.path.join(PATH_DATA, + 'square_adm_units.shp')) + for row in range(shp_gdf.shape[0]): + for results in sera_mapping.process_admin_unit(shp_gdf, row): + # Results of function being tested: + cell_id, _, intersect_geom = results + + if cell_id not in cell_ids_to_check: + continue + + # True results against which to compare: + true_geom = get_geometry_for_cell_id(cell_id, + shp_gdf['adm_ID'].values[row], + res_df) + + intersection = true_geom.intersection(intersect_geom) + + # Compare: + print('\n Comparing %s against %s' + % (("{:.%gf}" + % (FLOAT_TOL)).format(intersection.area), + ("{:.%gf}" + % (FLOAT_TOL)).format(intersect_geom.area))) + print(' Comparing %s against %s' + % (("{:.%gf}" + % (FLOAT_TOL)).format(intersection.area), + ("{:.%gf}" + % (FLOAT_TOL)).format(true_geom.area))) + assert round(intersection.area, FLOAT_TOL) == round(intersect_geom.area, FLOAT_TOL) + assert round(intersection.area, FLOAT_TOL) == round(true_geom.area, FLOAT_TOL) diff --git a/tests_datasets/mapping_cells.csv b/tests_datasets/mapping_cells.csv new file mode 100644 index 0000000..8ebbf72 --- /dev/null +++ b/tests_datasets/mapping_cells.csv @@ -0,0 +1,7 @@ +cell_id;adm_ID;area_ratio;geometry +2427610968;ID_1;1.00;POLYGON ((23.8 37.9666666666, 23.8027777777 37.9666666666, 23.8027777777 37.9694444444, 23.8 37.9694444444, 23.8 37.9666666666)) +2427610968;ID_2;1.00;POLYGON ((23.8 37.9666666666, 23.8027777777 37.9666666666, 23.8027777777 37.9694444444, 23.8 37.9694444444, 23.8 37.9666666666)) +2427610968;ID_3;0.50;POLYGON ((23.8 37.9666666666, 23.8013888888 37.9666666666, 23.8013888888 37.9694444444, 23.8 37.9694444444, 23.8 37.9666666666)) +2427610968;ID_4;0.25;POLYGON ((23.8 37.9680555555, 23.8013888888 37.9680555555, 23.8013888888 37.9694444444, 23.8 37.9694444444, 23.8 37.9680555555)) +2427610969;ID_1;1.00;POLYGON ((23.8027777777 37.9666666666, 23.8055555555 37.9666666666, 23.8055555555 37.9694444444, 23.8027777777 37.9694444444, 23.8027777777 37.9666666666)) +2427610969;ID_2;0.50;POLYGON ((23.8027777777 37.9666666666, 23.8041666666 37.9666666666, 23.8041666666 37.9694444444, 23.8027777777 37.9694444444, 23.8027777777 37.9666666666)) diff --git a/tests_datasets/square_adm_units.cpg b/tests_datasets/square_adm_units.cpg new file mode 100644 index 0000000..cd89cb9 --- /dev/null +++ b/tests_datasets/square_adm_units.cpg @@ -0,0 +1 @@ +ISO-8859-1 \ No newline at end of file diff --git a/tests_datasets/square_adm_units.dbf b/tests_datasets/square_adm_units.dbf new file mode 100644 index 0000000000000000000000000000000000000000..73fb14e3710f761da93d4fb61b792baca4b5a946 GIT binary patch literal 390 zcmZRs;F4xxU|?`$2xPzp5>s;HJzc;u&LA=XL?B^a1y7fFLj`IAPnUQjYMKXfp)s`$ JG@+(}QUGgOF9`qu literal 0 HcmV?d00001 diff --git a/tests_datasets/square_adm_units.prj b/tests_datasets/square_adm_units.prj new file mode 100644 index 0000000..a30c00a --- /dev/null +++ b/tests_datasets/square_adm_units.prj @@ -0,0 +1 @@ +GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] \ No newline at end of file diff --git a/tests_datasets/square_adm_units.shp b/tests_datasets/square_adm_units.shp new file mode 100644 index 0000000000000000000000000000000000000000..25f1a6980b1e33794b75daaadf3fe2db3da43ff2 GIT binary patch literal 644 zcmZQzQ0HR63K*SUFf%Z)GB7YqNR>HWc*fkJJo&Lj-)|=e?qs`{lh2tud^B`%arxuq zfK@xHAR_|<g99;UffRz&fwZIB3**D=MRx~`4|69>J&e!5z`%sd9TTVAy?gg8!cU~Q z16?nS53?7GJD72~L+n`1C3Q00fvy+E$Kwtbba!kyHQDavZzl(sURYRT3lErDnEzmE M(e=Xk=;~p70G_JkX#fBK literal 0 HcmV?d00001 diff --git a/tests_datasets/square_adm_units.shx b/tests_datasets/square_adm_units.shx new file mode 100644 index 0000000000000000000000000000000000000000..46747912864642fd5cff8ca07ec1ecbc519d45f4 GIT binary patch literal 132 zcmZQzQ0HR64xC;vGcd3+FfdF=l{sE`#@wMi`LRXcZzl)tWV@G>&zU=XG<0!s`Qzk( WT|0`X5d#AQNL?9}-UX%qfoK3t7ZWJ} literal 0 HcmV?d00001 -- GitLab