From b769d799a9b5f43996ad99f54d128c0a4f803f9f Mon Sep 17 00:00:00 2001 From: Cecilia Nievas Date: Thu, 6 May 2021 16:56:18 +0200 Subject: [PATCH 1/5] Added function get_distance_centroids and its test --- GDE_TOOLS_create_industrial_cells.py | 46 +++++++++++++++++++ .../test_GDE_TOOLS_create_industrial_cells.py | 33 +++++++++++++ 2 files changed, 79 insertions(+) diff --git a/GDE_TOOLS_create_industrial_cells.py b/GDE_TOOLS_create_industrial_cells.py index 5891b84..e3c235c 100644 --- a/GDE_TOOLS_create_industrial_cells.py +++ b/GDE_TOOLS_create_industrial_cells.py @@ -1129,3 +1129,49 @@ def auto_adjust_overlaps_gaps( out_gdf["lat_n"].values[which_2[0]] = newpolygon2.bounds[3] return out_gdf, gaps_found + + +def get_distance_centroids(cells_gdf, col_lon, col_lat): + """This function calculates the distance between the points defined by (col_lon, col_lat) + and the centroids of the polygons defined in the geometry column of cells_gdf. No projection + is input. It is assumed that col_lon, col_lat and the geometry are all defined in a common + coordinate system and the distance is calculated in that coordinate system, which is treated + as Cartesian. The output is the largest of all these calculated distances. + + Args: + cells_gdf (GeoDataFrame): GeoPandas GeoDataFrame with at least three columns: + - col_lon: longitude in degrees (float) + - col_lat: latitude in degrees (float) + - geometry: Shapely polygons + col_lon (str): Name of the column of cells_gdf that contains longitudes. + col_lat (str): Name of the column of cells_gdf that contains latitudes. + + Returns: + max_dist (float): Maximum distance between the points defined by (col_lon, col_lat) and + the centroids of the polygons defined in the geometry column of + cells_gdf. No projection assumed. Distance calculated treating the + values of col_lon, col_lat and the definition of geometry as + Cartesian. + """ + + # Check that in_gdf has the needed columns and terminate otherwise: + if ( + (col_lon not in cells_gdf.columns) + or (col_lat not in cells_gdf.columns) + or ("geometry" not in cells_gdf.columns) + ): + print( + "ERROR!! One or more of col_lon, col_lat, geometry " + "missing as columns of cells_gdf" + ) + return np.inf + + centroids = [geom.centroid for geom in cells_gdf["geometry"].values] + orig_points = [ + Point(lon_val, lat_val) + for lon_val, lat_val in zip(cells_gdf[col_lon].values, cells_gdf[col_lat].values) + ] + dist = [p1.distance(p2) for p1, p2 in zip(centroids, orig_points)] + max_dist = np.array(dist).max() + + return max_dist diff --git a/tests/test_GDE_TOOLS_create_industrial_cells.py b/tests/test_GDE_TOOLS_create_industrial_cells.py index 08b82c2..84b6ce0 100644 --- a/tests/test_GDE_TOOLS_create_industrial_cells.py +++ b/tests/test_GDE_TOOLS_create_industrial_cells.py @@ -1229,3 +1229,36 @@ def test_auto_adjust_overlaps_gaps(): np.testing.assert_allclose( expected_out_gdf["lat_n"].values, function_out_gdf["lat_n"].values, rtol=0.0, atol=1e-08 ) + + +def test_get_distance_centroids(): + """ + The original function is very simple. The test case is built within this test function. + + """ + # Build a GeoPandas DataFrame for testing: + col_lon = "LON" + col_lat = "LAT" + d = { + col_lon: [23.0, -17.5, 23.0, -17.5], + col_lat: [38.3, 38.3, -41.4, -41.4], + "geometry": [ + Polygon([(22.0, 37.8), (23.5, 37.8), (23.5, 38.8), (22.0, 38.8)]), + Polygon([(-18.0, 36.8), (-17.0, 36.8), (-17.0, 38.8), (-18.0, 38.8)]), + Polygon([(22.5, -41.8), (23.5, -41.8), (23.5, -40.9), (22.5, -40.9)]), + Polygon([(-18.5, -41.9), (-16.5, -41.9), (-16.5, -40.9), (-18.5, -40.9)]), + ], + } # Distances are: 0.25, 0.5, 0.05, 0.0 + dummy_gdf = gpd.GeoDataFrame(d) + + # Call function to test: + function_out = gdet_cr_ind.get_distance_centroids(dummy_gdf, col_lon, col_lat) + + assert "{:.3f}".format(function_out) == "0.500" + + # Test that the result is inf when the strings given as col_lon and col_lat are not found + # as column names in the input GeoDataFrame: + # Call function to test: + function_out = gdet_cr_ind.get_distance_centroids(dummy_gdf, "LONGITUDE", "LATITUDE") + + assert np.isinf(function_out) -- GitLab From a6ab783a2a9e22f7d46a36a6c1c0ecb59eb30b0f Mon Sep 17 00:00:00 2001 From: Cecilia Nievas Date: Thu, 6 May 2021 17:01:30 +0200 Subject: [PATCH 2/5] Added function get_relative_area_range and its test --- GDE_TOOLS_create_industrial_cells.py | 28 ++++++++++++++++++ .../test_GDE_TOOLS_create_industrial_cells.py | 29 +++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/GDE_TOOLS_create_industrial_cells.py b/GDE_TOOLS_create_industrial_cells.py index e3c235c..937e83a 100644 --- a/GDE_TOOLS_create_industrial_cells.py +++ b/GDE_TOOLS_create_industrial_cells.py @@ -1175,3 +1175,31 @@ def get_distance_centroids(cells_gdf, col_lon, col_lat): max_dist = np.array(dist).max() return max_dist + + +def get_relative_area_range(cells_gdf): + """This function calculates a measure of the variability of the area of the geometries + contained in cells_gdf, by subtracting the minimum area value from the maximum one and + normalising it by the mean. + + Args: + cells_gdf (GeoDataFrame): GeoPandas GeoDataFrame with at least a geometry + column. + + Returns: + areas_range_relative (float): [max(areas) - min(areas)] / mean(areas), where + areas is the area of the polygons in the geometry + column of cells_gdf. + """ + + # Check that in_gdf has the needed columns and terminate otherwise: + if "geometry" not in cells_gdf.columns: + print("ERROR!! geometry missing as column of cells_gdf") + return np.inf + + areas = [geom.area for geom in cells_gdf["geometry"].values] + areas = np.array(areas) + areas_range = areas.max() - areas.min() + areas_range_relative = areas_range / areas.mean() + + return areas_range_relative diff --git a/tests/test_GDE_TOOLS_create_industrial_cells.py b/tests/test_GDE_TOOLS_create_industrial_cells.py index 84b6ce0..1388315 100644 --- a/tests/test_GDE_TOOLS_create_industrial_cells.py +++ b/tests/test_GDE_TOOLS_create_industrial_cells.py @@ -1262,3 +1262,32 @@ def test_get_distance_centroids(): function_out = gdet_cr_ind.get_distance_centroids(dummy_gdf, "LONGITUDE", "LATITUDE") assert np.isinf(function_out) + + +def test_get_relative_area_range(): + """ + The original function is very simple. The test case is built within this test function. + + """ + # Build a GeoPandas DataFrame for testing: + d = { + "geometry": [ + Polygon([(22.0, 37.8), (23.5, 37.8), (23.5, 38.8), (22.0, 38.8)]), + Polygon([(-18.0, 36.8), (-17.0, 36.8), (-17.0, 38.8), (-18.0, 38.8)]), + Polygon([(22.5, -41.8), (23.5, -41.8), (23.5, -40.9), (22.5, -40.9)]), + Polygon([(-18.5, -41.9), (-16.5, -41.9), (-16.5, -40.9), (-18.5, -40.9)]), + ] + } # Areas are: 1.5, 2.0, 0.9, 2.0 + dummy_gdf = gpd.GeoDataFrame(d) + + # Call function to test: + function_out = gdet_cr_ind.get_relative_area_range(dummy_gdf) + + assert "{:.4f}".format(function_out) == "0.6875" + + # Rename the columns and test again that the output is inf if there is no geometry column + # int he input GeoDataFrame: + dummy_gdf = dummy_gdf.rename(columns={"geometry": "geom"}) + function_out = gdet_cr_ind.get_relative_area_range(dummy_gdf) + + assert np.isinf(function_out) -- GitLab From 8b25d67346f9f03212f0808cfe90ac1e80e6dfbd Mon Sep 17 00:00:00 2001 From: Cecilia Nievas Date: Thu, 6 May 2021 17:22:49 +0200 Subject: [PATCH 3/5] Added function generate_country_industrial_cells, its tests, test files --- GDE_TOOLS_create_industrial_cells.py | 342 ++++++++++++++++++ .../test_GDE_TOOLS_create_industrial_cells.py | 187 ++++++++++ 2 files changed, 529 insertions(+) diff --git a/GDE_TOOLS_create_industrial_cells.py b/GDE_TOOLS_create_industrial_cells.py index 937e83a..bf5aaf5 100644 --- a/GDE_TOOLS_create_industrial_cells.py +++ b/GDE_TOOLS_create_industrial_cells.py @@ -1203,3 +1203,345 @@ def get_relative_area_range(cells_gdf): areas_range_relative = areas_range / areas.mean() return areas_range_relative + + +def generate_country_industrial_cells( + country, + col_lon, + col_lat, + width_EW, + width_NS, + id_str, + precision_points, + precision_cells, + aggr_model_pathname, + boundaries_pathname, + boundaries_type="shp", + consistency_checks=True, + autoadjust_overlap_gap=True, + in_crs="EPSG:4326", + consistency_tol_dist=0.05, + consistency_tol_area=0.05, +): + """This function reads the input aggregated exposure model of the country (location defined + by aggr_model_pathname and country), identifies the unique points present in this input + model, and generates cell geometries around these points of widths width_EW and width_NS + (in the east-west and north-south directions, respectively). It then adjusts these + geometries so that the cells do not overlap and that consecutive cells do not have small + gaps in between them. This adjustment is necessary because it cannot be guaranteed that + the input points are spaced with sufficient accuracy. The identification of unique points + and the adjustment of the geometries is carried out according to precision levels specified + by precision_points and precision_cells. + + If consistency_checks is True, four consistency checks are run: + + 1) Function overlap_by_full_geom_intersection() is used to verify that the created cells do + not overlap with one another. The cells pass this check if no overlaps are found (output + parameter overlap_found is "False"). + + 2) Functions swell_cells_with_buffer(), overlap_by_full_geom_intersection() and + auto_adjust_overlaps_gaps() are used to verify that the created cells do not have gaps in + between them where there should not be any. The cells pass this check if no gaps are found + (output parameter gap_found is "False"). + + 3) Function get_distance_centroids() is used to calculate the maximum distance between the + original points and the final centroids of the generated cells (max_dist_centr). This + maximum distance is compared against the tolerance, defined as: + + max_dist_centr <= min(width_EW, width_NS)*consistency_tol_dist + + The cells pass this check if max_dist_centr is smaller than the tolerance (output parameter + big_dist_diff is "False"). + + 4) Function get_relative_area_range() is used to calcuate the variability of the areas of + the resulting cells, as [max-min]/mean. This value is compared against consistency_tol_area: + if the former is smaller, the cells pass this check (output parameter big_area_diff is + "False"). + + If consistency_checks is False, the consistency checks are not run and all associated output + parameters take the value "Not_Checked". + + If both consistency_checks and autoadjust_overlap_gap are True, the function automatically + adjusts the geometry of the cells if overlaps or gaps are identified. Gaps between cells + that are located diagonally with one another and that have other neighbouring cells in other + directions are ignored. + + If autoadjust_overlap_gap is True, overlap_found and gap_found will be False, because the + function keeps on adjusting the geometries of the cells until no overlaps or gaps are found. + + The adjusted cells are then cropped as per the country's boundaries (input file defined by + boundaries_pathname). The consistency checks are run before this step because the cropping + will affect the location of the centroids of the cells and their areas. + + Args: + country (str): Name of country (as in the files of the aggregated model). + col_lon (str): Name of the column in the file of the aggregated exposure model that + contains longitudes. + col_lat (str): Name of the column in the file of the aggregated exposure model that + contains latitudes. + width_EW (float): Width of the cell in the east-west direction, in degrees, >0. + width_NS (float): Width of the cell in the north-south direction, in degrees, >0. + id_str (str): First part of the string used to generate IDs of the inidividual points. + in which the aggregated exposure model is defined. E.g. "IND_". + precision_points (int): Number of decimal places to be used to determine unique points + present in the aggregated exposure model (the aggregated model + will most likely have several entries for the "same" point). + precision_cells (dictionary): Dictionary with four keys, "lon_w", "lat_s", "lon_e", and + "lat_n". Each key contains a formatting string such as + '{:.3f}', for example. It is used to round the coordinates + of the cells' corners to a certain precision to identify + those that need to be adjusted to be the exact same value. + aggr_model_pathname (str): Path where the files of the aggregated model are located. + boundaries_pathname (str): Path where the boundaries of the country are located. + boundaries_type (str): File type containing the boundaries ("shp"=Shapefile, "gpkg"= + Geopackage). Default: "shp". + consistency_checks (bool): If True, consistency checks will be run to check the quality + of the output produced. If False, they will not. + Default: True. + autoadjust_overlap_gap (bool): If True, the code will automatically adjust cells for + which an overlap or gap was found (consistency_checks + needs to be True for this check to be run). If False, no + automatic adjustment will be carried out. + in_crs (str): CRS of the data (default="EPSG:4326"). + consistency_tol_dist (float): Tolerance to assess how large the maximum distance between + the original points and the centroids of the generated + cells is with respect to the width of the cells. The value + of consistency_tol_dist is multiplied by the smallest of + width_EW and width_NS to define the tolerance in degrees. + Default: 0.05 (i.e. 5% of the smallest width). Only needed + if consistency_checks is True. + consistency_tol_area (float): Tolerance to assess how large the variability of the area + of the generated cells is. The value of + consistency_tol_area is compared against the relative area + range, which is calculated as ([max-min]/mean) of all + cells. Default: 0.05. Only needed if consistency_checks is + True. + + Returns: + cells_adj_bound_gdf (GeoDataFrame): GeoPandas GeoDataFrame with the cells defined around + the points at which the aggregated model is defined + for the country. The cells have already been + "cropped" around the country boundaries. + Each row is one of these points. It contains the + following columns: + - id: ID of the point, given by id_str and an + incremental integer. + - coordinates: the names of these columns are given + by col_lon and col_lat. They contain + the original coordinates as given in + the input aggregated exposure model. + - lon_w, lat_s, lon_e, lat_n: coordinates that + define the corners of + the cells. + - geometry: (Shapely) polygons of the output cells. + aggr_mod_df (DataFrame): Pandas DataFrame with the aggregated exposure model of the + country. The content is exactly the same as in the original + input file, plus two additional columns "ID_99" and "NAME_99", + both of which contain the IDs of the cells of cells_adj_gdf + associated with each entry of aggr_mod_df. + overlap_found (str): "True" if consistency_checks=True and an overlap between the + calculated cell geometries was found, "False" if consistency_checks + =True and an overlap was not found (which means that the cells pass + this consistency check), "Not_Checked" if consistency_checks=False, + i.e. the consistency checks were not run. + gap_found (str): "True" if consistency_checks=True and a gap between the calculated cell + geometries was found, "False" if consistency_checks=True and a gap was + not found (which means that the cells pass this consistency check), + "Not_Checked" if consistency_checks=False, i.e. the consistency checks + were not run. + big_dist_diff (str): "True" if consistency_checks=True and the maximum distance between + the original input points and the centroids of the calculated cells + (before the cropping as per country boundaries) is larger than the + tolerance, "False" if consistency_checks=True and the maximum + distance is smaller than the tolerance (which means that the cells + pass this consistency check), "Not_Checked" if consistency_checks= + False, i.e. the consistency checks were not run. + big_area_diff (str): "True" if consistency_checks=True and the variability of the areas + of the resulting cells before the cropping as per country + boundaries) is larger than the tolerance, "False" if + consistency_checks=True and the variability is smaller than the + tolerance (which means that the cells pass this consistency check), + "Not_Checked" if consistency_checks=False, i.e. the consistency + checks were not run. + country_id (str): ID of the country as specified in the country boundaries file given + as input. + """ + + # Return the input in_df if width_EW or width_NS are not valid: + if (width_EW <= 0.0) or (width_NS <= 0): + print( + "ERROR in generate_country_industrial_cells: " + "with_EW and width_NS need to be positive numbers >=0" + ) + return + + # Load exposure file of the aggregated exposure model: + aggr_model_filepath = os.path.join(aggr_model_pathname, "Exposure_Ind_%s.csv" % (country)) + if not os.path.isfile(aggr_model_filepath): + print("ERROR in generate_country_industrial_cells: aggregated model file not found") + return + aggr_mod_df = pd.read_csv(aggr_model_filepath, sep=",") # aggr_mod_df is a Pandas DataFrame + + # Load country boundaries: + boundaries_filepath = os.path.join( + boundaries_pathname, "Adm0_%s.%s" % (country, boundaries_type) + ) + if not os.path.isfile(boundaries_filepath): + print("ERROR in generate_country_industrial_cells: country boundaries file not found") + return + bounds_gdf = gpd.read_file(boundaries_filepath) + + # Retrieve unique points in the exposure file, determined with a specific precision + # (points_gdf is a GeoPandas GeoDataFrame, ids_aggr is an array of strings with length + # equal to the number of rows of aggr_mod_df): + points_gdf, ids_aggr = retrieve_unique_points( + aggr_mod_df, col_lon, col_lat, id_str, precision=precision_points, in_crs=in_crs + ) + + # Add the IDs of the unique points to the DataFrame of the input aggregated model: + aggr_mod_df["ID_99"] = ids_aggr + aggr_mod_df["NAME_99"] = ids_aggr + + # Define cells around the unique points (cells_gdf is a GeoPandas GeoDataFrame): + cells_gdf = define_cells_in_dataframe( + points_gdf, col_lon, col_lat, width_EW, width_NS, in_crs=in_crs + ) + + # Create dictionaries with all coordinates of the corners of the cells: + coords_dict = create_dict_all_coordinates(cells_gdf, precision_cells) + if len(coords_dict.keys()) < 1: + print( + "ERROR in generate_country_industrial_cells: create_dict_all_coordinates " + "has returned an empty dictionary" + ) + return + + # Create dictionary with unique values of the coordinates of the corners of the cells, to + # a certain precision (given by precision_cells when generating coords_dict): + coords_uq = create_dict_unique_coordinates(coords_dict) + + # Adjust all the coordinates of the corners of the cells (coords_dict) by taking the + # average value of all instances of that coordinate that "should be the same", as identified + # in coords_uq: + coords_dict_adj = adjust_coords(coords_dict, coords_uq) # coords_dict_adj is a dictionary + + # Generate final output with adjusted cell geometries (cells_adj_gdf is a + # GeoPandas GeoDataFrame): + cells_adj_gdf = build_adjusted_cells_dataframe(cells_gdf, coords_dict_adj) + + # Run consistency checks if requested: + if not consistency_checks: + overlap_found = "Not_Checked" + gap_found = "Not_Checked" + big_dist_diff = "Not_Checked" + big_area_diff = "Not_Checked" + else: + # Consistency check 1: the output geometries should not overlap + num_overlaps = 999 # Initialise variable for the while loop to run at least once + while num_overlaps > 0: + intsect_gdf = overlap_by_full_geom_intersection(cells_adj_gdf, "id_1", "id_2") + num_overlaps = intsect_gdf.shape[0] + if num_overlaps > 0: + if not autoadjust_overlap_gap: # The user specified not to automatically adjust + overlap_found = "True" + break + # Automatically adjust the overlaps + cells_adj_gdf, _ = auto_adjust_overlaps_gaps( + cells_adj_gdf, + intsect_gdf, + col_lon, + col_lat, + width_EW, + width_NS, + "overlap", + "id_1", + "id_2", + ) + else: # No overlaps found, the while loop will end + overlap_found = "False" + + # Consistency check 2: + gaps_found = True # Initialise variable for the while loop to run at least once + while gaps_found: + # Expand the cells by 25% of their dimensions in all directions: + cells_adj_offset_gdf = swell_cells_with_buffer( + cells_adj_gdf, 0.25 * width_EW, 0.25 * width_NS + ) + + # Identify intersections in the expanded version: + intsect_gdf = overlap_by_full_geom_intersection( + cells_adj_offset_gdf, "id_1", "id_2" + ) + + # Automatically adjust the potential gaps and store it in auxiliary variable to + # determine if there are gaps or not: + cells_adj_aux_gdf, gaps_found = auto_adjust_overlaps_gaps( + cells_adj_gdf, + intsect_gdf, + col_lon, + col_lat, + width_EW, + width_NS, + "gap", + "id_1", + "id_2", + ) + + if not autoadjust_overlap_gap: # The user specified not to automatically adjust + if gaps_found: + gap_found = "True" + else: + gap_found = "False" + break # This will be the result of the check, the while loop will not run again + + # Adopt the auxiliary adjusted version: + cells_adj_gdf = deepcopy(cells_adj_aux_gdf) + gap_found = str(gaps_found) + + # Consistency check 3: maximum distance between original points and final centroids: + max_dist_centr = get_distance_centroids(cells_adj_gdf, col_lon, col_lat) + # Compare the maximum distance against the tolerance: + if max_dist_centr > min(width_EW, width_NS) * consistency_tol_dist: + big_dist_diff = "True" + else: + big_dist_diff = "False" + + # Consistency check 4: stability/variability of area of resulting cells: + rel_area_range = get_relative_area_range(cells_gdf) + # Compare the relative area range ([max-min]/mean) against the tolerance: + if rel_area_range > consistency_tol_area: + big_area_diff = "True" + else: + big_area_diff = "False" + + # Intersect cells with admnistrative boundary of country: + cells_adj_bound_gdf = gpd.overlay(cells_adj_gdf, bounds_gdf, how="intersection") + # Eliminate columns that are not useful: + if "ID_0" in cells_adj_bound_gdf.columns: + country_id = str(cells_adj_bound_gdf["ID_0"].values[0]) + del cells_adj_bound_gdf["ID_0"] + else: + country_id = "UNK" + if "NAME_0" in cells_adj_bound_gdf.columns: + del cells_adj_bound_gdf["NAME_0"] + if "id_2" in cells_adj_bound_gdf.columns: + del cells_adj_bound_gdf["id_2"] + if "id_1" in cells_adj_bound_gdf.columns: + cells_adj_bound_gdf = cells_adj_bound_gdf.rename(columns={"id_1": "id"}) + # Update columns "lon_w", "lat_s", "lon_e", and "lat_n" of cells_adj_bound_gdf: + for row in range(0, cells_adj_bound_gdf.shape[0]): + geometry_row = cells_adj_bound_gdf["geometry"].values[row] + cells_adj_bound_gdf["lon_w"].values[row] = geometry_row.bounds[0] + cells_adj_bound_gdf["lon_e"].values[row] = geometry_row.bounds[2] + cells_adj_bound_gdf["lat_s"].values[row] = geometry_row.bounds[1] + cells_adj_bound_gdf["lat_n"].values[row] = geometry_row.bounds[3] + + return ( + cells_adj_bound_gdf, + aggr_mod_df, + overlap_found, + gap_found, + big_dist_diff, + big_area_diff, + country_id, + ) diff --git a/tests/test_GDE_TOOLS_create_industrial_cells.py b/tests/test_GDE_TOOLS_create_industrial_cells.py index 1388315..406b9b5 100644 --- a/tests/test_GDE_TOOLS_create_industrial_cells.py +++ b/tests/test_GDE_TOOLS_create_industrial_cells.py @@ -1291,3 +1291,190 @@ def test_get_relative_area_range(): function_out = gdet_cr_ind.get_relative_area_range(dummy_gdf) assert np.isinf(function_out) + + +def test_generate_country_industrial_cells(): + """ + The test reads both inputs and expected outputs from a series of data files. + Three ficticious input files are considered. + """ + # Path to data files: + pathname = os.path.join( + os.path.dirname(__file__), "data", "GDE_TOOLS_create_industrial_cells" + ) + + # Parameters common to all three ficticious cases: + col_lon = "LONGITUDE" + col_lat = "LATITUDE" + width_EW = 30.0 / (60.0 * 60.0) # 30 arcsec + width_NS = 30.0 / (60.0 * 60.0) # 30 arcsec + precision_points = 4 + boundaries_type = "shp" + in_crs = "EPSG:4326" + consistency_tol_dist = 0.05 + consistency_tol_area = 0.05 + + # Auxiliary dictionary definining the precision to use to convert coordinates into strings: + dec_precision_EW = int("{:E}".format(width_EW).split("-")[1]) + dec_precision_NS = int("{:E}".format(width_NS).split("-")[1]) + precision_cells = {} + precision_cells["lon_w"] = "{:.%sf}" % (dec_precision_EW) + precision_cells["lat_s"] = "{:.%sf}" % (dec_precision_NS) + precision_cells["lon_e"] = "{:.%sf}" % (dec_precision_EW) + precision_cells["lat_n"] = "{:.%sf}" % (dec_precision_NS) + + # The three ficticious countries to test: + countries = ["Country_A", "Country_B", "Country_C"] + id_strings = ["CA_IND", "CB_IND", "CC_IND"] + + # For each country, the following combinations of input parameters are considered: + consistency_options = [True, True, False] + autoadjust_options = [True, False, False] + + # Expected output for simple variables for which the output is not defined by a data file: + expected_country_id = "278" + expected_overlap_found = np.array( + [ + ["False", "False", "Not_Checked"], # Country_A + ["False", "True", "Not_Checked"], # Country_B + ["False", "False", "Not_Checked"], # Country_C + ] + ) + expected_gap_found = np.array( + [ + ["False", "False", "Not_Checked"], + ["False", "False", "Not_Checked"], + ["False", "True", "Not_Checked"], + ] + ) + expected_big_dist_diff = np.array( + [ + ["False", "False", "Not_Checked"], + ["False", "False", "Not_Checked"], + ["False", "False", "Not_Checked"], + ] + ) + expected_big_area_diff = np.array( + [ + ["False", "False", "Not_Checked"], + ["False", "False", "Not_Checked"], + ["False", "False", "Not_Checked"], + ] + ) + + # Run the tests: + for k, country in enumerate(countries): + expected_aggr_filepath = os.path.join( + pathname, "Exposure_Ind_%s_output.csv" % (country) + ) + expected_aggr_mod_df = pd.read_csv(expected_aggr_filepath, sep=",") + for i in range(0, len(consistency_options)): + consistency_checks = consistency_options[i] + autoadjust_overlap_gap = autoadjust_options[i] + # Run the function: + res = gdet_cr_ind.generate_country_industrial_cells( + country, + col_lon, + col_lat, + width_EW, + width_NS, + id_strings[k], + precision_points, + precision_cells, + pathname, + pathname, + boundaries_type, + consistency_checks, + autoadjust_overlap_gap, + in_crs, + consistency_tol_dist, + consistency_tol_area, + ) + ( + function_cells_gdf, + function_aggr_mod_df, + function_overlap_found, + function_gap_found, + function_big_dist_diff, + function_big_area_diff, + function_country_id, + ) = res + + # Compare against expected results (simple variables): + assert function_overlap_found == expected_overlap_found[k, i] + assert function_gap_found == expected_gap_found[k, i] + assert function_big_dist_diff == expected_big_dist_diff[k, i] + assert function_big_area_diff == expected_big_area_diff[k, i] + assert function_country_id == expected_country_id + + # Compare against expected updated aggregated model file: + numerical_columns = [ + "LONGITUDE", + "LATITUDE", + "BUILDINGS", + "DWELLINGS", + "OCCUPANTS_PER_ASSET", + "COST_PER_AREA_EUR", + ] + non_numerical_columns = [ + "TAXONOMY", + "ID_1", + "NAME_1", + "ID_2", + "NAME_2", + "ID_99", + "NAME_99", + ] + for col in numerical_columns: + np.testing.assert_allclose( + function_aggr_mod_df[col].values, + expected_aggr_mod_df[col].values, + rtol=0.0, + atol=1e-08, + ) + for col in non_numerical_columns: + assert np.all( + function_aggr_mod_df[col].values == expected_aggr_mod_df[col].values + ) + + # Compare against expected final cell geometries and IDs: + expected_cells_filepath = os.path.join( + pathname, "expected_cells_%s_%s.csv" % (country, str(i)) + ) + aux_df = pd.read_csv(expected_cells_filepath, sep=",") + geoms = [] + for poly in aux_df["geometry"].values: + geoms.append(shapely.wkt.loads(poly)) + expected_cells_gdf = gpd.GeoDataFrame(aux_df, geometry=geoms, crs=in_crs) + assert np.all(function_cells_gdf["id"].values == expected_cells_gdf["id"].values) + for row in range(0, expected_cells_gdf.shape[0]): + np.testing.assert_allclose( + function_cells_gdf["geometry"].values[row].bounds, + expected_cells_gdf["geometry"].values[row].bounds, + rtol=0.0, + atol=1e-08, + ) + np.testing.assert_allclose( + function_cells_gdf["lon_w"].values, + expected_cells_gdf["lon_w"].values, + rtol=0.0, + atol=1e-08, + ) + np.testing.assert_allclose( + function_cells_gdf["lon_e"].values, + expected_cells_gdf["lon_e"].values, + rtol=0.0, + atol=1e-08, + ) + np.testing.assert_allclose( + function_cells_gdf["lat_s"].values, + expected_cells_gdf["lat_s"].values, + rtol=0.0, + atol=1e-08, + ) + np.testing.assert_allclose( + function_cells_gdf["lat_n"].values, + expected_cells_gdf["lat_n"].values, + rtol=0.0, + atol=1e-08, + ) -- GitLab From 0e9f4756d392df1dc174a9d85a14f5b24d15e358 Mon Sep 17 00:00:00 2001 From: Cecilia Nievas Date: Thu, 6 May 2021 17:26:47 +0200 Subject: [PATCH 4/5] Revert "Added function generate_country_industrial_cells, its tests, test files" This reverts commit 8b25d67346f9f03212f0808cfe90ac1e80e6dfbd --- GDE_TOOLS_create_industrial_cells.py | 342 ------------------ .../test_GDE_TOOLS_create_industrial_cells.py | 187 ---------- 2 files changed, 529 deletions(-) diff --git a/GDE_TOOLS_create_industrial_cells.py b/GDE_TOOLS_create_industrial_cells.py index bf5aaf5..937e83a 100644 --- a/GDE_TOOLS_create_industrial_cells.py +++ b/GDE_TOOLS_create_industrial_cells.py @@ -1203,345 +1203,3 @@ def get_relative_area_range(cells_gdf): areas_range_relative = areas_range / areas.mean() return areas_range_relative - - -def generate_country_industrial_cells( - country, - col_lon, - col_lat, - width_EW, - width_NS, - id_str, - precision_points, - precision_cells, - aggr_model_pathname, - boundaries_pathname, - boundaries_type="shp", - consistency_checks=True, - autoadjust_overlap_gap=True, - in_crs="EPSG:4326", - consistency_tol_dist=0.05, - consistency_tol_area=0.05, -): - """This function reads the input aggregated exposure model of the country (location defined - by aggr_model_pathname and country), identifies the unique points present in this input - model, and generates cell geometries around these points of widths width_EW and width_NS - (in the east-west and north-south directions, respectively). It then adjusts these - geometries so that the cells do not overlap and that consecutive cells do not have small - gaps in between them. This adjustment is necessary because it cannot be guaranteed that - the input points are spaced with sufficient accuracy. The identification of unique points - and the adjustment of the geometries is carried out according to precision levels specified - by precision_points and precision_cells. - - If consistency_checks is True, four consistency checks are run: - - 1) Function overlap_by_full_geom_intersection() is used to verify that the created cells do - not overlap with one another. The cells pass this check if no overlaps are found (output - parameter overlap_found is "False"). - - 2) Functions swell_cells_with_buffer(), overlap_by_full_geom_intersection() and - auto_adjust_overlaps_gaps() are used to verify that the created cells do not have gaps in - between them where there should not be any. The cells pass this check if no gaps are found - (output parameter gap_found is "False"). - - 3) Function get_distance_centroids() is used to calculate the maximum distance between the - original points and the final centroids of the generated cells (max_dist_centr). This - maximum distance is compared against the tolerance, defined as: - - max_dist_centr <= min(width_EW, width_NS)*consistency_tol_dist - - The cells pass this check if max_dist_centr is smaller than the tolerance (output parameter - big_dist_diff is "False"). - - 4) Function get_relative_area_range() is used to calcuate the variability of the areas of - the resulting cells, as [max-min]/mean. This value is compared against consistency_tol_area: - if the former is smaller, the cells pass this check (output parameter big_area_diff is - "False"). - - If consistency_checks is False, the consistency checks are not run and all associated output - parameters take the value "Not_Checked". - - If both consistency_checks and autoadjust_overlap_gap are True, the function automatically - adjusts the geometry of the cells if overlaps or gaps are identified. Gaps between cells - that are located diagonally with one another and that have other neighbouring cells in other - directions are ignored. - - If autoadjust_overlap_gap is True, overlap_found and gap_found will be False, because the - function keeps on adjusting the geometries of the cells until no overlaps or gaps are found. - - The adjusted cells are then cropped as per the country's boundaries (input file defined by - boundaries_pathname). The consistency checks are run before this step because the cropping - will affect the location of the centroids of the cells and their areas. - - Args: - country (str): Name of country (as in the files of the aggregated model). - col_lon (str): Name of the column in the file of the aggregated exposure model that - contains longitudes. - col_lat (str): Name of the column in the file of the aggregated exposure model that - contains latitudes. - width_EW (float): Width of the cell in the east-west direction, in degrees, >0. - width_NS (float): Width of the cell in the north-south direction, in degrees, >0. - id_str (str): First part of the string used to generate IDs of the inidividual points. - in which the aggregated exposure model is defined. E.g. "IND_". - precision_points (int): Number of decimal places to be used to determine unique points - present in the aggregated exposure model (the aggregated model - will most likely have several entries for the "same" point). - precision_cells (dictionary): Dictionary with four keys, "lon_w", "lat_s", "lon_e", and - "lat_n". Each key contains a formatting string such as - '{:.3f}', for example. It is used to round the coordinates - of the cells' corners to a certain precision to identify - those that need to be adjusted to be the exact same value. - aggr_model_pathname (str): Path where the files of the aggregated model are located. - boundaries_pathname (str): Path where the boundaries of the country are located. - boundaries_type (str): File type containing the boundaries ("shp"=Shapefile, "gpkg"= - Geopackage). Default: "shp". - consistency_checks (bool): If True, consistency checks will be run to check the quality - of the output produced. If False, they will not. - Default: True. - autoadjust_overlap_gap (bool): If True, the code will automatically adjust cells for - which an overlap or gap was found (consistency_checks - needs to be True for this check to be run). If False, no - automatic adjustment will be carried out. - in_crs (str): CRS of the data (default="EPSG:4326"). - consistency_tol_dist (float): Tolerance to assess how large the maximum distance between - the original points and the centroids of the generated - cells is with respect to the width of the cells. The value - of consistency_tol_dist is multiplied by the smallest of - width_EW and width_NS to define the tolerance in degrees. - Default: 0.05 (i.e. 5% of the smallest width). Only needed - if consistency_checks is True. - consistency_tol_area (float): Tolerance to assess how large the variability of the area - of the generated cells is. The value of - consistency_tol_area is compared against the relative area - range, which is calculated as ([max-min]/mean) of all - cells. Default: 0.05. Only needed if consistency_checks is - True. - - Returns: - cells_adj_bound_gdf (GeoDataFrame): GeoPandas GeoDataFrame with the cells defined around - the points at which the aggregated model is defined - for the country. The cells have already been - "cropped" around the country boundaries. - Each row is one of these points. It contains the - following columns: - - id: ID of the point, given by id_str and an - incremental integer. - - coordinates: the names of these columns are given - by col_lon and col_lat. They contain - the original coordinates as given in - the input aggregated exposure model. - - lon_w, lat_s, lon_e, lat_n: coordinates that - define the corners of - the cells. - - geometry: (Shapely) polygons of the output cells. - aggr_mod_df (DataFrame): Pandas DataFrame with the aggregated exposure model of the - country. The content is exactly the same as in the original - input file, plus two additional columns "ID_99" and "NAME_99", - both of which contain the IDs of the cells of cells_adj_gdf - associated with each entry of aggr_mod_df. - overlap_found (str): "True" if consistency_checks=True and an overlap between the - calculated cell geometries was found, "False" if consistency_checks - =True and an overlap was not found (which means that the cells pass - this consistency check), "Not_Checked" if consistency_checks=False, - i.e. the consistency checks were not run. - gap_found (str): "True" if consistency_checks=True and a gap between the calculated cell - geometries was found, "False" if consistency_checks=True and a gap was - not found (which means that the cells pass this consistency check), - "Not_Checked" if consistency_checks=False, i.e. the consistency checks - were not run. - big_dist_diff (str): "True" if consistency_checks=True and the maximum distance between - the original input points and the centroids of the calculated cells - (before the cropping as per country boundaries) is larger than the - tolerance, "False" if consistency_checks=True and the maximum - distance is smaller than the tolerance (which means that the cells - pass this consistency check), "Not_Checked" if consistency_checks= - False, i.e. the consistency checks were not run. - big_area_diff (str): "True" if consistency_checks=True and the variability of the areas - of the resulting cells before the cropping as per country - boundaries) is larger than the tolerance, "False" if - consistency_checks=True and the variability is smaller than the - tolerance (which means that the cells pass this consistency check), - "Not_Checked" if consistency_checks=False, i.e. the consistency - checks were not run. - country_id (str): ID of the country as specified in the country boundaries file given - as input. - """ - - # Return the input in_df if width_EW or width_NS are not valid: - if (width_EW <= 0.0) or (width_NS <= 0): - print( - "ERROR in generate_country_industrial_cells: " - "with_EW and width_NS need to be positive numbers >=0" - ) - return - - # Load exposure file of the aggregated exposure model: - aggr_model_filepath = os.path.join(aggr_model_pathname, "Exposure_Ind_%s.csv" % (country)) - if not os.path.isfile(aggr_model_filepath): - print("ERROR in generate_country_industrial_cells: aggregated model file not found") - return - aggr_mod_df = pd.read_csv(aggr_model_filepath, sep=",") # aggr_mod_df is a Pandas DataFrame - - # Load country boundaries: - boundaries_filepath = os.path.join( - boundaries_pathname, "Adm0_%s.%s" % (country, boundaries_type) - ) - if not os.path.isfile(boundaries_filepath): - print("ERROR in generate_country_industrial_cells: country boundaries file not found") - return - bounds_gdf = gpd.read_file(boundaries_filepath) - - # Retrieve unique points in the exposure file, determined with a specific precision - # (points_gdf is a GeoPandas GeoDataFrame, ids_aggr is an array of strings with length - # equal to the number of rows of aggr_mod_df): - points_gdf, ids_aggr = retrieve_unique_points( - aggr_mod_df, col_lon, col_lat, id_str, precision=precision_points, in_crs=in_crs - ) - - # Add the IDs of the unique points to the DataFrame of the input aggregated model: - aggr_mod_df["ID_99"] = ids_aggr - aggr_mod_df["NAME_99"] = ids_aggr - - # Define cells around the unique points (cells_gdf is a GeoPandas GeoDataFrame): - cells_gdf = define_cells_in_dataframe( - points_gdf, col_lon, col_lat, width_EW, width_NS, in_crs=in_crs - ) - - # Create dictionaries with all coordinates of the corners of the cells: - coords_dict = create_dict_all_coordinates(cells_gdf, precision_cells) - if len(coords_dict.keys()) < 1: - print( - "ERROR in generate_country_industrial_cells: create_dict_all_coordinates " - "has returned an empty dictionary" - ) - return - - # Create dictionary with unique values of the coordinates of the corners of the cells, to - # a certain precision (given by precision_cells when generating coords_dict): - coords_uq = create_dict_unique_coordinates(coords_dict) - - # Adjust all the coordinates of the corners of the cells (coords_dict) by taking the - # average value of all instances of that coordinate that "should be the same", as identified - # in coords_uq: - coords_dict_adj = adjust_coords(coords_dict, coords_uq) # coords_dict_adj is a dictionary - - # Generate final output with adjusted cell geometries (cells_adj_gdf is a - # GeoPandas GeoDataFrame): - cells_adj_gdf = build_adjusted_cells_dataframe(cells_gdf, coords_dict_adj) - - # Run consistency checks if requested: - if not consistency_checks: - overlap_found = "Not_Checked" - gap_found = "Not_Checked" - big_dist_diff = "Not_Checked" - big_area_diff = "Not_Checked" - else: - # Consistency check 1: the output geometries should not overlap - num_overlaps = 999 # Initialise variable for the while loop to run at least once - while num_overlaps > 0: - intsect_gdf = overlap_by_full_geom_intersection(cells_adj_gdf, "id_1", "id_2") - num_overlaps = intsect_gdf.shape[0] - if num_overlaps > 0: - if not autoadjust_overlap_gap: # The user specified not to automatically adjust - overlap_found = "True" - break - # Automatically adjust the overlaps - cells_adj_gdf, _ = auto_adjust_overlaps_gaps( - cells_adj_gdf, - intsect_gdf, - col_lon, - col_lat, - width_EW, - width_NS, - "overlap", - "id_1", - "id_2", - ) - else: # No overlaps found, the while loop will end - overlap_found = "False" - - # Consistency check 2: - gaps_found = True # Initialise variable for the while loop to run at least once - while gaps_found: - # Expand the cells by 25% of their dimensions in all directions: - cells_adj_offset_gdf = swell_cells_with_buffer( - cells_adj_gdf, 0.25 * width_EW, 0.25 * width_NS - ) - - # Identify intersections in the expanded version: - intsect_gdf = overlap_by_full_geom_intersection( - cells_adj_offset_gdf, "id_1", "id_2" - ) - - # Automatically adjust the potential gaps and store it in auxiliary variable to - # determine if there are gaps or not: - cells_adj_aux_gdf, gaps_found = auto_adjust_overlaps_gaps( - cells_adj_gdf, - intsect_gdf, - col_lon, - col_lat, - width_EW, - width_NS, - "gap", - "id_1", - "id_2", - ) - - if not autoadjust_overlap_gap: # The user specified not to automatically adjust - if gaps_found: - gap_found = "True" - else: - gap_found = "False" - break # This will be the result of the check, the while loop will not run again - - # Adopt the auxiliary adjusted version: - cells_adj_gdf = deepcopy(cells_adj_aux_gdf) - gap_found = str(gaps_found) - - # Consistency check 3: maximum distance between original points and final centroids: - max_dist_centr = get_distance_centroids(cells_adj_gdf, col_lon, col_lat) - # Compare the maximum distance against the tolerance: - if max_dist_centr > min(width_EW, width_NS) * consistency_tol_dist: - big_dist_diff = "True" - else: - big_dist_diff = "False" - - # Consistency check 4: stability/variability of area of resulting cells: - rel_area_range = get_relative_area_range(cells_gdf) - # Compare the relative area range ([max-min]/mean) against the tolerance: - if rel_area_range > consistency_tol_area: - big_area_diff = "True" - else: - big_area_diff = "False" - - # Intersect cells with admnistrative boundary of country: - cells_adj_bound_gdf = gpd.overlay(cells_adj_gdf, bounds_gdf, how="intersection") - # Eliminate columns that are not useful: - if "ID_0" in cells_adj_bound_gdf.columns: - country_id = str(cells_adj_bound_gdf["ID_0"].values[0]) - del cells_adj_bound_gdf["ID_0"] - else: - country_id = "UNK" - if "NAME_0" in cells_adj_bound_gdf.columns: - del cells_adj_bound_gdf["NAME_0"] - if "id_2" in cells_adj_bound_gdf.columns: - del cells_adj_bound_gdf["id_2"] - if "id_1" in cells_adj_bound_gdf.columns: - cells_adj_bound_gdf = cells_adj_bound_gdf.rename(columns={"id_1": "id"}) - # Update columns "lon_w", "lat_s", "lon_e", and "lat_n" of cells_adj_bound_gdf: - for row in range(0, cells_adj_bound_gdf.shape[0]): - geometry_row = cells_adj_bound_gdf["geometry"].values[row] - cells_adj_bound_gdf["lon_w"].values[row] = geometry_row.bounds[0] - cells_adj_bound_gdf["lon_e"].values[row] = geometry_row.bounds[2] - cells_adj_bound_gdf["lat_s"].values[row] = geometry_row.bounds[1] - cells_adj_bound_gdf["lat_n"].values[row] = geometry_row.bounds[3] - - return ( - cells_adj_bound_gdf, - aggr_mod_df, - overlap_found, - gap_found, - big_dist_diff, - big_area_diff, - country_id, - ) diff --git a/tests/test_GDE_TOOLS_create_industrial_cells.py b/tests/test_GDE_TOOLS_create_industrial_cells.py index 406b9b5..1388315 100644 --- a/tests/test_GDE_TOOLS_create_industrial_cells.py +++ b/tests/test_GDE_TOOLS_create_industrial_cells.py @@ -1291,190 +1291,3 @@ def test_get_relative_area_range(): function_out = gdet_cr_ind.get_relative_area_range(dummy_gdf) assert np.isinf(function_out) - - -def test_generate_country_industrial_cells(): - """ - The test reads both inputs and expected outputs from a series of data files. - Three ficticious input files are considered. - """ - # Path to data files: - pathname = os.path.join( - os.path.dirname(__file__), "data", "GDE_TOOLS_create_industrial_cells" - ) - - # Parameters common to all three ficticious cases: - col_lon = "LONGITUDE" - col_lat = "LATITUDE" - width_EW = 30.0 / (60.0 * 60.0) # 30 arcsec - width_NS = 30.0 / (60.0 * 60.0) # 30 arcsec - precision_points = 4 - boundaries_type = "shp" - in_crs = "EPSG:4326" - consistency_tol_dist = 0.05 - consistency_tol_area = 0.05 - - # Auxiliary dictionary definining the precision to use to convert coordinates into strings: - dec_precision_EW = int("{:E}".format(width_EW).split("-")[1]) - dec_precision_NS = int("{:E}".format(width_NS).split("-")[1]) - precision_cells = {} - precision_cells["lon_w"] = "{:.%sf}" % (dec_precision_EW) - precision_cells["lat_s"] = "{:.%sf}" % (dec_precision_NS) - precision_cells["lon_e"] = "{:.%sf}" % (dec_precision_EW) - precision_cells["lat_n"] = "{:.%sf}" % (dec_precision_NS) - - # The three ficticious countries to test: - countries = ["Country_A", "Country_B", "Country_C"] - id_strings = ["CA_IND", "CB_IND", "CC_IND"] - - # For each country, the following combinations of input parameters are considered: - consistency_options = [True, True, False] - autoadjust_options = [True, False, False] - - # Expected output for simple variables for which the output is not defined by a data file: - expected_country_id = "278" - expected_overlap_found = np.array( - [ - ["False", "False", "Not_Checked"], # Country_A - ["False", "True", "Not_Checked"], # Country_B - ["False", "False", "Not_Checked"], # Country_C - ] - ) - expected_gap_found = np.array( - [ - ["False", "False", "Not_Checked"], - ["False", "False", "Not_Checked"], - ["False", "True", "Not_Checked"], - ] - ) - expected_big_dist_diff = np.array( - [ - ["False", "False", "Not_Checked"], - ["False", "False", "Not_Checked"], - ["False", "False", "Not_Checked"], - ] - ) - expected_big_area_diff = np.array( - [ - ["False", "False", "Not_Checked"], - ["False", "False", "Not_Checked"], - ["False", "False", "Not_Checked"], - ] - ) - - # Run the tests: - for k, country in enumerate(countries): - expected_aggr_filepath = os.path.join( - pathname, "Exposure_Ind_%s_output.csv" % (country) - ) - expected_aggr_mod_df = pd.read_csv(expected_aggr_filepath, sep=",") - for i in range(0, len(consistency_options)): - consistency_checks = consistency_options[i] - autoadjust_overlap_gap = autoadjust_options[i] - # Run the function: - res = gdet_cr_ind.generate_country_industrial_cells( - country, - col_lon, - col_lat, - width_EW, - width_NS, - id_strings[k], - precision_points, - precision_cells, - pathname, - pathname, - boundaries_type, - consistency_checks, - autoadjust_overlap_gap, - in_crs, - consistency_tol_dist, - consistency_tol_area, - ) - ( - function_cells_gdf, - function_aggr_mod_df, - function_overlap_found, - function_gap_found, - function_big_dist_diff, - function_big_area_diff, - function_country_id, - ) = res - - # Compare against expected results (simple variables): - assert function_overlap_found == expected_overlap_found[k, i] - assert function_gap_found == expected_gap_found[k, i] - assert function_big_dist_diff == expected_big_dist_diff[k, i] - assert function_big_area_diff == expected_big_area_diff[k, i] - assert function_country_id == expected_country_id - - # Compare against expected updated aggregated model file: - numerical_columns = [ - "LONGITUDE", - "LATITUDE", - "BUILDINGS", - "DWELLINGS", - "OCCUPANTS_PER_ASSET", - "COST_PER_AREA_EUR", - ] - non_numerical_columns = [ - "TAXONOMY", - "ID_1", - "NAME_1", - "ID_2", - "NAME_2", - "ID_99", - "NAME_99", - ] - for col in numerical_columns: - np.testing.assert_allclose( - function_aggr_mod_df[col].values, - expected_aggr_mod_df[col].values, - rtol=0.0, - atol=1e-08, - ) - for col in non_numerical_columns: - assert np.all( - function_aggr_mod_df[col].values == expected_aggr_mod_df[col].values - ) - - # Compare against expected final cell geometries and IDs: - expected_cells_filepath = os.path.join( - pathname, "expected_cells_%s_%s.csv" % (country, str(i)) - ) - aux_df = pd.read_csv(expected_cells_filepath, sep=",") - geoms = [] - for poly in aux_df["geometry"].values: - geoms.append(shapely.wkt.loads(poly)) - expected_cells_gdf = gpd.GeoDataFrame(aux_df, geometry=geoms, crs=in_crs) - assert np.all(function_cells_gdf["id"].values == expected_cells_gdf["id"].values) - for row in range(0, expected_cells_gdf.shape[0]): - np.testing.assert_allclose( - function_cells_gdf["geometry"].values[row].bounds, - expected_cells_gdf["geometry"].values[row].bounds, - rtol=0.0, - atol=1e-08, - ) - np.testing.assert_allclose( - function_cells_gdf["lon_w"].values, - expected_cells_gdf["lon_w"].values, - rtol=0.0, - atol=1e-08, - ) - np.testing.assert_allclose( - function_cells_gdf["lon_e"].values, - expected_cells_gdf["lon_e"].values, - rtol=0.0, - atol=1e-08, - ) - np.testing.assert_allclose( - function_cells_gdf["lat_s"].values, - expected_cells_gdf["lat_s"].values, - rtol=0.0, - atol=1e-08, - ) - np.testing.assert_allclose( - function_cells_gdf["lat_n"].values, - expected_cells_gdf["lat_n"].values, - rtol=0.0, - atol=1e-08, - ) -- GitLab From d82cd8e9d9f579e13bdd646b1ad7a35c91e46191 Mon Sep 17 00:00:00 2001 From: Cecilia Nievas Date: Thu, 6 May 2021 17:33:15 +0200 Subject: [PATCH 5/5] Added function generate_country_industrial_cells, its tests, test files --- GDE_TOOLS_create_industrial_cells.py | 344 +++++++++++++++++- .../Adm0_Country_A.dbf | Bin 0 -> 119 bytes .../Adm0_Country_A.prj | 1 + .../Adm0_Country_A.qpj | 1 + .../Adm0_Country_A.shp | Bin 0 -> 236 bytes .../Adm0_Country_A.shx | Bin 0 -> 108 bytes .../Adm0_Country_B.dbf | Bin 0 -> 119 bytes .../Adm0_Country_B.prj | 1 + .../Adm0_Country_B.qpj | 1 + .../Adm0_Country_B.shp | Bin 0 -> 236 bytes .../Adm0_Country_B.shx | Bin 0 -> 108 bytes .../Adm0_Country_C.dbf | Bin 0 -> 119 bytes .../Adm0_Country_C.prj | 1 + .../Adm0_Country_C.qpj | 1 + .../Adm0_Country_C.shp | Bin 0 -> 236 bytes .../Adm0_Country_C.shx | Bin 0 -> 108 bytes .../Exposure_Ind_Country_A.csv | 15 + .../Exposure_Ind_Country_A_output.csv | 15 + .../Exposure_Ind_Country_B.csv | 15 + .../Exposure_Ind_Country_B_output.csv | 15 + .../Exposure_Ind_Country_C.csv | 15 + .../Exposure_Ind_Country_C_output.csv | 15 + .../expected_cells_Country_A_0.csv | 6 + .../expected_cells_Country_A_1.csv | 6 + .../expected_cells_Country_A_2.csv | 6 + .../expected_cells_Country_B_0.csv | 6 + .../expected_cells_Country_B_1.csv | 6 + .../expected_cells_Country_B_2.csv | 6 + .../expected_cells_Country_C_0.csv | 6 + .../expected_cells_Country_C_1.csv | 6 + .../expected_cells_Country_C_2.csv | 6 + .../test_GDE_TOOLS_create_industrial_cells.py | 188 ++++++++++ 32 files changed, 681 insertions(+), 1 deletion(-) create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.dbf create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.prj create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.qpj create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.shp create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.shx create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.dbf create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.prj create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.qpj create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.shp create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.shx create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.dbf create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.prj create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.qpj create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.shp create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.shx create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_A.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_A_output.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_B.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_B_output.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_C.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_C_output.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_0.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_1.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_2.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_0.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_1.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_2.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_0.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_1.csv create mode 100644 tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_2.csv diff --git a/GDE_TOOLS_create_industrial_cells.py b/GDE_TOOLS_create_industrial_cells.py index 937e83a..405cbb6 100644 --- a/GDE_TOOLS_create_industrial_cells.py +++ b/GDE_TOOLS_create_industrial_cells.py @@ -36,7 +36,7 @@ from copy import deepcopy import numpy as np import pandas as pd import geopandas as gpd -from shapely.geometry import Polygon +from shapely.geometry import Point, Polygon def define_corners(centr_lon, centr_lat, width_EW, width_NS): @@ -1203,3 +1203,345 @@ def get_relative_area_range(cells_gdf): areas_range_relative = areas_range / areas.mean() return areas_range_relative + + +def generate_country_industrial_cells( + country, + col_lon, + col_lat, + width_EW, + width_NS, + id_str, + precision_points, + precision_cells, + aggr_model_pathname, + boundaries_pathname, + boundaries_type="shp", + consistency_checks=True, + autoadjust_overlap_gap=True, + in_crs="EPSG:4326", + consistency_tol_dist=0.05, + consistency_tol_area=0.05, +): + """This function reads the input aggregated exposure model of the country (location defined + by aggr_model_pathname and country), identifies the unique points present in this input + model, and generates cell geometries around these points of widths width_EW and width_NS + (in the east-west and north-south directions, respectively). It then adjusts these + geometries so that the cells do not overlap and that consecutive cells do not have small + gaps in between them. This adjustment is necessary because it cannot be guaranteed that + the input points are spaced with sufficient accuracy. The identification of unique points + and the adjustment of the geometries is carried out according to precision levels specified + by precision_points and precision_cells. + + If consistency_checks is True, four consistency checks are run: + + 1) Function overlap_by_full_geom_intersection() is used to verify that the created cells do + not overlap with one another. The cells pass this check if no overlaps are found (output + parameter overlap_found is "False"). + + 2) Functions swell_cells_with_buffer(), overlap_by_full_geom_intersection() and + auto_adjust_overlaps_gaps() are used to verify that the created cells do not have gaps in + between them where there should not be any. The cells pass this check if no gaps are found + (output parameter gap_found is "False"). + + 3) Function get_distance_centroids() is used to calculate the maximum distance between the + original points and the final centroids of the generated cells (max_dist_centr). This + maximum distance is compared against the tolerance, defined as: + + max_dist_centr <= min(width_EW, width_NS)*consistency_tol_dist + + The cells pass this check if max_dist_centr is smaller than the tolerance (output parameter + big_dist_diff is "False"). + + 4) Function get_relative_area_range() is used to calcuate the variability of the areas of + the resulting cells, as [max-min]/mean. This value is compared against consistency_tol_area: + if the former is smaller, the cells pass this check (output parameter big_area_diff is + "False"). + + If consistency_checks is False, the consistency checks are not run and all associated output + parameters take the value "Not_Checked". + + If both consistency_checks and autoadjust_overlap_gap are True, the function automatically + adjusts the geometry of the cells if overlaps or gaps are identified. Gaps between cells + that are located diagonally with one another and that have other neighbouring cells in other + directions are ignored. + + If autoadjust_overlap_gap is True, overlap_found and gap_found will be False, because the + function keeps on adjusting the geometries of the cells until no overlaps or gaps are found. + + The adjusted cells are then cropped as per the country's boundaries (input file defined by + boundaries_pathname). The consistency checks are run before this step because the cropping + will affect the location of the centroids of the cells and their areas. + + Args: + country (str): Name of country (as in the files of the aggregated model). + col_lon (str): Name of the column in the file of the aggregated exposure model that + contains longitudes. + col_lat (str): Name of the column in the file of the aggregated exposure model that + contains latitudes. + width_EW (float): Width of the cell in the east-west direction, in degrees, >0. + width_NS (float): Width of the cell in the north-south direction, in degrees, >0. + id_str (str): First part of the string used to generate IDs of the inidividual points. + in which the aggregated exposure model is defined. E.g. "IND_". + precision_points (int): Number of decimal places to be used to determine unique points + present in the aggregated exposure model (the aggregated model + will most likely have several entries for the "same" point). + precision_cells (dictionary): Dictionary with four keys, "lon_w", "lat_s", "lon_e", and + "lat_n". Each key contains a formatting string such as + '{:.3f}', for example. It is used to round the coordinates + of the cells' corners to a certain precision to identify + those that need to be adjusted to be the exact same value. + aggr_model_pathname (str): Path where the files of the aggregated model are located. + boundaries_pathname (str): Path where the boundaries of the country are located. + boundaries_type (str): File type containing the boundaries ("shp"=Shapefile, "gpkg"= + Geopackage). Default: "shp". + consistency_checks (bool): If True, consistency checks will be run to check the quality + of the output produced. If False, they will not. + Default: True. + autoadjust_overlap_gap (bool): If True, the code will automatically adjust cells for + which an overlap or gap was found (consistency_checks + needs to be True for this check to be run). If False, no + automatic adjustment will be carried out. + in_crs (str): CRS of the data (default="EPSG:4326"). + consistency_tol_dist (float): Tolerance to assess how large the maximum distance between + the original points and the centroids of the generated + cells is with respect to the width of the cells. The value + of consistency_tol_dist is multiplied by the smallest of + width_EW and width_NS to define the tolerance in degrees. + Default: 0.05 (i.e. 5% of the smallest width). Only needed + if consistency_checks is True. + consistency_tol_area (float): Tolerance to assess how large the variability of the area + of the generated cells is. The value of + consistency_tol_area is compared against the relative area + range, which is calculated as ([max-min]/mean) of all + cells. Default: 0.05. Only needed if consistency_checks is + True. + + Returns: + cells_adj_bound_gdf (GeoDataFrame): GeoPandas GeoDataFrame with the cells defined around + the points at which the aggregated model is defined + for the country. The cells have already been + "cropped" around the country boundaries. + Each row is one of these points. It contains the + following columns: + - id: ID of the point, given by id_str and an + incremental integer. + - coordinates: the names of these columns are given + by col_lon and col_lat. They contain + the original coordinates as given in + the input aggregated exposure model. + - lon_w, lat_s, lon_e, lat_n: coordinates that + define the corners of + the cells. + - geometry: (Shapely) polygons of the output cells. + aggr_mod_df (DataFrame): Pandas DataFrame with the aggregated exposure model of the + country. The content is exactly the same as in the original + input file, plus two additional columns "ID_99" and "NAME_99", + both of which contain the IDs of the cells of cells_adj_gdf + associated with each entry of aggr_mod_df. + overlap_found (str): "True" if consistency_checks=True and an overlap between the + calculated cell geometries was found, "False" if consistency_checks + =True and an overlap was not found (which means that the cells pass + this consistency check), "Not_Checked" if consistency_checks=False, + i.e. the consistency checks were not run. + gap_found (str): "True" if consistency_checks=True and a gap between the calculated cell + geometries was found, "False" if consistency_checks=True and a gap was + not found (which means that the cells pass this consistency check), + "Not_Checked" if consistency_checks=False, i.e. the consistency checks + were not run. + big_dist_diff (str): "True" if consistency_checks=True and the maximum distance between + the original input points and the centroids of the calculated cells + (before the cropping as per country boundaries) is larger than the + tolerance, "False" if consistency_checks=True and the maximum + distance is smaller than the tolerance (which means that the cells + pass this consistency check), "Not_Checked" if consistency_checks= + False, i.e. the consistency checks were not run. + big_area_diff (str): "True" if consistency_checks=True and the variability of the areas + of the resulting cells before the cropping as per country + boundaries) is larger than the tolerance, "False" if + consistency_checks=True and the variability is smaller than the + tolerance (which means that the cells pass this consistency check), + "Not_Checked" if consistency_checks=False, i.e. the consistency + checks were not run. + country_id (str): ID of the country as specified in the country boundaries file given + as input. + """ + + # Return the input in_df if width_EW or width_NS are not valid: + if (width_EW <= 0.0) or (width_NS <= 0): + print( + "ERROR in generate_country_industrial_cells: " + "with_EW and width_NS need to be positive numbers >=0" + ) + return + + # Load exposure file of the aggregated exposure model: + aggr_model_filepath = os.path.join(aggr_model_pathname, "Exposure_Ind_%s.csv" % (country)) + if not os.path.isfile(aggr_model_filepath): + print("ERROR in generate_country_industrial_cells: aggregated model file not found") + return + aggr_mod_df = pd.read_csv(aggr_model_filepath, sep=",") # aggr_mod_df is a Pandas DataFrame + + # Load country boundaries: + boundaries_filepath = os.path.join( + boundaries_pathname, "Adm0_%s.%s" % (country, boundaries_type) + ) + if not os.path.isfile(boundaries_filepath): + print("ERROR in generate_country_industrial_cells: country boundaries file not found") + return + bounds_gdf = gpd.read_file(boundaries_filepath) + + # Retrieve unique points in the exposure file, determined with a specific precision + # (points_gdf is a GeoPandas GeoDataFrame, ids_aggr is an array of strings with length + # equal to the number of rows of aggr_mod_df): + points_gdf, ids_aggr = retrieve_unique_points( + aggr_mod_df, col_lon, col_lat, id_str, precision=precision_points, in_crs=in_crs + ) + + # Add the IDs of the unique points to the DataFrame of the input aggregated model: + aggr_mod_df["ID_99"] = ids_aggr + aggr_mod_df["NAME_99"] = ids_aggr + + # Define cells around the unique points (cells_gdf is a GeoPandas GeoDataFrame): + cells_gdf = define_cells_in_dataframe( + points_gdf, col_lon, col_lat, width_EW, width_NS, in_crs=in_crs + ) + + # Create dictionaries with all coordinates of the corners of the cells: + coords_dict = create_dict_all_coordinates(cells_gdf, precision_cells) + if len(coords_dict.keys()) < 1: + print( + "ERROR in generate_country_industrial_cells: create_dict_all_coordinates " + "has returned an empty dictionary" + ) + return + + # Create dictionary with unique values of the coordinates of the corners of the cells, to + # a certain precision (given by precision_cells when generating coords_dict): + coords_uq = create_dict_unique_coordinates(coords_dict) + + # Adjust all the coordinates of the corners of the cells (coords_dict) by taking the + # average value of all instances of that coordinate that "should be the same", as identified + # in coords_uq: + coords_dict_adj = adjust_coords(coords_dict, coords_uq) # coords_dict_adj is a dictionary + + # Generate final output with adjusted cell geometries (cells_adj_gdf is a + # GeoPandas GeoDataFrame): + cells_adj_gdf = build_adjusted_cells_dataframe(cells_gdf, coords_dict_adj) + + # Run consistency checks if requested: + if not consistency_checks: + overlap_found = "Not_Checked" + gap_found = "Not_Checked" + big_dist_diff = "Not_Checked" + big_area_diff = "Not_Checked" + else: + # Consistency check 1: the output geometries should not overlap + num_overlaps = 999 # Initialise variable for the while loop to run at least once + while num_overlaps > 0: + intsect_gdf = overlap_by_full_geom_intersection(cells_adj_gdf, "id_1", "id_2") + num_overlaps = intsect_gdf.shape[0] + if num_overlaps > 0: + if not autoadjust_overlap_gap: # The user specified not to automatically adjust + overlap_found = "True" + break + # Automatically adjust the overlaps + cells_adj_gdf, _ = auto_adjust_overlaps_gaps( + cells_adj_gdf, + intsect_gdf, + col_lon, + col_lat, + width_EW, + width_NS, + "overlap", + "id_1", + "id_2", + ) + else: # No overlaps found, the while loop will end + overlap_found = "False" + + # Consistency check 2: + gaps_found = True # Initialise variable for the while loop to run at least once + while gaps_found: + # Expand the cells by 25% of their dimensions in all directions: + cells_adj_offset_gdf = swell_cells_with_buffer( + cells_adj_gdf, 0.25 * width_EW, 0.25 * width_NS + ) + + # Identify intersections in the expanded version: + intsect_gdf = overlap_by_full_geom_intersection( + cells_adj_offset_gdf, "id_1", "id_2" + ) + + # Automatically adjust the potential gaps and store it in auxiliary variable to + # determine if there are gaps or not: + cells_adj_aux_gdf, gaps_found = auto_adjust_overlaps_gaps( + cells_adj_gdf, + intsect_gdf, + col_lon, + col_lat, + width_EW, + width_NS, + "gap", + "id_1", + "id_2", + ) + + if not autoadjust_overlap_gap: # The user specified not to automatically adjust + if gaps_found: + gap_found = "True" + else: + gap_found = "False" + break # This will be the result of the check, the while loop will not run again + + # Adopt the auxiliary adjusted version: + cells_adj_gdf = deepcopy(cells_adj_aux_gdf) + gap_found = str(gaps_found) + + # Consistency check 3: maximum distance between original points and final centroids: + max_dist_centr = get_distance_centroids(cells_adj_gdf, col_lon, col_lat) + # Compare the maximum distance against the tolerance: + if max_dist_centr > min(width_EW, width_NS) * consistency_tol_dist: + big_dist_diff = "True" + else: + big_dist_diff = "False" + + # Consistency check 4: stability/variability of area of resulting cells: + rel_area_range = get_relative_area_range(cells_gdf) + # Compare the relative area range ([max-min]/mean) against the tolerance: + if rel_area_range > consistency_tol_area: + big_area_diff = "True" + else: + big_area_diff = "False" + + # Intersect cells with admnistrative boundary of country: + cells_adj_bound_gdf = gpd.overlay(cells_adj_gdf, bounds_gdf, how="intersection") + # Eliminate columns that are not useful: + if "ID_0" in cells_adj_bound_gdf.columns: + country_id = str(cells_adj_bound_gdf["ID_0"].values[0]) + del cells_adj_bound_gdf["ID_0"] + else: + country_id = "UNK" + if "NAME_0" in cells_adj_bound_gdf.columns: + del cells_adj_bound_gdf["NAME_0"] + if "id_2" in cells_adj_bound_gdf.columns: + del cells_adj_bound_gdf["id_2"] + if "id_1" in cells_adj_bound_gdf.columns: + cells_adj_bound_gdf = cells_adj_bound_gdf.rename(columns={"id_1": "id"}) + # Update columns "lon_w", "lat_s", "lon_e", and "lat_n" of cells_adj_bound_gdf: + for row in range(0, cells_adj_bound_gdf.shape[0]): + geometry_row = cells_adj_bound_gdf["geometry"].values[row] + cells_adj_bound_gdf["lon_w"].values[row] = geometry_row.bounds[0] + cells_adj_bound_gdf["lon_e"].values[row] = geometry_row.bounds[2] + cells_adj_bound_gdf["lat_s"].values[row] = geometry_row.bounds[1] + cells_adj_bound_gdf["lat_n"].values[row] = geometry_row.bounds[3] + + return ( + cells_adj_bound_gdf, + aggr_mod_df, + overlap_found, + gap_found, + big_dist_diff, + big_area_diff, + country_id, + ) diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.dbf b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.dbf new file mode 100644 index 0000000000000000000000000000000000000000..248f50c16ba5b92f2635de2bbe31d738f5384e48 GIT binary patch literal 119 ycmZRsWMyV#U|>jO5CxK$ATtFn<_BVN!MPBIr%Sv6R0yj)uL2wxKv_oS7E%B_pa!h~ literal 0 HcmV?d00001 diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.prj b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.prj new file mode 100644 index 0000000..a30c00a --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.prj @@ -0,0 +1 @@ +GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.qpj b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.qpj new file mode 100644 index 0000000..5fbc831 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.qpj @@ -0,0 +1 @@ +GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]] diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.shp b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.shp new file mode 100644 index 0000000000000000000000000000000000000000..41d1fb30878beec10e1201947203192e79545cc1 GIT binary patch literal 236 zcmZQzQ0HR64$59IGcd3M<>Xsm2k!W%?hwYOyJ_x!4~G|@L~cZdXgJiXRsLYs_H@9m z9YvH8$aBDE)`#WCyv;c^9R95Qc*&Q^(*dLqqz(ko?S=7SYSHz=_%L_E)Wi4y6M#f9 literal 0 HcmV?d00001 diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.shx b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_A.shx new file mode 100644 index 0000000000000000000000000000000000000000..ccd1687abbfda09ebd318a8507d1f92fbba9f7ba GIT binary patch literal 108 zcmZQzQ0HR64$NLKGcd3M<>Xsm2k!W%?hwYOyJ_x!4~G|@L~cZdXgJiXRsLYs_H@9m L9Yxd#$a4SyoTv}i literal 0 HcmV?d00001 diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.dbf b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.dbf new file mode 100644 index 0000000000000000000000000000000000000000..248f50c16ba5b92f2635de2bbe31d738f5384e48 GIT binary patch literal 119 ycmZRsWMyV#U|>jO5CxK$ATtFn<_BVN!MPBIr%Sv6R0yj)uL2wxKv_oS7E%B_pa!h~ literal 0 HcmV?d00001 diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.prj b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.prj new file mode 100644 index 0000000..a30c00a --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.prj @@ -0,0 +1 @@ +GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.qpj b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.qpj new file mode 100644 index 0000000..5fbc831 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.qpj @@ -0,0 +1 @@ +GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]] diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.shp b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.shp new file mode 100644 index 0000000000000000000000000000000000000000..41d1fb30878beec10e1201947203192e79545cc1 GIT binary patch literal 236 zcmZQzQ0HR64$59IGcd3M<>Xsm2k!W%?hwYOyJ_x!4~G|@L~cZdXgJiXRsLYs_H@9m z9YvH8$aBDE)`#WCyv;c^9R95Qc*&Q^(*dLqqz(ko?S=7SYSHz=_%L_E)Wi4y6M#f9 literal 0 HcmV?d00001 diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.shx b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_B.shx new file mode 100644 index 0000000000000000000000000000000000000000..ccd1687abbfda09ebd318a8507d1f92fbba9f7ba GIT binary patch literal 108 zcmZQzQ0HR64$NLKGcd3M<>Xsm2k!W%?hwYOyJ_x!4~G|@L~cZdXgJiXRsLYs_H@9m L9Yxd#$a4SyoTv}i literal 0 HcmV?d00001 diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.dbf b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.dbf new file mode 100644 index 0000000000000000000000000000000000000000..248f50c16ba5b92f2635de2bbe31d738f5384e48 GIT binary patch literal 119 ycmZRsWMyV#U|>jO5CxK$ATtFn<_BVN!MPBIr%Sv6R0yj)uL2wxKv_oS7E%B_pa!h~ literal 0 HcmV?d00001 diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.prj b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.prj new file mode 100644 index 0000000..a30c00a --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.prj @@ -0,0 +1 @@ +GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.qpj b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.qpj new file mode 100644 index 0000000..5fbc831 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.qpj @@ -0,0 +1 @@ +GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]] diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.shp b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.shp new file mode 100644 index 0000000000000000000000000000000000000000..41d1fb30878beec10e1201947203192e79545cc1 GIT binary patch literal 236 zcmZQzQ0HR64$59IGcd3M<>Xsm2k!W%?hwYOyJ_x!4~G|@L~cZdXgJiXRsLYs_H@9m z9YvH8$aBDE)`#WCyv;c^9R95Qc*&Q^(*dLqqz(ko?S=7SYSHz=_%L_E)Wi4y6M#f9 literal 0 HcmV?d00001 diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.shx b/tests/data/GDE_TOOLS_create_industrial_cells/Adm0_Country_C.shx new file mode 100644 index 0000000000000000000000000000000000000000..ccd1687abbfda09ebd318a8507d1f92fbba9f7ba GIT binary patch literal 108 zcmZQzQ0HR64$NLKGcd3M<>Xsm2k!W%?hwYOyJ_x!4~G|@L~cZdXgJiXRsLYs_H@9m L9Yxd#$a4SyoTv}i literal 0 HcmV?d00001 diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_A.csv b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_A.csv new file mode 100644 index 0000000..a11ae4d --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_A.csv @@ -0,0 +1,15 @@ +LONGITUDE,LATITUDE,TAXONOMY,BUILDINGS,DWELLINGS,OCCUPANTS_PER_ASSET,COST_PER_AREA_EUR,ID_1,NAME_1,ID_2,NAME_2 +12,50,BdgClassA,50,50,500,2000,1,State 1,11,Municipality 1 +12,50,BdgClassB,30,30,400,2000,1,State 1,11,Municipality 1 +12,50,BdgClassC,20,20,300,2000,1,State 1,11,Municipality 1 +12,50.0082,BdgClassA,40,40,400,2000,1,State 1,11,Municipality 1 +12,50.0082,BdgClassB,40,40,533,2000,1,State 1,11,Municipality 1 +12,50.0082,BdgClassC,30,30,450,2000,1,State 1,11,Municipality 1 +12.0082,50,BdgClassA,15,15,150,2000,1,State 1,11,Municipality 1 +12.0082,50,BdgClassD,30,30,500,2000,1,State 1,11,Municipality 1 +12.0081,50.0082,BdgClassA,20,20,200,2000,2,State 2,25,Municipality 5 +12.0081,50.0082,BdgClassB,15,15,200,2000,2,State 2,25,Municipality 5 +12.0081,50.0082,BdgClassC,25,25,375,2000,2,State 2,25,Municipality 5 +12.0081,50.0082,BdgClassD,30,30,500,2000,2,State 2,25,Municipality 5 +12.0162,50.0162,BdgClassB,45,45,600,2000,2,State 2,27,Municipality 7 +12.0162,50.0162,BdgClassD,60,60,1000,2000,2,State 2,27,Municipality 7 \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_A_output.csv b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_A_output.csv new file mode 100644 index 0000000..5b39230 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_A_output.csv @@ -0,0 +1,15 @@ +LONGITUDE,LATITUDE,TAXONOMY,BUILDINGS,DWELLINGS,OCCUPANTS_PER_ASSET,COST_PER_AREA_EUR,ID_1,NAME_1,ID_2,NAME_2,ID_99,NAME_99 +12,50,BdgClassA,50,50,500,2000,1,State 1,11,Municipality 1,CA_IND_0,CA_IND_0 +12,50,BdgClassB,30,30,400,2000,1,State 1,11,Municipality 1,CA_IND_0,CA_IND_0 +12,50,BdgClassC,20,20,300,2000,1,State 1,11,Municipality 1,CA_IND_0,CA_IND_0 +12,50.0082,BdgClassA,40,40,400,2000,1,State 1,11,Municipality 1,CA_IND_1,CA_IND_1 +12,50.0082,BdgClassB,40,40,533,2000,1,State 1,11,Municipality 1,CA_IND_1,CA_IND_1 +12,50.0082,BdgClassC,30,30,450,2000,1,State 1,11,Municipality 1,CA_IND_1,CA_IND_1 +12.0082,50,BdgClassA,15,15,150,2000,1,State 1,11,Municipality 1,CA_IND_3,CA_IND_3 +12.0082,50,BdgClassD,30,30,500,2000,1,State 1,11,Municipality 1,CA_IND_3,CA_IND_3 +12.0081,50.0082,BdgClassA,20,20,200,2000,2,State 2,25,Municipality 5,CA_IND_2,CA_IND_2 +12.0081,50.0082,BdgClassB,15,15,200,2000,2,State 2,25,Municipality 5,CA_IND_2,CA_IND_2 +12.0081,50.0082,BdgClassC,25,25,375,2000,2,State 2,25,Municipality 5,CA_IND_2,CA_IND_2 +12.0081,50.0082,BdgClassD,30,30,500,2000,2,State 2,25,Municipality 5,CA_IND_2,CA_IND_2 +12.0162,50.0162,BdgClassB,45,45,600,2000,2,State 2,27,Municipality 7,CA_IND_4,CA_IND_4 +12.0162,50.0162,BdgClassD,60,60,1000,2000,2,State 2,27,Municipality 7,CA_IND_4,CA_IND_4 \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_B.csv b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_B.csv new file mode 100644 index 0000000..d6e559d --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_B.csv @@ -0,0 +1,15 @@ +LONGITUDE,LATITUDE,TAXONOMY,BUILDINGS,DWELLINGS,OCCUPANTS_PER_ASSET,COST_PER_AREA_EUR,ID_1,NAME_1,ID_2,NAME_2 +12,50,BdgClassA,50,50,500,2000,1,State 1,11,Municipality 1 +12,50,BdgClassB,30,30,400,2000,1,State 1,11,Municipality 1 +12,50,BdgClassC,20,20,300,2000,1,State 1,11,Municipality 1 +12.001,50.0082,BdgClassA,40,40,400,2000,1,State 1,11,Municipality 1 +12.001,50.0082,BdgClassB,40,40,533,2000,1,State 1,11,Municipality 1 +12.001,50.0082,BdgClassC,30,30,450,2000,1,State 1,11,Municipality 1 +12.0082,50,BdgClassA,15,15,150,2000,1,State 1,11,Municipality 1 +12.0082,50,BdgClassD,30,30,500,2000,1,State 1,11,Municipality 1 +12.0081,50.0082,BdgClassA,20,20,200,2000,2,State 2,25,Municipality 5 +12.0081,50.0082,BdgClassB,15,15,200,2000,2,State 2,25,Municipality 5 +12.0081,50.0082,BdgClassC,25,25,375,2000,2,State 2,25,Municipality 5 +12.0081,50.0082,BdgClassD,30,30,500,2000,2,State 2,25,Municipality 5 +12.0162,50.0162,BdgClassB,45,45,600,2000,2,State 2,27,Municipality 7 +12.0162,50.0162,BdgClassD,60,60,1000,2000,2,State 2,27,Municipality 7 \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_B_output.csv b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_B_output.csv new file mode 100644 index 0000000..a91dfd9 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_B_output.csv @@ -0,0 +1,15 @@ +LONGITUDE,LATITUDE,TAXONOMY,BUILDINGS,DWELLINGS,OCCUPANTS_PER_ASSET,COST_PER_AREA_EUR,ID_1,NAME_1,ID_2,NAME_2,ID_99,NAME_99 +12,50,BdgClassA,50,50,500,2000,1,State 1,11,Municipality 1,CB_IND_0,CB_IND_0 +12,50,BdgClassB,30,30,400,2000,1,State 1,11,Municipality 1,CB_IND_0,CB_IND_0 +12,50,BdgClassC,20,20,300,2000,1,State 1,11,Municipality 1,CB_IND_0,CB_IND_0 +12.001,50.0082,BdgClassA,40,40,400,2000,1,State 1,11,Municipality 1,CB_IND_1,CB_IND_1 +12.001,50.0082,BdgClassB,40,40,533,2000,1,State 1,11,Municipality 1,CB_IND_1,CB_IND_1 +12.001,50.0082,BdgClassC,30,30,450,2000,1,State 1,11,Municipality 1,CB_IND_1,CB_IND_1 +12.0082,50,BdgClassA,15,15,150,2000,1,State 1,11,Municipality 1,CB_IND_3,CB_IND_3 +12.0082,50,BdgClassD,30,30,500,2000,1,State 1,11,Municipality 1,CB_IND_3,CB_IND_3 +12.0081,50.0082,BdgClassA,20,20,200,2000,2,State 2,25,Municipality 5,CB_IND_2,CB_IND_2 +12.0081,50.0082,BdgClassB,15,15,200,2000,2,State 2,25,Municipality 5,CB_IND_2,CB_IND_2 +12.0081,50.0082,BdgClassC,25,25,375,2000,2,State 2,25,Municipality 5,CB_IND_2,CB_IND_2 +12.0081,50.0082,BdgClassD,30,30,500,2000,2,State 2,25,Municipality 5,CB_IND_2,CB_IND_2 +12.0162,50.0162,BdgClassB,45,45,600,2000,2,State 2,27,Municipality 7,CB_IND_4,CB_IND_4 +12.0162,50.0162,BdgClassD,60,60,1000,2000,2,State 2,27,Municipality 7,CB_IND_4,CB_IND_4 \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_C.csv b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_C.csv new file mode 100644 index 0000000..4f41905 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_C.csv @@ -0,0 +1,15 @@ +LONGITUDE,LATITUDE,TAXONOMY,BUILDINGS,DWELLINGS,OCCUPANTS_PER_ASSET,COST_PER_AREA_EUR,ID_1,NAME_1,ID_2,NAME_2 +12,50,BdgClassA,50,50,500,2000,1,State 1,11,Municipality 1 +12,50,BdgClassB,30,30,400,2000,1,State 1,11,Municipality 1 +12,50,BdgClassC,20,20,300,2000,1,State 1,11,Municipality 1 +11.999,50.0082,BdgClassA,40,40,400,2000,1,State 1,11,Municipality 1 +11.999,50.0082,BdgClassB,40,40,533,2000,1,State 1,11,Municipality 1 +11.999,50.0082,BdgClassC,30,30,450,2000,1,State 1,11,Municipality 1 +12.0082,50,BdgClassA,15,15,150,2000,1,State 1,11,Municipality 1 +12.0082,50,BdgClassD,30,30,500,2000,1,State 1,11,Municipality 1 +12.0084,50.0082,BdgClassA,20,20,200,2000,2,State 2,25,Municipality 5 +12.0084,50.0082,BdgClassB,15,15,200,2000,2,State 2,25,Municipality 5 +12.0084,50.0082,BdgClassC,25,25,375,2000,2,State 2,25,Municipality 5 +12.0084,50.0082,BdgClassD,30,30,500,2000,2,State 2,25,Municipality 5 +12.0162,50.0162,BdgClassB,45,45,600,2000,2,State 2,27,Municipality 7 +12.0162,50.0162,BdgClassD,60,60,1000,2000,2,State 2,27,Municipality 7 \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_C_output.csv b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_C_output.csv new file mode 100644 index 0000000..8bd9083 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/Exposure_Ind_Country_C_output.csv @@ -0,0 +1,15 @@ +LONGITUDE,LATITUDE,TAXONOMY,BUILDINGS,DWELLINGS,OCCUPANTS_PER_ASSET,COST_PER_AREA_EUR,ID_1,NAME_1,ID_2,NAME_2,ID_99,NAME_99 +12,50,BdgClassA,50,50,500,2000,1,State 1,11,Municipality 1,CC_IND_1,CC_IND_1 +12,50,BdgClassB,30,30,400,2000,1,State 1,11,Municipality 1,CC_IND_1,CC_IND_1 +12,50,BdgClassC,20,20,300,2000,1,State 1,11,Municipality 1,CC_IND_1,CC_IND_1 +11.999,50.0082,BdgClassA,40,40,400,2000,1,State 1,11,Municipality 1,CC_IND_0,CC_IND_0 +11.999,50.0082,BdgClassB,40,40,533,2000,1,State 1,11,Municipality 1,CC_IND_0,CC_IND_0 +11.999,50.0082,BdgClassC,30,30,450,2000,1,State 1,11,Municipality 1,CC_IND_0,CC_IND_0 +12.0082,50,BdgClassA,15,15,150,2000,1,State 1,11,Municipality 1,CC_IND_2,CC_IND_2 +12.0082,50,BdgClassD,30,30,500,2000,1,State 1,11,Municipality 1,CC_IND_2,CC_IND_2 +12.0084,50.0082,BdgClassA,20,20,200,2000,2,State 2,25,Municipality 5,CC_IND_3,CC_IND_3 +12.0084,50.0082,BdgClassB,15,15,200,2000,2,State 2,25,Municipality 5,CC_IND_3,CC_IND_3 +12.0084,50.0082,BdgClassC,25,25,375,2000,2,State 2,25,Municipality 5,CC_IND_3,CC_IND_3 +12.0084,50.0082,BdgClassD,30,30,500,2000,2,State 2,25,Municipality 5,CC_IND_3,CC_IND_3 +12.0162,50.0162,BdgClassB,45,45,600,2000,2,State 2,27,Municipality 7,CC_IND_4,CC_IND_4 +12.0162,50.0162,BdgClassD,60,60,1000,2000,2,State 2,27,Municipality 7,CC_IND_4,CC_IND_4 \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_0.csv b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_0.csv new file mode 100644 index 0000000..7a522f8 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_0.csv @@ -0,0 +1,6 @@ +id,LONGITUDE,LATITUDE,lon_w,lat_s,lon_e,lat_n,geometry +CA_IND_0,12,50,11.9975,49.997,12.004075,50.0041,"POLYGON ((11.9975 50.0041, 12.004075 50.0041, 12.004075 49.997, 11.9975 49.997, 11.9975 50.0041))" +CA_IND_1,12,50.0082,11.9975,50.0041,12.004075,50.0122555555556,"POLYGON ((11.9975 50.0122555555556, 12.004075 50.0122555555556, 12.004075 50.0041, 11.9975 50.0041, 11.9975 50.0122555555556))" +CA_IND_2,12.0081,50.0082,12.004075,50.0041,12.0122222222222,50.0122555555556,"POLYGON ((12.004075 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.0041, 12.004075 50.0041, 12.004075 50.0122555555556))" +CA_IND_3,12.0082,50,12.004075,49.997,12.0122222222222,50.0041,"POLYGON ((12.004075 50.0041, 12.0122222222222 50.0041, 12.0122222222222 49.997, 12.004075 49.997, 12.004075 50.0041))" +CA_IND_4,12.0162,50.0162,12.0122222222222,50.0122555555556,12.018,50.018,"POLYGON ((12.0122222222222 50.018, 12.018 50.018, 12.018 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.018))" \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_1.csv b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_1.csv new file mode 100644 index 0000000..7a522f8 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_1.csv @@ -0,0 +1,6 @@ +id,LONGITUDE,LATITUDE,lon_w,lat_s,lon_e,lat_n,geometry +CA_IND_0,12,50,11.9975,49.997,12.004075,50.0041,"POLYGON ((11.9975 50.0041, 12.004075 50.0041, 12.004075 49.997, 11.9975 49.997, 11.9975 50.0041))" +CA_IND_1,12,50.0082,11.9975,50.0041,12.004075,50.0122555555556,"POLYGON ((11.9975 50.0122555555556, 12.004075 50.0122555555556, 12.004075 50.0041, 11.9975 50.0041, 11.9975 50.0122555555556))" +CA_IND_2,12.0081,50.0082,12.004075,50.0041,12.0122222222222,50.0122555555556,"POLYGON ((12.004075 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.0041, 12.004075 50.0041, 12.004075 50.0122555555556))" +CA_IND_3,12.0082,50,12.004075,49.997,12.0122222222222,50.0041,"POLYGON ((12.004075 50.0041, 12.0122222222222 50.0041, 12.0122222222222 49.997, 12.004075 49.997, 12.004075 50.0041))" +CA_IND_4,12.0162,50.0162,12.0122222222222,50.0122555555556,12.018,50.018,"POLYGON ((12.0122222222222 50.018, 12.018 50.018, 12.018 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.018))" \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_2.csv b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_2.csv new file mode 100644 index 0000000..7a522f8 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_A_2.csv @@ -0,0 +1,6 @@ +id,LONGITUDE,LATITUDE,lon_w,lat_s,lon_e,lat_n,geometry +CA_IND_0,12,50,11.9975,49.997,12.004075,50.0041,"POLYGON ((11.9975 50.0041, 12.004075 50.0041, 12.004075 49.997, 11.9975 49.997, 11.9975 50.0041))" +CA_IND_1,12,50.0082,11.9975,50.0041,12.004075,50.0122555555556,"POLYGON ((11.9975 50.0122555555556, 12.004075 50.0122555555556, 12.004075 50.0041, 11.9975 50.0041, 11.9975 50.0122555555556))" +CA_IND_2,12.0081,50.0082,12.004075,50.0041,12.0122222222222,50.0122555555556,"POLYGON ((12.004075 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.0041, 12.004075 50.0041, 12.004075 50.0122555555556))" +CA_IND_3,12.0082,50,12.004075,49.997,12.0122222222222,50.0041,"POLYGON ((12.004075 50.0041, 12.0122222222222 50.0041, 12.0122222222222 49.997, 12.004075 49.997, 12.004075 50.0041))" +CA_IND_4,12.0162,50.0162,12.0122222222222,50.0122555555556,12.018,50.018,"POLYGON ((12.0122222222222 50.018, 12.018 50.018, 12.018 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.018))" \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_0.csv b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_0.csv new file mode 100644 index 0000000..4946fb5 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_0.csv @@ -0,0 +1,6 @@ +id,LONGITUDE,LATITUDE,lon_w,lat_s,lon_e,lat_n,geometry +CB_IND_0,12,50,11.9975,49.997,12.0040444444444,50.0041,"POLYGON ((11.9975 50.0041, 12.0040444444444 50.0041, 12.0040444444444 49.997, 11.9975 49.997, 11.9975 50.0041))" +CB_IND_1,12,50.0082,11.9975,50.0041,12.0046055555556,50.0122555555556,"POLYGON ((11.9975 50.0122555555556, 12.0046055555556 50.0122555555556, 12.0046055555556 50.0041, 11.9975 50.0041, 11.9975 50.0122555555556))" +CB_IND_2,12.0081,50.0082,12.0046055555556,50.0041,12.0122222222222,50.0122555555556,"POLYGON ((12.0046055555556 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.0041, 12.0046055555556 50.0041, 12.0046055555556 50.0122555555556))" +CB_IND_3,12.0082,50,12.0040444444444,49.997,12.0122222222222,50.0041,"POLYGON ((12.0040444444444 50.0041, 12.0122222222222 50.0041, 12.0122222222222 49.997, 12.0040444444444 49.997, 12.0040444444444 50.0041))" +CB_IND_4,12.0162,50.0162,12.0122222222222,50.0122555555556,12.018,50.018,"POLYGON ((12.0122222222222 50.018, 12.018 50.018, 12.018 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.018))" \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_1.csv b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_1.csv new file mode 100644 index 0000000..ec73e55 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_1.csv @@ -0,0 +1,6 @@ +id,LONGITUDE,LATITUDE,lon_w,lat_s,lon_e,lat_n,geometry +CB_IND_0,12,50,11.9975,49.997,12.0040444444444,50.0041,"POLYGON ((11.9975 50.0041, 12.0040444444444 50.0041, 12.0040444444444 49.997, 11.9975 49.997, 11.9975 50.0041))" +CB_IND_1,12,50.0082,11.9975,50.0041,12.0051666666667,50.0122555555556,"POLYGON ((11.9975 50.0122555555556, 12.0051666666667 50.0122555555556, 12.0051666666667 50.0041, 11.9975 50.0041, 11.9975 50.0122555555556))" +CB_IND_2,12.0081,50.0082,12.0040444444444,50.0041,12.0122222222222,50.0122555555556,"POLYGON ((12.0040444444444 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.0041, 12.0040444444444 50.0041, 12.0040444444444 50.0122555555556))" +CB_IND_3,12.0082,50,12.0040444444444,49.997,12.0122222222222,50.0041,"POLYGON ((12.0040444444444 50.0041, 12.0122222222222 50.0041, 12.0122222222222 49.997, 12.0040444444444 49.997, 12.0040444444444 50.0041))" +CB_IND_4,12.0162,50.0162,12.0122222222222,50.0122555555556,12.018,50.018,"POLYGON ((12.0122222222222 50.018, 12.018 50.018, 12.018 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.018))" \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_2.csv b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_2.csv new file mode 100644 index 0000000..ec73e55 --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_B_2.csv @@ -0,0 +1,6 @@ +id,LONGITUDE,LATITUDE,lon_w,lat_s,lon_e,lat_n,geometry +CB_IND_0,12,50,11.9975,49.997,12.0040444444444,50.0041,"POLYGON ((11.9975 50.0041, 12.0040444444444 50.0041, 12.0040444444444 49.997, 11.9975 49.997, 11.9975 50.0041))" +CB_IND_1,12,50.0082,11.9975,50.0041,12.0051666666667,50.0122555555556,"POLYGON ((11.9975 50.0122555555556, 12.0051666666667 50.0122555555556, 12.0051666666667 50.0041, 11.9975 50.0041, 11.9975 50.0122555555556))" +CB_IND_2,12.0081,50.0082,12.0040444444444,50.0041,12.0122222222222,50.0122555555556,"POLYGON ((12.0040444444444 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.0041, 12.0040444444444 50.0041, 12.0040444444444 50.0122555555556))" +CB_IND_3,12.0082,50,12.0040444444444,49.997,12.0122222222222,50.0041,"POLYGON ((12.0040444444444 50.0041, 12.0122222222222 50.0041, 12.0122222222222 49.997, 12.0040444444444 49.997, 12.0040444444444 50.0041))" +CB_IND_4,12.0162,50.0162,12.0122222222222,50.0122555555556,12.018,50.018,"POLYGON ((12.0122222222222 50.018, 12.018 50.018, 12.018 50.0122555555556, 12.0122222222222 50.0122555555556, 12.0122222222222 50.018))" \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_0.csv b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_0.csv new file mode 100644 index 0000000..d311a2b --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_0.csv @@ -0,0 +1,6 @@ +id,LONGITUDE,LATITUDE,lon_w,lat_s,lon_e,lat_n,geometry +CC_IND_0,11.999,50.0082,11.9975,50.0041,12.0036555555556,50.0122555555556,"POLYGON ((11.9975 50.0122555555556, 12.0036555555556 50.0122555555556, 12.0036555555556 50.0041, 11.9975 50.0041, 11.9975 50.0122555555556))" +CC_IND_1,12,50,11.9975,49.997,12.0041444444444,50.0041,"POLYGON ((11.9975 50.0041, 12.0041444444444 50.0041, 12.0041444444444 49.997, 11.9975 49.997, 11.9975 50.0041))" +CC_IND_2,12.0082,50,12.0041444444444,49.997,12.0122,50.0041,"POLYGON ((12.0041444444444 50.0041, 12.0122 50.0041, 12.0122 49.997, 12.0041444444444 49.997, 12.0041444444444 50.0041))" +CC_IND_3,12.0084,50.0082,12.0036555555556,50.0041,12.0125666666667,50.0122555555556,"POLYGON ((12.0036555555556 50.0122555555556, 12.0125666666667 50.0122555555556, 12.0125666666667 50.0041, 12.0036555555556 50.0041, 12.0036555555556 50.0122555555556))" +CC_IND_4,12.0162,50.0162,12.0122,50.0122555555556,12.018,50.018,"POLYGON ((12.0122 50.018, 12.018 50.018, 12.018 50.0122555555556, 12.0122 50.0122555555556, 12.0122 50.018))" \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_1.csv b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_1.csv new file mode 100644 index 0000000..7fb340a --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_1.csv @@ -0,0 +1,6 @@ +id,LONGITUDE,LATITUDE,lon_w,lat_s,lon_e,lat_n,geometry +CC_IND_0,11.999,50.0082,11.9975,50.0041,12.0031666666667,50.0122555555556,"POLYGON ((11.9975 50.0122555555556, 12.0031666666667 50.0122555555556, 12.0031666666667 50.0041, 11.9975 50.0041, 11.9975 50.0122555555556))" +CC_IND_1,12,50,11.9975,49.997,12.0041444444444,50.0041,"POLYGON ((11.9975 50.0041, 12.0041444444444 50.0041, 12.0041444444444 49.997, 11.9975 49.997, 11.9975 50.0041))" +CC_IND_2,12.0082,50,12.0041444444444,49.997,12.0122,50.0041,"POLYGON ((12.0041444444444 50.0041, 12.0122 50.0041, 12.0122 49.997, 12.0041444444444 49.997, 12.0041444444444 50.0041))" +CC_IND_3,12.0084,50.0082,12.0041444444444,50.0041,12.0125666666667,50.0122555555556,"POLYGON ((12.0041444444444 50.0122555555556, 12.0125666666667 50.0122555555556, 12.0125666666667 50.0041, 12.0041444444444 50.0041, 12.0041444444444 50.0122555555556))" +CC_IND_4,12.0162,50.0162,12.0122,50.0122555555556,12.018,50.018,"POLYGON ((12.0122 50.018, 12.018 50.018, 12.018 50.0122555555556, 12.0122 50.0122555555556, 12.0122 50.018))" \ No newline at end of file diff --git a/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_2.csv b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_2.csv new file mode 100644 index 0000000..7fb340a --- /dev/null +++ b/tests/data/GDE_TOOLS_create_industrial_cells/expected_cells_Country_C_2.csv @@ -0,0 +1,6 @@ +id,LONGITUDE,LATITUDE,lon_w,lat_s,lon_e,lat_n,geometry +CC_IND_0,11.999,50.0082,11.9975,50.0041,12.0031666666667,50.0122555555556,"POLYGON ((11.9975 50.0122555555556, 12.0031666666667 50.0122555555556, 12.0031666666667 50.0041, 11.9975 50.0041, 11.9975 50.0122555555556))" +CC_IND_1,12,50,11.9975,49.997,12.0041444444444,50.0041,"POLYGON ((11.9975 50.0041, 12.0041444444444 50.0041, 12.0041444444444 49.997, 11.9975 49.997, 11.9975 50.0041))" +CC_IND_2,12.0082,50,12.0041444444444,49.997,12.0122,50.0041,"POLYGON ((12.0041444444444 50.0041, 12.0122 50.0041, 12.0122 49.997, 12.0041444444444 49.997, 12.0041444444444 50.0041))" +CC_IND_3,12.0084,50.0082,12.0041444444444,50.0041,12.0125666666667,50.0122555555556,"POLYGON ((12.0041444444444 50.0122555555556, 12.0125666666667 50.0122555555556, 12.0125666666667 50.0041, 12.0041444444444 50.0041, 12.0041444444444 50.0122555555556))" +CC_IND_4,12.0162,50.0162,12.0122,50.0122555555556,12.018,50.018,"POLYGON ((12.0122 50.018, 12.018 50.018, 12.018 50.0122555555556, 12.0122 50.0122555555556, 12.0122 50.018))" \ No newline at end of file diff --git a/tests/test_GDE_TOOLS_create_industrial_cells.py b/tests/test_GDE_TOOLS_create_industrial_cells.py index 1388315..e8780f8 100644 --- a/tests/test_GDE_TOOLS_create_industrial_cells.py +++ b/tests/test_GDE_TOOLS_create_industrial_cells.py @@ -33,6 +33,7 @@ import numpy as np import pandas as pd import geopandas as gpd from shapely.geometry import Polygon +import shapely.wkt import GDE_TOOLS_create_industrial_cells as gdet_cr_ind @@ -1291,3 +1292,190 @@ def test_get_relative_area_range(): function_out = gdet_cr_ind.get_relative_area_range(dummy_gdf) assert np.isinf(function_out) + + +def test_generate_country_industrial_cells(): + """ + The test reads both inputs and expected outputs from a series of data files. + Three ficticious input files are considered. + """ + # Path to data files: + pathname = os.path.join( + os.path.dirname(__file__), "data", "GDE_TOOLS_create_industrial_cells" + ) + + # Parameters common to all three ficticious cases: + col_lon = "LONGITUDE" + col_lat = "LATITUDE" + width_EW = 30.0 / (60.0 * 60.0) # 30 arcsec + width_NS = 30.0 / (60.0 * 60.0) # 30 arcsec + precision_points = 4 + boundaries_type = "shp" + in_crs = "EPSG:4326" + consistency_tol_dist = 0.05 + consistency_tol_area = 0.05 + + # Auxiliary dictionary definining the precision to use to convert coordinates into strings: + dec_precision_EW = int("{:E}".format(width_EW).split("-")[1]) + dec_precision_NS = int("{:E}".format(width_NS).split("-")[1]) + precision_cells = {} + precision_cells["lon_w"] = "{:.%sf}" % (dec_precision_EW) + precision_cells["lat_s"] = "{:.%sf}" % (dec_precision_NS) + precision_cells["lon_e"] = "{:.%sf}" % (dec_precision_EW) + precision_cells["lat_n"] = "{:.%sf}" % (dec_precision_NS) + + # The three ficticious countries to test: + countries = ["Country_A", "Country_B", "Country_C"] + id_strings = ["CA_IND", "CB_IND", "CC_IND"] + + # For each country, the following combinations of input parameters are considered: + consistency_options = [True, True, False] + autoadjust_options = [True, False, False] + + # Expected output for simple variables for which the output is not defined by a data file: + expected_country_id = "278" + expected_overlap_found = np.array( + [ + ["False", "False", "Not_Checked"], # Country_A + ["False", "True", "Not_Checked"], # Country_B + ["False", "False", "Not_Checked"], # Country_C + ] + ) + expected_gap_found = np.array( + [ + ["False", "False", "Not_Checked"], + ["False", "False", "Not_Checked"], + ["False", "True", "Not_Checked"], + ] + ) + expected_big_dist_diff = np.array( + [ + ["False", "False", "Not_Checked"], + ["False", "False", "Not_Checked"], + ["False", "False", "Not_Checked"], + ] + ) + expected_big_area_diff = np.array( + [ + ["False", "False", "Not_Checked"], + ["False", "False", "Not_Checked"], + ["False", "False", "Not_Checked"], + ] + ) + + # Run the tests: + for k, country in enumerate(countries): + expected_aggr_filepath = os.path.join( + pathname, "Exposure_Ind_%s_output.csv" % (country) + ) + expected_aggr_mod_df = pd.read_csv(expected_aggr_filepath, sep=",") + for i in range(0, len(consistency_options)): + consistency_checks = consistency_options[i] + autoadjust_overlap_gap = autoadjust_options[i] + # Run the function: + res = gdet_cr_ind.generate_country_industrial_cells( + country, + col_lon, + col_lat, + width_EW, + width_NS, + id_strings[k], + precision_points, + precision_cells, + pathname, + pathname, + boundaries_type, + consistency_checks, + autoadjust_overlap_gap, + in_crs, + consistency_tol_dist, + consistency_tol_area, + ) + ( + function_cells_gdf, + function_aggr_mod_df, + function_overlap_found, + function_gap_found, + function_big_dist_diff, + function_big_area_diff, + function_country_id, + ) = res + + # Compare against expected results (simple variables): + assert function_overlap_found == expected_overlap_found[k, i] + assert function_gap_found == expected_gap_found[k, i] + assert function_big_dist_diff == expected_big_dist_diff[k, i] + assert function_big_area_diff == expected_big_area_diff[k, i] + assert function_country_id == expected_country_id + + # Compare against expected updated aggregated model file: + numerical_columns = [ + "LONGITUDE", + "LATITUDE", + "BUILDINGS", + "DWELLINGS", + "OCCUPANTS_PER_ASSET", + "COST_PER_AREA_EUR", + ] + non_numerical_columns = [ + "TAXONOMY", + "ID_1", + "NAME_1", + "ID_2", + "NAME_2", + "ID_99", + "NAME_99", + ] + for col in numerical_columns: + np.testing.assert_allclose( + function_aggr_mod_df[col].values, + expected_aggr_mod_df[col].values, + rtol=0.0, + atol=1e-08, + ) + for col in non_numerical_columns: + assert np.all( + function_aggr_mod_df[col].values == expected_aggr_mod_df[col].values + ) + + # Compare against expected final cell geometries and IDs: + expected_cells_filepath = os.path.join( + pathname, "expected_cells_%s_%s.csv" % (country, str(i)) + ) + aux_df = pd.read_csv(expected_cells_filepath, sep=",") + geoms = [] + for poly in aux_df["geometry"].values: + geoms.append(shapely.wkt.loads(poly)) + expected_cells_gdf = gpd.GeoDataFrame(aux_df, geometry=geoms, crs=in_crs) + assert np.all(function_cells_gdf["id"].values == expected_cells_gdf["id"].values) + for row in range(0, expected_cells_gdf.shape[0]): + np.testing.assert_allclose( + function_cells_gdf["geometry"].values[row].bounds, + expected_cells_gdf["geometry"].values[row].bounds, + rtol=0.0, + atol=1e-08, + ) + np.testing.assert_allclose( + function_cells_gdf["lon_w"].values, + expected_cells_gdf["lon_w"].values, + rtol=0.0, + atol=1e-08, + ) + np.testing.assert_allclose( + function_cells_gdf["lon_e"].values, + expected_cells_gdf["lon_e"].values, + rtol=0.0, + atol=1e-08, + ) + np.testing.assert_allclose( + function_cells_gdf["lat_s"].values, + expected_cells_gdf["lat_s"].values, + rtol=0.0, + atol=1e-08, + ) + np.testing.assert_allclose( + function_cells_gdf["lat_n"].values, + expected_cells_gdf["lat_n"].values, + rtol=0.0, + atol=1e-08, + ) -- GitLab