diff --git a/GDE_TOOLS_create_industrial_cells.py b/GDE_TOOLS_create_industrial_cells.py index 5f56c6193c034ddbd544090a29d2312c5bcedee0..1d6b8b67b7dc48d1a1372bd9b5c06b1be64c7f63 100644 --- a/GDE_TOOLS_create_industrial_cells.py +++ b/GDE_TOOLS_create_industrial_cells.py @@ -58,8 +58,8 @@ def define_corners(centr_lon, centr_lat, width_EW, width_NS): lat_s (float): Latitude of the south border of the cell, in degrees. lon_e (float): Longitude of the east border of the cell, in degrees. lat_n (float): Latitude of the north border of the cell, in degrees. - """ + if (width_EW <= 0.0) or (width_NS <= 0.0): print("ERROR in define_corners: width_EW and width_NS need to be positive numbers >=0") return 999.9, 999.9, 999.9, 999.9 @@ -83,8 +83,8 @@ def define_cell_polygon(lon_w, lat_s, lon_e, lat_n): Returns: Shapely polygon defined by the four coordinates given as input (rectangle). - """ + return Polygon([(lon_w, lat_s), (lon_e, lat_s), (lon_e, lat_n), (lon_w, lat_n)]) @@ -110,8 +110,8 @@ def define_cells_in_dataframe(in_df, col_lon, col_lat, width_EW, width_NS, in_cr out_gdf: A GeoPandas DataFrame containing the same columns as in_df, plus the geometry of cells defined around the centroids. Number of rows equal to the number of rows in in_df. - """ + # Return the input in_df if width_EW or width_NS are not valid: if (width_EW <= 0.0) or (width_NS <= 0.0): print("ERROR in define_corners: with_EW and width_NS need to be positive numbers >=0") @@ -177,17 +177,14 @@ def retrieve_unique_points(in_df, col_lon, col_lat, id_str, precision=4, in_crs= CRS: as per in_crs. ids_of_unique (array of str): IDs of the unique points (those of out_gpd["id"]) associated with each row of in_df. - """ + # Concatenate longitude and latitude as strings to identify unique points with a precision: pr_str = "{:.%sf}" % (precision) all_pts_str = np.array( [ "%s_%s" - % ( - pr_str.format(in_df[col_lon].values[i]), - pr_str.format(in_df[col_lat].values[i]) - ) + % (pr_str.format(in_df[col_lon].values[i]), pr_str.format(in_df[col_lat].values[i])) for i in range(in_df.shape[0]) ] ) @@ -217,3 +214,202 @@ def retrieve_unique_points(in_df, col_lon, col_lat, id_str, precision=4, in_crs= ids_of_unique = ids[unique_inv] return out_gpd, ids_of_unique + + +def create_dict_all_coordinates(in_gdf, rounding_precision): + """This function retrieves the four coordinates that define all cells in in_gdf and outputs + them in a dictionary both as floats and strings, the latter with number of decimal places + as indicated by rounding_precision. The output dictionary contains four keys, "lon_w", + "lat_s", "lon_e", and "lat_n", each of which contain two subkeys, "all_float" and "all_str". + The float values are provided in the output too (despite being directly copied from the + input without any further processing) to facilitate later operations with the output + dictionary. + + Args: + in_gdf (Geopandas GeoDataFrame): GeoDataFrame with the cells geometries and original + centroids. It is meant to be the output of function + define_cells_in_dataframe(). Its "geometry" column + contains the cells' polygons. It also contains the + columns "lon_w", "lat_s", "lon_e", and "lat_n", with + the four coordinates that define each cell (floats). + rounding_precision (dictionary): Dictionary with four keys, "lon_w", "lat_s", "lon_e", + and "lat_n". Each key contains a formatting string + such as '{:.3f}', for example. + + Returns: + coords_dict (dictionary): Dictionary with four keys, "lon_w", "lat_s", "lon_e", and + "lat_n". Each key contains the subkeys "all_float" and + "all_str". Each of these is an array with length equal to + the number of rows in in_gdf and their elements in the same + order. Both contain the coordinates, in one case in terms of + floats ("all_float") and in the other in terms of strings, + with number of decimal places as indicated by + rounding_precision ("all_str"). + """ + + # Check that in_gdf has the needed columns and terminate otherwise: + if ( + ("lon_w" not in in_gdf.columns) + or ("lat_s" not in in_gdf.columns) + or ("lon_e" not in in_gdf.columns) + or ("lat_n" not in in_gdf.columns) + ): + print("ERROR!! One or more of lon_w, lat_s, lon_e, lat_n missing as columns of in_gdf") + return {} + + coords_dict = {} # Initialise output dictionary + + for ct in ["lon_w", "lat_s", "lon_e", "lat_n"]: + coords_dict[ct] = {} + coords_dict[ct]["all_float"] = in_gdf[ct].values + # Array of coordinates formatted as strings with precision given by rounding_precision: + coords_dict[ct]["all_str"] = np.array( + [rounding_precision[ct].format(val) for val in in_gdf[ct].values] + ) + + return coords_dict + + +def create_dict_unique_coordinates(coords_dict): + """This function retrieves all the coordinates of coords_dict["lon_w"] and + coords_dict["lon_e"], determines the unique values present in both arrays (they both contain + strings), and outputs it as coords_uq["lon"]. It then retrieves all the coordinates of + coords_dict["lat_s"] and coords_dict["lat_n"], determines the unique values present in both + arrays, and outputs it as coords_uq["lat"]. + + Args: + coords_dict (dictionary): Dictionary with four keys, "lon_w", "lat_s", "lon_e", and + "lat_n". Each key contains the subkeys "all_float" and + "all_str". Both contain the coordinates, in one case in terms + of floats ("all_float") and in the other in terms of strings, + with a specific number of decimal places ("all_str"). All + these arrays have equal length and their elements correspond + to one another. It is meant to be the output of function + create_dict_all_coordinates(). + + Returns: + coords_uq (dictionary): Dictionary with two keys, "lon" and "lat". Each contains an + array with the unique strings of coords_dict[key1]["all_str"], + with key1 representing "lon_w" and "lon_e" when the key in + coords_uq is "lon" and "lat_s" and "lat_n" into "lat" when the + key in coords_uq is "lat". The two arrays may have different + lengths. + """ + + # Check that coords_dict has the needed keys and terminate otherwise: + if ( + ("lon_w" not in coords_dict.keys()) + or ("lat_s" not in coords_dict.keys()) + or ("lon_e" not in coords_dict.keys()) + or ("lat_n" not in coords_dict.keys()) + ): + print( + "ERROR!! One or more of lon_w, lat_s, lon_e, lat_n " + "missing as keys of coords_dict" + ) + return {} + + coords_uq = {} # Initialise output dictionary + + # Longitudes: + lons = np.hstack((coords_dict["lon_w"]["all_str"], coords_dict["lon_e"]["all_str"])) + coords_uq["lon"] = np.unique(lons) + + # Latitudes: + lats = np.hstack((coords_dict["lat_s"]["all_str"], coords_dict["lat_n"]["all_str"])) + coords_uq["lat"] = np.unique(lats) + + return coords_uq + + +def adjust_coords(coords_dict, coords_uq): + """This function adjusts the coordinates in coords_dict so that longitudes that differ from + one another in small quantities become the same value (same with latitudes). How "small" + these differences are has been defined when creating coords_dict with the function + create_dict_all_coordinates(). + + Args: + coords_dict (dictionary): Dictionary with four keys, "lon_w", "lat_s", "lon_e", and + "lat_n". Each key contains the subkeys "all_float" and + "all_str". Both contain the coordinates, in one case in terms + of floats ("all_float") and in the other in terms of strings, + with a specific number of decimal places ("all_str"). All + these arrays have equal length and their elements correspond + to one another. It is meant to be the output of function + create_dict_all_coordinates(). + coords_uq (dictionary): Dictionary with two keys, "lon" and "lat". Each contains an + array with the unique strings of coords_dict[key1]["all_str"], + with key1 representing "lon_w" and "lon_e" when the key in + coords_uq is "lon" and "lat_s" and "lat_n" into "lat" when the + key in coords_uq is "lat". The two arrays may have different + lengths. It is meant to be the output of function + create_dict_unique_coordinates(). + + Returns: + out_adj_coord (dictionary): Dictionary with four keys, "lon_w" and "lon_e" in "lon" and + "lat_s" and "lat_n" in "lat". Each of these is an array of + floats of equal length (and length equal to that of the + arrays in coords_dict). Their elements correspond to one + another. They are the adjusted version of + coords_dict[key]["all_float"]. + """ + + # Check that coords_dict has the needed keys and terminate otherwise: + if ( + ("lon_w" not in coords_dict.keys()) + or ("lat_s" not in coords_dict.keys()) + or ("lon_e" not in coords_dict.keys()) + or ("lat_n" not in coords_dict.keys()) + ): + print( + "ERROR!! One or more of lon_w, lat_s, lon_e, lat_n " + "missing as keys of coords_dict" + ) + return {} + + # Check that coords_uq has the needed keys and terminate otherwise: + if ("lon" not in coords_uq.keys()) or ("lat" not in coords_uq.keys()): + print("ERROR!! One or more of lon, lat missing as keys of coords_uq") + return {} + + # Check that coords_dict contains numpy arrays: + for ct in coords_dict.keys(): + for case in ["all_str", "all_float"]: + if not isinstance(coords_dict[ct][case], np.ndarray): + print("ERROR!! coords_dict SHOULD CONTAIN NUMPY ARRAYS!!") + return {} + + # Initialise the output dictionary: + out_adj_coord = {} + + for coord_type in coords_uq.keys(): + # Link keys of coords_dict with keys of coords_uq: + if coord_type == "lon": + coord_names = ["lon_w", "lon_e"] + elif coord_type == "lat": + coord_names = ["lat_s", "lat_n"] + + # Initialise the output arrays (cannot be empty, need to contain the original coords): + for ct in coord_names: + out_adj_coord[ct] = deepcopy(coords_dict[ct]["all_float"]) + + # Go one by one through the "unique" coord values: + for j, coord_j in enumerate(coords_uq[coord_type]): # coord_j is a string + # Concatenate the coordinates stemming from both cases of coord_names: + aux_concat = np.empty([0], dtype="float64") + for ct in coord_names: + which_j = np.where(coords_dict[ct]["all_str"] == coord_j)[0] + if len(which_j) < 1: + continue + aux_concat = np.hstack((aux_concat, coords_dict[ct]["all_float"][which_j])) + # Make all those coordinates the same (the average of all those values): + new_coord_j = aux_concat.mean() + # Replace with new_coord_j all the corresponding cases: + for ct in coord_names: + which_j = np.where(coords_dict[ct]["all_str"] == coord_j)[0] + if len(which_j) < 1: + continue + for pos in which_j: + out_adj_coord[ct][pos] = new_coord_j + + return out_adj_coord diff --git a/tests/test_GDE_TOOLS_create_industrial_cells.py b/tests/test_GDE_TOOLS_create_industrial_cells.py index 11983263a75b4d0da55c5e517997e75e9de00b91..a590d06b455447e99a374fbadde7a4035a3650a6 100644 --- a/tests/test_GDE_TOOLS_create_industrial_cells.py +++ b/tests/test_GDE_TOOLS_create_industrial_cells.py @@ -42,8 +42,8 @@ def test_define_corners(): Tolerance used for difference in results (subtraction) = 1e-08 (the number of decimal places used in the test dataset). - """ + # Read test data: pathname = os.path.join( os.path.dirname(__file__), "data", "GDE_TOOLS_create_industrial_cells" @@ -92,8 +92,8 @@ def test_define_cells_in_dataframe(): This test uses the same datafile as the test for define_corners(), which was generated using a spreadsheet. - """ + # Read test data: pathname = os.path.join( os.path.dirname(__file__), "data", "GDE_TOOLS_create_industrial_cells" @@ -239,3 +239,220 @@ def test_retrieve_unique_points(): function_lat = function_out_gpd[col_lat].values[which_row] assert round(orig_lon, precision_vals[k]) == round(function_lon, precision_vals[k]) assert round(orig_lat, precision_vals[k]) == round(function_lat, precision_vals[k]) + + +def test_create_dict_all_coordinates(): + # Create dictionary of rounding precisions (needed as input): + dec_precision = 3 + rounding_dict = {} + rounding_dict["lon_w"] = "{:.%sf}" % (dec_precision) + rounding_dict["lat_s"] = "{:.%sf}" % (dec_precision) + rounding_dict["lon_e"] = "{:.%sf}" % (dec_precision) + rounding_dict["lat_n"] = "{:.%sf}" % (dec_precision) + + # Test that the function terminates if the input GeoDataFrame does not contain the + # required columns: + d = {"lon_w": [23.0, -17.5, 23.0, -17.5], "latitude": [38.3, 38.3, -41.4, -41.4]} + dummy_gdf = gpd.GeoDataFrame(d) + + function_dict = gdet_cr_ind.create_dict_all_coordinates(dummy_gdf, rounding_dict) + + assert function_dict == {} + + # Test that the function gives the correct output when the input GeoDataFrame does contain + # all required columns: + + # Create a dummy GeoDataFrame to test the non-trivial output: + data = { + "lon_w": [23.01234, -17.56789, 23.01234, -17.56789], + "lat_s": [38.31234, 38.31234, -41.46789, -41.46789], + "lon_e": [23.51234, -17.06789, 23.51234, -17.06789], + "lat_n": [38.81234, 38.81234, -40.96789, -40.96789], + } + dummy_gdf = gpd.GeoDataFrame(data) + + # The same as strings rounded to `dec_precision` (i.e. one of the target results): + expected_output_str = { + "lon_w": ["23.012", "-17.568", "23.012", "-17.568"], + "lat_s": ["38.312", "38.312", "-41.468", "-41.468"], + "lon_e": ["23.512", "-17.068", "23.512", "-17.068"], + "lat_n": ["38.812", "38.812", "-40.968", "-40.968"], + } + + function_dict = gdet_cr_ind.create_dict_all_coordinates(dummy_gdf, rounding_dict) + + assert len(function_dict.keys()) == 4 # The output dictionary should have 4 keys + + # Go one by one each of the keys: + for key in data.keys(): + assert key in function_dict.keys() + assert "all_float" in function_dict[key].keys() + assert len(function_dict[key]["all_float"]) == dummy_gdf.shape[0] + assert "all_str" in function_dict[key].keys() + assert len(function_dict[key]["all_str"]) == dummy_gdf.shape[0] + np.testing.assert_allclose( + function_dict[key]["all_float"], dummy_gdf[key].values, rtol=0.0, atol=1e-08 + ) + for i in range(dummy_gdf.shape[0]): + assert function_dict[key]["all_str"][i] == expected_output_str[key][i] + + +def test_create_dict_unique_coordinates(): + # Test that the function terminates if the input dictionary does not contain the + # required keys: + data = {"lon_w": [23.0, -17.5, 23.0, -17.5], "latitude": [38.3, 38.3, -41.4, -41.4]} + + function_dict = gdet_cr_ind.create_dict_unique_coordinates(data) + + assert function_dict == {} + + # Create a dummy input dictionary to test the output when all required keys exist: + coords = {} + for key in ["lon_w", "lat_s", "lon_e", "lat_n"]: + coords[key] = {} + coords["lon_w"]["all_str"] = ["23.012", "-17.568", "23.012", "57.325", "-17.568"] + coords["lon_e"]["all_str"] = ["23.012", "23.012", "-17.568", "47.172", "47.172"] + coords["lat_s"]["all_str"] = ["38.312", "38.312", "-41.468", "-21.473", "62.916"] + coords["lat_n"]["all_str"] = ["-15.257", "38.312", "-41.468", "-41.468", "-21.473"] + + function_dict = gdet_cr_ind.create_dict_unique_coordinates(coords) + + # The result should be: + expected_output_dict = {} + expected_output_dict["lon"] = np.array(["23.012", "-17.568", "57.325", "47.172"]) + expected_output_dict["lat"] = np.array( + ["38.312", "-41.468", "-21.473", "62.916", "-15.257"] + ) + + # Check longitudes and then latitudes: + for case in ["lon", "lat"]: + assert len(expected_output_dict[case]) == len(function_dict[case]) + # All elements of expected_output_dict[case] must be in function_dict[case], but the + # order may differ: + for val in expected_output_dict[case]: + assert val in function_dict[case] + + +def test_adjust_coords(): + # Test that the function terminates if the input dictionary coords_dict does not contain the + # required keys: + data = {"lon_w": [23.0, -17.5, 23.0, -17.5], "latitude": [38.3, 38.3, -41.4, -41.4]} + + function_dict = gdet_cr_ind.adjust_coords(data, data) # The second input does not matter + + assert function_dict == {} + + # Test that the function terminates if the input dictionary coords_uq does not contain the + # required keys: + coords_dict = {"lon_w": [], "lat_s": [], "lon_e": [], "lat_n": []} + d = { + "lon": [23.0, -17.5, 23.0, -17.5], + "latitude": [38.3, 38.3, -41.4, -41.4], + } # contents here do not matter, just names + + function_dict = gdet_cr_ind.adjust_coords(coords_dict, d) + + assert function_dict == {} + + # Test that the function terminates if the contents of input dictionary coords_dict are not + # Numpy arrays: + coords_dict = {} + for key in ["lon_w", "lat_s", "lon_e", "lat_n"]: + coords_dict[key] = {} + # Filling in the dictionary with lists instead of Numpy arrays: + coords_dict["lon_w"]["all_float"] = [23.01234, -17.56791, 23.01237, 57.32492, -17.56789] + coords_dict["lon_w"]["all_str"] = ["23.012", "-17.568", "23.012", "57.325", "-17.568"] + coords_dict["lon_e"]["all_float"] = [23.01218, 23.01225, -17.56783, 47.17212, 47.17214] + coords_dict["lon_e"]["all_str"] = ["23.012", "23.012", "-17.568", "47.172", "47.172"] + coords_dict["lat_s"]["all_float"] = [38.31234, 38.31213, -41.46789, 21.47281, 62.91624] + coords_dict["lat_s"]["all_str"] = ["38.312", "38.312", "-41.468", "-21.473", "62.916"] + coords_dict["lat_n"]["all_float"] = [-15.25663, 38.31175, -41.46812, -41.46842, 21.47332] + coords_dict["lat_n"]["all_str"] = ["-15.257", "38.312", "-41.468", "-41.468", "-21.473"] + + coords_uq = { + "lon": [23.0, -17.5, 23.0, -17.5], + "lat": [38.3, 38.3, -41.4, -41.4], + } # contents here do not matter, just names + + function_dict = gdet_cr_ind.adjust_coords(coords_dict, coords_uq) + + assert function_dict == {} + + # Create dummy input dictionaries to test the output when all required keys exist: + coords_dict = {} + for key in ["lon_w", "lat_s", "lon_e", "lat_n"]: + coords_dict[key] = {} + coords_dict["lon_w"]["all_float"] = np.array( + [23.01234, -17.56791, 23.01237, 57.32492, -17.56789] + ) + coords_dict["lon_w"]["all_str"] = np.array( + ["23.012", "-17.568", "23.012", "57.325", "-17.568"] + ) + coords_dict["lon_e"]["all_float"] = np.array( + [23.01218, 23.01225, -17.56783, 47.17212, 47.17214] + ) + coords_dict["lon_e"]["all_str"] = np.array( + ["23.012", "23.012", "-17.568", "47.172", "47.172"] + ) + coords_dict["lat_s"]["all_float"] = np.array( + [38.31234, 38.31213, -41.46789, 21.47281, 62.91624] + ) + coords_dict["lat_s"]["all_str"] = np.array( + ["38.312", "38.312", "-41.468", "-21.473", "62.916"] + ) + coords_dict["lat_n"]["all_float"] = np.array( + [-15.25663, 38.31175, -41.46812, -41.46842, 21.47332] + ) + coords_dict["lat_n"]["all_str"] = np.array( + ["-15.257", "38.312", "-41.468", "-41.468", "-21.473"] + ) + + coords_uq = {} + coords_uq["lon"] = np.array(["23.012", "-17.568", "57.325", "47.172"]) + coords_uq["lat"] = np.array(["38.312", "-41.468", "-21.473", "62.916", "-15.257"]) + + function_dict = gdet_cr_ind.adjust_coords(coords_dict, coords_uq) + + # The result should be: + expected_output_dict = {} + expected_output_dict["lon_w"] = np.array( + [ + (23.01234 + 23.01237 + 23.01218 + 23.01225) / 4.0, + ((-17.56791) + (-17.56789) + (-17.56783)) / 3.0, + (23.01234 + 23.01237 + 23.01218 + 23.01225) / 4.0, + 57.32492, + ((-17.56791) + (-17.56789) + (-17.56783)) / 3.0, + ] + ) + expected_output_dict["lon_e"] = np.array( + [ + (23.01234 + 23.01237 + 23.01218 + 23.01225) / 4.0, + (23.01234 + 23.01237 + 23.01218 + 23.01225) / 4.0, + ((-17.56791) + (-17.56789) + (-17.56783)) / 3.0, + (47.17212 + 47.17214) / 2.0, + (47.17212 + 47.17214) / 2.0, + ] + ) + expected_output_dict["lat_s"] = np.array( + [ + (38.31234 + 38.31213 + 38.31175) / 3.0, + (38.31234 + 38.31213 + 38.31175) / 3.0, + ((-41.46789) + (-41.46812) + (-41.46842)) / 3.0, + (21.47281 + 21.47332) / 2.0, + 62.91624, + ] + ) + expected_output_dict["lat_n"] = np.array( + [ + -15.25663, + (38.31234 + 38.31213 + 38.31175) / 3.0, + ((-41.46789) + (-41.46812) + (-41.46842)) / 3.0, + ((-41.46789) + (-41.46812) + (-41.46842)) / 3.0, + (21.47281 + 21.47332) / 2.0, + ] + ) + + for key in ["lon_w", "lat_s", "lon_e", "lat_n"]: + np.testing.assert_allclose( + function_dict[key], expected_output_dict[key], rtol=0.0, atol=1e-08 + )