diff --git a/README.md b/README.md index 63cd11e1acc137b817cbedcf952e5d314890e6d2..1af22cb6532caf9e5a2c2d7175eab3505c258aec 100644 --- a/README.md +++ b/README.md @@ -35,13 +35,26 @@ pip3 install . ## Running obmgapanalysis copy the config-example.yml to your working directory as config.yml and modify -the variables regarding the data source, database credentials and tiles for input quadkeys. +the variables regarding the data source, database credentials, multiprocessing +framework and tiles for input quadkeys. + +To assess built-up areas with a configured dataset: ```bash cd /your/working/directory obmgapanalysis ``` +To assess built-up areas based on a `buildings_database`: +```bash +obmgapanalysis --obm_built_up +``` + +To insert entries from the `import_pathname` into the `target_database`: +```bash +obmgapanalysis --import_csv +``` + ## Copyright and copyleft Copyright (C) 2021 diff --git a/config-example.yml b/config-example.yml index 2879f485e3cc60345bf555d0e9348efaebdc2911..5bc2c4b975215be6cff8b1525c7873790265fb73 100644 --- a/config-example.yml +++ b/config-example.yml @@ -15,6 +15,7 @@ obm_output_pathname: ./obm_results import_pathname: ./results number_cores: 1 batch_size: 1000 +get_geometry: False roads_database: host: your_host.dir.request_data diff --git a/docs/03_Configuration_file.md b/docs/03_Configuration_file.md index 8f367b22b869f6dc45331da1a32bd37ba80ee206..1261005b4572325a492e77249fdfbf09b9b5f9c6 100644 --- a/docs/03_Configuration_file.md +++ b/docs/03_Configuration_file.md @@ -35,8 +35,11 @@ amount of tiles to be handled per process. Each CSV file may contain maximum thi all of them provide built areas. output_pathname (str): Target path name for the csv file writing and reading. + obm_output_pathname (str): Target path name for the OBM csv file writing and reading. + import_pathname (str): Target path name with csv files to import. number_cores (int): Desired maximum number of parallel processes to execute. - batch_size (int): Maximum amount of tiles to be handled per process + batch_size (int): Maximum amount of tiles to be handled per process. + get_geometry (bool): If True, geometries will be stored in the output csv files. The last sections refer to database connections. `database` holds a database from which roads can be extracted to refine built areas, also it may contain buildings if the program wants to calculate a diff --git a/obmgapanalysis/fileprocessor.py b/obmgapanalysis/fileprocessor.py index 390c1e94175c6fc76ae84a1ceef445beef6009f0..aba313eeb2478c51a703f1513d4b066a24156721 100644 --- a/obmgapanalysis/fileprocessor.py +++ b/obmgapanalysis/fileprocessor.py @@ -19,8 +19,8 @@ import os import logging -import geopandas import pandas +import geopandas # Initialize log logger = logging.getLogger(__name__) @@ -29,7 +29,11 @@ logger = logging.getLogger(__name__) class FileProcessor: @staticmethod def write_tiles_to_csv( - list_of_dictionaries, output_pathname, column_geometry="built_area", crs="epsg:4326" + list_of_dictionaries, + output_pathname, + get_geometry=False, + column_geometry="built_area", + crs="epsg:4326", ): """Write a csv file from a list of dictionaries. @@ -39,36 +43,30 @@ class FileProcessor: output_pathname (str): Target path name for the csv file. + get_geometry (bool): Set if the geometry will be written + column_geometry (str): Name of the field that contains geometries. Default = "built_area" crs (str): EPSG code of the data projection. Default = "epsg:4326" """ - tiles_gdf = geopandas.GeoDataFrame( - list_of_dictionaries, geometry=column_geometry, crs=crs - ) - filepath_out = os.path.join( - output_pathname, "{}_{}.csv".format(tiles_gdf.quadkey.iloc[0], len(tiles_gdf.index)) - ) - logger.info("Creating {}".format(filepath_out)) - tiles_gdf.to_csv(filepath_out, index=False) - - @staticmethod - def write_obm_tiles_to_csv(list_of_dictionaries, output_pathname): - """Write a csv file from a list of dictionaries without geometries. - - Args: - list_of_dictionaries (list): List of dictionaries with built-up areas to - write. - - output_pathname (str): Target path name for the csv file. - """ - - tiles_df = pandas.DataFrame(list_of_dictionaries) - filepath_out = os.path.join( - output_pathname, - "OBM_{}_{}.csv".format(tiles_df.quadkey.iloc[0], len(tiles_df.index)), - ) - logger.info("Creating {}".format(filepath_out)) - tiles_df.to_csv(filepath_out, index=False) + if get_geometry is False: + tiles_df = pandas.DataFrame(list_of_dictionaries) + tiles_df = tiles_df.drop_duplicates(keep="first") + filepath_out = os.path.join( + output_pathname, + "{}_{}.csv".format(tiles_df.quadkey.iloc[0], len(tiles_df.index)), + ) + logger.info("Creating {}".format(filepath_out)) + tiles_df.to_csv(filepath_out, index=False) + else: + tiles_gdf = geopandas.GeoDataFrame( + list_of_dictionaries, geometry=column_geometry, crs=crs + ) + filepath_out = os.path.join( + output_pathname, + "{}_{}.csv".format(tiles_gdf.quadkey.iloc[0], len(tiles_gdf.index)), + ) + logger.info("Creating {}".format(filepath_out)) + tiles_gdf.to_csv(filepath_out, index=False) diff --git a/obmgapanalysis/obmgapanalysis.py b/obmgapanalysis/obmgapanalysis.py index 128bb4233fc9029b6a50f8f2eaf36d70505c8e99..affb1e60832a93b6726ffb1f464cc6b1f8364c3c 100644 --- a/obmgapanalysis/obmgapanalysis.py +++ b/obmgapanalysis/obmgapanalysis.py @@ -65,6 +65,8 @@ if args.import_csv: target_db_config = config["target_database"] import_pathname = os.path.abspath(config["import_pathname"]) +get_geometry = config["get_geometry"] + def multiprocess_built_estimation_batch(quadkey_batch): """ @@ -106,6 +108,7 @@ def multiprocess_built_estimation_batch(quadkey_batch): database_crs_number=roads_database_crs_number, table_config=db_config["roads_table"], buffer_magnitude=db_config["process_buffer_magnitude"], + get_geometry=get_geometry, ) if result is not None: built_up_areas.append(result) @@ -116,7 +119,10 @@ def multiprocess_built_estimation_batch(quadkey_batch): if built_up_areas: # Write output into a csv file - FileProcessor.write_tiles_to_csv(built_up_areas, output_pathname) + FileProcessor.write_tiles_to_csv( + built_up_areas, output_pathname, get_geometry=get_geometry + ) + del built_up_areas roads_database.connection.close() @@ -163,10 +169,11 @@ def multiprocess_buildings_batch(quadkey_batch): if obm_built_up_areas: # Write output into a csv file - FileProcessor.write_obm_tiles_to_csv( + FileProcessor.write_tiles_to_csv( list_of_dictionaries=obm_built_up_areas, output_pathname=obm_output_pathname, ) + del obm_built_up_areas buildings_database.connection.close() diff --git a/obmgapanalysis/tileprocessor.py b/obmgapanalysis/tileprocessor.py index 8ae3bddbee675811a72a66b49663529294e80a0c..9d077ce77e33b30c4b65b2d5d880b852250221f7 100644 --- a/obmgapanalysis/tileprocessor.py +++ b/obmgapanalysis/tileprocessor.py @@ -272,12 +272,12 @@ class TileProcessor: return polygon.area @staticmethod - def build_dictionary(tile, datasource, built_polygon): + def build_dictionary(tile, datasource, built_polygon, get_geometry=False): """Returns a dictionary with the built-up area related attributes associated to the Tile and a given DataSource. Contains: quadkey (str): Tile quadkey - source_id (int): Integer associated to a predefined method + source_id (int): Integer associated to a predefined method. built_area (str): Polygon string projected to WGS84 coordinates. built_area_size (float): Area measured in squared meters. last_update (str): Date when the pickle was generated. @@ -298,22 +298,40 @@ class TileProcessor: if built_polygon.is_empty: logging.info("No built area found in {}".format(tile.quadkey)) return - - results = { - "quadkey": tile.quadkey, - "source_id": datasource.source_id, - "built_area": TileProcessor.reproject_polygon(built_polygon, tile.crs, "epsg:4326"), - "built_area_size": TileProcessor.albers_area_calculation(built_polygon, tile.crs), - "last_update": str(date.today()), - } + if get_geometry is False: + results = { + "quadkey": tile.quadkey, + "source_id": datasource.source_id, + "built_area_size": TileProcessor.albers_area_calculation( + built_polygon, tile.crs + ), + "last_update": str(date.today()), + } + else: + results = { + "quadkey": tile.quadkey, + "source_id": datasource.source_id, + "built_area": TileProcessor.reproject_polygon( + built_polygon, tile.crs, "epsg:4326" + ), + "built_area_size": TileProcessor.albers_area_calculation( + built_polygon, tile.crs + ), + "last_update": str(date.today()), + } if not results["source_id"]: del results["source_id"] - return results @staticmethod def get_built_up_area( - quadkey, datasource, database, database_crs_number, table_config, buffer_magnitude + quadkey, + datasource, + database, + database_crs_number, + table_config, + buffer_magnitude, + get_geometry=False, ): """Run the complete processing of a quadkey and returns a dictionary created with TileProcessor.build_dictionary. @@ -362,7 +380,9 @@ class TileProcessor: refined_built_area = TileProcessor.polygon_difference( clip_built_geometry, roads_processed ) - result = TileProcessor.build_dictionary(tile, datasource, refined_built_area) + result = TileProcessor.build_dictionary( + tile, datasource, refined_built_area, get_geometry=get_geometry + ) return result @staticmethod diff --git a/setup.py b/setup.py index 6f980ca944e0f746dfb5702e5e88104991bc8403..e2a81efb1f9291b4475551af38c41a28cb122d8b 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ setup( "babelgrid", "fiona", "rtree", + "pandas", "geopandas", "rasterio", "psycopg2-binary",