diff --git a/config-example.yml b/config-example.yml index e6b56a38569b46529d2330c8ac1b79c2c1b1e6b2..84e6bbec8d36bfd219e0d039f273958f004a34ca 100644 --- a/config-example.yml +++ b/config-example.yml @@ -11,10 +11,11 @@ tiles: txt_filepath: tiles.txt output_pathname: ./results +obm_output_pathname: ./obm_results number_cores: 1 batch_size: 1000 -database: +roads_database: host: your_host.dir.request_data dbname: gis port: 5433 @@ -25,6 +26,16 @@ database: geometry_field: way process_buffer_magnitude: False +buildings_database: + host: your_host.dir.request_data + dbname: gis + port: 5433 + username: postgres + password: secret_pass + buildings_table: + tablename: planet_osm_roads + geometry_field: way + target_database: host: your_host.dir.request_data dbname: gis diff --git a/obmgapanalysis/fileprocessor.py b/obmgapanalysis/fileprocessor.py index 829b7b51654f4182d4b33baa36449e42b119f03c..390c1e94175c6fc76ae84a1ceef445beef6009f0 100644 --- a/obmgapanalysis/fileprocessor.py +++ b/obmgapanalysis/fileprocessor.py @@ -20,6 +20,7 @@ import os import logging import geopandas +import pandas # Initialize log logger = logging.getLogger(__name__) @@ -52,3 +53,22 @@ class FileProcessor: ) logger.info("Creating {}".format(filepath_out)) tiles_gdf.to_csv(filepath_out, index=False) + + @staticmethod + def write_obm_tiles_to_csv(list_of_dictionaries, output_pathname): + """Write a csv file from a list of dictionaries without geometries. + + Args: + list_of_dictionaries (list): List of dictionaries with built-up areas to + write. + + output_pathname (str): Target path name for the csv file. + """ + + tiles_df = pandas.DataFrame(list_of_dictionaries) + filepath_out = os.path.join( + output_pathname, + "OBM_{}_{}.csv".format(tiles_df.quadkey.iloc[0], len(tiles_df.index)), + ) + logger.info("Creating {}".format(filepath_out)) + tiles_df.to_csv(filepath_out, index=False) diff --git a/obmgapanalysis/obmgapanalysis.py b/obmgapanalysis/obmgapanalysis.py index 8663fa81d5102668d5d4cbf998e28a77db3b06b4..2756a8641e81e6ec4c1ab5b5a1e2c4780bd9935c 100644 --- a/obmgapanalysis/obmgapanalysis.py +++ b/obmgapanalysis/obmgapanalysis.py @@ -41,22 +41,31 @@ parser = argparse.ArgumentParser() parser.add_argument( "--import_csv", action="store_true", help="Import CSV files into target database" ) +parser.add_argument("--obm_built_up", action="store_true", help="Find built-up areas from OBM") args = parser.parse_args() # Get program configuration from config.yml with open("config.yml", "r") as ymlfile: config = yaml.load(ymlfile, Loader=yaml.FullLoader) -db_config = config["database"] -datasource_config = config["datasource"] tiles_config = config["tiles"] + +db_config = config["roads_database"] +datasource_config = config["datasource"] output_pathname = os.path.abspath(config["output_pathname"]) -target_db_config = config["target_database"] if not os.path.exists(output_pathname): os.makedirs(output_pathname) +buildings_db_config = config["buildings_database"] +obm_output_pathname = os.path.abspath(config["obm_output_pathname"]) +if not os.path.exists(obm_output_pathname): + os.makedirs(obm_output_pathname) + +if args.import_csv: + target_db_config = config["target_database"] + -def multiprocess_chunk(quadkey_batch): +def multiprocess_built_estimation_batch(quadkey_batch): """ Wrapper funtion that writes a CSV file with built-up areas found in the quadkey_batch. The ouput is based on TileProcessor.build_dictionary. @@ -86,10 +95,10 @@ def multiprocess_chunk(quadkey_batch): ) # Build a list with all tiles with reported built-up areas - build_up_areas = [] + built_up_areas = [] for quadkey in quadkey_batch: try: - result = TileProcessor.get_build_up_area( + result = TileProcessor.get_built_up_area( quadkey=quadkey, database=roads_database, datasource=datasource, @@ -98,20 +107,70 @@ def multiprocess_chunk(quadkey_batch): buffer_magnitude=db_config["process_buffer_magnitude"], ) if result is not None: - build_up_areas.append(result) + built_up_areas.append(result) except Exception as e: logger.info("Error in quadkey: {}".format(quadkey)) logger.info("Error caught: {}".format(e)) - if build_up_areas: + if built_up_areas: # Write output into a csv file - FileProcessor.write_tiles_to_csv(build_up_areas, output_pathname) + FileProcessor.write_tiles_to_csv(built_up_areas, output_pathname) roads_database.connection.close() -def multiprocess_csv(csv_filepath): +def multiprocess_buildings_batch(quadkey_batch): + """ + Wrapper function that writes a CSV file with OBM built-up areas found in the + quadkey_batch. + + Output filenames are: OBM__.csv + + Args: + quadkey_batch (list): List of quadkeys to process with settlement data + """ + + # Connect to the OBM database + buildings_database = Database(**buildings_db_config) + buildings_database.create_connection_and_cursor() + buildings_database_crs_number = buildings_database.get_crs_from_geometry_field( + **buildings_db_config["buildings_table"] + ) + logger.info( + "Connection established to {} in {}".format( + buildings_db_config["dbname"], buildings_db_config["host"] + ) + ) + + # Build a list with all tiles with reported obm built-up areas + obm_built_up_areas = [] + for quadkey in quadkey_batch: + # try: + result = TileProcessor.get_obm_built_up_area( + quadkey=quadkey, + database=buildings_database, + database_crs_number=buildings_database_crs_number, + table_config=buildings_db_config["buildings_table"], + ) + if result is not None: + obm_built_up_areas.append(result) + + # except Exception as e: + # logger.info("Error in quadkey: {}".format(quadkey)) + # logger.info("Error caught: {}".format(e)) + + if obm_built_up_areas: + # Write output into a csv file + FileProcessor.write_obm_tiles_to_csv( + list_of_dictionaries=obm_built_up_areas, + output_pathname=obm_output_pathname, + ) + + buildings_database.connection.close() + + +def multiprocess_write_csv(csv_filepath): """ Wrapper function that imports a CSV file into a configured database table @@ -159,7 +218,7 @@ def main(): logging.info("Creating multiprocessing pool") with multiprocessing.Pool(processes=num_processes) as pool: logging.info("Start parallel processing") - pool.map(multiprocess_csv, csv_filepaths) + pool.map(multiprocess_write_csv, csv_filepaths) logging.info("Parallel processing finished, closing pool") pool.close() @@ -183,10 +242,23 @@ def main(): tiles_list[i : i + batch_size] for i in range(0, len(tiles_list), batch_size) ] - logging.info("Creating multiprocessing pool") + if args.obm_built_up: + logging.info("Creating multiprocessing pool for obm built-up areas") + with multiprocessing.Pool(processes=num_processes) as pool: + logging.info("Start parallel processing of {} batches".format(len(quadkey_batches))) + pool.map(multiprocess_buildings_batch, quadkey_batches) + + logging.info("Parallel processing finished, closing pool") + pool.close() + pool.join() + + logger.info("Task finished, closing obmgapanalysis") + sys.exit() + + logging.info("Creating multiprocessing pool for settlement layer built-up areas") with multiprocessing.Pool(processes=num_processes) as pool: logging.info("Start parallel processing of {} batches".format(len(quadkey_batches))) - pool.map(multiprocess_chunk, quadkey_batches) + pool.map(multiprocess_built_estimation_batch, quadkey_batches) logging.info("Parallel processing finished, closing pool") pool.close() diff --git a/obmgapanalysis/tileprocessor.py b/obmgapanalysis/tileprocessor.py index ac773f69042abdb4a1a3490b46746e0c355f694e..ee21665a1c43fa19653dee9662ebf3db40f512aa 100644 --- a/obmgapanalysis/tileprocessor.py +++ b/obmgapanalysis/tileprocessor.py @@ -216,7 +216,9 @@ class TileProcessor: """ input_dataframe = input_dataframe.to_crs(tile.crs) - input_dataframe = geopandas.clip(input_dataframe, tile.geometry) + input_dataframe = geopandas.clip( + input_dataframe, tile.geometry.buffer(buffer_magnitude) + ) geometry = input_dataframe.unary_union if buffer_magnitude > 0.0: geometry = geometry.buffer(buffer_magnitude) @@ -304,7 +306,7 @@ class TileProcessor: return results @staticmethod - def get_build_up_area( + def get_built_up_area( quadkey, datasource, database, database_crs_number, table_config, buffer_magnitude ): """Run the complete processing of a quadkey and returns a dictionary @@ -356,3 +358,46 @@ class TileProcessor: ) result = TileProcessor.build_dictionary(tile, datasource, refined_built_area) return result + + @staticmethod + def get_obm_built_up_area(quadkey, database, database_crs_number, table_config): + """Run the processing for obm built-up area extraction of a quadkey + and returns a dictionary created with quadkey and obm_built_area_size + + Args: + quadkey (str): Quadkey code associated with a Bing quadtree tile. + + database (database.Database): Database instance with credentials + and connection ready to perform data queries. + + database_crs_number (str): SRID number of the target table. + + table_config (dict): Dictionary with table name, schema and geometry_field. + This is part of the config.yml file. + + Returns: + results (dictionary): Dictionary with built-up area information. + """ + + crs = "epsg:{}".format(database_crs_number) + tile = Tile(quadkey, crs) + + # Find building footprints within the buildings database + buildings_in_tile = database.get_features_in_tile( + tile=tile, crs_number=database_crs_number, **table_config + ) + + if not buildings_in_tile.empty: + buildings_processed = TileProcessor.process_dataframe_with_tile( + buildings_in_tile, tile=tile + ) + + result = { + "quadkey": tile.quadkey, + "source_id": 0, + "built_area_size": TileProcessor.albers_area_calculation( + buildings_processed, tile.crs + ), + "last_update": str(date.today()), + } + return result diff --git a/tests/test_tileprocessor.py b/tests/test_tileprocessor.py index 9abc1475f1773cb811df6c704bd25d3f6075c6dd..9ed76026587ee6ff0a64722093079f2fe68d7350 100644 --- a/tests/test_tileprocessor.py +++ b/tests/test_tileprocessor.py @@ -126,24 +126,20 @@ def test_clip_to_tile_extent(): def test_process_dataframe_with_tile(): expected_geometry_roads = ( - "POLYGON ((2550002.184577069 4629637.952911595, 2550002.438602296 " - "4629670.059528879, 2550002.454491734 4629670.34505833, 2550007.486132718 " - "4629718.894118963, 2550007.534455809 4629719.202675823, 2550007.614596096 " - "4629719.504536704, 2550020.082379065 4629758.216188851, 2550020.178233149 " - "4629758.472967537, 2550020.297122533 4629758.719926265, 2550020.438054846 " - "4629758.955003661, 2550020.599853719 4629759.176237528, 2550020.781168612 " - "4629759.381781219, 2550030.443700413 4629769.375572084, 2550030.760692946 " - "4629769.659947647, 2550049.072185669 4629783.876796038, 2550053.931105311 " - "4629790.803233128, 2550060.593091394 4629790.803233128, 2550060.594888479 " - "4629790.73518196, 2550060.573776196 4629790.441533886, 2550060.523983029 " - "4629790.151369173, 2550060.445988514 4629789.867482267, 2550060.340543782 " - "4629789.592607153, 2550060.208664322 4629789.329391029, 2550060.051620206 " - "4629789.08036881, 2550053.724702562 4629780.061286326, 2550053.541161971 " + "POLYGON ((2550002.438602296 4629670.059528879, 2550002.454491734 " + "4629670.34505833, 2550007.486132718 4629718.894118963, 2550007.534455809 " + "4629719.202675823, 2550007.614596096 4629719.504536704, 2550020.082379065 " + "4629758.216188851, 2550020.178233149 4629758.472967537, 2550020.297122533 " + "4629758.719926265, 2550020.438054846 4629758.955003661, 2550020.599853719 " + "4629759.176237528, 2550020.781168612 4629759.381781219, 2550030.443700413 " + "4629769.375572084, 2550030.760692946 4629769.659947647, 2550049.072185669 " + "4629783.876796038, 2550053.931105311 4629790.803233128, 2550061.2602155 " + "4629790.803233128, 2550053.724702562 4629780.061286326, 2550053.541161971 " "4629779.825551415, 2550053.335084511 4629779.609241042, 2550053.108513971 " "4629779.41450048, 2550034.608849079 4629765.051557215, 2550025.57961757 " "4629755.712777978, 2550013.42183824 4629717.963666889, 2550008.437281965 " - "4629669.86891498, 2550008.184577072 4629637.929176555, 2550002.185554927 " - "4629637.929176555, 2550002.184577069 4629637.952911595))" + "4629669.86891498, 2550008.184577072 4629637.929176555, 2550002.184389279 " + "4629637.929176555, 2550002.438602296 4629670.059528879))" ) roads_process = TileProcessor.process_dataframe_with_tile(