Commit 7ea49252 authored by shinde's avatar shinde Committed by Simantini Shinde
Browse files

Added separate function to import population data

parent 6ba9d0a4
Pipeline #37835 passed with stage
in 1 minute and 28 seconds
...@@ -24,6 +24,7 @@ import constants ...@@ -24,6 +24,7 @@ import constants
import shapely.wkt import shapely.wkt
import pyproj import pyproj
from shapely.ops import transform from shapely.ops import transform
import csv
# Initialize log # Initialize log
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -174,12 +175,13 @@ class JapanDatabase(SpatialiteDatabase): ...@@ -174,12 +175,13 @@ class JapanDatabase(SpatialiteDatabase):
# Create table PopulationDistribution # Create table PopulationDistribution
sql_statement = "CREATE TABLE PopulationDistribution (" sql_statement = "CREATE TABLE PopulationDistribution ("
sql_statement += "id INTEGER PRIMARY KEY, " sql_statement += "id INTEGER PRIMARY KEY, "
sql_statement += "district_id INTEGER, " sql_statement += "district_id INTEGER, "
sql_statement += "total INTEGER, " sql_statement += "total INTEGER, "
sql_statement += "male INTEGER, " sql_statement += "male INTEGER, "
sql_statement += "female INTEGER, " sql_statement += "female INTEGER, "
sql_statement += "number_household INTEGER)" sql_statement += "number_household INTEGER,"
sql_statement += "population_density REAL)"
self.connection.execute(sql_statement) self.connection.execute(sql_statement)
logger.debug("Table PopulationDistribution created") logger.debug("Table PopulationDistribution created")
...@@ -503,37 +505,6 @@ class JapanDatabase(SpatialiteDatabase): ...@@ -503,37 +505,6 @@ class JapanDatabase(SpatialiteDatabase):
) )
self.cursor.execute(sql_statement) self.cursor.execute(sql_statement)
def insert_population_distribution(
self, district_id, total, male, female, number_household
):
"""
Inserts a full dataset to the PopulationDistribution table.
Args:
district_id (int):
ID of the district. Corresponds to District.id
total (int):
Total population of the district
male (int):
Male population of the district
female (int):
Female population of the district
number_household (int):
Total number of households of the district
"""
sql_statement = "INSERT INTO PopulationDistribution "
sql_statement += "(district_id, total, male, female, "
sql_statement += "number_household) "
sql_statement += "VALUES (%d, %d, %d, %d, %d)" % (
district_id,
total,
male,
female,
number_household,
)
self.cursor.execute(sql_statement)
def insert_building_type(self, building_type_id, description): def insert_building_type(self, building_type_id, description):
""" """
Inserts a building-type description to the BuildingType table. Inserts a building-type description to the BuildingType table.
...@@ -1090,6 +1061,60 @@ class JapanDatabase(SpatialiteDatabase): ...@@ -1090,6 +1061,60 @@ class JapanDatabase(SpatialiteDatabase):
numpy.seterr(divide="ignore", invalid="ignore") numpy.seterr(divide="ignore", invalid="ignore")
return districts return districts
def import_population_distribution(self, population_distribution_filepath):
"""
Imports all population data from the 2015 Population Census file provided
by E-Stat, Japan. The following file is needed:
Number of people distributed by municipality (population_distribution_filepath)
Args:
population_distribution_filepath (str):
Filepath to the path of population distribution
"""
# Read columns district, total population, male population, female population and number
# of households from input CSV file
with open(population_distribution_filepath) as population_file:
population = csv.DictReader(population_file)
population_to_db = [
(
i["KEY_CODE"],
i["total_population"],
i["male_population"],
i["female_population"],
i["total_number_of_households"],
)
for i in population
]
# Insert the data to the PopulationDistribution table
sql_statement = (
"INSERT INTO PopulationDistribution (district_id, total, male, "
"female, number_household) VALUES (?, ?, ?, ?, ?)"
)
self.cursor.executemany(sql_statement, population_to_db)
self.connection.commit()
logger.info("Population data added")
# Calculate the population_density with the boundary area from Districts table
# and update the PopulationDistribution table
sql_statement = (
"WITH updates(ID, population_density) "
"AS (SELECT PopulationDistribution.district_id, PopulationDistribution.total/ "
"District.area_size "
"FROM PopulationDistribution "
"INNER JOIN District ON District.id = PopulationDistribution.district_id) "
)
sql_statement += (
"UPDATE PopulationDistribution "
"SET population_density = (SELECT population_density FROM updates "
"WHERE PopulationDistribution.district_id = ID)"
)
logger.debug(sql_statement)
self.cursor.execute(sql_statement)
self.connection.commit()
def import_exposure_data( def import_exposure_data(
self, self,
population_distribution_filepath, population_distribution_filepath,
...@@ -1099,13 +1124,17 @@ class JapanDatabase(SpatialiteDatabase): ...@@ -1099,13 +1124,17 @@ class JapanDatabase(SpatialiteDatabase):
dwelling_sizes_filepath, dwelling_sizes_filepath,
): ):
""" """
Imports all exposure data from the Excel files provided by E-Stat, Japan. The following Imports all exposure data from the CSV and Excel files provided by E-Stat, Japan.
files are needed: The following files are needed:
Number of people distributed by municipality (population_distribution_filepath)
List of numbers of buildings by municipality (building_numbers_filepath)
List of numbers of dwellings by municipality (dwelling_numbers_filepath) List of numbers of dwellings by municipality (dwelling_numbers_filepath)
List of numbers of households by municipality (household_numbers_filepath)
List of dwelling sizes by municipality (dwelling_sizes_filepath)
Args: Args:
population_distribution_filepath (str): population_distribution_filepath (str):
File path to the path of population distribution Filepath to the path of population distribution
building_numbers_filepath (str): building_numbers_filepath (str):
File path to the file of number of buildings File path to the file of number of buildings
dwelling_numbers_filepath (str): dwelling_numbers_filepath (str):
...@@ -1141,52 +1170,26 @@ class JapanDatabase(SpatialiteDatabase): ...@@ -1141,52 +1170,26 @@ class JapanDatabase(SpatialiteDatabase):
# Store all district (i.e. administrative unit) areas with their district_ids in a list # Store all district (i.e. administrative unit) areas with their district_ids in a list
districts = self.calculate_district_area() districts = self.calculate_district_area()
# Import population data into the database
self.import_population_distribution(population_distribution_filepath)
# Create an empty list to store population attributes # Create an empty list to store population attributes
population_list = [] population_list = []
# Read columns district, total population, male population, female population and number # Select district_id, total and population_density from PopulationDistribution
# of households from input CSV file # to be stored in a population_list
population_distribution_input = pandas.read_csv( sql_statement = (
population_distribution_filepath, usecols=[2, 9, 10, 11, 12] "SELECT district_id, total, population_density FROM PopulationDistribution"
) )
for index, row in population_distribution_input.iterrows(): self.cursor.execute(sql_statement)
admin_id = int(row["KEY_CODE"])
# Identify district_id based on admin_id from the District table
district_id_result = self.get_district_id(admin_id)
if district_id_result is None: # Only data for which a district exist matter
continue
district_id = district_id_result[0]
# Insert the data to the PopulationDistribution table
self.insert_population_distribution(
district_id,
int(row[constants.TOTAL_POPULATION]),
int(row[constants.MALE_POPULATION]),
int(row[constants.FEMALE_POPULATION]),
int(row[constants.NUMBER_HOUSEHOLD_POPULATION]),
)
# Store population attributes in the population_list
population = (district_id, int(row[constants.TOTAL_POPULATION]))
add_element_and_get_index(population, population_list)
logger.debug("PopulationDistribution for district %s added" % admin_id)
logger.info("Population data added")
# Calculate the population density and add it to the population_list # Store district_id (corresponding to the district_id in the Districts table),
for pop in population_list: # total and population_density from PopulationDistribution table to a population_list
for admin in districts: # TODO: remove
if pop[constants.DISTRICT_ID] == admin[constants.DISTRICT_ID]: for population in self.cursor:
unit_area = admin[constants.ADMINISTRATIVE_UNIT_AREA] for district in districts:
population_density = numpy.true_divide( if district[constants.DISTRICT_ID] == population[constants.DISTRICT_ID]:
pop[constants.TOTAL_POPULATION], unit_area add_element_and_get_index(population, population_list)
)
index = population_list.index(pop)
population_list[index] = pop + (population_density,)
numpy.seterr(divide="ignore", invalid="ignore")
else:
continue
# Create a list with construction material types total, wooden and non-wooden # Create a list with construction material types total, wooden and non-wooden
# from the building numbers input CSV file # from the building numbers input CSV file
......
...@@ -44,11 +44,11 @@ def main(): ...@@ -44,11 +44,11 @@ def main():
db.create_tables() db.create_tables()
db.read_districts_and_boundaries("data/Boundary.gpkg") db.read_districts_and_boundaries("data/Boundary.gpkg")
db.import_exposure_data( db.import_exposure_data(
"data/e008_3e.xlsx", "data/population_sub_municipal.csv",
"data/e039_3e.xlsx", "data/e039_3e.xlsx",
"data/e008_3e.xlsx",
"data/e011_2e.xlsx", "data/e011_2e.xlsx",
"data/e014e.xlsx", "data/e014e.xlsx",
"data/population_sub_municipal.csv",
) )
# Leave the program # Leave the program
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment