Commit 7ea49252 authored by shinde's avatar shinde Committed by Simantini Shinde
Browse files

Added separate function to import population data

parent 6ba9d0a4
Pipeline #37835 passed with stage
in 1 minute and 28 seconds
......@@ -24,6 +24,7 @@ import constants
import shapely.wkt
import pyproj
from shapely.ops import transform
import csv
# Initialize log
logger = logging.getLogger(__name__)
......@@ -174,12 +175,13 @@ class JapanDatabase(SpatialiteDatabase):
# Create table PopulationDistribution
sql_statement = "CREATE TABLE PopulationDistribution ("
sql_statement += "id INTEGER PRIMARY KEY, "
sql_statement += "district_id INTEGER, "
sql_statement += "total INTEGER, "
sql_statement += "male INTEGER, "
sql_statement += "female INTEGER, "
sql_statement += "number_household INTEGER)"
sql_statement += "id INTEGER PRIMARY KEY, "
sql_statement += "district_id INTEGER, "
sql_statement += "total INTEGER, "
sql_statement += "male INTEGER, "
sql_statement += "female INTEGER, "
sql_statement += "number_household INTEGER,"
sql_statement += "population_density REAL)"
self.connection.execute(sql_statement)
logger.debug("Table PopulationDistribution created")
......@@ -503,37 +505,6 @@ class JapanDatabase(SpatialiteDatabase):
)
self.cursor.execute(sql_statement)
def insert_population_distribution(
self, district_id, total, male, female, number_household
):
"""
Inserts a full dataset to the PopulationDistribution table.
Args:
district_id (int):
ID of the district. Corresponds to District.id
total (int):
Total population of the district
male (int):
Male population of the district
female (int):
Female population of the district
number_household (int):
Total number of households of the district
"""
sql_statement = "INSERT INTO PopulationDistribution "
sql_statement += "(district_id, total, male, female, "
sql_statement += "number_household) "
sql_statement += "VALUES (%d, %d, %d, %d, %d)" % (
district_id,
total,
male,
female,
number_household,
)
self.cursor.execute(sql_statement)
def insert_building_type(self, building_type_id, description):
"""
Inserts a building-type description to the BuildingType table.
......@@ -1090,6 +1061,60 @@ class JapanDatabase(SpatialiteDatabase):
numpy.seterr(divide="ignore", invalid="ignore")
return districts
def import_population_distribution(self, population_distribution_filepath):
"""
Imports all population data from the 2015 Population Census file provided
by E-Stat, Japan. The following file is needed:
Number of people distributed by municipality (population_distribution_filepath)
Args:
population_distribution_filepath (str):
Filepath to the path of population distribution
"""
# Read columns district, total population, male population, female population and number
# of households from input CSV file
with open(population_distribution_filepath) as population_file:
population = csv.DictReader(population_file)
population_to_db = [
(
i["KEY_CODE"],
i["total_population"],
i["male_population"],
i["female_population"],
i["total_number_of_households"],
)
for i in population
]
# Insert the data to the PopulationDistribution table
sql_statement = (
"INSERT INTO PopulationDistribution (district_id, total, male, "
"female, number_household) VALUES (?, ?, ?, ?, ?)"
)
self.cursor.executemany(sql_statement, population_to_db)
self.connection.commit()
logger.info("Population data added")
# Calculate the population_density with the boundary area from Districts table
# and update the PopulationDistribution table
sql_statement = (
"WITH updates(ID, population_density) "
"AS (SELECT PopulationDistribution.district_id, PopulationDistribution.total/ "
"District.area_size "
"FROM PopulationDistribution "
"INNER JOIN District ON District.id = PopulationDistribution.district_id) "
)
sql_statement += (
"UPDATE PopulationDistribution "
"SET population_density = (SELECT population_density FROM updates "
"WHERE PopulationDistribution.district_id = ID)"
)
logger.debug(sql_statement)
self.cursor.execute(sql_statement)
self.connection.commit()
def import_exposure_data(
self,
population_distribution_filepath,
......@@ -1099,13 +1124,17 @@ class JapanDatabase(SpatialiteDatabase):
dwelling_sizes_filepath,
):
"""
Imports all exposure data from the Excel files provided by E-Stat, Japan. The following
files are needed:
Imports all exposure data from the CSV and Excel files provided by E-Stat, Japan.
The following files are needed:
Number of people distributed by municipality (population_distribution_filepath)
List of numbers of buildings by municipality (building_numbers_filepath)
List of numbers of dwellings by municipality (dwelling_numbers_filepath)
List of numbers of households by municipality (household_numbers_filepath)
List of dwelling sizes by municipality (dwelling_sizes_filepath)
Args:
population_distribution_filepath (str):
File path to the path of population distribution
Filepath to the path of population distribution
building_numbers_filepath (str):
File path to the file of number of buildings
dwelling_numbers_filepath (str):
......@@ -1141,52 +1170,26 @@ class JapanDatabase(SpatialiteDatabase):
# Store all district (i.e. administrative unit) areas with their district_ids in a list
districts = self.calculate_district_area()
# Import population data into the database
self.import_population_distribution(population_distribution_filepath)
# Create an empty list to store population attributes
population_list = []
# Read columns district, total population, male population, female population and number
# of households from input CSV file
population_distribution_input = pandas.read_csv(
population_distribution_filepath, usecols=[2, 9, 10, 11, 12]
# Select district_id, total and population_density from PopulationDistribution
# to be stored in a population_list
sql_statement = (
"SELECT district_id, total, population_density FROM PopulationDistribution"
)
for index, row in population_distribution_input.iterrows():
admin_id = int(row["KEY_CODE"])
# Identify district_id based on admin_id from the District table
district_id_result = self.get_district_id(admin_id)
if district_id_result is None: # Only data for which a district exist matter
continue
district_id = district_id_result[0]
# Insert the data to the PopulationDistribution table
self.insert_population_distribution(
district_id,
int(row[constants.TOTAL_POPULATION]),
int(row[constants.MALE_POPULATION]),
int(row[constants.FEMALE_POPULATION]),
int(row[constants.NUMBER_HOUSEHOLD_POPULATION]),
)
# Store population attributes in the population_list
population = (district_id, int(row[constants.TOTAL_POPULATION]))
add_element_and_get_index(population, population_list)
logger.debug("PopulationDistribution for district %s added" % admin_id)
logger.info("Population data added")
self.cursor.execute(sql_statement)
# Calculate the population density and add it to the population_list
for pop in population_list:
for admin in districts:
if pop[constants.DISTRICT_ID] == admin[constants.DISTRICT_ID]:
unit_area = admin[constants.ADMINISTRATIVE_UNIT_AREA]
population_density = numpy.true_divide(
pop[constants.TOTAL_POPULATION], unit_area
)
index = population_list.index(pop)
population_list[index] = pop + (population_density,)
numpy.seterr(divide="ignore", invalid="ignore")
else:
continue
# Store district_id (corresponding to the district_id in the Districts table),
# total and population_density from PopulationDistribution table to a population_list
# TODO: remove
for population in self.cursor:
for district in districts:
if district[constants.DISTRICT_ID] == population[constants.DISTRICT_ID]:
add_element_and_get_index(population, population_list)
# Create a list with construction material types total, wooden and non-wooden
# from the building numbers input CSV file
......
......@@ -44,11 +44,11 @@ def main():
db.create_tables()
db.read_districts_and_boundaries("data/Boundary.gpkg")
db.import_exposure_data(
"data/e008_3e.xlsx",
"data/population_sub_municipal.csv",
"data/e039_3e.xlsx",
"data/e008_3e.xlsx",
"data/e011_2e.xlsx",
"data/e014e.xlsx",
"data/population_sub_municipal.csv",
)
# Leave the program
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment