Commit 5f2618ac authored by Simantini Shinde's avatar Simantini Shinde
Browse files

Imported household data

parent a9a4fe01
Pipeline #39381 passed with stage
in 1 minute and 52 seconds
......@@ -147,7 +147,6 @@ class JapanDatabase(SpatialiteDatabase):
sql_statement += "building_type_id INTEGER, "
sql_statement += "dwelling_type_id INTEGER, "
sql_statement += "tenure_type_id INTEGER, "
sql_statement += "construction_material_id INTEGER, "
sql_statement += "number_dwelling REAL, "
sql_statement += "number_household REAL, "
sql_statement += "number_household_member REAL, "
......@@ -286,6 +285,36 @@ class JapanDatabase(SpatialiteDatabase):
sql_statement += "VALUES (%d, '%s')" % (story_number_id, description)
self.cursor.execute(sql_statement)
def insert_dwelling_type(self, dwelling_type_id, description):
"""
Inserts a dwelling-type description to the DwellingType table.
Args:
dwelling_type_id (int):
ID of the dwelling type
description (str):
Description of the dwelling type
"""
sql_statement = "INSERT INTO DwellingType (id, description) "
sql_statement += "VALUES (%d, '%s')" % (dwelling_type_id, description)
self.cursor.execute(sql_statement)
def insert_tenure_type(self, tenure_type_id, description):
"""
Inserts a tenure-type description to the TenureType table.
Args:
tenure_type_id (int):
ID of the tenure type
description (str):
Description of the tenure type
"""
sql_statement = "INSERT INTO TenureType (id, description) "
sql_statement += "VALUES (%d, '%s')" % (tenure_type_id, description)
self.cursor.execute(sql_statement)
def import_districts_and_boundaries(self, district_boundary_filepath):
"""
Imports all districts and boundaries from a prepared geopackage file.
......@@ -1147,11 +1176,132 @@ class JapanDatabase(SpatialiteDatabase):
"Added dwelling numbers for districts with no information in DwellingNumber table"
)
def import_household_data(
self, household_data_filepath, building_type_list, dwelling_type_list, tenure_type_list
):
"""
Imports all household data from the 2018 building statistics files provided
by E-Stat, Japan. The following file is needed:
List of numbers of households and household members by municipality
(household_data_filepath)
The household data provide the household numbers and household members for a
combination of different building attributes:
- building types (total, detached house, tenement house,apartments and
others),
- dwelling types (total, used exclusively for living, used also for commerce
or other purposes),
- tenure types (total, owned houses, rented houses, rented houses owned by
local government, rented houses owned by urban renaissance agency or
public corporation, rented houses owned privately, issued houses)
This household data is not available for all districts in Japan
i.e. the districts with population less than 15000 are ignored in the
building statistics.
Args:
household_data_filepath (str):
Filepath to the file of numbers of households and household members
building_type_list (list):
Collection of types of buildings from the household data file.
dwelling_type_list (list):
Collection of types of dwellings from the household data file.
tenure_type_list (list):
Collection of the dwelling tenures from the household data file.
"""
# Read columns identification area code, district, building type, dwelling type,
# tenure type, number of dwellings, number of households, number of household
# members, rooms per dwelling, tatami per dwelling, floorspace per dwelling,
# tatami per person and person per room from the input Excel file.
household_data_input = pandas.read_excel(
household_data_filepath,
header=7,
usecols=[4, 5, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19],
)
# Iterate through each district
for index, row in household_data_input.iterrows():
admin_id = int((row["Area classification"].split("_"))[0])
# Identify district_id based on admin_id from the District table
district_id_result = self.get_district_id(admin_id)
if district_id_result is None: # Only data for which a district exist matter
continue
district_id = district_id_result[0]
# Get ID of building type, dwelling type, and tenure type
building_type_id = add_element_and_get_index(
row["Type of building"], building_type_list
)
dwelling_type_id = add_element_and_get_index(
row["Type of dwelling"], dwelling_type_list
)
tenure_type_id = add_element_and_get_index(
row["Tenure of dwelling"], tenure_type_list
)
# Read relevant household data
number_dwelling = float(
str(row[constants.NUMBER_DWELLING_HOUSEHOLD]).replace("-", "0")
)
number_household = float(str(row[constants.NUMBER_HOUSEHOLD]).replace("-", "0"))
number_household_member = float(
str(row[constants.NUMBER_HOUSEHOLD_MEMBER]).replace("-", "0")
)
rooms_per_dwelling = float(str(row[constants.ROOMS_PER_DWELLING]).replace("-", "0"))
tatami_per_dwelling = float(
str(row[constants.TATAMI_PER_DWELLING]).replace("-", "0")
)
floorspace_per_dwelling = float(
str(row[constants.FLOORSPACE_PER_DWELLING]).replace("-", "0")
)
tatami_per_person = float(str(row[constants.TATAMI_PER_PERSON]).replace("-", "0"))
person_per_room = float(str(row[constants.PERSON_PER_ROOM]).replace("-", "0"))
# Insert the data to the HouseholdData table
sql_statement = """
INSERT INTO HouseholdData
(
district_id,
building_type_id,
dwelling_type_id,
tenure_type_id,
number_dwelling,
number_household,
number_household_member,
rooms_per_dwelling,
tatami_per_dwelling,
floorspace_per_dwelling,
tatami_per_person, person_per_room
)
VALUES
(
%d, %d, %d, %d, %f, %f, %f, %f, %f, %f, %f, %f
)
""" % (
district_id,
building_type_id,
dwelling_type_id,
tenure_type_id,
number_dwelling,
number_household,
number_household_member,
rooms_per_dwelling,
tatami_per_dwelling,
floorspace_per_dwelling,
tatami_per_person,
person_per_room,
)
self.cursor.execute(sql_statement)
self.connection.commit()
logger.info("Household data added")
def import_exposure_data(
self,
population_distribution_filepath,
building_numbers_filepath,
dwelling_numbers_filepath,
household_data_filepath,
):
"""
Imports all exposure data from the CSV and Excel files provided by E-Stat, Japan.
......@@ -1159,6 +1309,8 @@ class JapanDatabase(SpatialiteDatabase):
Number of people distributed by municipality (population_distribution_filepath)
List of numbers of buildings by municipality (building_numbers_filepath)
List of numbers of dwellings by municipality (dwelling_numbers_filepath)
List of numbers of households and household members by municipality
(household_data_filepath)
Args:
population_distribution_filepath (str):
......@@ -1167,12 +1319,15 @@ class JapanDatabase(SpatialiteDatabase):
Filepath to the file of number of buildings
dwelling_numbers_filepath (str):
Filepath to the file of number of dwellings
household_data_filepath (str):
Filepath to the file of numbers of households and household members
"""
# Create lists to store building attribute classifications
building_type_list = []
story_number_list = []
dwelling_type_list = []
tenure_type_list = []
# Construction material classifications
construction_material_list = [
......@@ -1213,6 +1368,11 @@ class JapanDatabase(SpatialiteDatabase):
# Calculate the dwelling numbers for districts with no dwelling information
self.calculate_dwelling_numbers_district_no_info()
# Import household data into the database
self.import_household_data(
household_data_filepath, building_type_list, dwelling_type_list, tenure_type_list
)
# Add the building types to the database
for building_type_id, building_type in enumerate(building_type_list):
self.insert_building_type(building_type_id, building_type)
......@@ -1230,4 +1390,14 @@ class JapanDatabase(SpatialiteDatabase):
self.insert_construction_material(construction_material_id, construction_material)
logger.info("Construction-material types added")
# Add the dwelling types to the database
for dwelling_type_id, dwelling_type in enumerate(dwelling_type_list):
self.insert_dwelling_type(dwelling_type_id, dwelling_type)
logger.info("Dwelling types added")
# Add the tenure types to the database
for tenure_type_id, tenure_type in enumerate(tenure_type_list):
self.insert_tenure_type(tenure_type_id, tenure_type)
logger.info("Tenure types added")
self.connection.commit()
......@@ -44,7 +44,10 @@ def main():
db.create_tables()
db.import_districts_and_boundaries("data/Boundary.gpkg")
db.import_exposure_data(
"data/population_sub_municipal.csv", "data/e039_3e.xlsx", "data/e008_3e.xlsx"
"data/population_sub_municipal.csv",
"data/e039_3e.xlsx",
"data/e008_3e.xlsx",
"data/e011_2e.xlsx",
)
# Leave the program
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment