Commit 158ff89e authored by Simantini Shinde's avatar Simantini Shinde
Browse files

Calculated frequency distributions for Dwelling Numbers

parent 274d7a1a
Pipeline #39194 passed with stage
in 1 minute and 38 seconds
......@@ -587,93 +587,7 @@ class JapanDatabase(SpatialiteDatabase):
self.connection.commit()
logger.info("Building numbers added")
def import_dwelling_numbers(
self,
dwelling_numbers_filepath,
building_type_list,
construction_material_list,
story_number_list,
):
"""
Imports all dwelling data from the 2018 building statistics files provided
by E-Stat, Japan. The following file is needed:
List of numbers of dwellings by municipality (dwelling_numbers_filepath)
The dwelling data provide the dwelling numbers for a combination of different
building attributes:
- building types (total, detached house, tenement house,apartments and others),
- construction material type (total, wooden, wooden (excluding wooden and
fire-proofed), wooden and fire-proofed and non-wooden)
- story number type (total, 1 story, 2 stories and over, 2 stories,
3 to 5 stories and 6 stories) except for building type `Other` which is
given for story number `total` only.
These dwelling numbers are not available for all districts in Japan
i.e. the districts with population less than 15000 are ignored in the
building statistics.
Args:
dwelling_numbers_filepath (str):
Filepath of the file containing dwelling numbers
building_type_list (list):
Collection of types of buildings from the dwelling numbers file.
construction_material_list (list):
Collection of types of construction materials from the dwelling numbers file.
story_number_list (list):
Collection of the number of stories from the dwelling numbers file.
"""
# Read columns identification area code, district, building type, construction material
# type, story number type and number of dwellings from input csv file
dwelling_numbers_input = pandas.read_excel(
dwelling_numbers_filepath, header=10, usecols=[4, 5, 7, 9, 10, 11, 12, 13, 14]
)
# Iterate through each district
for index, row in dwelling_numbers_input.iterrows():
admin_id = int((row["Area classification"].split("_"))[0])
# Identify district_id based on admin_id from the District table
district_id_result = self.get_district_id(admin_id)
if district_id_result is None: # Only data for which a district exist matter
continue
district_id = district_id_result[0]
# Get ID of building type and number of stories
building_type_id = add_element_and_get_index(
row["Type of building"], building_type_list
)
story_number_id = add_element_and_get_index(
row["Stories of building"], story_number_list
)
# Insert dwelling numbers for each construction material
for construction_material_id in range(len(construction_material_list)):
dwelling_number = float(
str(row[constants.NUMBER_DWELLING + construction_material_id]).replace(
"-", "0"
)
)
# Insert the data to the DwellingNumber table
sql_statement = (
"INSERT INTO DwellingNumber (district_id, building_type_id, "
"construction_material_id, story_number_id, number_dwelling) "
"VALUES (%d, %d, %d, %d, %f)"
% (
district_id,
building_type_id,
construction_material_id,
story_number_id,
dwelling_number,
)
)
self.cursor.execute(sql_statement)
self.connection.commit()
logger.info("Dwelling numbers added")
def calculate_frequency_distributions(self):
def calculate_building_numbers_frequency_distributions(self):
"""
Fixes rounding-up errors observed between the sum of building numbers of the
building attributes with type not equals `total` (except for building type `Other`)
......@@ -767,6 +681,7 @@ class JapanDatabase(SpatialiteDatabase):
logger.debug(sql_statement)
self.cursor.execute(sql_statement)
self.connection.commit()
logger.info("Fixed rounding up errors for building numbers")
def calculate_building_numbers_district_no_info(self):
"""
......@@ -894,6 +809,201 @@ class JapanDatabase(SpatialiteDatabase):
logger.debug(sql_statement)
self.cursor.execute(sql_statement)
self.connection.commit()
logger.info(
"Added building numbers for districts with no information in BuildingNumber table"
)
def import_dwelling_numbers(
self,
dwelling_numbers_filepath,
building_type_list,
construction_material_list,
story_number_list,
):
"""
Imports all dwelling data from the 2018 building statistics files provided
by E-Stat, Japan. The following file is needed:
List of numbers of dwellings by municipality (dwelling_numbers_filepath)
The dwelling data provide the dwelling numbers for a combination of different
building attributes:
- building types (total, detached house, tenement house,apartments and others),
- construction material type (total, wooden, wooden (excluding wooden and
fire-proofed), wooden and fire-proofed and non-wooden)
- story number type (total, 1 story, 2 stories and over, 2 stories,
3 to 5 stories and 6 stories) except for building type `Other` which is
given for story number `total` only.
These dwelling numbers are not available for all districts in Japan
i.e. the districts with population less than 15000 are ignored in the
building statistics.
Args:
dwelling_numbers_filepath (str):
Filepath of the file containing dwelling numbers
building_type_list (list):
Collection of types of buildings from the dwelling numbers file.
construction_material_list (list):
Collection of types of construction materials from the dwelling numbers file.
story_number_list (list):
Collection of the number of stories from the dwelling numbers file.
"""
# Read columns identification area code, district, building type, construction material
# type, story number type and number of dwellings from input csv file
dwelling_numbers_input = pandas.read_excel(
dwelling_numbers_filepath, header=10, usecols=[4, 5, 7, 9, 10, 11, 12, 13, 14]
)
# Iterate through each district
for index, row in dwelling_numbers_input.iterrows():
admin_id = int((row["Area classification"].split("_"))[0])
# Identify district_id based on admin_id from the District table
district_id_result = self.get_district_id(admin_id)
if district_id_result is None: # Only data for which a district exist matter
continue
district_id = district_id_result[0]
# Get ID of building type and number of stories
building_type_id = add_element_and_get_index(
row["Type of building"], building_type_list
)
story_number_id = add_element_and_get_index(
row["Stories of building"], story_number_list
)
# Insert dwelling numbers for each construction material
for construction_material_id in range(len(construction_material_list)):
dwelling_number = float(
str(row[constants.NUMBER_DWELLING + construction_material_id]).replace(
"-", "0"
)
)
# Insert the data to the DwellingNumber table
sql_statement = (
"INSERT INTO DwellingNumber (district_id, building_type_id, "
"construction_material_id, story_number_id, number_dwelling) "
"VALUES (%d, %d, %d, %d, %f)"
% (
district_id,
building_type_id,
construction_material_id,
story_number_id,
dwelling_number,
)
)
self.cursor.execute(sql_statement)
self.connection.commit()
logger.info("Dwelling numbers added")
def calculate_dwelling_numbers_frequency_distributions(self):
"""
Fixes rounding-up errors observed between the sum of dwelling numbers of the
building attributes with type not equal to `total` (except for building type `Other`
and also construction_material_id is not `wooden (excluding wooden and
fire-proofed)` and not `wooden and fire-proofed`) and dwelling numbers of the
building attributes with type `total` by doing a frequency distribution in
query Q with T.total_building and S.sum_building from the joined queries S and
T, and updates the calculated target_number in the DwellingNumber table.
The query S computes the sum of dwelling numbers grouped by district_id where
building type is not `total` (i.e. building_type_id != 0) and construction material is
neither `total`, `wooden (excluding wooden and fire-proofed)` nor
`wooden and fire-proofed` (i.e. construction_material_id != 0 and != 2 and != 3) and
story number is not `total` in the Japanese statistics data, except for when building
type is `Other` (i.e. building_type_id = 4) then story number is `total`
(i.e. story_number_id = 0).
The query T selects the dwelling numbers when building type is `total`
(i.e. building_type_id = 0) and construction material is `total`
(i.e. construction_material_id = 0) and story number is `total`
(i.e. story_number_id = 0).
"""
sql_statement = """
UPDATE DwellingNumber AS M
SET number_dwelling = Q.target_number
FROM
(
SELECT D.district_id, D.number_dwelling,
S.sum_dwelling, T.total_dwelling,
D.number_dwelling * T.total_dwelling / S.sum_dwelling AS target_number,
D.building_type_id, D.construction_material_id,
D.story_number_id
FROM DwellingNumber AS D
INNER JOIN
(
SELECT district_id,
SUM(number_dwelling) AS sum_dwelling
FROM DwellingNumber
WHERE
(
(
building_type_id != 0
AND construction_material_id != 0
AND construction_material_id != 2
AND construction_material_id != 3
AND story_number_id != 0
)
OR
(
building_type_id == 4
AND construction_material_id != 0
AND construction_material_id != 2
AND construction_material_id != 3
AND story_number_id == 0
)
)
GROUP BY district_id
) AS S
ON D.district_id = S.district_id
INNER JOIN
(
SELECT district_id,
number_dwelling AS total_dwelling
FROM DwellingNumber
WHERE
(
building_type_id = 0
AND construction_material_id = 0
AND story_number_id = 0
)
) AS T
ON D.District_id = T.district_id
WHERE
(
(
D.building_type_id != 0
AND D.construction_material_id != 0
AND D.construction_material_id != 2
AND D.construction_material_id != 3
AND D.story_number_id != 0
)
OR
(
D.building_type_id == 4
AND D.construction_material_id != 0
AND D.construction_material_id != 2
AND D.construction_material_id != 3
AND D.story_number_id == 0
)
)
) AS Q
WHERE
(
M.district_id = Q.district_id
AND M.building_type_id = Q.building_type_id
AND M.construction_material_id = Q.construction_material_id
AND M.story_number_id = Q.story_number_id
)
"""
logger.debug(sql_statement)
self.cursor.execute(sql_statement)
self.connection.commit()
logger.info("Fixed rounding up errors for dwelling numbers")
def import_exposure_data(
self,
......@@ -939,6 +1049,13 @@ class JapanDatabase(SpatialiteDatabase):
building_numbers_filepath, building_type_list, story_number_list
)
# Calculate the frequency distributions of building numbers
# and update the BuildingNumber table
self.calculate_building_numbers_frequency_distributions()
# Calculate the building numbers for districts with no building information
self.calculate_building_numbers_district_no_info()
# Import dwelling numbers data into the database
self.import_dwelling_numbers(
dwelling_numbers_filepath,
......@@ -947,12 +1064,9 @@ class JapanDatabase(SpatialiteDatabase):
story_number_list,
)
# Calculate the frequency distributions of building numbers
# and update the BuildingNumber table
self.calculate_frequency_distributions()
# Calculate the building numbers for districts with no building information
self.calculate_building_numbers_district_no_info()
# Calculate the frequency distributions of dwelling numbers
# and update the DwellingNumber table
self.calculate_dwelling_numbers_frequency_distributions()
# Add the building types to the database
for building_type_id, building_type in enumerate(building_type_list):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment