Commit e3d5f319 authored by Simantini Shinde's avatar Simantini Shinde
Browse files

Calculated and updated building number for district with no info

parent 4356d182
Pipeline #39000 passed with stage
in 1 minute and 25 seconds
......@@ -505,6 +505,11 @@ class JapanDatabase(SpatialiteDatabase):
3 to 5 stories and 6 stories) except for building type `Other` which is
given for story number `total` only.
These building numbers are not available for all districts in Japan
i.e. the districts with population less than 15000 are ignored in the
building statistics and are calculated in the
`calculate_building_numbers_district_no_info` function.
Args:
building_numbers_filepath (str):
Filepath of the file containing building numbers
......@@ -681,6 +686,132 @@ class JapanDatabase(SpatialiteDatabase):
self.cursor.execute(sql_statement)
self.connection.commit()
def calculate_building_numbers_district_no_info(self):
"""
Calculates the building numbers for districts that do not have building information in
the building dataset.
The building numbers are calculated in query `ToUpdateBuildingNumber` by multiplying
the population for districts with no building information to a factor of the number
of buildings per person as taken from the districts with population densities closest
to the population density of the district with no building information.
The innermost query `DistrictWithNoInformation` initially finds the districts not
present in the BuildingNumber table by comparing it to the Districts and
PopulationDistribution tables. These districts are then mapped to their corresponding
population densities and populations. This final table is cross-joined with all
districts and their corresponding population and population densities from the
BuildingNumber (i.e. with building information) table in the query
SourcePopulationDistribution.
The next outer query `MappingToDistrictWithNoInfo` calculates the absolute value
of the difference between the population densities of districts with and without
information and sorts it in ascending order to find the districts with information
which are closest in population density to the districts without building information.
The following outer query `ClosestMappingToDistrictWithNoInfo` selects only the
first ten districts from districts in query MappingToDistrictWithNoInfo sorted
by the aforementioned difference.
The next outer query `MappingBuildingExposureToDistrictWithNoInfo` maps the building
attributes and their building numbers from the BuildingNumber table to the districts
with no information and calculates the building numbers for districts with no building
information. The mapping is done on the district_id of the ten closest districts
(with information) found in the query `ClosestMappingToDistrictWithNoInfo`.
The final query `ToUpdateBuildingNumber` selects the district_id, building exposure
attributes and building number to be inserted into the BuildingNumber table.
"""
sql_statement = """
INSERT INTO BuildingNumber
(
district_id,
building_type_id,
construction_material_id,
story_number_id,
number_building
)
SELECT ToUpdateBuildingNumber.to_update_id,
ToUpdateBuildingNumber.building_type_id,
ToUpdateBuildingNumber.construction_material_id,
ToUpdateBuildingNumber.story_number_id,
ToUpdateBuildingNumber.to_update_number_building
FROM
(
SELECT MappingBuildingExposureToDistrictWithNoInfo.to_update_id,
MappingBuildingExposureToDistrictWithNoInfo.source_id,
MappingBuildingExposureToDistrictWithNoInfo.building_type_id,
MappingBuildingExposureToDistrictWithNoInfo.construction_material_id,
MappingBuildingExposureToDistrictWithNoInfo.story_number_id,
MappingBuildingExposureToDistrictWithNoInfo.number_building,
sum(MappingBuildingExposureToDistrictWithNoInfo.number_building),
AVG(MappingBuildingExposureToDistrictWithNoInfo.number_building)/
MappingBuildingExposureToDistrictWithNoInfo.source_population
* MappingBuildingExposureToDistrictWithNoInfo.[total]
AS to_update_number_building
FROM
(
SELECT *
FROM
(
SELECT MappingToDistrictWithNoInfo.id AS to_update_id,
MappingToDistrictWithNoInfo.district_id AS source_id,
MappingToDistrictWithNoInfo.rn,
MappingToDistrictWithNoInfo.[total],
MappingToDistrictWithNoInfo.source_population
FROM
(
SELECT DistrictWithNoInformation.id,
DistrictWithNoInformation.district_id,
row_number() OVER (PARTITION BY DistrictWithNoInformation.id) AS rn,
DistrictWithNoInformation.[total],
DistrictWithNoInformation.source_population
FROM
(
SELECT D.id,
SourcePopulationDistribution.district_id,
ABS(ToUpdatePopulationDistribution.population_density -
SourcePopulationDistribution.population_density
) AS difference,
SourcePopulationDistribution.source_population,
ToUpdatePopulationDistribution.[total]
FROM PopulationDistribution AS ToUpdatePopulationDistribution
INNER JOIN District AS D
ON ToUpdatePopulationDistribution.district_id = D.id
CROSS JOIN
(
SELECT DISTINCT B.district_id,
PD.[total] as source_population,
PD.population_density
FROM BuildingNumber AS B
INNER JOIN PopulationDistribution AS PD
ON B.district_id = PD.district_id
) AS SourcePopulationDistribution
WHERE D.id NOT IN
(
SELECT district_id
FROM BuildingNumber
)
ORDER BY D.id, difference ASC
) AS DistrictWithNoInformation
) AS MappingToDistrictWithNoInfo
WHERE rn < 11
) AS ClosestMappingToDistrictWithNoInfo
INNER JOIN BuildingNumber
ON ClosestMappingToDistrictWithNoInfo.source_id = BuildingNumber.district_id
ORDER BY ClosestMappingToDistrictWithNoInfo.to_update_id
) AS MappingBuildingExposureToDistrictWithNoInfo
Group by MappingBuildingExposureToDistrictWithNoInfo.to_update_id,
MappingBuildingExposureToDistrictWithNoInfo.building_type_id,
MappingBuildingExposureToDistrictWithNoInfo.construction_material_id,
MappingBuildingExposureToDistrictWithNoInfo.story_number_id
) AS ToUpdateBuildingNumber
"""
logger.debug(sql_statement)
self.cursor.execute(sql_statement)
self.connection.commit()
def import_exposure_data(self, population_distribution_filepath, building_numbers_filepath):
"""
Imports all exposure data from the CSV and Excel files provided by E-Stat, Japan.
......@@ -720,6 +851,9 @@ class JapanDatabase(SpatialiteDatabase):
# and update the BuildingNumber table
self.calculate_frequency_distributions()
# Calculate the building numbers for districts with no building information
self.calculate_building_numbers_district_no_info()
# Add the building types to the database
for building_type_id, building_type in enumerate(building_type_list):
self.insert_building_type(building_type_id, building_type)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment