Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Dynamic Exposure
Global Dynamic Exposure
exposure-japan
Commits
7ea49252
Commit
7ea49252
authored
Jan 24, 2022
by
shinde
Committed by
Simantini Shinde
Jan 27, 2022
Browse files
Added separate function to import population data
parent
6ba9d0a4
Pipeline
#37835
passed with stage
in 1 minute and 28 seconds
Changes
2
Pipelines
2
Show whitespace changes
Inline
Side-by-side
exposurejapan/database.py
View file @
7ea49252
...
...
@@ -24,6 +24,7 @@ import constants
import
shapely.wkt
import
pyproj
from
shapely.ops
import
transform
import
csv
# Initialize log
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -179,7 +180,8 @@ class JapanDatabase(SpatialiteDatabase):
sql_statement
+=
"total INTEGER, "
sql_statement
+=
"male INTEGER, "
sql_statement
+=
"female INTEGER, "
sql_statement
+=
"number_household INTEGER)"
sql_statement
+=
"number_household INTEGER,"
sql_statement
+=
"population_density REAL)"
self
.
connection
.
execute
(
sql_statement
)
logger
.
debug
(
"Table PopulationDistribution created"
)
...
...
@@ -503,37 +505,6 @@ class JapanDatabase(SpatialiteDatabase):
)
self
.
cursor
.
execute
(
sql_statement
)
def
insert_population_distribution
(
self
,
district_id
,
total
,
male
,
female
,
number_household
):
"""
Inserts a full dataset to the PopulationDistribution table.
Args:
district_id (int):
ID of the district. Corresponds to District.id
total (int):
Total population of the district
male (int):
Male population of the district
female (int):
Female population of the district
number_household (int):
Total number of households of the district
"""
sql_statement
=
"INSERT INTO PopulationDistribution "
sql_statement
+=
"(district_id, total, male, female, "
sql_statement
+=
"number_household) "
sql_statement
+=
"VALUES (%d, %d, %d, %d, %d)"
%
(
district_id
,
total
,
male
,
female
,
number_household
,
)
self
.
cursor
.
execute
(
sql_statement
)
def
insert_building_type
(
self
,
building_type_id
,
description
):
"""
Inserts a building-type description to the BuildingType table.
...
...
@@ -1090,6 +1061,60 @@ class JapanDatabase(SpatialiteDatabase):
numpy
.
seterr
(
divide
=
"ignore"
,
invalid
=
"ignore"
)
return
districts
def
import_population_distribution
(
self
,
population_distribution_filepath
):
"""
Imports all population data from the 2015 Population Census file provided
by E-Stat, Japan. The following file is needed:
Number of people distributed by municipality (population_distribution_filepath)
Args:
population_distribution_filepath (str):
Filepath to the path of population distribution
"""
# Read columns district, total population, male population, female population and number
# of households from input CSV file
with
open
(
population_distribution_filepath
)
as
population_file
:
population
=
csv
.
DictReader
(
population_file
)
population_to_db
=
[
(
i
[
"KEY_CODE"
],
i
[
"total_population"
],
i
[
"male_population"
],
i
[
"female_population"
],
i
[
"total_number_of_households"
],
)
for
i
in
population
]
# Insert the data to the PopulationDistribution table
sql_statement
=
(
"INSERT INTO PopulationDistribution (district_id, total, male, "
"female, number_household) VALUES (?, ?, ?, ?, ?)"
)
self
.
cursor
.
executemany
(
sql_statement
,
population_to_db
)
self
.
connection
.
commit
()
logger
.
info
(
"Population data added"
)
# Calculate the population_density with the boundary area from Districts table
# and update the PopulationDistribution table
sql_statement
=
(
"WITH updates(ID, population_density) "
"AS (SELECT PopulationDistribution.district_id, PopulationDistribution.total/ "
"District.area_size "
"FROM PopulationDistribution "
"INNER JOIN District ON District.id = PopulationDistribution.district_id) "
)
sql_statement
+=
(
"UPDATE PopulationDistribution "
"SET population_density = (SELECT population_density FROM updates "
"WHERE PopulationDistribution.district_id = ID)"
)
logger
.
debug
(
sql_statement
)
self
.
cursor
.
execute
(
sql_statement
)
self
.
connection
.
commit
()
def
import_exposure_data
(
self
,
population_distribution_filepath
,
...
...
@@ -1099,13 +1124,17 @@ class JapanDatabase(SpatialiteDatabase):
dwelling_sizes_filepath
,
):
"""
Imports all exposure data from the Excel files provided by E-Stat, Japan. The following
files are needed:
Imports all exposure data from the CSV and Excel files provided by E-Stat, Japan.
The following files are needed:
Number of people distributed by municipality (population_distribution_filepath)
List of numbers of buildings by municipality (building_numbers_filepath)
List of numbers of dwellings by municipality (dwelling_numbers_filepath)
List of numbers of households by municipality (household_numbers_filepath)
List of dwelling sizes by municipality (dwelling_sizes_filepath)
Args:
population_distribution_filepath (str):
File
path to the path of population distribution
Filepath to the path of population distribution
building_numbers_filepath (str):
File path to the file of number of buildings
dwelling_numbers_filepath (str):
...
...
@@ -1141,53 +1170,27 @@ class JapanDatabase(SpatialiteDatabase):
# Store all district (i.e. administrative unit) areas with their district_ids in a list
districts
=
self
.
calculate_district_area
()
# Import population data into the database
self
.
import_population_distribution
(
population_distribution_filepath
)
# Create an empty list to store population attributes
population_list
=
[]
# Read columns district, total population, male population, female population and number
# of households from input CSV file
population_distribution_input
=
pandas
.
read_csv
(
population_distribution_filepath
,
usecols
=
[
2
,
9
,
10
,
11
,
12
]
)
for
index
,
row
in
population_distribution_input
.
iterrows
():
admin_id
=
int
(
row
[
"KEY_CODE"
])
# Identify district_id based on admin_id from the District table
district_id_result
=
self
.
get_district_id
(
admin_id
)
if
district_id_result
is
None
:
# Only data for which a district exist matter
continue
district_id
=
district_id_result
[
0
]
# Insert the data to the PopulationDistribution table
self
.
insert_population_distribution
(
district_id
,
int
(
row
[
constants
.
TOTAL_POPULATION
]),
int
(
row
[
constants
.
MALE_POPULATION
]),
int
(
row
[
constants
.
FEMALE_POPULATION
]),
int
(
row
[
constants
.
NUMBER_HOUSEHOLD_POPULATION
]),
# Select district_id, total and population_density from PopulationDistribution
# to be stored in a population_list
sql_statement
=
(
"SELECT district_id, total, population_density FROM PopulationDistribution"
)
self
.
cursor
.
execute
(
sql_statement
)
# Store population attributes in the population_list
population
=
(
district_id
,
int
(
row
[
constants
.
TOTAL_POPULATION
]))
# Store district_id (corresponding to the district_id in the Districts table),
# total and population_density from PopulationDistribution table to a population_list
# TODO: remove
for
population
in
self
.
cursor
:
for
district
in
districts
:
if
district
[
constants
.
DISTRICT_ID
]
==
population
[
constants
.
DISTRICT_ID
]:
add_element_and_get_index
(
population
,
population_list
)
logger
.
debug
(
"PopulationDistribution for district %s added"
%
admin_id
)
logger
.
info
(
"Population data added"
)
# Calculate the population density and add it to the population_list
for
pop
in
population_list
:
for
admin
in
districts
:
if
pop
[
constants
.
DISTRICT_ID
]
==
admin
[
constants
.
DISTRICT_ID
]:
unit_area
=
admin
[
constants
.
ADMINISTRATIVE_UNIT_AREA
]
population_density
=
numpy
.
true_divide
(
pop
[
constants
.
TOTAL_POPULATION
],
unit_area
)
index
=
population_list
.
index
(
pop
)
population_list
[
index
]
=
pop
+
(
population_density
,)
numpy
.
seterr
(
divide
=
"ignore"
,
invalid
=
"ignore"
)
else
:
continue
# Create a list with construction material types total, wooden and non-wooden
# from the building numbers input CSV file
building_number_construction_material_list
=
[
"0_Total"
,
"1_Wooden"
,
"2_Non-wooden"
]
...
...
exposurejapan/exposurejapan.py
View file @
7ea49252
...
...
@@ -44,11 +44,11 @@ def main():
db
.
create_tables
()
db
.
read_districts_and_boundaries
(
"data/Boundary.gpkg"
)
db
.
import_exposure_data
(
"data/
e008_3e.xlsx
"
,
"data/
population_sub_municipal.csv
"
,
"data/e039_3e.xlsx"
,
"data/e008_3e.xlsx"
,
"data/e011_2e.xlsx"
,
"data/e014e.xlsx"
,
"data/population_sub_municipal.csv"
,
)
# Leave the program
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment