database.py 93.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/usr/bin/env python3

# Copyright (C) 2021:
#   Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.

import logging
20
from exposurelib.database import SpatialiteDatabase
21
import pandas
22
import constants
23
import shapely.wkt
24
import shapely.wkb
25
26
import pyproj
from shapely.ops import transform
27
import csv
28
29
30
31
32

# Initialize log
logger = logging.getLogger(__name__)


33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def add_element_and_get_index(element, element_list):
    """
    Checks if an element is in a list and adds it to the list if not.
    Returns the index of the element in the list.

    Args:
        element (str):
            Element to be added to the list
        element_list (list):
            List to add the element to

    Returns:
         Index of inserted element in the list
    """

    if element not in element_list:
        element_list.append(element)
    return element_list.index(element)


53
class JapanDatabase(SpatialiteDatabase):
54
55
    """
    The JapanDatabase class represents a Spatialite database for the Japan
56
57
58
59
60
61
62
63
64
65
66
67
68
69
    exposure model. It is derived from the generic Database class.

    Args:
        database_filepath (str):
            File path for the Spatialite database file.
        spatialite_filepath (str):
            File path of the Spatialite extension.

    Attributes:
        database_filepath (str): Spatialite database file path.
        spatialite_filepath (str): File path to the Spatialite extension.
    """

    def __init__(self, database_filepath, spatialite_filepath="mod_spatialite"):
70
        SpatialiteDatabase.__init__(self, database_filepath, spatialite_filepath)
71
72
73
74

    def create_tables(self):
        """
        Creates all necessary tables in the database. These are:
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
            District              : Stores the districts with their IDs, names and geometries
            DwellingNumber        : Stores the number of dwellings depending on building types,
                                    construction material and number of stories for each
                                    district
            BuildingNumber        : Stores the number of buildings depending on building types,
                                    construction material and number of stories for each
                                    district
            HouseholdData         : Stores different parameters describing the household
                                    numbers, household members, and household spaces for each
                                    district
            DwellingFloorspace    : Stores the floorspace per dwelling depending on the
                                    building, dwelling, tenure types and construction material
                                    for each district
            DwellingDistribution  : Stores the number of dwellings of different sizes depending
                                    on the building, dwelling, tenure types and construction
                                    material for each district
            BuildingType          : Stores the different types of buildings
            DwellingType          : Stores the different types of dwellings
            TenureType            : Stores the different types of tenures
            ConstructionMaterial  : Stores the construction-material types
            StoryNumber           : Stores the number (e.g. 1, 2, 3, 4, ...) and classifications
                                    (1-2, 3-5, ...) of numbers of stories.
            DwellingSizeType      : Stores the different types of dwelling sizes
            BuildingClassesMapping: Stores GEM taxonomy-based building classes
99
            BuildingAsset         : Stores required building exposure assets
100
101
102
103
        """

        # Create table District
        sql_statement = "CREATE TABLE District ("
shinde's avatar
shinde committed
104
105
106
        sql_statement += "id         INTEGER PRIMARY KEY, "
        sql_statement += "name       TEXT, "
        sql_statement += "area_size  REAL)"
107
108
109
110
111
112
        self.connection.execute(sql_statement)
        sql_statement = "SELECT AddGeometryColumn('District', 'geom', 4326, "
        sql_statement += "'MULTIPOLYGON', 'XY')"
        self.connection.execute(sql_statement)
        logger.debug("Table District created")

113
114
115
116
117
118
        # Create table DwellingNumber
        sql_statement = "CREATE TABLE DwellingNumber ("
        sql_statement += "id                        INTEGER PRIMARY KEY AUTOINCREMENT, "
        sql_statement += "district_id               INTEGER, "
        sql_statement += "building_type_id          INTEGER, "
        sql_statement += "construction_material_id  INTEGER, "
119
        sql_statement += "story_number_id           INTEGER, "
120
121
122
123
        sql_statement += "number_dwelling           REAL)"
        self.connection.execute(sql_statement)
        logger.debug("Table DwellingNumber created")

124
125
126
127
128
129
130
        # Create table BuildingNumber
        sql_statement = "CREATE TABLE BuildingNumber ("
        sql_statement += "id                        INTEGER PRIMARY KEY AUTOINCREMENT, "
        sql_statement += "district_id               INTEGER, "
        sql_statement += "building_type_id          INTEGER, "
        sql_statement += "construction_material_id  INTEGER, "
        sql_statement += "story_number_id           INTEGER, "
131
        sql_statement += "number_building           REAL);"
132
133
134
        self.connection.execute(sql_statement)
        logger.debug("Table BuildingNumber created")

135
136
137
138
139
140
141
142
143
144
145
146
147
        # Create index for columns in the BuildingNumber table
        sql_statement = """
            CREATE INDEX "BN_Index" ON "BuildingNumber"
            (
                "building_type_id"	        ASC,
                "construction_material_id"  ASC,
                "story_number_id"	        ASC,
                "number_building"	        ASC
            )
            """
        self.connection.execute(sql_statement)
        logger.debug("Index for BuildingNumber created")

148
149
150
151
152
153
154
        # Create table HouseholdData
        sql_statement = "CREATE TABLE HouseholdData ("
        sql_statement += "id                        INTEGER PRIMARY KEY AUTOINCREMENT, "
        sql_statement += "district_id               INTEGER, "
        sql_statement += "building_type_id          INTEGER, "
        sql_statement += "dwelling_type_id          INTEGER, "
        sql_statement += "tenure_type_id            INTEGER, "
155
        sql_statement += "construction_material_id  INTEGER, "
156
        sql_statement += "story_number_id           INTEGER, "
157
158
159
160
161
162
163
164
165
166
167
        sql_statement += "number_dwelling           REAL, "
        sql_statement += "number_household          REAL, "
        sql_statement += "number_household_member   REAL, "
        sql_statement += "rooms_per_dwelling        REAL, "
        sql_statement += "tatami_per_dwelling       REAL, "
        sql_statement += "floorspace_per_dwelling   REAL, "
        sql_statement += "tatami_per_person         REAL, "
        sql_statement += "person_per_room           REAL)"
        self.connection.execute(sql_statement)
        logger.debug("Table HouseholdData created")

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
        # Create table DwellingFloorspace
        sql_statement = "CREATE TABLE DwellingFloorspace ("
        sql_statement += "id                        INTEGER PRIMARY KEY, "
        sql_statement += "district_id               INTEGER, "
        sql_statement += "building_type_id          INTEGER, "
        sql_statement += "dwelling_type_id          INTEGER, "
        sql_statement += "tenure_type_id            INTEGER, "
        sql_statement += "construction_material_id  INTEGER, "
        sql_statement += "floorspace                REAL)"
        self.connection.execute(sql_statement)
        logger.debug("Table DwellingFloorspace created")

        # Create table DwellingDistribution
        sql_statement = "CREATE TABLE DwellingDistribution ("
        sql_statement += "id                        INTEGER PRIMARY KEY, "
        sql_statement += "district_id               INTEGER, "
        sql_statement += "building_type_id          INTEGER, "
        sql_statement += "dwelling_type_id          INTEGER, "
        sql_statement += "tenure_type_id            INTEGER, "
        sql_statement += "construction_material_id  INTEGER, "
        sql_statement += "dwelling_size_type_id     INTEGER, "
        sql_statement += "number_dwelling           REAL)"
        self.connection.execute(sql_statement)
        logger.debug("Table DwellingDistribution created")

shinde's avatar
shinde committed
193
194
        # Create table PopulationDistribution
        sql_statement = "CREATE TABLE PopulationDistribution ("
195
196
        sql_statement += "id                  INTEGER PRIMARY KEY, "
        sql_statement += "district_id         INTEGER, "
Simantini Shinde's avatar
Simantini Shinde committed
197
198
199
        sql_statement += "total_population    INTEGER, "
        sql_statement += "male_population     INTEGER, "
        sql_statement += "female_population   INTEGER, "
200
201
        sql_statement += "number_household    INTEGER,"
        sql_statement += "population_density  REAL)"
shinde's avatar
shinde committed
202
203
204
        self.connection.execute(sql_statement)
        logger.debug("Table PopulationDistribution created")

205
206
207
208
209
        # Create table BuildingType
        sql_statement = "CREATE TABLE BuildingType ("
        sql_statement += "id           INTEGER PRIMARY KEY, "
        sql_statement += "description  TEXT)"
        self.connection.execute(sql_statement)
210
        logger.debug("Table BuildingType created")
211

212
213
214
215
216
        # Create table DwellingType
        sql_statement = "CREATE TABLE DwellingType ("
        sql_statement += "id           INTEGER PRIMARY KEY, "
        sql_statement += "description  TEXT)"
        self.connection.execute(sql_statement)
217
        logger.debug("Table DwellingType created")
218
219
220
221
222
223

        # Create table TenureType
        sql_statement = "CREATE TABLE TenureType ("
        sql_statement += "id           INTEGER PRIMARY KEY, "
        sql_statement += "description  TEXT)"
        self.connection.execute(sql_statement)
224
        logger.debug("Table TenureType created")
225

226
227
228
229
230
        # Create table ConstructionMaterial
        sql_statement = "CREATE TABLE ConstructionMaterial ("
        sql_statement += "id           INTEGER PRIMARY KEY, "
        sql_statement += "description  TEXT)"
        self.connection.execute(sql_statement)
231
        logger.debug("Table ConstructionMaterial created")
232

233
        # Create table StoryNumber
234
        sql_statement = "CREATE TABLE StoryNumber ("
235
236
237
        sql_statement += "id           INTEGER PRIMARY KEY, "
        sql_statement += "description  TEXT)"
        self.connection.execute(sql_statement)
238
239
240
241
242
243
244
245
        logger.debug("Table StoryNumber created")

        # Create table DwellingSizeType
        sql_statement = "CREATE TABLE DwellingSizeType ("
        sql_statement += "id           INTEGER PRIMARY KEY, "
        sql_statement += "description  TEXT)"
        self.connection.execute(sql_statement)
        logger.debug("Table DwellingSizeType created")
246

247
248
249
250
251
252
253
254
255
256
        # Create table BuildingClassesMapping
        sql_statement = "CREATE TABLE BuildingClassesMapping ("
        sql_statement += "id                        INTEGER PRIMARY KEY, "
        sql_statement += "building_type_id          INTEGER, "
        sql_statement += "construction_material_id  INTEGER, "
        sql_statement += "story_number_id           INTEGER, "
        sql_statement += "building_class            TEXT)"
        self.connection.execute(sql_statement)
        logger.debug("Table BuildingClassesMapping created")

257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
        # Create table BuildingAsset
        sql_statement = "CREATE TABLE BuildingAsset ("
        sql_statement += "id                       INTEGER PRIMARY KEY, "
        sql_statement += "admin_id                 TEXT, "
        sql_statement += "admin_name               TEXT, "
        sql_statement += "occupancy                TEXT, "
        sql_statement += "taxonomy                 INTEGER, "
        sql_statement += "number                   REAL, "
        sql_statement += "structural               REAL, "
        sql_statement += "night                    REAL)"
        self.connection.execute(sql_statement)
        sql_statement = "SELECT AddGeometryColumn('BuildingAsset', 'geom', 4326, "
        sql_statement += "'MULTIPOLYGON', 'XY')"
        self.connection.execute(sql_statement)
        logger.debug("Table BuildingAsset created")

273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
    def insert_building_type(self, building_type_id, description):
        """
        Inserts a building-type description to the BuildingType table.

        Args:
            building_type_id (int):
                ID of the building type
            description (str):
                Description of the building type
        """

        sql_statement = "INSERT INTO BuildingType "
        sql_statement += "(id, description) "
        sql_statement += "VALUES (%d, '%s')" % (building_type_id, description)
        self.cursor.execute(sql_statement)

    def insert_construction_material(self, construction_material_id, description):
        """
        Inserts a construction-material description to the ConstructionMaterial table.

        Args:
            construction_material_id (int):
                ID of the construction material
            description (str):
                Description of the construction material
        """

        sql_statement = "INSERT INTO ConstructionMaterial "
        sql_statement += "(id, description) "
        sql_statement += "VALUES (%d, '%s')" % (construction_material_id, description)
        self.cursor.execute(sql_statement)

305
    def insert_story_number(self, story_number_id, description):
306
        """
307
        Inserts a story-number description to the StoryNumber table.
308
309

        Args:
310
            story_number_id (int):
311
312
313
314
315
                ID of the number of stories entry
            description (str):
                Description of the number of stories entry
        """

316
        sql_statement = "INSERT INTO StoryNumber "
317
        sql_statement += "(id, description) "
318
        sql_statement += "VALUES (%d, '%s')" % (story_number_id, description)
319
320
        self.cursor.execute(sql_statement)

Simantini Shinde's avatar
Simantini Shinde committed
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
    def insert_dwelling_type(self, dwelling_type_id, description):
        """
        Inserts a dwelling-type description to the DwellingType table.

        Args:
            dwelling_type_id (int):
                ID of the dwelling type
            description (str):
                Description of the dwelling type
        """

        sql_statement = "INSERT INTO DwellingType (id, description) "
        sql_statement += "VALUES (%d, '%s')" % (dwelling_type_id, description)
        self.cursor.execute(sql_statement)

    def insert_tenure_type(self, tenure_type_id, description):
        """
        Inserts a tenure-type description to the TenureType table.

        Args:
            tenure_type_id (int):
                ID of the tenure type
            description (str):
                Description of the tenure type
        """

        sql_statement = "INSERT INTO TenureType (id, description) "
        sql_statement += "VALUES (%d, '%s')" % (tenure_type_id, description)
        self.cursor.execute(sql_statement)

351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
    def insert_dwelling_size_type(self, dwelling_size_type_id, description):
        """
        Inserts a dwelling-size description to the DwellingSizeType table.

        Args:
            dwelling_size_type_id (int):
                ID of the dwelling-size type entry
            description (str):
                Description of the dwelling-size type entry
        """

        sql_statement = "INSERT INTO DwellingSizeType (id, description) "
        sql_statement += "VALUES (%d, '%s')" % (dwelling_size_type_id, description)
        self.cursor.execute(sql_statement)

366
    def import_districts_and_boundaries(self, district_boundary_filepath):
367
        """
368
        Imports all districts and boundaries from a prepared geopackage file.
369
370
371
372
373
374
375
376
        ('estat_bound_municipal`). The function opens this file as Spatialite database
        and copies all necessary district and boundary information into the `District` table.

        Args:
            district_boundary_filepath (str):
                File path to the boundary file
        """

377
        # Attach Geopackage boundary database to the Japan database
shinde's avatar
shinde committed
378
379
380
381
382
383
        sql_statement = "SELECT EnableGpkgAmphibiousMode();"
        self.connection.execute(sql_statement)
        sql_statement = "ATTACH '%s' AS BoundaryDatabase" % district_boundary_filepath
        logger.debug(sql_statement)
        self.connection.execute(sql_statement)

Simantini Shinde's avatar
Simantini Shinde committed
384
        # From the boundary database select id, name and geometry
shinde's avatar
shinde committed
385
        # and insert into Districts table in the Japan database
386
        sql_statement = (
Simantini Shinde's avatar
Simantini Shinde committed
387
388
            "INSERT INTO District (id, name, geom) "
            "SELECT key_code_ward, CITY_NAME, "
389
390
            "ST_Transform(CastToMultiPolygon(GeomFromGPB(geom)), "
            "%d) " % constants.WGS84
shinde's avatar
shinde committed
391
392
393
394
395
396
397
398
        )
        sql_statement += "FROM BoundaryDatabase.Boundary"
        logger.debug(sql_statement)
        self.cursor.execute(sql_statement)
        self.connection.commit()

        # Calculate the area for each district using Albers equal area transformation
        # and update the Districts table
399
        sql_statement = "SELECT id, AsBinary(geom) FROM District"
shinde's avatar
shinde committed
400
401
402
        self.cursor.execute(sql_statement)
        districts = self.cursor.fetchall()
        for district in districts:
403
            district_geom = shapely.wkb.loads(district[1])
shinde's avatar
shinde committed
404
405
406
407
408
409
410
            district_size = (
                self.albers_area_calculation(district_geom, "epsg:%d" % constants.WGS84)
                * 0.000001  # Convert from square meters to square kilometers
            )
            sql_statement = "UPDATE District SET area_size = %f WHERE id = %s" % (
                district_size,
                district[0],
411
            )
shinde's avatar
shinde committed
412
413
            logger.debug(sql_statement)
            self.cursor.execute(sql_statement)
414
        self.connection.commit()
shinde's avatar
shinde committed
415

416
        # Detach Geopackage boundary database from the Japan database
shinde's avatar
shinde committed
417
418
419
        sql_statement = "DETACH DATABASE 'BoundaryDatabase'"
        logger.debug(sql_statement)
        self.connection.execute(sql_statement)
420
        logger.info("Districts and boundaries added")
421

422
    def get_district_id(self, district_id):
shinde's avatar
shinde committed
423
424
425
426
427
        """
        Gets the ID of the district in the Districts table corresponding
        to the ID of the administrative district specified.

        Args:
428
            district_id (int):
shinde's avatar
shinde committed
429
430
431
432
433
                ID of the administrative district
        """

        # Identify district_id based on admin_id from the District table
        sql_statement = "SELECT id FROM District "
434
        sql_statement += "WHERE id = %d" % district_id
shinde's avatar
shinde committed
435
436
437
438
        self.cursor.execute(sql_statement)
        result = self.cursor.fetchone()
        return result

439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
    @staticmethod
    def reproject_polygon(input_polygon, input_crs, target_crs):
        """
        Returns a (multi)polygon object transformed from input_crs to target_crs. if
        "aea" is specified as target crs, returns the Albers equal area transformed
        polygon.

        More information on the Albers equal area can be found at:
            https://pro.arcgis.com/en/pro-app/latest/help/mapping/properties/albers.htm

        Args:
            input_polygon (shapely.geometry.polygon.Polygon):
                Polygon object to reproject.

            input_crs (str):
                Initial crs of the input_polygon (e.g. "epsg:4326").

            target_crs (str):
                Target crs for the final polygon object. If "aea" is
                specified, process the complete Albers equal area transformation.

        Returns:
            geometry (shapely.geometry.polygon.Polygon):
                Polygon object reprojected to target_crs.
        """
        if target_crs == "aea":
            project = pyproj.Transformer.from_proj(
                pyproj.Proj(input_crs), pyproj.Proj("epsg:4326"), always_xy=True
            )
            input_polygon = transform(project.transform, input_polygon)

            # Get the bounding box of the polygon and sort latitudes and longitudes
            bbox = input_polygon.bounds
            minx, maxx = sorted([bbox[0], bbox[2]])
            miny, maxy = sorted([bbox[1], bbox[3]])

            # Use the resulting coordinates to perform the transformation
            project_albers = pyproj.Proj(
                "+proj=aea +lat_1={} +lat_2={} +lat_0={} +lon_0={}".format(
                    miny, maxy, (miny + maxy) / 2.0, (minx + maxx) / 2.0
                )
            )
            geometry = transform(project_albers, input_polygon)

        else:
            project = pyproj.Transformer.from_proj(
                pyproj.Proj(input_crs), pyproj.Proj(target_crs), always_xy=True
            )
            geometry = transform(project.transform, input_polygon)

        return geometry

    def albers_area_calculation(self, input_polygon, polygon_crs):
        """
        Return the area of a polygon in the Albers equal area projection.

        Args:
            input_polygon (shapely.geometry.polygon.Polygon):
                Input polygon object for area calculation.

            polygon_crs (str):
                Initial crs associated to input_polygon.

        Returns:
            float:
                Area measured in squared meters for the input_polygon projected to the Albers
                equal area system.
        """

        polygon = self.reproject_polygon(input_polygon, polygon_crs, "aea")

        return polygon.area

512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
    def import_population_distribution(self, population_distribution_filepath):
        """
        Imports all population data from the 2015 Population Census file provided
        by E-Stat, Japan. The following file is needed:
            Number of people distributed by municipality (population_distribution_filepath)

        Args:
            population_distribution_filepath (str):
                Filepath to the path of population distribution
        """

        # Read columns district, total population, male population, female population and number
        # of households from input CSV file
        with open(population_distribution_filepath) as population_file:
            population = csv.DictReader(population_file)
            population_to_db = [
                (
                    i["KEY_CODE"],
                    i["total_population"],
                    i["male_population"],
                    i["female_population"],
                    i["total_number_of_households"],
                )
                for i in population
            ]

            # Insert the data to the PopulationDistribution table
539
540
            sql_statement = """
                INSERT INTO PopulationDistribution
Simantini Shinde's avatar
Simantini Shinde committed
541
542
543
544
545
                (district_id,
                total_population,
                male_population,
                female_population,
                number_household)
546
547
                VALUES (?, ?, ?, ?, ?)
            """
548
549
550
551
552
553
554
            self.cursor.executemany(sql_statement, population_to_db)

        self.connection.commit()
        logger.info("Population data added")

        # Calculate the population_density with the boundary area from Districts table
        # and update the PopulationDistribution table
555
556
        sql_statement = """
            UPDATE PopulationDistribution
557
558
            SET population_density = Q.density
            FROM
559
            (
Simantini Shinde's avatar
Simantini Shinde committed
560
                SELECT PopulationDistribution.total_population/District.area_size AS density,
561
562
563
                    PopulationDistribution.district_id AS id
                FROM PopulationDistribution
                INNER JOIN District
564
565
566
567
                ON District.id = PopulationDistribution.district_id
            ) AS Q
            WHERE PopulationDistribution.district_id = Q.id
            """
568
569
570
571
        logger.debug(sql_statement)
        self.cursor.execute(sql_statement)
        self.connection.commit()

572
573
    def import_building_numbers(
        self, building_numbers_filepath, building_type_list, story_number_list
574
    ):
575
        """
576
        Imports all building data from the 2018 building statistics files provided
577
        by E-Stat, Japan. The following file is needed:
578
            List of numbers of buildings by municipality (building_numbers_filepath)
579

580
        The building data provide the building numbers for a combination of different
581
582
583
584
585
586
        building attributes:
        - building types (total, detached house, tenement house, apartments and others)
        - construction material type (total, wooden, non-wooden)
        - story number type (total, 1 story, 2 stories and over, 2 stories, 3 to 5 stories
          and 6 stories) except for building type `Other` which is
          given for story number `total` only.
587

588
589
590
591
592
        These building numbers are not available for all districts in Japan
        i.e. the districts with population less than 15000 are ignored in the
        building statistics and are calculated in the
        `calculate_building_numbers_district_no_info` function.

593
        Args:
594
            building_numbers_filepath (str):
595
596
597
598
599
                Filepath of the file containing building numbers
            building_type_list (list):
                Collection of types of buildings from the building numbers file.
            story_number_list (list):
                Collection of the number of stories from the building numbers file.
600
601
        """

602
        # Create a list with construction material types total, wooden and non-wooden
603
604
        building_number_construction_material_list = ["0_Total", "1_Wooden", "2_Non-wooden"]

605
        # Read columns identification area code, district, building type, construction material
606
        # type, story number type and number of buildings from input Excel file
607
608
609
        building_numbers_input = pandas.read_excel(
            building_numbers_filepath, header=10, usecols=[4, 5, 7, 9, 10, 11, 12]
        )
610

611
612
        for index, row in building_numbers_input.iterrows():
            admin_id = int((row["Area classification"].split("_"))[0])
613

614
615
616
617
618
            # Identify district_id based on admin_id from the District table
            district_id_result = self.get_district_id(admin_id)
            if district_id_result is None:  # Only data for which a district exist matter
                continue
            district_id = district_id_result[0]
619

620
621
622
623
624
625
626
            # Get ID of building type and number of stories
            building_type_id = add_element_and_get_index(
                row["Type of building"], building_type_list
            )
            story_number_id = add_element_and_get_index(
                row["Stories of building"], story_number_list
            )
627

628
629
630
631
632
633
634
            # Insert building numbers for total, wooden and non-wooden construction material
            for construction_material_index in range(
                len(building_number_construction_material_list)
            ):
                building_number = float(
                    str(row[constants.NUMBER_BUILDING + construction_material_index]).replace(
                        "-", "0"
635
                    )
636
637
638
639
                )

                # Map the column index to the construction_material_id
                construction_material_id = construction_material_index
640

641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
                # Re-assign construction_material_id for non-wooden from the
                # construction_material_list
                if (
                    construction_material_id
                    == constants.WOODEN_EXCLUDING_WOODEN_AND_FIRE_PROOFED
                ):
                    construction_material_id = constants.NON_WOODEN

                # Insert the data to the BuildingNumber table
                sql_statement = (
                    "INSERT INTO BuildingNumber (district_id, building_type_id, "
                    "construction_material_id, story_number_id, number_building) "
                    "VALUES (%d, %d, %d, %d, %f)"
                    % (
                        district_id,
                        building_type_id,
                        construction_material_id,
                        story_number_id,
                        building_number,
660
                    )
661
662
                )
                self.cursor.execute(sql_statement)
663

664
665
        self.connection.commit()
        logger.info("Building numbers added")
666

667
    def calculate_building_numbers_frequency_distributions(self):
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
        """
        Fixes rounding-up errors observed between the sum of building numbers of the
        building attributes with type not equals `total` (except for building type `Other`)
        and building numbers of the building attributes with type `total` by doing a frequency
        distribution in query Q with T.total_building and S.sum_building from the
        joined queries S, T and updates the calculated target_number in the
        BuildingNumber table.

        The query S computes the sum of building numbers grouped by district_id where
        building type is not `total` (i.e. building_type_id != 0) and construction material is
        not `total` (i.e. construction_material_id != 0) and story number is not `total` in the
        Japanese statistics data, except for when building type is `Other`
        (i.e. building_type_id = 4) then story number is `total` (i.e. story_number_id = 0).

        The query T selects the building numbers when building type is `total`
        (i.e. building_type_id = 0) and construction material is `total`
        (i.e. construction_material_id = 0) and story number is `total`
        (i.e. story_number_id = 0).
        """

        sql_statement = """
            UPDATE BuildingNumber AS M
            SET number_building = Q.target_number
            FROM
            (
                SELECT B.district_id, B.number_building,
                    S.sum_building, T.total_building,
                    B.number_building * T.total_building
                    / S.sum_building AS target_number,
                    B.building_type_id, B.construction_material_id,
                    B.story_number_id
                FROM BuildingNumber AS B
                INNER JOIN
                (
                    SELECT district_id,
                        SUM(number_building) AS sum_building
                    FROM BuildingNumber
                    WHERE
                    (
                        (
                            building_type_id != 0
                            AND construction_material_id != 0
                            AND story_number_id != 0
                        )
                        OR
                        (
                            building_type_id == 4
                            AND construction_material_id != 0
                            AND story_number_id == 0
                        )
                    )
                    GROUP BY district_id
                ) AS S
                ON B.district_id = S.district_id
                INNER JOIN
                (
                    SELECT district_id,
                        number_building AS total_building
                    FROM BuildingNumber
                    WHERE
                    (
                        building_type_id = 0
                        AND construction_material_id = 0
                        AND story_number_id = 0
                    )
                ) AS T
                ON B.District_id = T.district_id
                WHERE
                (
                    (
                        B.building_type_id != 0
                        AND B.construction_material_id != 0
                        AND B.story_number_id != 0
                    )
                    OR
                    (
                        B.building_type_id == 4
                        AND B.construction_material_id != 0
                        AND B.story_number_id == 0
                    )
                )
            ) AS Q
            WHERE
            (
                M.district_id = Q.district_id
                AND M.building_type_id = Q.building_type_id
                AND M.construction_material_id = Q.construction_material_id
                AND M.story_number_id = Q.story_number_id
            )
            """
        logger.debug(sql_statement)
        self.cursor.execute(sql_statement)
        self.connection.commit()
761
        logger.info("Fixed rounding up errors for building numbers")
762

763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
    def calculate_building_numbers_district_no_info(self):
        """
        Calculates the building numbers for districts that do not have building information in
        the building dataset.

        The building numbers are calculated in query `ToUpdateBuildingNumber` by multiplying
        the population for districts with no building information to a factor of the number
        of buildings per person as taken from the districts with population densities closest
        to the population density of the district with no building information.

        The innermost query `DistrictWithNoInformation` initially finds the districts not
        present in the BuildingNumber table by comparing it to the Districts and
        PopulationDistribution tables. These districts are then mapped to their corresponding
        population densities and populations. This final table is cross-joined with all
        districts and their corresponding population and population densities from the
        BuildingNumber (i.e. with building information) table in the query
        SourcePopulationDistribution.

        The next outer query `MappingToDistrictWithNoInfo` calculates the absolute value
        of the difference between the population densities of districts with and without
        information and sorts it in ascending order to find the districts with information
        which are closest in population density to the districts without building information.

        The following outer query `ClosestMappingToDistrictWithNoInfo` selects only the
        first ten districts from districts in query MappingToDistrictWithNoInfo sorted
        by the aforementioned difference.

        The next outer query `MappingBuildingExposureToDistrictWithNoInfo` maps the building
        attributes and their building numbers from the BuildingNumber table to the districts
        with no information and calculates the building numbers for districts with no building
        information. The mapping is done on the district_id of the ten closest districts
        (with information) found in the query `ClosestMappingToDistrictWithNoInfo`.

        The final query `ToUpdateBuildingNumber` selects the district_id, building exposure
        attributes and building number to be inserted into the BuildingNumber table.
        """

        sql_statement = """
Simantini Shinde's avatar
Simantini Shinde committed
801
802
803
804
805
806
807
808
809
810
811
812
813
            INSERT INTO BuildingNumber
            (
                district_id,
                building_type_id,
                construction_material_id,
                story_number_id,
                number_building
            )
            SELECT ToUpdateBuildingNumber.to_update_id,
                ToUpdateBuildingNumber.building_type_id,
                ToUpdateBuildingNumber.construction_material_id,
                ToUpdateBuildingNumber.story_number_id,
                ToUpdateBuildingNumber.to_update_number_building
814
815
            FROM
            (
Simantini Shinde's avatar
Simantini Shinde committed
816
817
818
819
820
821
822
                SELECT MappingBuildingExposureToDistrictWithNoInfo.to_update_id,
                    MappingBuildingExposureToDistrictWithNoInfo.source_id,
                    MappingBuildingExposureToDistrictWithNoInfo.building_type_id,
                    MappingBuildingExposureToDistrictWithNoInfo.construction_material_id,
                    MappingBuildingExposureToDistrictWithNoInfo.story_number_id,
                    MappingBuildingExposureToDistrictWithNoInfo.number_building,
                    sum(MappingBuildingExposureToDistrictWithNoInfo.number_building),
823
824
                    AVG(MappingBuildingExposureToDistrictWithNoInfo.number_building/
                        MappingBuildingExposureToDistrictWithNoInfo.source_population)
Simantini Shinde's avatar
Simantini Shinde committed
825
826
                        * MappingBuildingExposureToDistrictWithNoInfo.to_update_population
                    AS to_update_number_building
827
828
                FROM
                (
Simantini Shinde's avatar
Simantini Shinde committed
829
                    SELECT *
830
831
                    FROM
                    (
Simantini Shinde's avatar
Simantini Shinde committed
832
833
834
835
836
                        SELECT MappingToDistrictWithNoInfo.id AS to_update_id,
                            MappingToDistrictWithNoInfo.district_id AS source_id,
                            MappingToDistrictWithNoInfo.rn,
                            MappingToDistrictWithNoInfo.to_update_population,
                            MappingToDistrictWithNoInfo.source_population
837
838
                        FROM
                        (
Simantini Shinde's avatar
Simantini Shinde committed
839
840
841
842
843
844
845
                            SELECT DistrictWithNoInformation.id,
                                DistrictWithNoInformation.district_id,
                                row_number() OVER (PARTITION BY DistrictWithNoInformation.id)
                                AS rn,
                                DistrictWithNoInformation.to_update_population,
                                DistrictWithNoInformation.source_population
                            FROM
846
                            (
Simantini Shinde's avatar
Simantini Shinde committed
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
                                SELECT D.id,
                                    SourcePopulationDistribution.district_id,
                                    ABS(ToUpdatePopulationDistribution.population_density -
                                        SourcePopulationDistribution.population_density
                                    ) AS difference,
                                    SourcePopulationDistribution.source_population,
                                    ToUpdatePopulationDistribution.[total_population]
                                    AS to_update_population
                                FROM PopulationDistribution AS ToUpdatePopulationDistribution
                                INNER JOIN District AS D
                                ON ToUpdatePopulationDistribution.district_id = D.id
                                CROSS JOIN
                                (
                                    SELECT DISTINCT B.district_id,
                                        PD.[total_population] as source_population,
                                        PD.population_density
                                    FROM BuildingNumber AS B
                                    INNER JOIN PopulationDistribution AS PD
                                    ON B.district_id = PD.district_id
                                ) AS SourcePopulationDistribution
                                WHERE D.id NOT IN
                                (
                                    SELECT district_id
                                    FROM BuildingNumber
                                )
                                ORDER BY D.id, difference ASC
                            ) AS DistrictWithNoInformation
                        ) AS MappingToDistrictWithNoInfo
                        WHERE rn < 11
                    ) AS ClosestMappingToDistrictWithNoInfo
877
878
879
                    INNER JOIN BuildingNumber
                    ON ClosestMappingToDistrictWithNoInfo.source_id = BuildingNumber.district_id
                    ORDER BY ClosestMappingToDistrictWithNoInfo.to_update_id
Simantini Shinde's avatar
Simantini Shinde committed
880
                ) AS MappingBuildingExposureToDistrictWithNoInfo
881
                GROUP BY MappingBuildingExposureToDistrictWithNoInfo.to_update_id,
Simantini Shinde's avatar
Simantini Shinde committed
882
883
884
885
                    MappingBuildingExposureToDistrictWithNoInfo.building_type_id,
                    MappingBuildingExposureToDistrictWithNoInfo.construction_material_id,
                    MappingBuildingExposureToDistrictWithNoInfo.story_number_id
            ) AS ToUpdateBuildingNumber
886
887
888
889
        """
        logger.debug(sql_statement)
        self.cursor.execute(sql_statement)
        self.connection.commit()
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
        logger.info(
            "Added building numbers for districts with no information in BuildingNumber table"
        )

    def import_dwelling_numbers(
        self,
        dwelling_numbers_filepath,
        building_type_list,
        construction_material_list,
        story_number_list,
    ):
        """
        Imports all dwelling data from the 2018 building statistics files provided
        by E-Stat, Japan. The following file is needed:
            List of numbers of dwellings by municipality (dwelling_numbers_filepath)

        The dwelling data provide the dwelling numbers for a combination of different
        building attributes:
        - building types (total, detached house, tenement house,apartments and others),
        - construction material type (total, wooden, wooden (excluding wooden and
          fire-proofed), wooden and fire-proofed and non-wooden)
        - story number type (total, 1 story, 2 stories and over, 2 stories,
          3 to 5 stories and 6 stories) except for building type `Other` which is
          given for story number `total` only.

        These dwelling numbers are not available for all districts in Japan
        i.e. the districts with population less than 15000 are ignored in the
        building statistics.

        Args:
            dwelling_numbers_filepath (str):
                Filepath of the file containing dwelling numbers
            building_type_list (list):
                Collection of types of buildings from the dwelling numbers file.
            construction_material_list (list):
                Collection of types of construction materials from the dwelling numbers file.
            story_number_list (list):
                Collection of the number of stories from the dwelling numbers file.
        """

        # Read columns identification area code, district, building type, construction material
        # type, story number type and number of dwellings from input csv file
        dwelling_numbers_input = pandas.read_excel(
            dwelling_numbers_filepath, header=10, usecols=[4, 5, 7, 9, 10, 11, 12, 13, 14]
        )

        # Iterate through each district
        for index, row in dwelling_numbers_input.iterrows():
            admin_id = int((row["Area classification"].split("_"))[0])

            # Identify district_id based on admin_id from the District table
            district_id_result = self.get_district_id(admin_id)
            if district_id_result is None:  # Only data for which a district exist matter
                continue
            district_id = district_id_result[0]

            # Get ID of building type and number of stories
            building_type_id = add_element_and_get_index(
                row["Type of building"], building_type_list
            )
            story_number_id = add_element_and_get_index(
                row["Stories of building"], story_number_list
            )

            # Insert dwelling numbers for each construction material
            for construction_material_id in range(len(construction_material_list)):
                dwelling_number = float(
                    str(row[constants.NUMBER_DWELLING + construction_material_id]).replace(
                        "-", "0"
                    )
                )

                # Insert the data to the DwellingNumber table
                sql_statement = (
                    "INSERT INTO DwellingNumber (district_id, building_type_id, "
                    "construction_material_id, story_number_id, number_dwelling) "
                    "VALUES (%d, %d, %d, %d, %f)"
                    % (
                        district_id,
                        building_type_id,
                        construction_material_id,
                        story_number_id,
                        dwelling_number,
                    )
                )
                self.cursor.execute(sql_statement)

        self.connection.commit()
        logger.info("Dwelling numbers added")

    def calculate_dwelling_numbers_frequency_distributions(self):
        """
        Fixes rounding-up errors observed between the sum of dwelling numbers of the
        building attributes with type not equal to `total` (except for building type `Other`
        and also construction_material_id is not `wooden (excluding wooden and
        fire-proofed)` and not `wooden and fire-proofed`) and dwelling numbers of the
        building attributes with type `total` by doing a frequency distribution in
        query Q with T.total_building and S.sum_building from the joined queries S and
        T, and updates the calculated target_number in the DwellingNumber table.

        The query S computes the sum of dwelling numbers grouped by district_id where
        building type is not `total` (i.e. building_type_id != 0) and construction material is
        neither `total`, `wooden (excluding wooden and fire-proofed)` nor
        `wooden and fire-proofed` (i.e. construction_material_id != 0 and != 2 and != 3) and
        story number is not `total` in the Japanese statistics data, except for when building
        type is `Other` (i.e. building_type_id = 4) then story number is `total`
        (i.e. story_number_id = 0).

        The query T selects the dwelling numbers when building type is `total`
        (i.e. building_type_id = 0) and construction material is `total`
        (i.e. construction_material_id = 0) and story number is `total`
For faster browsing, not all history is shown. View entire blame