decompress.py 4.94 KB
Newer Older
1
2
# -*- coding: utf-8 -*-

3
4
# py_tools_ds - A collection of geospatial data analysis tools that simplify standard
# operations when handling geospatial raster and vector data as well as projections.
5
#
6
7
8
9
# Copyright (C) 2016-2021
# - Daniel Scheffler (GFZ Potsdam, daniel.scheffler@gfz-potsdam.de)
# - Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences Potsdam,
#   Germany (https://www.gfz-potsdam.de/)
10
11
12
13
14
#
# This software was developed within the context of the GeoMultiSens project funded
# by the German Federal Ministry of Education and Research
# (project grant code: 01 IS 14 010 A-C).
#
15
16
17
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
18
#
19
#   http://www.apache.org/licenses/LICENSE-2.0
20
#
21
22
23
24
25
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
26

27
28
29
import os
import zipfile
import tarfile
30
31
32
import gzip
from logging import getLogger
import shutil
33
34
35
36

__author__ = 'Daniel Scheffler'


37
def decompress(compressed_file, outputpath=None, logger=getLogger('decompressor')):
Daniel Scheffler's avatar
Daniel Scheffler committed
38
39
    """Decompress ZIP, TAR, TAR.GZ, TGZ and GZ archives to a given output path.

40
41
42
43
    :param compressed_file:
    :param outputpath:
    :param logger:      instance of logging.Logger
    """
44
45
46
47
48
49
50
51
52
53
54
55
    # define output folder and filename
    in_folder, in_filename = os.path.split(compressed_file)
    out_folder, out_filename = os.path.split(outputpath) if outputpath else ('', '')
    out_filename = out_filename or in_filename.partition(".")[0]
    out_folder = out_folder or in_folder
    outputpath = os.path.join(out_folder, out_filename)

    # decompress
    logger.info('Extracting ' + in_filename + '...')

    if not os.path.isdir(out_folder):
        os.makedirs(out_folder)
56
57
58
59
60
61
62
63
64
65
66

    if compressed_file.endswith(".zip"):
        assert zipfile.is_zipfile(compressed_file), \
            logger.critical(compressed_file + " is not a valid zipfile!")
        zf = zipfile.ZipFile(compressed_file)
        names = zf.namelist()
        count_extracted = 0
        for n in names:
            if os.path.exists(os.path.join(outputpath, n)) and \
                    zipfile.ZipFile.getinfo(zf, n).file_size == os.stat(os.path.join(outputpath, n)).st_size:
                logger.warning("file '%s' from '%s' already exists in the directory: '%s'"
67
                               % (n, in_filename, outputpath))
68
69
70
71
72
73
74
75
76
77
78
79
80
81
            else:
                written = 0
                while written == 0:
                    try:
                        zf.extract(n, outputpath)
                        logger.info("Extracting %s..." % n)
                        count_extracted += 1
                        written = 1
                    except OSError as e:
                        if e.errno == 28:
                            print('No space left on device. Waiting..')
                        else:
                            raise
        if count_extracted == 0:
82
            logger.warning("No files of %s have been decompressed.\n" % in_filename)
83
        else:
84
            logger.info("Extraction of '" + in_filename + " was successful\n")
85
86
        zf.close()

87
    elif compressed_file.endswith((".tar", ".tar.gz", ".tgz")):
88
89
90
91
92
93
94
        tf = tarfile.open(compressed_file)
        names, members = tf.getnames(), tf.getmembers()
        count_extracted = 0
        for n, m in zip(names, members):
            if os.path.exists(os.path.join(outputpath, n)) and \
                    m.size == os.stat(os.path.join(outputpath, n)).st_size:
                logger.warning("file '%s' from '%s' already exists in the directory: '%s'"
95
                               % (n, in_filename, outputpath))
96
97
98
99
100
101
102
103
104
105
106
107
108
109
            else:
                written = 0
                while written == 0:
                    try:
                        tf.extract(n, outputpath)
                        logger.info("Extracting %s..." % n)
                        count_extracted += 1
                        written = 1
                    except OSError as e:
                        if e.errno == 28:
                            print('No space left on device. Waiting..')
                        else:
                            raise
        if count_extracted == 0:
110
            logger.warning("No files of %s have been decompressed.\n" % in_filename)
111
        else:
112
            logger.info("Extraction of '" + in_filename + " was successful\n")
113
        tf.close()
114
115
116
117
118
119
120
121
122

    elif compressed_file.endswith(".gz"):
        with gzip.open(compressed_file, 'rb') as f_in:
            with open(outputpath, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)

    else:
        raise ValueError('Unexpected file extension of compressed file. Supported file extensions are: '
                         '*.zip, *.tar and *.tgz')