options_schema.py 19.2 KB
Newer Older
1
2
3
4
# -*- coding: utf-8 -*-

# gms_preprocessing, spatial and spectral homogenization of satellite remote sensing data
#
5
# Copyright (C) 2020  Daniel Scheffler (GFZ Potsdam, daniel.scheffler@gfz-potsdam.de)
6
7
8
9
10
11
#
# This software was developed within the context of the GeoMultiSens project funded
# by the German Federal Ministry of Education and Research
# (project grant code: 01 IS 14 010 A-C).
#
# This program is free software: you can redistribute it and/or modify it under
12
# the terms of the GNU General Public License as published by the Free Software
13
14
# Foundation, either version 3 of the License, or (at your option) any later version.
# Please note the following exception: `gms_preprocessing` depends on tqdm, which
15
16
17
# is distributed under the Mozilla Public Licence (MPL) v2.0 except for the files
# "tqdm/_tqdm.py", "setup.py", "README.rst", "MANIFEST.in" and ".gitignore".
# Details can be found here: https://github.com/tqdm/tqdm/blob/master/LICENCE.
18
19
20
21
22
23
24
25
26
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.

27
"""Definition of gms options schema (as used by cerberus library)."""
28
29


30
gms_schema_input = dict(
31
    global_opts=dict(
32
        type='dict', required=False,
33
        schema=dict(
34
            inmem_serialization=dict(type='boolean', required=False),
35
            parallelization_level=dict(type='string', required=False, allowed=['scenes', 'tiles']),
36
37
            spatial_index_server_host=dict(type='string', required=False),
            spatial_index_server_port=dict(type='integer', required=False),
38
            CPUs=dict(type='integer', required=False, nullable=True),
39
            CPUs_all_jobs=dict(type='integer', required=False, nullable=True),
40
41
            max_mem_usage=dict(type='integer', required=False, min=0, max=100),
            critical_mem_usage=dict(type='integer', required=False, min=0, max=100),
42
            max_parallel_reads_writes=dict(type='integer', required=False, min=0),
43
            allow_subMultiprocessing=dict(type='boolean', required=False),
44
            delete_old_output=dict(type='boolean', required=False),
45
            disable_exception_handler=dict(type='boolean', required=False),
46
47
            disable_IO_locks=dict(type='boolean', required=False),
            disable_CPU_locks=dict(type='boolean', required=False),
48
            disable_DB_locks=dict(type='boolean', required=False),
49
50
            disable_memory_locks=dict(type='boolean', required=False),
            min_version_mem_usage_stats=dict(type='string', required=False),
51
52
            log_level=dict(type='string', required=False, allowed=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']),
            tiling_block_size_XY=dict(type='list', required=False, schema=dict(type="integer"), minlength=2,
53
                                      maxlength=2),
54
55
56
            is_test=dict(type='boolean', required=False),
            profiling=dict(type='boolean', required=False),
            benchmark_global=dict(type='boolean', required=False),
57
        )),
58
    paths=dict(
59
        type='dict', required=False,
60
        schema=dict(
61
62
63
64
65
66
67
68
69
70
71
72
73
            path_fileserver=dict(type='string', required=False),
            path_archive=dict(type='string', required=False),
            path_procdata_scenes=dict(type='string', required=False),
            path_procdata_MGRS=dict(type='string', required=False),
            path_tempdir=dict(type='string', required=False),
            path_benchmarks=dict(type='string', required=False),
            path_job_logs=dict(type='string', required=False),
            path_spatIdxSrv=dict(type='string', required=False),
            path_SNR_models=dict(type='string', required=False),
            path_dem_proc_srtm_90m=dict(type='string', required=False),
            path_earthSunDist=dict(type='string', required=False),
            path_solar_irr=dict(type='string', required=False),
            path_cloud_classif=dict(type='string', required=False),
74
            path_custom_sicor_options=dict(type='string', required=False, nullable=True),
75
            path_ECMWF_db=dict(type='string', required=False),
76
            path_spechomo_classif=dict(type='string', required=False, nullable=True),
77
78
        )),
    processors=dict(
79
        type='dict', required=False,
80
        schema=dict(
81
82
83
84
85
86
87
88
            general_opts=dict(type='dict', required=False, schema=dict(
                skip_thermal=dict(type='boolean', required=False),
                skip_pan=dict(type='boolean', required=False),
                sort_bands_by_cwl=dict(type='boolean', required=False),
                target_radunit_optical=dict(type='string', required=False, allowed=['Rad', 'TOA_Ref', 'BOA_Ref']),
                target_radunit_thermal=dict(type='string', required=False, allowed=['Rad', 'Temp']),
                scale_factor_TOARef=dict(type='integer', required=False),
                scale_factor_BOARef=dict(type='integer', required=False),
89
90
                mgrs_pixel_buffer=dict(type='integer', required=False),
                output_data_compression=dict(type='boolean', required=False),
91
                write_ENVIclassif_cloudmask=dict(type='boolean', required=False),
92
                )),
93
94
95
96
            L1A=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
97
                SZA_SAA_calculation_accurracy=dict(type='string', required=False, allowed=['coarse', 'fine']),
98
                export_VZA_SZA_SAA_RAA_stats=dict(type='boolean', required=False),
99
                )),
100
101
102
103
104
            L1B=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
                skip_coreg=dict(type='boolean', required=False),
105
106
107
108
109
                spatial_ref_min_overlap=dict(type='float', required=False, min=0, max=100),
                spatial_ref_min_cloudcov=dict(type='float', required=False, min=0, max=100),
                spatial_ref_max_cloudcov=dict(type='float', required=False, min=0, max=100),
                spatial_ref_plusminus_days=dict(type='integer', required=False),
                spatial_ref_plusminus_years=dict(type='integer', required=False),
110
111
112
                coreg_band_wavelength_for_matching=dict(type='integer', required=False, min=350, max=2500),
                coreg_max_shift_allowed=dict(type='float', required=False, min=0),
                coreg_window_size=dict(type='list', required=False, minlength=0, maxlength=2,
113
                                       schema=dict(type='integer', required=False, min=8)),
114
                )),
115
116
117
118
            L1C=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
119
                cloud_masking_algorithm=dict(type='dict', required=False, schema={
120
121
122
123
124
125
126
127
                    'Landsat-4': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian', 'SICOR']),
                    'Landsat-5': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian', 'SICOR']),
                    'Landsat-7': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian', 'SICOR']),
                    'Landsat-8': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian', 'SICOR']),
                    'Sentinel-2A': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian',
                                                                                'SICOR']),
                    'Sentinel-2B': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian',
                                                                                'SICOR']),
128
                    }),
129
130
                export_L1C_obj_dumps=dict(type='boolean', required=False),
                auto_download_ecmwf=dict(type='boolean', required=False),
131
                ac_fillnonclear_areas=dict(type='boolean', required=False),
Daniel Scheffler's avatar
Daniel Scheffler committed
132
133
                ac_clear_area_labels=dict(type='list', required=False, schema=dict(type='string', allowed=[
                    "Clear", "Snow", "Water", "Shadow", "Cirrus", "Cloud"])),
134
135
                ac_scale_factor_errors=dict(type='integer', required=False),
                ac_max_ram_gb=dict(type='integer', required=False),
136
                ac_estimate_accuracy=dict(type='boolean', required=False),
137
                ac_bandwise_accuracy=dict(type='boolean', required=False),
138
                )),
139
140
141
142
143
144
            L2A=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
                align_coord_grids=dict(type='boolean', required=False),
                match_gsd=dict(type='boolean', required=False),
145
146
147
148
                spatial_resamp_alg=dict(type='string', required=False,
                                        allowed=['nearest', 'bilinear', 'cubic', 'cubic_spline', 'lanczos', 'average',
                                                 'mode', 'max', 'min', 'med', 'q1', 'q3']),
                clip_to_extent=dict(type='boolean', required=False),
149
                spathomo_estimate_accuracy=dict(type='boolean', required=False),
150
                )),
151
152
153
154
            L2B=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
155
                spechomo_method=dict(type='string', required=False, allowed=['LI', 'LR', 'QR', 'RFR']),
156
                spechomo_n_clusters=dict(type='integer', required=False, allowed=[1, 5, 10, 15, 20, 30, 40, 50]),
157
                spechomo_classif_alg=dict(type='string', required=False, allowed=['MinDist', 'kNN', 'SAM', 'SID']),
158
                spechomo_kNN_n_neighbors=dict(type='integer', required=False, min=0),
159
                spechomo_estimate_accuracy=dict(type='boolean', required=False),
160
                spechomo_bandwise_accuracy=dict(type='boolean', required=False),
161
                )),
162
163
164
165
            L2C=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
166
167
168
                )),
        )),
    usecase=dict(
169
170
171
172
        type='dict', required=False, schema=dict(
            virtual_sensor_id=dict(type='integer', required=False),  # TODO add possible values
            datasetid_spatial_ref=dict(type='integer', required=False, nullable=True),
            datasetid_spectral_ref=dict(type='integer', required=False, nullable=True),
173
174
175
            target_CWL=dict(type='list', required=False, schema=dict(type='float')),
            target_FWHM=dict(type='list', required=False, schema=dict(type='float')),
            target_gsd=dict(type='list', required=False, schema=dict(type='float'),  maxlength=2),
176
            target_epsg_code=dict(type='integer', required=False, nullable=True),
177
178
179
180
            spatial_ref_gridx=dict(type='list', required=False, schema=dict(type='float'), maxlength=2),
            spatial_ref_gridy=dict(type='list', required=False, schema=dict(type='float'), maxlength=2),
        )),
)
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200


def get_updated_schema(source_schema, key2update, new_value):
    def deep_update(schema, key2upd, new_val):
        """Return true if update, else false"""

        for key in schema:
            if key == key2upd:
                schema[key] = new_val
            elif isinstance(schema[key], dict):
                deep_update(schema[key], key2upd, new_val)

        return schema

    from copy import deepcopy
    tgt_schema = deepcopy(source_schema)
    return deep_update(tgt_schema, key2update, new_value)


gms_schema_config_output = get_updated_schema(gms_schema_input, key2update='required', new_value=True)
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218


parameter_mapping = dict(
    # global opts
    inmem_serialization=('global_opts', 'inmem_serialization'),
    parallelization_level=('global_opts', 'parallelization_level'),
    spatial_index_server_host=('global_opts', 'spatial_index_server_host'),
    spatial_index_server_port=('global_opts', 'spatial_index_server_port'),
    CPUs=('global_opts', 'CPUs'),
    CPUs_all_jobs=('global_opts', 'CPUs_all_jobs'),
    max_mem_usage=('global_opts', 'max_mem_usage'),
    critical_mem_usage=('global_opts', 'critical_mem_usage'),
    max_parallel_reads_writes=('global_opts', 'max_parallel_reads_writes'),
    allow_subMultiprocessing=('global_opts', 'allow_subMultiprocessing'),
    delete_old_output=('global_opts', 'delete_old_output'),
    disable_exception_handler=('global_opts', 'disable_exception_handler'),
    disable_IO_locks=('global_opts', 'disable_IO_locks'),
    disable_CPU_locks=('global_opts', 'disable_CPU_locks'),
219
    disable_DB_locks=('global_opts', 'disable_DB_locks'),
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
    disable_memory_locks=('global_opts', 'disable_memory_locks'),
    min_version_mem_usage_stats=('global_opts', 'min_version_mem_usage_stats'),
    log_level=('global_opts', 'log_level'),
    tiling_block_size_XY=('global_opts', 'tiling_block_size_XY'),
    is_test=('global_opts', 'is_test'),
    profiling=('global_opts', 'profiling'),
    benchmark_global=('global_opts', 'benchmark_global'),

    # paths
    path_fileserver=('paths', 'path_fileserver'),
    path_archive=('paths', 'path_archive'),
    path_procdata_scenes=('paths', 'path_procdata_scenes'),
    path_procdata_MGRS=('paths', 'path_procdata_MGRS'),
    path_tempdir=('paths', 'path_tempdir'),
    path_benchmarks=('paths', 'path_benchmarks'),
    path_job_logs=('paths', 'path_job_logs'),
    path_spatIdxSrv=('paths', 'path_spatIdxSrv'),
    path_SNR_models=('paths', 'path_SNR_models'),
    path_dem_proc_srtm_90m=('paths', 'path_dem_proc_srtm_90m'),
    path_earthSunDist=('paths', 'path_earthSunDist'),
    path_solar_irr=('paths', 'path_solar_irr'),
    path_cloud_classif=('paths', 'path_cloud_classif'),
    path_custom_sicor_options=('paths', 'path_custom_sicor_options'),
    path_ECMWF_db=('paths', 'path_ECMWF_db'),
    path_spechomo_classif=('paths', 'path_spechomo_classif'),

    # processors > general opts
    skip_thermal=('processors', 'general_opts', 'skip_thermal'),
    skip_pan=('processors', 'general_opts', 'skip_pan'),
    sort_bands_by_cwl=('processors', 'general_opts', 'sort_bands_by_cwl'),
    target_radunit_optical=('processors', 'general_opts', 'target_radunit_optical'),
    target_radunit_thermal=('processors', 'general_opts', 'target_radunit_thermal'),
    scale_factor_TOARef=('processors', 'general_opts', 'scale_factor_TOARef'),
    scale_factor_BOARef=('processors', 'general_opts', 'scale_factor_BOARef'),
    mgrs_pixel_buffer=('processors', 'general_opts', 'mgrs_pixel_buffer'),
    output_data_compression=('processors', 'general_opts', 'output_data_compression'),
    write_ENVIclassif_cloudmask=('processors', 'general_opts', 'write_ENVIclassif_cloudmask'),

    # processors > L1A
    exec_L1AP=('processors', 'L1A', ['run_processor', 'write_output', 'delete_output']),
    SZA_SAA_calculation_accurracy=('processors', 'L1A', 'SZA_SAA_calculation_accurracy'),
    export_VZA_SZA_SAA_RAA_stats=('processors', 'L1A', 'export_VZA_SZA_SAA_RAA_stats'),

    # processors > L1B
    exec_L1BP=('processors', 'L1B', ['run_processor', 'write_output', 'delete_output']),
    skip_coreg=('processors', 'L1B', 'skip_coreg'),
    spatial_ref_min_overlap=('processors', 'L1B', 'spatial_ref_min_overlap'),
267
    spatial_ref_min_cloudcov=('processors', 'L1B', 'spatial_ref_min_cloudcov'),
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
    spatial_ref_max_cloudcov=('processors', 'L1B', 'spatial_ref_max_cloudcov'),
    spatial_ref_plusminus_days=('processors', 'L1B', 'spatial_ref_plusminus_days'),
    spatial_ref_plusminus_years=('processors', 'L1B', 'spatial_ref_plusminus_years'),
    coreg_band_wavelength_for_matching=('processors', 'L1B', 'coreg_band_wavelength_for_matching'),
    coreg_max_shift_allowed=('processors', 'L1B', 'coreg_max_shift_allowed'),
    coreg_window_size=('processors', 'L1B', 'coreg_window_size'),

    # processors > L1C
    exec_L1CP=('processors', 'L1C', ['run_processor', 'write_output', 'delete_output']),
    cloud_masking_algorithm=('processors', 'L1C', 'cloud_masking_algorithm'),
    export_L1C_obj_dumps=('processors', 'L1C', 'export_L1C_obj_dumps'),
    auto_download_ecmwf=('processors', 'L1C', 'auto_download_ecmwf'),
    ac_fillnonclear_areas=('processors', 'L1C', 'ac_fillnonclear_areas'),
    ac_clear_area_labels=('processors', 'L1C', 'ac_clear_area_labels'),
    ac_scale_factor_errors=('processors', 'L1C', 'ac_scale_factor_errors'),
    ac_max_ram_gb=('processors', 'L1C', 'ac_max_ram_gb'),
    ac_estimate_accuracy=('processors', 'L1C', 'ac_estimate_accuracy'),
    ac_bandwise_accuracy=('processors', 'L1C', 'ac_bandwise_accuracy'),

    # processors > L2A
    exec_L2AP=('processors', 'L2A', ['run_processor', 'write_output', 'delete_output']),
    align_coord_grids=('processors', 'L2A', 'align_coord_grids'),
    match_gsd=('processors', 'L2A', 'match_gsd'),
    spatial_resamp_alg=('processors', 'L2A', 'spatial_resamp_alg'),
    clip_to_extent=('processors', 'L2A', 'clip_to_extent'),
    spathomo_estimate_accuracy=('processors', 'L2A', 'spathomo_estimate_accuracy'),

    # processors > L2B
    exec_L2BP=('processors', 'L2B', ['run_processor', 'write_output', 'delete_output']),
    spechomo_method=('processors', 'L2B', 'spechomo_method'),
298
299
300
    spechomo_n_clusters=('processors', 'L2B', 'spechomo_n_clusters'),
    spechomo_classif_alg=('processors', 'L2B', 'spechomo_classif_alg'),
    spechomo_kNN_n_neighbors=('processors', 'L2B', 'spechomo_kNN_n_neighbors'),
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
    spechomo_estimate_accuracy=('processors', 'L2B', 'spechomo_estimate_accuracy'),
    spechomo_bandwise_accuracy=('processors', 'L2B', 'spechomo_bandwise_accuracy'),

    # processors > L2C
    exec_L2CP=('processors', 'L2C', ['run_processor', 'write_output', 'delete_output']),

    # usecase
    virtual_sensor_id=('usecase', 'virtual_sensor_id'),
    datasetid_spatial_ref=('usecase', 'datasetid_spatial_ref'),
    virtual_sensor_name=('usecase', 'virtual_sensor_name'),
    datasetid_spectral_ref=('usecase', 'datasetid_spectral_ref'),
    target_CWL=('usecase', 'target_CWL'),
    target_FWHM=('usecase', 'target_FWHM'),
    target_gsd=('usecase', 'target_gsd'),
    target_epsg_code=('usecase', 'target_epsg_code'),
    spatial_ref_gridx=('usecase', 'spatial_ref_gridx'),
    spatial_ref_gridy=('usecase', 'spatial_ref_gridy'),
)


def get_param_from_json_config(paramname, json_config):
    keymap = parameter_mapping[paramname]  # tuple

    dict2search = json_config
    for i, k in enumerate(keymap):
        if i < len(keymap) - 1:
            # not the last element of the tuple -> contains a sub-dictionary
            dict2search = dict2search[k]
        elif isinstance(k, list):
            return [dict2search[sk] for sk in k]
        else:
            return dict2search[k]