diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..fedaf3e747a69392702f812aa7ad7dab4760df1a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +*.zip filter=lfs diff=lfs merge=lfs -text +*.bsq filter=lfs diff=lfs merge=lfs -text +*.tif filter=lfs diff=lfs merge=lfs -text diff --git a/HISTORY.rst b/HISTORY.rst index 9d6b8301f91246b1f2402c143a4081f5d7dea9a3..6f9e0aba86facd0e8d03e280ea0311364bb945f4 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,20 @@ History ======= +0.14.1 (coming soon) +-------------------- + +* Revised GeoArray.save() which fixes two GDAL warnings when writing cloud optimized GeoTiff (COG) format: + + * *Warning 1*: This file used to have optimizations in its layout, but those have been, at least partly, + invalidated by later changes + * *Warning 2*: The IFD has been rewritten at the end of the file, which breaks COG layout. + +* Fixed unsupported band-specific nodata values in case of ENVI output format. +* Added two tests that validate the metadata written by GeoArray.save() for ENVI and GTiff format. +* Track tif and bsq files with git-lfs. + + 0.14.0 (2021-07-26) ------------------- diff --git a/geoarray/baseclasses.py b/geoarray/baseclasses.py index bd4d3c6d391852b8ddb41bfeb3991fae2724cd3d..03d48ce2233f00f7546fb8560f257a0f541e2440 100644 --- a/geoarray/baseclasses.py +++ b/geoarray/baseclasses.py @@ -998,65 +998,51 @@ class GeoArray(object): driver = gdal.GetDriverByName(fmt) if driver is None: - raise Exception("'%s' is not a supported GDAL driver. Refer to www.gdal.org/formats_list.html for full " - "list of GDAL driver codes." % fmt) + raise Exception("'%s' is not a supported GDAL driver. Refer to https://gdal.org/drivers/raster/index.html " + "for full list of GDAL driver codes." % fmt) if not os.path.isdir(os.path.dirname(out_path)): os.makedirs(os.path.dirname(out_path)) envi_metadict = self.metadata.to_ENVI_metadict() - ##################### - # write raster data # - ##################### + ########################### + # get source GDAL dataset # + ########################### - if self.is_inmem: - # write in-memory data - ds_inmem = get_GDAL_ds_inmem(self.arr, # expects rows,columns,bands - self.geotransform, self.projection, - self._nodata) # avoid to compute the nodata value here, so use private attrib. - - # write dataset - ds_out = driver.CreateCopy(out_path, ds_inmem, - options=creationOptions or []) # noqa + ds_src: gdal.Dataset + ds_out: gdal.Dataset - # # rows, columns, bands => bands, rows, columns - # out_arr = self.arr if self.ndim == 2 else np.swapaxes(np.swapaxes(self.arr, 0, 2), 1, 2) - # gdalnumeric.SaveArray(out_arr, out_path, format=fmt, prototype=ds_inmem) # expects bands,rows,columns - # ds_out = gdal.Open(out_path) - - del ds_inmem - del ds_out + if self.is_inmem: + ds_src = get_GDAL_ds_inmem(self.arr, # expects rows,columns,bands + self.geotransform, self.projection, + self._nodata) # avoid to compute the nodata value here, so use private attrib. else: - # transform linked image dataset (not in-memory) to target format - src_ds = gdal.Open(self.filePath) - if not src_ds: - raise Exception('Error reading file: ' + gdal.GetLastErrorMsg()) - + ds_src = gdal.Open(self.filePath) # metadomains = {dom: src_ds.GetMetadata(dom) for dom in src_ds.GetMetadataDomainList()} - gdal.Translate(out_path, src_ds, format=fmt, creationOptions=creationOptions) - del src_ds - - if not os.path.exists(out_path): - raise Exception(gdal.GetLastErrorMsg()) - - ################ - # set metadata # - ################ - - # NOTE: The dataset has to be written BEFORE metadata are added. Otherwise, metadata are not written. + if not ds_src: + raise Exception('Error reading file: ' + gdal.GetLastErrorMsg()) - ds_out = gdal.Open(out_path, gdal.GA_Update) + ######################################### + # write output dataset and set metadata # + ######################################### try: - if not ds_out: - raise Exception('Error reading file: ' + gdal.GetLastErrorMsg()) + # disable to write separate metadata XML files + os.environ['GDAL_PAM_ENABLED'] = 'NO' # ENVI # ######## if fmt == 'ENVI': + # NOTE: The dataset has to be written BEFORE metadata are added. Otherwise, metadata are not written. + + # write ds_src to disk and re-open it to add the metadata + gdal.Translate(out_path, ds_src, format=fmt, creationOptions=creationOptions) + del ds_src + ds_out = gdal.Open(out_path, gdal.GA_Update) + for bidx in range(self.bands): band = ds_out.GetRasterBand(bidx + 1) @@ -1065,24 +1051,22 @@ class GeoArray(object): band.SetDescription(bandname) assert band.GetDescription() == bandname - # NOTE: band-specific nodata values are not supported by the ENVI header format - if 'nodata' in envi_metadict: - nodataVal = self.metadata.band_meta['nodata'][bidx] - band.SetNoDataValue(nodataVal) - assert band.GetNoDataValue() == nodataVal - del band # avoid that band names are written to global meta if 'band_names' in envi_metadict: del envi_metadict['band_names'] + # the expected key name is 'data_ignore_value', see below if 'nodata' in envi_metadict: del envi_metadict['nodata'] # set data_ignore_value in case self.metadata.band_meta contains a unique nodata value - if 'nodata' in self.metadata.band_meta and len(set(self.metadata.band_meta['nodata'])) == 1: - envi_metadict['data_ignore_value'] = str(self.metadata.band_meta['nodata'][0]) + if 'nodata' in self.metadata.band_meta: + if len(set(self.metadata.band_meta['nodata'])) == 1: + envi_metadict['data_ignore_value'] = str(self.metadata.band_meta['nodata'][0]) + else: + warnings.warn("Band-specific nodata values are not supported by the ENVI header format.") ds_out.SetMetadata(envi_metadict, 'ENVI') @@ -1090,44 +1074,60 @@ class GeoArray(object): ds_out.SetDescription(envi_metadict['description']) ds_out.FlushCache() - gdal.Unlink(out_path + '.aux.xml') + # gdal.Unlink(out_path + '.aux.xml') - elif self.metadata.all_meta: - # set global domain metadata - if self.metadata.global_meta: - ds_out.SetMetadata(dict((k, repr(v)) for k, v in self.metadata.global_meta.items())) + else: + ds_out = ds_src - if 'description' in envi_metadict: - ds_out.SetDescription(envi_metadict['description']) + # set metadata + if self.metadata.all_meta: - # set band domain metadata - bandmeta_dict = self.metadata.to_DataFrame().astype(str).to_dict() + # set global domain metadata + if self.metadata.global_meta: + ds_out.SetMetadata(dict((k, repr(v)) for k, v in self.metadata.global_meta.items())) - for bidx in range(self.bands): - band = ds_out.GetRasterBand(bidx + 1) - bandmeta = bandmeta_dict[bidx].copy() + if 'description' in envi_metadict: + ds_out.SetDescription(envi_metadict['description']) - # filter global metadata out - bandmeta = {k: v for k, v in bandmeta.items() if k not in self.metadata.global_meta} - # meta2write = dict((k, repr(v)) for k, v in self.metadata.band_meta.items() if v is not np.nan) + # set band domain metadata + bandmeta_dict = self.metadata.to_DataFrame().astype(str).to_dict() - if 'band_names' in bandmeta: - band.SetDescription(self.metadata.band_meta['band_names'][bidx].strip()) - del bandmeta['band_names'] + for bidx in range(self.bands): + band = ds_out.GetRasterBand(bidx + 1) + bandmeta = bandmeta_dict[bidx].copy() - if 'nodata' in bandmeta: - band.SetNoDataValue(self.metadata.band_meta['nodata'][bidx]) - del bandmeta['nodata'] + # filter global metadata out + bandmeta = {k: v for k, v in bandmeta.items() if k not in self.metadata.global_meta} + # meta2write = dict((k, repr(v)) for k, v in self.metadata.band_meta.items() if v is not np.nan) - if bandmeta: - band.SetMetadata(bandmeta) + if 'band_names' in bandmeta: + band.SetDescription(self.metadata.band_meta['band_names'][bidx].strip()) + del bandmeta['band_names'] - band.FlushCache() - del band + if 'nodata' in bandmeta: + band.SetNoDataValue(self.metadata.band_meta['nodata'][bidx]) + del bandmeta['nodata'] - ds_out.FlushCache() + if bandmeta: + band.SetMetadata(bandmeta) + + band.FlushCache() + del band + + ds_out.FlushCache() + + # write ds_out to disk, + # -> writes the in-memory array or transforms the linked dataset into the target format + gdal.Translate(out_path, ds_out, format=fmt, creationOptions=creationOptions) + del ds_src finally: + if 'GDAL_PAM_ENABLED' in os.environ: + del os.environ['GDAL_PAM_ENABLED'] + + if not os.path.exists(out_path): + raise Exception(gdal.GetLastErrorMsg()) + del ds_out def dump(self, out_path): diff --git a/tests/data/L8_2bands_extract10x11.tif b/tests/data/L8_2bands_extract10x11.tif index 16349cf7b89cde3ba8a02758be49790863ab9c9c..6397094ad79f28697f87a88b86b2cc9d4b9e7668 100755 Binary files a/tests/data/L8_2bands_extract10x11.tif and b/tests/data/L8_2bands_extract10x11.tif differ diff --git a/tests/data/L8_BadDataMask10x11.tif b/tests/data/L8_BadDataMask10x11.tif index 7192b2e583c04de2ffc5b48bbd08c74b84cb4fb2..da6f607c15bdec384a9258c7392d24f8419f1d71 100644 Binary files a/tests/data/L8_BadDataMask10x11.tif and b/tests/data/L8_BadDataMask10x11.tif differ diff --git a/tests/data/subset_metadata.bsq b/tests/data/subset_metadata.bsq index 6152d302f50701274774237fa944c639251a5814..0863c7e88de4e562e0ea2237c2ae184ca4be85f2 100644 Binary files a/tests/data/subset_metadata.bsq and b/tests/data/subset_metadata.bsq differ diff --git a/tests/data/subset_metadata.hdr b/tests/data/subset_metadata.hdr index 2516f5a249e24f73fbf91058ae4baf50b967c977..0324354a506ffbd05412e8d3869f8533f15cca7f 100644 --- a/tests/data/subset_metadata.hdr +++ b/tests/data/subset_metadata.hdr @@ -1,7 +1,6 @@ ENVI description = { - File Resize Result, x resize factor: 1.000000, y resize factor: 1.000000. - [Wed Aug 08 13:35:48 2018]} +File Resize Result, x resize factor: 1.000000, y resize factor: 1.000000. [Wed Aug 08 13:35:48 2018]} samples = 5 lines = 5 bands = 10 @@ -9,32 +8,65 @@ header offset = 0 file type = ENVI Standard data type = 2 interleave = bsq -sensor type = Unknown byte order = 0 -x start = 350 -y start = 10500 -map info = {UTM, 1.000, 1.000, 287232.433, 4080098.314, 1.4200000000e+001, 1.4200000000e+001, 11, North, WGS-84, units=Meters, rotation=-12.00000000} +map info = {UTM, 1, 1, 287232.433, 4080098.314, 14.2, 14.2, 11, North,WGS-84, rotation=-12} coordinate system string = {PROJCS["UTM_Zone_11N",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",500000.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-117.0],PARAMETER["Scale_Factor",0.9996],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]} -wavelength units = Nanometers band names = { - Resize (Band 5:f141006t01p00r16_refl_BSQ.bsq), - Resize (Band 6:f141006t01p00r16_refl_BSQ.bsq), - Resize (Band 7:f141006t01p00r16_refl_BSQ.bsq), - Resize (Band 8:f141006t01p00r16_refl_BSQ.bsq), - Resize (Band 9:f141006t01p00r16_refl_BSQ.bsq), - Resize (Band 10:f141006t01p00r16_refl_BSQ.bsq), - Resize (Band 11:f141006t01p00r16_refl_BSQ.bsq), - Resize (Band 12:f141006t01p00r16_refl_BSQ.bsq), - Resize (Band 13:f141006t01p00r16_refl_BSQ.bsq), - Resize (Band 14:f141006t01p00r16_refl_BSQ.bsq)} -wavelength = { - 404.596649, 414.278656, 423.964661, 433.654663, 443.349670, 453.049652, - 462.752655, 472.460663, 482.173645, 491.890656} -fwhm = { - 9.645100, 9.599000, 9.555200, 9.513600, 9.474300, 9.437300, 9.402500, - 9.370000, 9.339700, 9.311700} -bbl = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} -correction factors = { - 1.10059, 1.08712, 1.0534, 0.99914, 0.98411, 0.99055, 0.96899, 0.98119, - 0.9779, 0.97234} +Resize (Band 5:f141006t01p00r16_refl_BSQ.bsq), +Resize (Band 6:f141006t01p00r16_refl_BSQ.bsq), +Resize (Band 7:f141006t01p00r16_refl_BSQ.bsq), +Resize (Band 8:f141006t01p00r16_refl_BSQ.bsq), +Resize (Band 9:f141006t01p00r16_refl_BSQ.bsq), +Resize (Band 10:f141006t01p00r16_refl_BSQ.bsq), +Resize (Band 11:f141006t01p00r16_refl_BSQ.bsq), +Resize (Band 12:f141006t01p00r16_refl_BSQ.bsq), +Resize (Band 13:f141006t01p00r16_refl_BSQ.bsq), +Resize (Band 14:f141006t01p00r16_refl_BSQ.bsq)} +bbl = { 1, +1, +1, +1, +1, +1, +1, +1, +1, +1 } +corner coordinates lonlat = [[72.45254791675194, 40.30295607534694], [72.74642602223325, 40.30905731897239], [72.46095641250194, 40.07798579894774], [72.75386662265068, 40.08403894986721]] +correction factors = { 1.100590, +1.087120, +1.053400, +0.999140, +0.984110, +0.990550, +0.968990, +0.981190, +0.977900, +0.972340 } +data gain values = 0.010000, 0.010000 +data offset values = 0, 0 +EarthSunDist = 1.005349 +fwhm = { 9.645100, +9.599000, +9.555200, +9.513600, +9.474300, +9.437300, +9.402500, +9.370000, +9.339700, +9.311700 } +sensor type = Unknown +wavelength = { 404.596649, +414.278656, +423.964661, +433.654663, +443.349670, +453.049652, +462.752655, +472.460663, +482.173645, +491.890656 } +wavelength units = Nanometers +x start = 350 +y start = 10500 diff --git a/tests/data/subset_metadata.tif b/tests/data/subset_metadata.tif new file mode 100644 index 0000000000000000000000000000000000000000..b492d9251c920b6c5d04c6e12b8f8e2b9575f60a --- /dev/null +++ b/tests/data/subset_metadata.tif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1583c10f07b4df0879fa9397831ac013c79aae57c432f7467c0cd59d52290b91 +size 1270 diff --git a/tests/test_geoarray.py b/tests/test_geoarray.py index 8be0ef54c65e449de1a927c3fd05099efbb8dcb8..ab5480584f8bef8f16cbe0a1b58c932b2e631ad6 100644 --- a/tests/test_geoarray.py +++ b/tests/test_geoarray.py @@ -34,6 +34,9 @@ import tempfile import os from collections import OrderedDict from typing import Iterable +from tempfile import TemporaryDirectory +from difflib import unified_diff +from subprocess import getoutput import numpy as np from shapely.geometry import Polygon @@ -52,18 +55,21 @@ __author__ = ["Daniel Scheffler", "Jessica Palka"] # Path of the tests-directory in the geoarray-package. tests_path = os.path.abspath(os.path.join(geoarray_root, "..", "..")) -path_data = os.path.join(tests_path, "tests", "data", "L8_2bands_extract10x11.tif") +path_data_tif = os.path.join(tests_path, "tests", "data", "L8_2bands_extract10x11.tif") +path_subset_bsq = os.path.join(tests_path, "tests", "data", "subset_metadata.bsq") +path_subset_hdr = os.path.join(tests_path, "tests", "data", "subset_metadata.hdr") +path_subset_tif = os.path.join(tests_path, "tests", "data", "subset_metadata.tif") # disable matplotlib figure popups matplotlib.use('Template') # disables matplotlib figure popups -def _get_gA_inMem_notInMem(): - gA_inMem = GeoArray(path_data) +def _get_gA_inMem_notInMem(path_file=path_data_tif): + gA_inMem = GeoArray(path_file) gA_inMem.to_mem() gA_inMem.filePath = "" - gA_notInMem = GeoArray(path_data) + gA_notInMem = GeoArray(path_file) params_inMem_NotInMem = [("inMem", gA_inMem), ("notInMem", gA_notInMem)] @@ -84,10 +90,10 @@ class Test_GeoArray(TestCase): with tempfile.TemporaryDirectory() as td: path_zip = os.path.join(td, 'temp_archive.zip') - file_basename = os.path.basename(path_data) + file_basename = os.path.basename(path_data_tif) with zipfile.ZipFile(path_zip, 'w') as zf: - zf.write(path_data, file_basename) + zf.write(path_data_tif, file_basename) gA = GeoArray(f'/vsizip/{path_zip}/{file_basename}') self.assertIsInstance(gA[:], np.ndarray) @@ -482,10 +488,49 @@ class Test_GeoArray(TestCase): # TODO validate written metadata (inMem, notInMem, in case a notInMem dataset already has custom metadata) + @parameterized.expand(_get_gA_inMem_notInMem(path_subset_bsq)) + def test_save_meta_ENVI(self, _, gA): + with TemporaryDirectory(prefix='geoarray__') as td: + p_out = os.path.join(td, 'outfile.bsq') + + gA.save(p_out, fmt='ENVI') + + # compare gdalinfo output + gdalinfo_out = [i + '\n' for i in getoutput(f'gdalinfo -mdd ENVI {p_out}').split('\n')] + gdalinfo_orig = [i + '\n' for i in getoutput(f'gdalinfo -mdd ENVI {path_subset_bsq}').split('\n')] + + diff = ''.join(list(unified_diff(gdalinfo_orig[3:], gdalinfo_out[3:]))) + self.assertFalse(diff) + + # compare header files + with open(p_out[:-4] + '.hdr', 'r') as inF: + new = inF.readlines() + with open(path_subset_hdr, 'r') as inF: + old = inF.readlines() + + diff = ''.join(list(unified_diff(old[3:], new[3:]))) + self.assertFalse(diff) + # os.system(f'gdalinfo {p_out}') + + @parameterized.expand(_get_gA_inMem_notInMem(path_subset_tif)) + def test_save_meta_GTiff(self, _, gA): + with TemporaryDirectory(prefix='geoarray__') as td: + p_out = os.path.join(td, 'outfile.tif') + + gA.save(p_out, fmt='GTiff') + + # compare gdalinfo output + gdalinfo_out = [i + '\n' for i in getoutput(f'gdalinfo {p_out}').split('\n')] + gdalinfo_orig = [i + '\n' for i in getoutput(f'gdalinfo {path_subset_tif}').split('\n')] + + diff = ''.join(list(unified_diff(gdalinfo_orig[2:], gdalinfo_out[2:]))) + self.assertFalse(diff) + # os.system(f'gdalinfo {p_out}') + def test_show(self): # test 3D case self.gA.show() - self.gA.show(interactive=True) # only works if test is started with ipython. + self.gA.show(interactive=True) # only works if test is started with IPython. # test 2D case gA = GeoArray(self.gA[:, :, 0])