Commit ede4c037 authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Fixed issue #5 (Classification map does not label nodata pixels when calling...

Fixed issue #5

 (Classification map does not label nodata pixels when calling classify_image() with unclassified_threshold=None). Added default CMAP nodata value.

Signed-off-by: Daniel Scheffler's avatarDaniel Scheffler <danschef@gfz-potsdam.de>
parent c2b3f28e
Pipeline #16948 passed with stage
in 3 minutes and 32 seconds
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
"""Base classes for specclassify.""" """Base classes for specclassify."""
import numpy as np import numpy as np
from typing import Union, List # noqa F401 # flake8 issue from typing import Union, List, Tuple # noqa F401 # flake8 issue
from multiprocessing import Pool from multiprocessing import Pool
from tqdm import tqdm from tqdm import tqdm
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
...@@ -76,7 +76,8 @@ class _ImageClassifier(object): ...@@ -76,7 +76,8 @@ class _ImageClassifier(object):
return tilepos, cmap, dists return tilepos, cmap, dists
def classify(self, image_cube, in_nodataVal=None, cmap_nodataVal=None, tiledims=(100, 100)): def classify(self, image_cube, in_nodataVal=None, cmap_nodataVal=-9999, tiledims=(100, 100)):
# type: (Union[GeoArray, np.ndarray], int, int, Tuple[int, int]) -> GeoArray
"""Classify the image. """Classify the image.
:param image_cube: :param image_cube:
...@@ -85,16 +86,23 @@ class _ImageClassifier(object): ...@@ -85,16 +86,23 @@ class _ImageClassifier(object):
:param tiledims: :param tiledims:
:return: :return:
""" """
if not isinstance(cmap_nodataVal, int):
raise TypeError(cmap_nodataVal, "Expected an integer.")
self._cmap_nodataVal = cmap_nodataVal self._cmap_nodataVal = cmap_nodataVal
dtype_cmap = np.int16 dtype_cmap = np.int16
if cmap_nodataVal is not None and not np.can_cast(cmap_nodataVal, dtype_cmap): if cmap_nodataVal is not None and not np.can_cast(cmap_nodataVal, dtype_cmap):
dtype_cmap = np.find_common_type(np.array(self.train_labels), np.array([cmap_nodataVal])) dtype_cmap = np.find_common_type(np.array(self.train_labels), np.array([cmap_nodataVal]))
image_cube_gA = GeoArray(image_cube, nodata=in_nodataVal) # lazily read in tiles to save memory # lazily read in tiles to save memory
image_cube_gA = GeoArray(image_cube, nodata=in_nodataVal)
bounds_alltiles = get_array_tilebounds(image_cube_gA.shape, tiledims) bounds_alltiles = get_array_tilebounds(image_cube_gA.shape, tiledims)
# run classification #
######################
print('Performing %s image classification...' % self.clf_name) print('Performing %s image classification...' % self.clf_name)
if self.CPUs is None or self.CPUs > 1: if self.CPUs is None or self.CPUs > 1:
with Pool(self.CPUs, initializer=_mp_initializer, initargs=(self.train_spectra, image_cube_gA)) as pool: with Pool(self.CPUs, initializer=_mp_initializer, initargs=(self.train_spectra, image_cube_gA)) as pool:
...@@ -105,7 +113,8 @@ class _ImageClassifier(object): ...@@ -105,7 +113,8 @@ class _ImageClassifier(object):
tiles_results = [self._predict_tilewise(bounds) for bounds in tqdm(bounds_alltiles)] tiles_results = [self._predict_tilewise(bounds) for bounds in tqdm(bounds_alltiles)]
# use a local variable to avoid pickling in multiprocessing # use a local variable to avoid pickling in multiprocessing
cmap_dist_shape = (image_cube_gA.rows, image_cube_gA.cols) if tiles_results[0][1].ndim == 2 else \ cmap_dist_shape = \
(image_cube_gA.rows, image_cube_gA.cols) if tiles_results[0][1].ndim == 2 else \
(image_cube_gA.rows, image_cube_gA.cols, tiles_results[0][1].shape[2]) (image_cube_gA.rows, image_cube_gA.cols, tiles_results[0][1].shape[2])
cmap = GeoArray(np.empty(cmap_dist_shape, dtype=dtype_cmap), nodata=cmap_nodataVal) cmap = GeoArray(np.empty(cmap_dist_shape, dtype=dtype_cmap), nodata=cmap_nodataVal)
cmap.unclassified_val = None cmap.unclassified_val = None
...@@ -114,9 +123,12 @@ class _ImageClassifier(object): ...@@ -114,9 +123,12 @@ class _ImageClassifier(object):
for tile_res in tiles_results: for tile_res in tiles_results:
((rS, rE), (cS, cE)), tile_cm = tile_res[:2] ((rS, rE), (cS, cE)), tile_cm = tile_res[:2]
cmap[rS: rE + 1, cS: cE + 1] = tile_cm cmap[rS: rE + 1, cS: cE + 1] = tile_cm
if len(tile_res) == 3: if len(tile_res) == 3:
dist[rS: rE + 1, cS: cE + 1] = tile_res[2] dist[rS: rE + 1, cS: cE + 1] = tile_res[2]
######################
self.cmap = cmap self.cmap = cmap
if len(tiles_results[0]) == 3: if len(tiles_results[0]) == 3:
self._distance_metrics = dist self._distance_metrics = dist
...@@ -133,8 +145,9 @@ class _ImageClassifier(object): ...@@ -133,8 +145,9 @@ class _ImageClassifier(object):
:param imdata: :param imdata:
:return: :return:
""" """
if imdata.nodata is not None and self._cmap_nodataVal is not None: if imdata.nodata is not None:
mask_anynodata = np.any(imdata[:] == imdata.nodata, axis=2) mask_anynodata = np.any(imdata[:] == imdata.nodata, axis=2)
cmap[mask_anynodata] = self._cmap_nodataVal cmap[mask_anynodata] = self._cmap_nodataVal
return cmap return cmap
......
...@@ -35,7 +35,7 @@ global_shared_endmembers = None # type: Union[None, np.ndarray] ...@@ -35,7 +35,7 @@ global_shared_endmembers = None # type: Union[None, np.ndarray]
global_shared_im2classify = None # type: Union[None, GeoArray] global_shared_im2classify = None # type: Union[None, GeoArray]
def classify_image(image, train_spectra, train_labels, classif_alg, in_nodataVal=None, cmap_nodataVal=None, def classify_image(image, train_spectra, train_labels, classif_alg, in_nodataVal=None, cmap_nodataVal=-9999,
tiledims=(1000, 1000), CPUs=None, return_distance=False, unclassified_threshold=None, tiledims=(1000, 1000), CPUs=None, return_distance=False, unclassified_threshold=None,
unclassified_pixVal=-1, **kwargs): unclassified_pixVal=-1, **kwargs):
# type: (Union[np.ndarray, GeoArray], np.ndarray, Union[np.ndarray, List[int]], str, int, int, tuple, int, bool, Union[int, float, str], int, dict) -> Union[GeoArray, Tuple[GeoArray, np.ndarray]] # noqa E501 # type: (Union[np.ndarray, GeoArray], np.ndarray, Union[np.ndarray, List[int]], str, int, int, tuple, int, bool, Union[int, float, str], int, dict) -> Union[GeoArray, Tuple[GeoArray, np.ndarray]] # noqa E501
......
...@@ -73,11 +73,15 @@ def _get_testIm(shape, nodataVal): ...@@ -73,11 +73,15 @@ def _get_testIm(shape, nodataVal):
size=spec.shape[0]//10), :] = \ size=spec.shape[0]//10), :] = \
cluster_centers[i, :] cluster_centers[i, :]
# fill remaining nodata positions with first spectrum
spec[spec[:, 0] == nodataVal, :] = cluster_centers[0, :]
# add 10% noise to each spectrum # add 10% noise to each spectrum
spec = spec * np.random.normal(1, .1, spec.size).reshape(spec.shape) noise_factor = np.random.normal(1, .1, spec.size).reshape(spec.shape)
spec_out = spec * noise_factor
# return in desired image dimensions # return in desired image dimensions
return spec.reshape(shape) return spec_out.reshape(shape)
test_gA = GeoArray(_get_testIm(shape=(1010, 1010, 6), test_gA = GeoArray(_get_testIm(shape=(1010, 1010, 6),
...@@ -109,12 +113,12 @@ class Test_MinimumDistance_Classifier(unittest.TestCase): ...@@ -109,12 +113,12 @@ class Test_MinimumDistance_Classifier(unittest.TestCase):
def test_label_unclassified_pixels_absolute_th(self): def test_label_unclassified_pixels_absolute_th(self):
MDC = MinimumDistance_Classifier(cluster_centers, cluster_labels, CPUs=1) MDC = MinimumDistance_Classifier(cluster_centers, cluster_labels, CPUs=1)
MDC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999, tiledims=(400, 200)) MDC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999, tiledims=(400, 200))
MDC.label_unclassified_pixels(label_unclassified=-1, threshold=6000) MDC.label_unclassified_pixels(label_unclassified=-1, threshold=6000)
def test_label_unclassified_pixels_relative_th(self): def test_label_unclassified_pixels_relative_th(self):
MDC = MinimumDistance_Classifier(cluster_centers, cluster_labels, CPUs=1) MDC = MinimumDistance_Classifier(cluster_centers, cluster_labels, CPUs=1)
MDC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999, tiledims=(400, 200)) MDC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999, tiledims=(400, 200))
MDC.label_unclassified_pixels(label_unclassified=-1, threshold='10%') MDC.label_unclassified_pixels(label_unclassified=-1, threshold='10%')
...@@ -226,22 +230,40 @@ class Test_kNN_SAM_Classifier(unittest.TestCase): ...@@ -226,22 +230,40 @@ class Test_kNN_SAM_Classifier(unittest.TestCase):
def test_classification_outputs(self): def test_classification_outputs(self):
SC = kNN_SAM_Classifier(cluster_centers, n_neighbors=self.n_neighbors, CPUs=1) SC = kNN_SAM_Classifier(cluster_centers, n_neighbors=self.n_neighbors, CPUs=1)
cmap = SC.classify(test_gA, in_nodataVal=None, cmap_nodataVal=None, tiledims=(400, 200)) cmap = SC.classify(test_gA, in_nodataVal=-9999, tiledims=(400, 200))
# check if first 20 pixels in col 0 contain the same cmap value (input image is nodata in all bands there) # check if first 20 pixels in col 0 contain the nodata value (input image is nodata in all bands there)
vals = np.unique(cmap[:20, 0, :]) vals = np.unique(cmap[:20, 0, :])
self.assertEqual(len(vals), self.n_neighbors) # n differnt neighbors self.assertEqual(len(vals), 1) # only the unclassified value
self.assertEqual(vals[0], -9999) # default nodata value
# check if first 20 pixels in col 1 contain the nodata value (input image is nodata in one band there)
# (nodata in one band would cause faulty results due to wrong spectral distance measures)
vals = np.unique(cmap[:20, 1, :])
self.assertEqual(len(vals), 1) # only the unclassified value
self.assertEqual(vals[0], -9999) # default nodata value
# check if the cmap contains only values between 0 and 49 in the rest of the image
vals = np.unique(cmap[:, 2:, :])
self.assertTrue(0 <= min(vals) < cluster_centers.shape[0]) # cmap values min between 0 and 49 self.assertTrue(0 <= min(vals) < cluster_centers.shape[0]) # cmap values min between 0 and 49
self.assertTrue(0 <= max(vals) < cluster_centers.shape[0]) # cmap values max between 0 and 49 self.assertTrue(0 <= max(vals) < cluster_centers.shape[0]) # cmap values max between 0 and 49
# check if first 20 pixels in col 1 contain the different cmap value (input image has values in some bands) def test_classification_outputs_inNodataVal_not_given(self):
vals = np.unique(cmap[:20, 1, :]) gA = GeoArray(test_gA[:], nodata=None)
self.assertGreater(len(vals), self.n_neighbors) # n different neighbors
SC = kNN_SAM_Classifier(cluster_centers, n_neighbors=self.n_neighbors, CPUs=1)
cmap = SC.classify(gA, tiledims=(400, 200))
# check if first 20 pixels in col 0 contain the unclassified value (input image is nodata in all bands there)
# (if no nodata values are given, the nodata positions must at least be labelled as unclassified via the dists)
vals = np.unique(cmap[:20, 0, :])
self.assertEqual(len(vals), self.n_neighbors) # n different neighbors
self.assertTrue(0 <= min(vals) < cluster_centers.shape[0]) # cmap values min between 0 and 49 self.assertTrue(0 <= min(vals) < cluster_centers.shape[0]) # cmap values min between 0 and 49
self.assertTrue(0 <= max(vals) < cluster_centers.shape[0]) # cmap values max between 0 and 49 self.assertTrue(0 <= max(vals) < cluster_centers.shape[0]) # cmap values max between 0 and 49
# check if the cmap contains only values between 0 and 49 in the rest of the image # check if first 20 pixels in col 1 contain varying cmap values (input image has values in some bands)
vals = np.unique(cmap[:, 2:, :]) vals = np.unique(cmap[:20, 1, :])
self.assertGreater(len(vals), self.n_neighbors) # n different neighbors
self.assertTrue(0 <= min(vals) < cluster_centers.shape[0]) # cmap values min between 0 and 49 self.assertTrue(0 <= min(vals) < cluster_centers.shape[0]) # cmap values min between 0 and 49
self.assertTrue(0 <= max(vals) < cluster_centers.shape[0]) # cmap values max between 0 and 49 self.assertTrue(0 <= max(vals) < cluster_centers.shape[0]) # cmap values max between 0 and 49
...@@ -277,16 +299,20 @@ class Test_kNN_SAM_Classifier(unittest.TestCase): ...@@ -277,16 +299,20 @@ class Test_kNN_SAM_Classifier(unittest.TestCase):
def test_label_unclassified_pixels_cmapNodataVal_not_given(self): def test_label_unclassified_pixels_cmapNodataVal_not_given(self):
SC = kNN_SAM_Classifier(cluster_centers, n_neighbors=self.n_neighbors, CPUs=None) SC = kNN_SAM_Classifier(cluster_centers, n_neighbors=self.n_neighbors, CPUs=None)
SC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=None, tiledims=(400, 200)) SC.classify(test_gA, in_nodataVal=-9999, tiledims=(400, 200))
# now label all pixels with SAM above 10 degrees with -1 # now label all pixels with SAM above 10 degrees with -1
cmap = SC.label_unclassified_pixels(label_unclassified=-1, threshold=10) cmap = SC.label_unclassified_pixels(label_unclassified=-1, threshold=10)
# check if first 20 pixels in cols 0 and 1 contain only -1 # check if first 20 pixels in cols 0 and 1 keep untouched (must contain -9999 due to nodata in the input image)
# (nodata pixels in the input image should appear as unclassified in the cmap (-1) if cmap_nodataVal=None) # (nodata pixels in the input image should appear as nodata in the cmap (-9999)
vals = np.unique(cmap[:20, :2, :]) vals = np.unique(cmap[:20, :2, :])
self.assertEqual(len(vals), 1) self.assertEqual(len(vals), 1)
self.assertEqual(vals[0], -1) self.assertEqual(vals[0], -9999)
# check if there are pixels in the rest of the image that are labelled as unclassified
vals = np.unique(cmap[:, 2:, :])
self.assertTrue(-1 in vals)
class Test_FEDSA_Classifier(unittest.TestCase): class Test_FEDSA_Classifier(unittest.TestCase):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment