Commit 52b1f00b authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Merge remote-tracking branch 'remotes/origin/feature/spectral_homogenization'


Former-commit-id: e34c1814
Former-commit-id: 8afcf6ea
parents 4d4b9e2b 7f8570f9
......@@ -53,6 +53,7 @@ class L2B_object(L2A_object):
self.proc_level = 'L2B'
def spectral_homogenization(self):
"""Apply spectral homogenization, i.e., prediction of the spectral bands of the target sensor."""
#################################################################
# collect some information specifying the needed homogenization #
#################################################################
......@@ -120,13 +121,19 @@ class L2B_object(L2A_object):
class SpectralHomogenizer(object):
"""Class for applying spectral homogenization by applying an interpolation or machine learning approach."""
def __init__(self, classifier_rootDir='', logger=None):
"""Get instance of SpectralHomogenizer.
:param classifier_rootDir: root directory where machine learning classifiers are stored.
:param logger: instance of logging.Logger
"""
self.classifier_rootDir = classifier_rootDir or CFG.path_spechomo_classif
self.logger = logger or logging.getLogger(self.__class__.__name__) # FIXME own logger logs nothing
def interpolate_cube(self, arrcube, source_CWLs, target_CWLs, kind='linear'):
# type: (Union[np.ndarray, GeoArray], list, list) -> np.ndarray
"""Spectrally nterpolate the spectral bands of a remote sensing image to new band positions.
"""Spectrally interpolate the spectral bands of a remote sensing image to new band positions.
:param arrcube: array to be spectrally interpolated
:param source_CWLs: list of source central wavelength positions
......@@ -153,6 +160,23 @@ class SpectralHomogenizer(object):
def predict_by_machine_learner(self, arrcube, method, src_satellite, src_sensor, src_LBA,
tgt_satellite, tgt_sensor, tgt_LBA, nodataVal=None, **fallback_argskwargs):
# type: (Union[np.ndarray, GeoArray], str, str, str, list, str, str, list, int, dict) -> np.ndarray
"""Predict spectral bands of target sensor by applying a machine learning approach.
:param arrcube: input image array for target sensor spectral band prediction (rows x cols x bands)
:param method: machine learning approach to be used for spectral bands prediction
'LR': Linear Regression
'RR': Ridge Regression
:param src_satellite: source satellite, e.g., 'Landsat-8'
:param src_sensor: source sensor, e.g., 'OLI_TIRS'
:param src_LBA: source LayerBandsAssignment
:param tgt_satellite: target satellite, e.g., 'Landsat-8'
:param tgt_sensor: target sensor, e.g., 'OLI_TIRS'
:param tgt_LBA: target LayerBandsAssignment
:param nodataVal: no data value
:param fallback_argskwargs: arguments and keyword arguments for fallback algorithm ({'args:{}, 'kwargs': {}}
:return: predicted array (rows x columns x bands)
"""
# TODO: add LBA validation to .predict()
PR = RSImage_Predictor(method=method, classifier_rootDir=self.classifier_rootDir)
......@@ -335,6 +359,7 @@ class SpectralResampler(object):
class KMeansRSImage(object):
"""Class for clustering a giveb input image by using K-Means algorithm."""
def __init__(self, im, n_clusters, CPUs=1, v=False):
# type: (GeoArray, int) -> None
......@@ -459,7 +484,15 @@ class KMeansRSImage(object):
class TrainingData(object):
"""Class for analyzing statistical relations between a pair of machine leaning training data cubes."""
def __init__(self, im_X, im_Y, test_size):
# type: (Union[GeoArray, np.ndarray], Union[GeoArray, np.ndarray], Union[float, int]) -> None
"""Get instance of TrainingData.
:param im_X: input image X
:param im_Y: input image Y
:param test_size: test size (proportion as float between 0 and 1) or absolute value as integer
"""
self.im_X = GeoArray(im_X)
self.im_Y = GeoArray(im_Y)
......@@ -474,6 +507,7 @@ class TrainingData(object):
train_test_split(self.spectra_X, self.spectra_Y, test_size=test_size, shuffle=True, random_state=0)
def plot_scatter_matrix(self, figsize=(15, 15), mode='intersensor'):
# TODO complete this function
train_X = self.train_X[np.random.choice(self.train_X.shape[0], 1000, replace=False), :]
train_Y = self.train_Y[np.random.choice(self.train_Y.shape[0], 1000, replace=False), :]
......@@ -516,6 +550,7 @@ class TrainingData(object):
plt.suptitle('Image Y band to band correlation')
def plot_scattermatrix(self):
# TODO complete this function
import seaborn
fig, axes = plt.subplots(self.im_X.data.bands, self.im_Y.data.bands,
......@@ -542,6 +577,7 @@ class TrainingData(object):
axes[i, j].plot([0, 1], [0, 1], c='red')
def show_band_scatterplot(self, band_src_im, band_tgt_im):
# TODO complete this function
from scipy.stats import gaussian_kde
x = self.im_X.data[band_src_im].flatten()[:10000]
......@@ -557,13 +593,21 @@ class TrainingData(object):
def im2spectra(geoArr):
# type: (Union[GeoArray, np.ndarray]) -> np.ndarray
"""Convert images to array of spectra samples (rows: samples; cols: spectral information)."""
return geoArr.reshape((geoArr.rows * geoArr.cols, geoArr.bands))
def spectra2im(spectra, rows, cols):
"""Convert array of spectra samples (rows: samples; cols: spectral information) to a 3D image."""
return spectra.reshape(rows, cols, spectra.shape[1])
def spectra2im(spectra, tgt_rows, tgt_cols):
# type: (Union[GeoArray, np.ndarray], int, int) -> np.ndarray
"""Convert array of spectra samples (rows: samples; cols: spectral information) to a 3D image.
:param spectra: 2D array with rows: spectral samples / columns: spectral information (bands)
:param tgt_rows: number of target image rows
:param tgt_cols: number of target image rows
:return: 3D array (rows x columns x spectral bands)
"""
return spectra.reshape(tgt_rows, tgt_cols, spectra.shape[1])
# class _MachineLearner_RSImage(object):
......@@ -627,137 +671,137 @@ def spectra2im(spectra, rows, cols):
# super(RidgeRegression_RSImage, self).fit(train_X, train_Y, **kwargs)
class ReferenceCube_Generator_OLD(object):
def __init__(self, filelist_refs, v=False, logger=None, CPUs=None):
# type: (List[str], bool, logging.Logger, Union[None, int]) -> None
"""
:param filelist_refs: list of reference images
"""
self.ims_ref = filelist_refs
self.ref_cube = None
self.v = v
self.logger = logger or GMS_logger(__name__) # must be pickable
self.CPUs = CPUs or cpu_count()
self.tmpdir_multiproc = ''
def generate_reference_cube(self, tgt_satellite, tgt_sensor, n_clusters=10, tgt_n_samples=1000, path_out='',
fmt_out='ENVI', progress=True):
# type: (str, str, int, int, str, str, bool) -> np.ndarray
"""Generate reference spectra from all hyperspectral input images.
The hyperspectral images are spectrally resampled to the target sensor specifications. The resulting target
sensor image is then clustered and the same number of spectra is randomly selected from each cluster. All
spectra are combined into a single 'reference cube' containing the same number of spectra for each cluster
whereas the spectra orginate from all the input images.
:param tgt_satellite: target satellite, e.g., 'Landsat-8'
:param tgt_sensor: target sensor, e.g.. 'OLI_TIRS'
:param n_clusters: number of clusters to be used for clustering the input images (KMeans)
:param tgt_n_samples: number o spectra to be collected from each input image
:param path_out: output path for the generated reference cube
:param fmt_out: output format (GDAL driver code)
:param progress: show progress bar (default: True)
:return: np.array: [tgt_n_samples x images x spectral bands of the target sensor]
"""
self.logger.info('Generating reference spectra from all input images...')
# get SRFs
self.logger.info('Reading spectral response functions of target sensor...')
tgt_srf = SRF(dict(Satellite=tgt_satellite, Sensor=tgt_sensor, Subsystem=None, image_type='RSD',
proc_level='L1A', logger=self.logger))
if self.v:
tgt_srf.plot_srfs()
# Build the 3D reference cube from random samples of each image
# => rows: tgt_n_samples, columns: images, bands: spectral information
# generate random spectra samples equally for each KMeans cluster
self.ref_cube = np.zeros((tgt_n_samples, len(self.ims_ref), len(tgt_srf.bands)))
bar = ProgressBar(prefix='\t overall progress:')
if progress:
bar.print_progress(0 / len(self.ims_ref) * 100)
for im_num, im in enumerate(self.ims_ref):
self.logger.info('Generating random samples for %s (shape: %s)'
% (os.path.basename(im), GeoArray(im).shape))
im_rsp = \
self.perform_spectral_resampling(im, tgt_srf, progress=progress)
random_samples = \
self.cluster_image_and_get_uniform_samples(im_rsp, n_clusters, tgt_n_samples)
self.logger.info('Adding random samples of %s to reference cube...'
% os.path.basename(self.ims_ref[im_num]))
self.ref_cube[:, im_num, :] = random_samples
if progress:
bar.print_progress((im_num + 1) / len(self.ims_ref) * 100)
# save
if path_out:
GeoArray(self.ref_cube).save(out_path=path_out, fmt=fmt_out)
return self.ref_cube
def perform_spectral_resampling(self, src_im, tgt_srf, progress=False):
# type: (Union[str, GeoArray], SRF, bool) -> Union[GeoArray, None]
"""Perform spectral resampling of the given image to match the given spectral response functions.
:param src_im: source image to be resampled
:param tgt_srf: target spectral response functions to be used for spectral resampling
:param progress: show progress bar (default: false)
:return:
"""
# handle src_im provided as file path or GeoArray instance
if isinstance(src_im, str):
im_name = os.path.basename(src_im)
im_gA = GeoArray(src_im)
else:
im_name = src_im.basename
im_gA = src_im
# read input image
self.logger.info('Reading the input image %s...' % im_name)
im_gA.cwl = np.array(im_gA.meta.loc['wavelength'], dtype=np.float).flatten()
# perform spectral resampling of input image to match spectral properties of target sensor
self.logger.info('Performing spectral resampling to match spectral properties of target sensor...')
SR = SpectralResampler(im_gA.cwl, tgt_srf)
tgt_im = GeoArray(np.zeros((*im_gA.shape[:2], len(tgt_srf.bands)), dtype=np.int16), im_gA.gt, im_gA.prj)
tiles = im_gA.tiles((1000, 1000)) # use tiles to save memory
for ((rS, rE), (cS, cE)), tiledata in (tqdm(tiles) if progress else tiles):
tgt_im[rS: rE + 1, cS: cE + 1, :] = SR.resample_image(tiledata.astype(np.int16), CPUs=self.CPUs)
return tgt_im
def cluster_image_and_get_uniform_samples(self, im, n_clusters, tgt_n_samples):
# type: (Union[GeoArray, np.ndarray], int, int) -> np.ndarray
"""Compute KMeans clusters for the given image and return the an array of uniform random samples.
:param im: image to be clustered
:param n_clusters: number of clusters to use
:param tgt_n_samples: number of returned random samples
:return: 2D array (rows: tgt_n_samples, columns: spectral information / bands
"""
# compute KMeans clusters for the spectrally resampled image
self.logger.info('Computing %s KMeans clusters...' % n_clusters)
kmeans = KMeansRSImage(im, n_clusters=n_clusters, CPUs=self.CPUs)
if self.v:
kmeans.plot_cluster_centers()
kmeans.plot_cluster_histogram()
# randomly grab the given number of spectra from each cluster
self.logger.info('Getting %s random spectra from each cluster...' % (tgt_n_samples // n_clusters))
random_samples = kmeans.get_random_spectra_from_each_cluster(samplesize=tgt_n_samples // n_clusters)
# combine the spectra (2D arrays) of all clusters to a single 2D array
self.logger.info('Combining random samples from all clusters.')
random_samples = np.vstack([random_samples[clusterlabel] for clusterlabel in random_samples])
return random_samples
# class ReferenceCube_Generator_OLD(object):
# def __init__(self, filelist_refs, v=False, logger=None, CPUs=None):
# # type: (List[str], bool, logging.Logger, Union[None, int]) -> None
# """
#
# :param filelist_refs: list of reference images
# """
# self.ims_ref = filelist_refs
# self.ref_cube = None
# self.v = v
# self.logger = logger or GMS_logger(__name__) # must be pickable
# self.CPUs = CPUs or cpu_count()
# self.tmpdir_multiproc = ''
#
# def generate_reference_cube(self, tgt_satellite, tgt_sensor, n_clusters=10, tgt_n_samples=1000, path_out='',
# fmt_out='ENVI', progress=True):
# # type: (str, str, int, int, str, str, bool) -> np.ndarray
# """Generate reference spectra from all hyperspectral input images.
#
# The hyperspectral images are spectrally resampled to the target sensor specifications. The resulting target
# sensor image is then clustered and the same number of spectra is randomly selected from each cluster. All
# spectra are combined into a single 'reference cube' containing the same number of spectra for each cluster
# whereas the spectra orginate from all the input images.
#
# :param tgt_satellite: target satellite, e.g., 'Landsat-8'
# :param tgt_sensor: target sensor, e.g.. 'OLI_TIRS'
# :param n_clusters: number of clusters to be used for clustering the input images (KMeans)
# :param tgt_n_samples: number o spectra to be collected from each input image
# :param path_out: output path for the generated reference cube
# :param fmt_out: output format (GDAL driver code)
# :param progress: show progress bar (default: True)
# :return: np.array: [tgt_n_samples x images x spectral bands of the target sensor]
# """
# self.logger.info('Generating reference spectra from all input images...')
#
# # get SRFs
# self.logger.info('Reading spectral response functions of target sensor...')
# tgt_srf = SRF(dict(Satellite=tgt_satellite, Sensor=tgt_sensor, Subsystem=None, image_type='RSD',
# proc_level='L1A', logger=self.logger))
#
# if self.v:
# tgt_srf.plot_srfs()
#
# # Build the 3D reference cube from random samples of each image
# # => rows: tgt_n_samples, columns: images, bands: spectral information
# # generate random spectra samples equally for each KMeans cluster
# self.ref_cube = np.zeros((tgt_n_samples, len(self.ims_ref), len(tgt_srf.bands)))
#
# bar = ProgressBar(prefix='\t overall progress:')
# if progress:
# bar.print_progress(0 / len(self.ims_ref) * 100)
# for im_num, im in enumerate(self.ims_ref):
# self.logger.info('Generating random samples for %s (shape: %s)'
# % (os.path.basename(im), GeoArray(im).shape))
#
# im_rsp = \
# self.perform_spectral_resampling(im, tgt_srf, progress=progress)
# random_samples = \
# self.cluster_image_and_get_uniform_samples(im_rsp, n_clusters, tgt_n_samples)
#
# self.logger.info('Adding random samples of %s to reference cube...'
# % os.path.basename(self.ims_ref[im_num]))
# self.ref_cube[:, im_num, :] = random_samples
#
# if progress:
# bar.print_progress((im_num + 1) / len(self.ims_ref) * 100)
#
# # save
# if path_out:
# GeoArray(self.ref_cube).save(out_path=path_out, fmt=fmt_out)
#
# return self.ref_cube
#
# def perform_spectral_resampling(self, src_im, tgt_srf, progress=False):
# # type: (Union[str, GeoArray], SRF, bool) -> Union[GeoArray, None]
# """Perform spectral resampling of the given image to match the given spectral response functions.
#
# :param src_im: source image to be resampled
# :param tgt_srf: target spectral response functions to be used for spectral resampling
# :param progress: show progress bar (default: false)
# :return:
# """
# # handle src_im provided as file path or GeoArray instance
# if isinstance(src_im, str):
# im_name = os.path.basename(src_im)
# im_gA = GeoArray(src_im)
# else:
# im_name = src_im.basename
# im_gA = src_im
#
# # read input image
# self.logger.info('Reading the input image %s...' % im_name)
# im_gA.cwl = np.array(im_gA.meta.loc['wavelength'], dtype=np.float).flatten()
#
# # perform spectral resampling of input image to match spectral properties of target sensor
# self.logger.info('Performing spectral resampling to match spectral properties of target sensor...')
# SR = SpectralResampler(im_gA.cwl, tgt_srf)
#
# tgt_im = GeoArray(np.zeros((*im_gA.shape[:2], len(tgt_srf.bands)), dtype=np.int16), im_gA.gt, im_gA.prj)
# tiles = im_gA.tiles((1000, 1000)) # use tiles to save memory
# for ((rS, rE), (cS, cE)), tiledata in (tqdm(tiles) if progress else tiles):
# tgt_im[rS: rE + 1, cS: cE + 1, :] = SR.resample_image(tiledata.astype(np.int16), CPUs=self.CPUs)
#
# return tgt_im
#
# def cluster_image_and_get_uniform_samples(self, im, n_clusters, tgt_n_samples):
# # type: (Union[GeoArray, np.ndarray], int, int) -> np.ndarray
# """Compute KMeans clusters for the given image and return the an array of uniform random samples.
#
# :param im: image to be clustered
# :param n_clusters: number of clusters to use
# :param tgt_n_samples: number of returned random samples
# :return: 2D array (rows: tgt_n_samples, columns: spectral information / bands
# """
# # compute KMeans clusters for the spectrally resampled image
# self.logger.info('Computing %s KMeans clusters...' % n_clusters)
# kmeans = KMeansRSImage(im, n_clusters=n_clusters, CPUs=self.CPUs)
#
# if self.v:
# kmeans.plot_cluster_centers()
# kmeans.plot_cluster_histogram()
#
# # randomly grab the given number of spectra from each cluster
# self.logger.info('Getting %s random spectra from each cluster...' % (tgt_n_samples // n_clusters))
# random_samples = kmeans.get_random_spectra_from_each_cluster(samplesize=tgt_n_samples // n_clusters)
#
# # combine the spectra (2D arrays) of all clusters to a single 2D array
# self.logger.info('Combining random samples from all clusters.')
# random_samples = np.vstack([random_samples[clusterlabel] for clusterlabel in random_samples])
#
# return random_samples
class ReferenceCube_Generator(object):
......@@ -811,6 +855,8 @@ class ReferenceCube_Generator(object):
@property
def refcubes(self):
"""Return a dictionary holding instances of RefCube for each target satellite / sensor of self.tgt_sat_sen_list.
"""
# type: () -> Dict[Tuple[str, str]: RefCube]
if not self._refcubes:
......@@ -835,13 +881,37 @@ class ReferenceCube_Generator(object):
return self._refcubes
def _get_tgt_GMS_identifier(self, tgt_sat, tgt_sen):
# type: (str, str) -> dict
"""Get a GMS identifier for the specified target sensor such that all possible bands are included (L1A)
:param tgt_sat: target satellite
:param tgt_sen: target sensor
:return:
"""
return dict(Satellite=tgt_sat, Sensor=tgt_sen, Subsystem=None, image_type='RSD',
proc_level='L1A', logger=self.logger) # use L1A to have all bands available
def _get_tgt_LayerBandsAssignment(self, tgt_sat, tgt_sen):
# type: (str, str) -> list
"""Get the LayerBandsAssignment for the specified target sensor.
NOTE: The returned bands list always contains all possible bands. Specific band seletions are later done
using np.take().
:param tgt_sat: target satellite
:param tgt_sen: target sensor
:return:
"""
return get_LayerBandsAssignment(self._get_tgt_GMS_identifier(tgt_sat, tgt_sen), no_pan=False)
def _get_tgt_SRF_object(self, tgt_sat, tgt_sen):
# type: (str, str) -> SRF
"""Get an SRF instance containing the spectral response functions for for the specified target sensor.
:param tgt_sat: target satellite
:param tgt_sen: target sensor
:return:
"""
return SRF(self._get_tgt_GMS_identifier(tgt_sat, tgt_sen), no_pan=False)
def generate_reference_cubes(self, fmt_out='ENVI', progress=True):
......@@ -988,7 +1058,16 @@ class ReferenceCube_Generator(object):
class RefCube(object):
"""Data model class for reference cubes holding the training data for later fitted machine learning classifiers."""
def __init__(self, filepath='', satellite='', sensor='', LayerBandsAssignment=None):
# type: (str, str, str, list) -> None
"""Get instance of RefCube.
:param filepath: file path for importing an existing reference cube from disk
:param satellite: the satellite for which the reference cube holds its spectral data
:param sensor: the sensor for which the reference cube holds its spectral data
:param LayerBandsAssignment: the LayerBandsAssignment for which the reference cube holds its spectral data
"""
# type: (str, str, str, list) -> None
# privates
self._col_imName_dict = dict()
......@@ -1008,15 +1087,18 @@ class RefCube(object):
@property
def n_images(self):
"""Return the number training images from which the reference cube contains spectral samples."""
return self.data.shape[1]
@property
def n_signatures(self):
"""Return the number spectral signatures per training image included in the reference cube."""
return self.data.shape[0]
@property
def col_imName_dict(self):
# type: () -> OrderedDict
"""Return an ordered dict containing the file base names of the original training images for each column."""
return OrderedDict((col, imName) for col, imName in zip(range(self.n_images), self.srcImNames))
@col_imName_dict.setter
......@@ -1027,6 +1109,14 @@ class RefCube(object):
def add_refcube_array(self, refcube_array, src_imnames, LayerBandsAssignment):
# type: (Union[str, np.ndarray], list, list) -> None
"""Add the given given array to the RefCube instance.
:param refcube_array: 3D array or file path of the reference cube to be added
(spectral samples /signatures x training images x spectral bands)
:param src_imnames: list of training image file base names from which the given cube received data
:param LayerBandsAssignment: LayerBandsAssignment of the spectral bands of the given 3D array
:return:
"""
# validation
assert LayerBandsAssignment == self.LayerBandsAssignment, \
"%s != %s" % (LayerBandsAssignment, self.LayerBandsAssignment)
......@@ -1073,15 +1163,16 @@ class RefCube(object):
@property
def metadata(self):
"""Return an ordered dictionary holding the metadata of the reference cube."""
attrs2include = ['satellite', 'sensor', 'filepath', 'n_signatures', 'n_images', 'col_imName_dict',
'LayerBandsAssignment']
return OrderedDict((k, getattr(self, k)) for k in attrs2include)
def get_band_combination(self, tgt_LBA):
# type: (List[str]) -> GeoArray
"""
"""Get an array according to the bands order given by a target LayerBandsAssignment.
:param tgt_LBA:
:param tgt_LBA: target LayerBandsAssignment
:return:
"""
if tgt_LBA != self.LayerBandsAssignment:
......@@ -1096,12 +1187,13 @@ class RefCube(object):
# type: (List[str]) -> None
"""Rearrange the spectral bands of the reference cube according to the given LayerBandsAssignment.
:param tgt_LBA:
:param tgt_LBA: target LayerBandsAssignment
"""
self.data = self.get_band_combination(tgt_LBA)
self.LayerBandsAssignment = tgt_LBA
def from_filepath(self, filepath):
# TODO convert to class method
self.data = GeoArray(filepath)
with open(os.path.splitext(filepath)[0] + '.meta', 'r') as metaF:
......@@ -1113,6 +1205,13 @@ class RefCube(object):
setattr(self, k, v)
def save(self, path_out, fmt='ENVI'):
# type: (str, str) -> None
"""Save the reference cube to disk.
:param path_out: output path on disk
:param fmt: output format as GDAL format code
:return:
"""
# type: (str, str) -> None
self.data.save(out_path=path_out, fmt=fmt)
......@@ -1125,16 +1224,21 @@ class RefCube(object):
class Classifier_Generator(object):
"""Class for creating collections of machine learning classifiers that can be used for spectral homogenization."""
def __init__(self, list_refcubes):
# type: (List[Union[str, RefCube]]) -> None
"""Get an instance of Classifier_Generator.
:param list_refcubes: list of RefCube instances for which the classifiers are to be created.
"""
self.refcubes = [RefCube(inRC) if isinstance(inRC, str) else inRC for inRC in list_refcubes]
@staticmethod
def _get_derived_LayerBandsAssignments(satellite, sensor):
"""
"""Get a list of possible LayerBandsAssignments in which the spectral training data may be arranged.
:param satellite:
:param sensor:
:param satellite: satellite to return LayerBandsAssignments for
:param sensor: sensor to return LayerBandsAssignments for
:return: e.g. for Landsat-8 OLI_TIRS:
[['1', '2', '3', '4', '5', '9', '6', '7'],
['1', '2', '3', '4', '5', '9', '6', '7', '8'],
......@@ -1156,6 +1260,16 @@ class Classifier_Generator(object):
]
def create_classifiers(self, outDir, method='LR', *args, **kwargs):
"""Create classifiers for all combinations of the reference cubes given in __init__().
:param outDir: output directory for the created classifier collections
:param method: type of machine learning classifiers to be includedn in classifier collections
'LR': Linear Regression
'RR': Ridge Regression
:param args: arguments to be passed to the fit() function of the machine learners
:param kwargs: keyword arguments to be passed to the fit() function of machine learners
:return:
"""
for src_cube in self.refcubes:
cls_collection = nested_dict()
fName_cls = get_filename_classifier_collection(method, src_cube.satellite, src_cube.sensor)
......@@ -1184,7 +1298,7 @@ class Classifier_Generator(object):
# train the model
ML = specHomoApproaches[method]()
ML.fit(train_X, train_Y)
ML.fit(train_X, train_Y, *args, **kwargs)
# append some metadata
ML.scores = dict(train=ML.score(train_X, train_Y), test=ML.score(test_X, test_Y))
......@@ -1210,13 +1324,9 @@ specHomoApproaches = dict(
)
def get_classifier_h5filename(method, src_sat, src_sen):
return "__".join([method, src_sat, src_sen]) + ".hdf5"
def get_classifier_filename(method, src_satellite, src_sensor, src_LBA_name, tgt_satellite, tgt_sensor, tgt_LBA_name):
return '__'.join([method, src_satellite, src_sensor, src_LBA_name]) + \
'__to__' + '__'.join([tgt_satellite, tgt_sensor, tgt_LBA_name]) + '.dill'
# def get_classifier_filename(method, src_satellite, src_sensor, src_LBA_name, tgt_satellite, tgt_sensor, tgt_LBA_name):
# return '__'.join([method, src_satellite, src_sensor, src_LBA_name]) + \
# '__to__' + '__'.join([tgt_satellite, tgt_sensor, tgt_LBA_name]) + '.dill'
def get_filename_classifier_collection(method, src_satellite, src_sensor):
......@@ -1236,12 +1346,32 @@ class ClassifierCollection(object):
class RSImage_Predictor(object):
"""Predictor class applying the predict() function of a machine learning classifier described be the given args."""
def __init__(self, method='LR', classifier_rootDir=''):
"""Get an instance of RSImage_Predictor.
:param method: machine learning approach to be used for spectral bands prediction
'LR': Linear Regression
'RR': Ridge Regression