outer_procedure.r 13.2 KB
Newer Older
carstennh's avatar
carstennh committed
1
2
3
#' Perform Habitat Sampling and Probability Mapping
#'
#'This is the main function that performs everything: specify the input imagery, select model type, initiate sampling and model building, generates interactive maps and produce final probability raster output
4
#'
carstennh's avatar
carstennh committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#' @param in.raster satellite time series stack (rasterBrickObject) or just any type of image (*rasterObject)
#' @param init.samples starting number of spatial locations
#' @param sample_type distribution of spatial locations c("random","regular")
#' @param nb_models number of models (independent classifiers) to collect
#' @param nb_it number of iterations for model accuracy
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param reference reference spectra either SpatialPointsDataFrame (shape file) or data.frame with lines = classes, column = predictors]
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or suppurt vector machine implementation
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
#' @param last only true for one class classifier c("FALSE", TRUE")
#' @param seed set seed for reproducable results
#' @param init.seed "sample" for new or use run1@seeds to reproduce previous steps
#' @param outPath output path for saving results
#' @param step at which step should the procedure start, e.g. use step = 2 if the first habitat is already extracted
carstennh's avatar
carstennh committed
19
#' @param  classNames character vector with class names in the order of reference spectra
carstennh's avatar
carstennh committed
20
21
#' @param n_classes total number of classes (habitat types) to be separated
#' @param multiTest number of test runs to compare different probability outputs
22
#' @param RGB rgb channel numbers for image plot
23
#' @param overwrite overwrite the results file
24
#' @param save_runs if it saves the Habitat object or not (default TRUE)
25
#' @param parallel_mode run loops using all available cores
26
#' @param plot_on_browser plot on the browser or inline in a notebook (default TRUE)
27

carstennh's avatar
carstennh committed
28
#'
29
30
#' @return 4 files per step:
#' 1) Habitat type probability map as geocoded *.kml layer and *.tif raster files and  *.png image output
31
#' 2) A Habitat object (only if save_runs is set to TRUE) consisting of 7 slots: \cr
32
33
34
35
36
37
38
39
#' run1@models - list of selcted classifiers \cr
#' run1@ref_samples - list of SpatialPointsDataFrames with same length as run1@models holding reference labels [1,2] for each selected model \cr
#' run1@switch - vector of lenght run1@models indicating if target class equals 2, if not NA the labels need to be switched \cr
#' run1@layer - raster map of habitat type probability \cr
#' run1@mod_all - list of all classifiers (equals nb_models) \cr
#' run1@class_ind - vector of predictive distance measure for all habitats \cr
#' run1@seeds - vector of seeds for random sampling \cr
#' all files are saved with step number, the *.tif file is additionally saved with class names
carstennh's avatar
carstennh committed
40
#'
carstennh's avatar
carstennh committed
41
42
43
44
45
#' @examples
#' ###################
#' library(HaSa)
#' raster::plotRGB(Sentinel_Stack_2018, r = 19, g = 20, b = 21, stretch = "lin", axes = T)
#' sp::plot(Example_Reference_Points, pch = 21, bg = "red", col = "yellow", cex = 1.9, lwd = 2.5, add = T)
carstennh's avatar
carstennh committed
46
#' #specify a valid output path e.g.  "C:/Users/.../"
47
48
49
#' multi_Class_Sampling(in.raster = Sentinel_Stack_2018, init.samples = 30, sample_type = "regular", nb_models = 200, nb_it = 10, buffer = 15,
#' reference = Example_Reference_Points, model = "rf", mtry = 10, last = F, seed = 3, init.seed = "sample", outPath="C:/User/", step = 1,
#' classNames = c("deciduous", "coniferous", "heath_young", "heath_old", "heath_shrub", "bare_ground", "xeric_grass"), n_classes = 7,
carstennh's avatar
carstennh committed
50
51
#' multiTest = 1, RGB = c(19, 20, 21))
#' ###################
carstennh's avatar
carstennh committed
52
#' for threshold evaluation an interactive map is plotted in the web browser
carstennh's avatar
carstennh committed
53
54
#'
#' next steps start automatically, after command line input of:
carstennh's avatar
carstennh committed
55
#' 1) number of the apropriate map if multiTest > 1
56
#' 2) probability threshold for habitat type extraction
carstennh's avatar
carstennh committed
57
58
59
#' 3) decision to sample again y/n
#' 4) adjust starting number of samples and number of models
#'
60
61
62
63
64
65
#'
#'
#' if convergence fails / no models can be selected / init.samples are to little / or another error occurs, restart next step with:
#' in.raster = out.raster
#' reference = out.reference
#' step = specify next step number
carstennh's avatar
carstennh committed
66
67
68
#' classNames = out.names
#'
#' @export
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
multi_Class_Sampling <- function(in.raster,
                                 init.samples = 30,
                                 sample_type = "regular",
                                 nb_models = 200,
                                 nb_it = 10,
                                 buffer,
                                 reference,
                                 model = "rf",
                                 mtry = 10,
                                 last = F,
                                 seed = 3,
                                 init.seed = "sample",
                                 outPath,
                                 step = 1,
                                 classNames,
                                 n_classes,
                                 multiTest = 1,
86
                                 RGB = c(19, 20, 21),
87
                                 overwrite = TRUE,
88
                                 save_runs = TRUE,
89
90
                                 parallel_mode = FALSE,
                                 plot_on_browser = TRUE) {
91
92
93
94
  ###first steps: data preparation
  if (class(reference) == "SpatialPointsDataFrame") {
    reference <- as.data.frame(raster::extract(in.raster, reference))
  }
carstennh's avatar
carstennh committed
95

96
  input_raster <- in.raster
97
98
  area <- as(raster::extent(in.raster), 'SpatialPolygons')
  area <- sp::SpatialPolygonsDataFrame(area, data.frame(ID = 1:length(area)))
99
100
  #sp::proj4string(area) <- sp::proj4string(in.raster)
  crs(area) <- crs(in.raster)
carstennh's avatar
carstennh committed
101

102
103
104
105
106
  col <- colorRampPalette(c("lightgrey",
                            "orange",
                            "yellow",
                            "limegreen",
                            "forestgreen"))
carstennh's avatar
carstennh committed
107

108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
  ##############################################################################
  r <- n_classes
  if (names(in.raster)[1] != colnames(reference)[1]) {
    colnames(reference) <- names(in.raster)
  }
  if (step != 1) {
    if (step < 11) {
      load(paste(outPath,
                 paste("threshold_step_0", step - 1, sep = ""),
                 sep = ""))
    } else{
      load(paste(outPath,
                 paste("threshold_step_", step - 1, sep = ""),
                 sep = ""))
    }
  }
carstennh's avatar
carstennh committed
124

125
126
127
128
129
130
131
132
  for (i in step:r) {
    print(paste(
      paste("init.samples = ", init.samples),
      paste("models = ", nb_models)
    ))
    if (i == r) {
      last = T
    }
carstennh's avatar
carstennh committed
133

134
135
136
137
138
139
140
    if (multiTest > 1) {
      test <- list()
      maFo <- list()
      new.names <- list()
      new.acc <- list()
      decision = "0"
      ##########################################################################
carstennh's avatar
carstennh committed
141

142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
      while (decision == "0") {
        for (rs in 1:multiTest) {
          ########################
          maFo_rf <- sample_nb(
            raster = in.raster,
            nb_samples = seq(init.samples, init.samples, init.samples),
            sample_type = sample_type,
            nb_mean = nb_models,
            nb_it = nb_it,
            buffer = buffer,
            reference = reference,
            model = model,
            area = area,
            mtry = mtry,
            last = last,
            seed = seed,
158
159
            init.seed = init.seed,
            parallel_mode = parallel_mode
160
          )
carstennh's avatar
carstennh committed
161

162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
          index <- maFo_rf$index
          acc <- maFo_rf$acc
          maFo_rf <- maFo_rf$obj
          ########################
          maFo[[rs]] <- maFo_rf
          test[[rs]] <- maFo_rf@layer[[1]]
          new.names[[rs]] <- index
          new.acc[[rs]] <- acc
          if (rs == multiTest) {
            par(mar = c(2, 2, 2, 3), mfrow = n2mfrow(multiTest))
            for (rr in 1:length(test)) {
              plot(
                test[[rr]],
                col = col(200),
                main = "",
                legend.shrink = 1)
              mtext(side = 3,
                    paste(rr, classNames[new.names[[rr]]], sep = " "),
                    font = 2)
            }
          }
        }
        decision <-
          readline("Which distribution is acceptable/ or sample again [../0]:  ")
      }
      maFo_rf <- maFo[[as.numeric(decision)]]
      index <- new.names[[as.numeric(decision)]]
      acc <- new.acc[[as.numeric(decision)]]
190
191
192
      remove(maFo)
      remove(new.names)
      remove(new.acc)
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
      ##########################################################################
    } else{
      ########################
      maFo_rf <- sample_nb(
        raster = in.raster,
        nb_samples = seq(init.samples, init.samples, init.samples),
        sample_type = sample_type,
        nb_mean = nb_models,
        nb_it = nb_it,
        buffer = buffer,
        reference = reference,
        model = model,
        area = area,
        mtry = mtry,
        last = last,
        seed = seed,
209
210
        init.seed = init.seed,
        parallel_mode = parallel_mode
211
      )
carstennh's avatar
carstennh committed
212

213
214
215
216
217
      index <- maFo_rf$index
      acc <- maFo_rf$acc
      maFo_rf <- maFo_rf$obj
      ########################
    }
carstennh's avatar
carstennh committed
218

219
220
221
    dummy <- maFo_rf@layer[[1]]
    iplot(
      x = dummy,
222
      y = input_raster,
223
224
225
226
227
      HaTy = classNames[index],
      r = RGB[1],
      g = RGB[2],
      b = RGB[3],
      acc = acc,
228
229
      outPath = outPath,
      plot_on_browser = plot_on_browser
230
231
232
233
234
235
236
237
    )

    decision <-
      readline("Threshold for Habitat Extraction or Sample Again [../0]:  ")

    sample2 <- init.samples
    models2 <- nb_models
    while (decision == "0") {
238
      remove(maFo_rf)
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
      decision2 <-
        readline("Adjust init.samples/nb.models or auto [../.. or 0]:  ")
      if (decision2 != "0") {
        sample2 <- as.numeric(strsplit(decision2, split = "/")[[1]][1])
        models2 <- as.numeric(strsplit(decision2, split = "/")[[1]][2])
      } else {
        sample2 <- sample2 + 50
        models2 <- models2 + 15
      }
      print(paste(
        paste("init.samples = ", sample2),
        paste("models = ", models2)
      ))
      maFo_rf <- sample_nb(
        raster = in.raster,
        nb_samples = seq(sample2, sample2, sample2),
        sample_type = sample_type,
        nb_mean = models2,
        nb_it = nb_it,
        buffer = buffer,
        reference = reference,
        model = model,
        area = area,
        mtry = mtry,
        last = last,
        seed = seed,
265
266
        init.seed = init.seed,
        parallel_mode = parallel_mode
267
268
269
270
271
272
      )

      index <- maFo_rf$index
      acc <- maFo_rf$acc
      maFo_rf <- maFo_rf$obj
      ########################
carstennh's avatar
carstennh committed
273

274
275
276
      dummy <- maFo_rf@layer[[1]]
      iplot(
        x = dummy,
277
        y = input_raster,
278
279
280
281
282
        HaTy = classNames[index],
        r = RGB[1],
        g = RGB[2],
        b = RGB[3],
        acc = acc,
283
284
        outPath = outPath,
        plot_on_browser = plot_on_browser
285
286
287
288
      )

      decision <-
        readline("Threshold for Habitat Extraction or Sample Again [../0]:  ")
carstennh's avatar
carstennh committed
289
    }
290
291
292
293
294
295

    if (i < 10) {
      ni <- paste("0", i, sep = "")
    } else{
      ni <- i
    }
296
297
298
299
300
301

    if ( save_runs == TRUE) {
      run1 <- maFo_rf
      save(run1, file = paste(outPath, paste("Run", ni, sep = ""), sep = ""))
      remove(run1)
    }
302
303
304
305
306
307
308
309
310
311
312
313
314
    ###rgdal version issue
    not_good_workaround <- comment(dummy@crs)
    comment(dummy@crs) <- ""
    ###
    raster::writeRaster(
      dummy,
      filename = paste(outPath,
                       paste("step_",
                             ni,
                             paste("_", classNames[index], sep = ""),
                             ".tif",
                             sep = ""),
                       sep = ""),
315
316
      format = "GTiff",
      overwrite = overwrite)
317
318
319
320
321
322
323

    ###rgdal version issue
    comment(dummy@crs) <- not_good_workaround
    ###
    kml <- raster::projectRaster(dummy,
                                 crs = "+proj=longlat +datum=WGS84",
                                 method = 'ngb')
324
    raster::KML(kml, paste(outPath, paste("step_", ni, sep = ""), sep = ""), overwrite = overwrite)
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343

    thres <- as.numeric(decision)
    dummy <- maFo_rf@layer[[1]]
    dummy[dummy < thres] <- 1
    dummy[dummy >= thres] <- NA
    reference <- reference[-index,]
    out.reference <<- reference
    classNames <- classNames[-index]
    out.names <<- classNames
    in.raster <- in.raster * dummy
    out.raster <<- in.raster

    print(paste(paste("Habitat", i), "Done"))

    colnames(reference)  <-  names(in.raster)
    if (i == 1) {
      threshold  <- thres
      save(threshold,
           file = paste(outPath,
344
                        paste("threshold_step_", ni, sep = ""),
345
346
347
348
349
350
351
352
353
                        sep = ""))
    } else {
      threshold <- append(threshold, thres)
      save(threshold,
           file = paste(outPath,
                        paste("threshold_step_", ni, sep = ""),
                        sep = ""))
    }

354
355
356
357
358
    if (i == r) {
      print("Congratulation - you finally made it towards the last habitat")
      break()
    }

359
360
361
362
363
364
365
366
367
368
    decision2 <- readline("Adjust init.samples/nb.models or auto [../.. or 0]:  ")

    if (decision2 != "0") {
      init.samples <- as.numeric(strsplit(decision2, split = "/")[[1]][1])
      nb_models <- as.numeric(strsplit(decision2, split = "/")[[1]][2])
    } else {
      init.samples <- init.samples + 50
      nb_models <- nb_models + 15
    }
  }
carstennh's avatar
carstennh committed
369
}