outer_procedure.r 13.7 KB
Newer Older
carstennh's avatar
carstennh committed
1
2
3
#' Perform Habitat Sampling and Probability Mapping
#'
#'This is the main function that performs everything: specify the input imagery, select model type, initiate sampling and model building, generates interactive maps and produce final probability raster output
4
#'
carstennh's avatar
carstennh committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#' @param in.raster satellite time series stack (rasterBrickObject) or just any type of image (*rasterObject)
#' @param init.samples starting number of spatial locations
#' @param sample_type distribution of spatial locations c("random","regular")
#' @param nb_models number of models (independent classifiers) to collect
#' @param nb_it number of iterations for model accuracy
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param reference reference spectra either SpatialPointsDataFrame (shape file) or data.frame with lines = classes, column = predictors]
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or suppurt vector machine implementation
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
#' @param last only true for one class classifier c("FALSE", TRUE")
#' @param seed set seed for reproducable results
#' @param init.seed "sample" for new or use run1@seeds to reproduce previous steps
#' @param outPath output path for saving results
#' @param step at which step should the procedure start, e.g. use step = 2 if the first habitat is already extracted
carstennh's avatar
carstennh committed
19
#' @param  classNames character vector with class names in the order of reference spectra
carstennh's avatar
carstennh committed
20
21
#' @param n_classes total number of classes (habitat types) to be separated
#' @param multiTest number of test runs to compare different probability outputs
22
#' @param RGB rgb channel numbers for image plot
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
23
24
25
26
#' @param overwrite overwrite the KML and raster files from previous runs (default TRUE)
#' @param save_runs to save to disk and memory the Habitat object or not (default TRUE)
#' @param parallel_mode run loops using all available cores (default FALSE)
#' @param max_num_cores maximum number of cores for parallelism (default 5)
27
#' @param plot_on_browser plot on the browser or inline in a notebook (default TRUE)
carstennh's avatar
carstennh committed
28
#'
29
30
#' @return 4 files per step:
#' 1) Habitat type probability map as geocoded *.kml layer and *.tif raster files and  *.png image output
31
#' 2) A Habitat object (only if save_runs is set to TRUE) consisting of 7 slots: \cr
32
33
34
35
36
37
38
39
#' run1@models - list of selcted classifiers \cr
#' run1@ref_samples - list of SpatialPointsDataFrames with same length as run1@models holding reference labels [1,2] for each selected model \cr
#' run1@switch - vector of lenght run1@models indicating if target class equals 2, if not NA the labels need to be switched \cr
#' run1@layer - raster map of habitat type probability \cr
#' run1@mod_all - list of all classifiers (equals nb_models) \cr
#' run1@class_ind - vector of predictive distance measure for all habitats \cr
#' run1@seeds - vector of seeds for random sampling \cr
#' all files are saved with step number, the *.tif file is additionally saved with class names
carstennh's avatar
carstennh committed
40
#'
carstennh's avatar
carstennh committed
41
42
43
44
45
#' @examples
#' ###################
#' library(HaSa)
#' raster::plotRGB(Sentinel_Stack_2018, r = 19, g = 20, b = 21, stretch = "lin", axes = T)
#' sp::plot(Example_Reference_Points, pch = 21, bg = "red", col = "yellow", cex = 1.9, lwd = 2.5, add = T)
carstennh's avatar
carstennh committed
46
#' #specify a valid output path e.g.  "C:/Users/.../"
47
48
49
#' multi_Class_Sampling(in.raster = Sentinel_Stack_2018, init.samples = 30, sample_type = "regular", nb_models = 200, nb_it = 10, buffer = 15,
#' reference = Example_Reference_Points, model = "rf", mtry = 10, last = F, seed = 3, init.seed = "sample", outPath="C:/User/", step = 1,
#' classNames = c("deciduous", "coniferous", "heath_young", "heath_old", "heath_shrub", "bare_ground", "xeric_grass"), n_classes = 7,
carstennh's avatar
carstennh committed
50
51
#' multiTest = 1, RGB = c(19, 20, 21))
#' ###################
carstennh's avatar
carstennh committed
52
#' for threshold evaluation an interactive map is plotted in the web browser
carstennh's avatar
carstennh committed
53
54
#'
#' next steps start automatically, after command line input of:
carstennh's avatar
carstennh committed
55
#' 1) number of the apropriate map if multiTest > 1
56
#' 2) probability threshold for habitat type extraction
carstennh's avatar
carstennh committed
57
58
59
#' 3) decision to sample again y/n
#' 4) adjust starting number of samples and number of models
#'
60
61
62
63
64
65
#'
#'
#' if convergence fails / no models can be selected / init.samples are to little / or another error occurs, restart next step with:
#' in.raster = out.raster
#' reference = out.reference
#' step = specify next step number
carstennh's avatar
carstennh committed
66
67
68
#' classNames = out.names
#'
#' @export
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
multi_Class_Sampling <- function(in.raster,
                                 init.samples = 30,
                                 sample_type = "regular",
                                 nb_models = 200,
                                 nb_it = 10,
                                 buffer,
                                 reference,
                                 model = "rf",
                                 mtry = 10,
                                 last = F,
                                 seed = 3,
                                 init.seed = "sample",
                                 outPath,
                                 step = 1,
                                 classNames,
                                 n_classes,
                                 multiTest = 1,
86
                                 RGB = c(19, 20, 21),
87
                                 overwrite = TRUE,
88
                                 save_runs = TRUE,
89
                                 parallel_mode = FALSE,
90
                                 max_num_cores = 5,
91
                                 plot_on_browser = TRUE) {
92
93
94
95
  ###first steps: data preparation
  if (class(reference) == "SpatialPointsDataFrame") {
    reference <- as.data.frame(raster::extract(in.raster, reference))
  }
carstennh's avatar
carstennh committed
96

97
  input_raster <- in.raster
98
99
  area <- as(raster::extent(in.raster), 'SpatialPolygons')
  area <- sp::SpatialPolygonsDataFrame(area, data.frame(ID = 1:length(area)))
100
101
  #sp::proj4string(area) <- sp::proj4string(in.raster)
  crs(area) <- crs(in.raster)
carstennh's avatar
carstennh committed
102

103
104
105
106
107
  col <- colorRampPalette(c("lightgrey",
                            "orange",
                            "yellow",
                            "limegreen",
                            "forestgreen"))
carstennh's avatar
carstennh committed
108

109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
  ##############################################################################
  r <- n_classes
  if (names(in.raster)[1] != colnames(reference)[1]) {
    colnames(reference) <- names(in.raster)
  }
  if (step != 1) {
    if (step < 11) {
      load(paste(outPath,
                 paste("threshold_step_0", step - 1, sep = ""),
                 sep = ""))
    } else{
      load(paste(outPath,
                 paste("threshold_step_", step - 1, sep = ""),
                 sep = ""))
    }
  }
carstennh's avatar
carstennh committed
125

126
127
128
129
130
131
132
133
  for (i in step:r) {
    print(paste(
      paste("init.samples = ", init.samples),
      paste("models = ", nb_models)
    ))
    if (i == r) {
      last = T
    }
carstennh's avatar
carstennh committed
134

135
136
137
138
139
140
141
    if (multiTest > 1) {
      test <- list()
      maFo <- list()
      new.names <- list()
      new.acc <- list()
      decision = "0"
      ##########################################################################
carstennh's avatar
carstennh committed
142

143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
      while (decision == "0") {
        for (rs in 1:multiTest) {
          ########################
          maFo_rf <- sample_nb(
            raster = in.raster,
            nb_samples = seq(init.samples, init.samples, init.samples),
            sample_type = sample_type,
            nb_mean = nb_models,
            nb_it = nb_it,
            buffer = buffer,
            reference = reference,
            model = model,
            area = area,
            mtry = mtry,
            last = last,
            seed = seed,
159
            init.seed = init.seed,
160
            save_runs = save_runs,
161
162
            parallel_mode = parallel_mode,
            max_num_cores = max_num_cores
163
          )
carstennh's avatar
carstennh committed
164

165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
          index <- maFo_rf$index
          acc <- maFo_rf$acc
          maFo_rf <- maFo_rf$obj
          ########################
          maFo[[rs]] <- maFo_rf
          test[[rs]] <- maFo_rf@layer[[1]]
          new.names[[rs]] <- index
          new.acc[[rs]] <- acc
          if (rs == multiTest) {
            par(mar = c(2, 2, 2, 3), mfrow = n2mfrow(multiTest))
            for (rr in 1:length(test)) {
              plot(
                test[[rr]],
                col = col(200),
                main = "",
                legend.shrink = 1)
              mtext(side = 3,
                    paste(rr, classNames[new.names[[rr]]], sep = " "),
                    font = 2)
            }
          }
        }
        decision <-
          readline("Which distribution is acceptable/ or sample again [../0]:  ")
      }
      maFo_rf <- maFo[[as.numeric(decision)]]
      index <- new.names[[as.numeric(decision)]]
      acc <- new.acc[[as.numeric(decision)]]
193
194
195
      remove(maFo)
      remove(new.names)
      remove(new.acc)
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
      ##########################################################################
    } else{
      ########################
      maFo_rf <- sample_nb(
        raster = in.raster,
        nb_samples = seq(init.samples, init.samples, init.samples),
        sample_type = sample_type,
        nb_mean = nb_models,
        nb_it = nb_it,
        buffer = buffer,
        reference = reference,
        model = model,
        area = area,
        mtry = mtry,
        last = last,
        seed = seed,
212
        init.seed = init.seed,
213
        save_runs = save_runs,
214
215
        parallel_mode = parallel_mode,
        max_num_cores = max_num_cores
216
      )
carstennh's avatar
carstennh committed
217

218
219
220
221
222
      index <- maFo_rf$index
      acc <- maFo_rf$acc
      maFo_rf <- maFo_rf$obj
      ########################
    }
carstennh's avatar
carstennh committed
223

224
225
226
    dummy <- maFo_rf@layer[[1]]
    iplot(
      x = dummy,
227
      y = input_raster,
228
229
230
231
232
      HaTy = classNames[index],
      r = RGB[1],
      g = RGB[2],
      b = RGB[3],
      acc = acc,
233
234
      outPath = outPath,
      plot_on_browser = plot_on_browser
235
236
237
238
239
240
241
242
    )

    decision <-
      readline("Threshold for Habitat Extraction or Sample Again [../0]:  ")

    sample2 <- init.samples
    models2 <- nb_models
    while (decision == "0") {
243
      remove(maFo_rf)
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
      decision2 <-
        readline("Adjust init.samples/nb.models or auto [../.. or 0]:  ")
      if (decision2 != "0") {
        sample2 <- as.numeric(strsplit(decision2, split = "/")[[1]][1])
        models2 <- as.numeric(strsplit(decision2, split = "/")[[1]][2])
      } else {
        sample2 <- sample2 + 50
        models2 <- models2 + 15
      }
      print(paste(
        paste("init.samples = ", sample2),
        paste("models = ", models2)
      ))
      maFo_rf <- sample_nb(
        raster = in.raster,
        nb_samples = seq(sample2, sample2, sample2),
        sample_type = sample_type,
        nb_mean = models2,
        nb_it = nb_it,
        buffer = buffer,
        reference = reference,
        model = model,
        area = area,
        mtry = mtry,
        last = last,
        seed = seed,
270
        init.seed = init.seed,
271
        save_runs = save_runs,
272
273
        parallel_mode = parallel_mode,
        max_num_cores = max_num_cores
274
275
276
277
278
279
      )

      index <- maFo_rf$index
      acc <- maFo_rf$acc
      maFo_rf <- maFo_rf$obj
      ########################
carstennh's avatar
carstennh committed
280

281
282
283
      dummy <- maFo_rf@layer[[1]]
      iplot(
        x = dummy,
284
        y = input_raster,
285
286
287
288
289
        HaTy = classNames[index],
        r = RGB[1],
        g = RGB[2],
        b = RGB[3],
        acc = acc,
290
291
        outPath = outPath,
        plot_on_browser = plot_on_browser
292
293
294
295
      )

      decision <-
        readline("Threshold for Habitat Extraction or Sample Again [../0]:  ")
carstennh's avatar
carstennh committed
296
    }
297
298
299
300
301
302

    if (i < 10) {
      ni <- paste("0", i, sep = "")
    } else{
      ni <- i
    }
303
304
305
306
307
308

    if ( save_runs == TRUE) {
      run1 <- maFo_rf
      save(run1, file = paste(outPath, paste("Run", ni, sep = ""), sep = ""))
      remove(run1)
    }
309
310
311
312
313
314
315
316
317
318
319
320
321
    ###rgdal version issue
    not_good_workaround <- comment(dummy@crs)
    comment(dummy@crs) <- ""
    ###
    raster::writeRaster(
      dummy,
      filename = paste(outPath,
                       paste("step_",
                             ni,
                             paste("_", classNames[index], sep = ""),
                             ".tif",
                             sep = ""),
                       sep = ""),
322
323
      format = "GTiff",
      overwrite = overwrite)
324
325
326
327
328
329
330

    ###rgdal version issue
    comment(dummy@crs) <- not_good_workaround
    ###
    kml <- raster::projectRaster(dummy,
                                 crs = "+proj=longlat +datum=WGS84",
                                 method = 'ngb')
331
    raster::KML(kml, paste(outPath, paste("step_", ni, sep = ""), sep = ""), overwrite = overwrite)
332
333
334
335
336

    thres <- as.numeric(decision)
    dummy <- maFo_rf@layer[[1]]
    dummy[dummy < thres] <- 1
    dummy[dummy >= thres] <- NA
337
    in.raster <- in.raster * dummy
338
339
    reference <- reference[-index,]
    classNames <- classNames[-index]
340
341
342
343
344
345
    if (save_runs) {
      out.reference <<- reference
      out.names <<- classNames
      out.raster <<- in.raster
    }
    remove(dummy)
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
346
    remove(maFo_rf)
347
348
349
350
351
352
353
354

    print(paste(paste("Habitat", i), "Done"))

    colnames(reference)  <-  names(in.raster)
    if (i == 1) {
      threshold  <- thres
      save(threshold,
           file = paste(outPath,
355
                        paste("threshold_step_", ni, sep = ""),
356
357
358
359
360
361
362
363
                        sep = ""))
    } else {
      threshold <- append(threshold, thres)
      save(threshold,
           file = paste(outPath,
                        paste("threshold_step_", ni, sep = ""),
                        sep = ""))
    }
364
    # Release memory
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
365
    gc(full = TRUE)
366

367
368
369
370
371
    if (i == r) {
      print("Congratulation - you finally made it towards the last habitat")
      break()
    }

372
373
374
375
376
377
378
379
380
381
    decision2 <- readline("Adjust init.samples/nb.models or auto [../.. or 0]:  ")

    if (decision2 != "0") {
      init.samples <- as.numeric(strsplit(decision2, split = "/")[[1]][1])
      nb_models <- as.numeric(strsplit(decision2, split = "/")[[1]][2])
    } else {
      init.samples <- init.samples + 50
      nb_models <- nb_models + 15
    }
  }
carstennh's avatar
carstennh committed
382
}