inner_procedure.r 10.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
#' Perform Habitat Sampling and Probability Mapping
#'
#'This is the function that performs: initiate sampling and model building
#'
#' @param raster satellite time series stack (rasterBrickObject) or just any type of image (*rasterObject)
#' @param nb_samples number of spatial locations
#' @param sample_type distribution of spatial locations c("random","regular")
#' @param nb_models number of models (independent classifiers) to collect
#' @param nb_mean number of iterations for model accuracy
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param reference reference spectra either SpatialPointsDataFrame (shape file) or data.frame with lines = classes, column = predictors]
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or suppurt vector machine implementation
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
14
15
#' @param mod.error threshold for model error until which iteration is being executed
#' @param in.memory boolean for raster processing (memory = "TRUE", from disk = "FALSE")
16
17
18
#' @param last only true for one class classifier c("FALSE", TRUE")
#' @param seed set seed for reproducable results
#' @param init.seed "sample" for new or use run1@seeds to reproduce previous steps
19
#' @param save_runs if the user wants to save the runs, if TRUE the complete Habitat Class object is returned
20
21
#' @param parallel_mode run loops in parallel
#' @param max_num_cores maximum number of cores for parallelism
22
#' @param progress_bar if true use a normal progress bar, otherwise a shiny's progress bar
23
#'
24
25
26
27
28
29
#' @return a list with 5 elements:
#' 1) returns 0 succeeded, 1 increase init.samples, or 2 increase init.samples and nb_models
#' 2) An index
#' 3) num_models number of models selected
#' 4) Accuracy vector
#' 5) A vector with a Habitat objects, each consisting of 7 slots: \cr
30
31
32
#' run1@models - list of selected classifiers (only if save_runs is TRUE) \cr
#' run1@ref_samples - list of SpatialPointsDataFrames with same length as run1@models holding reference labels [1,2] for each selected model (only if save_runs is TRUE) \cr
#' run1@switch - vector of length run1@models indicating if target class equals 2, if not NA the labels need to be switched (only if save_runs is TRUE) \cr
33
#' run1@layer - raster map of habitat type probability \cr
34
35
36
#' run1@mod_all - list of all classifiers (equals nb_models) (only if save_runs is TRUE) \cr
#' run1@class_ind - vector of predictive distance measure for all habitats (only if save_runs is TRUE) \cr
#' run1@seeds - vector of seeds for random sampling (only if save_runs is TRUE) \cr
37
38
#' all files are saved with step number, the *.tif file is additionally saved with class names
#' @keywords internal
39
#' @export
40
41
42


###################################################################################
43
44
45
46
47
48
49
50
51
sample_nb <- function(raster,
                      nb_samples,
                      sample_type,
                      nb_mean,
                      nb_it,
                      buffer,
                      reference,
                      model,
                      mtry,
Carsten Neumann's avatar
Carsten Neumann committed
52
                      mod.error,
53
54
                      last,
                      seed,
55
                      init.seed,
Carsten Neumann's avatar
Carsten Neumann committed
56
                      in.memory,
57
                      save_runs,
58
                      parallel_mode,
59
60
61
62
                      max_num_cores,
                      progress_bar = TRUE) {
  print(paste(paste("init.samples = ", nb_samples[1]),
              paste("models = ", nb_mean)))
63
64
  ###
  n_channel <- length(names(raster))
Carsten Neumann's avatar
Carsten Neumann committed
65
  ###raster in MEMORY or NOT
66
  if (in.memory == TRUE && raster::fromDisk(raster) == TRUE) { rast <- raster::readAll(raster) }else { rast <-raster }
67
68
69
70
71
72
73
74
75
76
77
78
79
  ###
  l <- 1        ###6. opt=260
  pbtn1 <- matrix(1, nrow = 1, ncol = 1)
  m <- vector("numeric", length = length(nb_samples))
  layer <- list()
  for (r in nb_samples) {
    ############################################################################
    if (last == T) {
      reference <- rbind(reference, rep(3000, ncol(reference)))
    }
    n <- nb_it
    sample_size <- r
    max_samples_per_class <- sample_size * 5
80
    if (class(init.seed) == "character" && init.seed == "sample") {
81
82
83
84
85
      seed2 <- sample(c(1:1000000), size = nb_mean, replace = F)
    } else {
      seed2 <- init.seed
    }
    oobe <- matrix(NA, nrow = n, ncol = nb_mean)
86
87
    models_list <- list()
    points_list <- list()
88
89
90
    dif <- matrix(NA, nrow = nb_mean, ncol = nrow(reference))
    channel <- matrix(NA, nrow = nb_mean, ncol = nrow(reference))
    switch <- matrix(NA, nrow = nb_mean, ncol = nrow(reference))
91
92
93
94
95
    if (progress_bar == TRUE) {
        pb <- utils::txtProgressBar(min = 1,
                                    max = nb_mean,
                                    style = 3)
    }
96
97
98

    if (parallel_mode == TRUE) {
      cores = parallel::detectCores( logical = TRUE)
99
100
101
102
103
104
      if (cores > max_num_cores) {
        cores <- max_num_cores
      } else {
        cores <- cores - 1
      }

105
106
107
108
109
110
111
112
113
114
115
116
117
      res <- parallel::mclapply(
        1:nb_mean,
        model_opt_r,
        raster = raster,
        sample_type = sample_type,
        buffer = buffer,
        model = model,
        seed = seed,
        n = n,
        sample_size = sample_size,
        n_channel = n_channel,
        seed2 = seed2,
        mtry = mtry,
Carsten Neumann's avatar
Carsten Neumann committed
118
        mod.error = mod.error,
119
        pbtn1 = pbtn1,
Carsten Neumann's avatar
Carsten Neumann committed
120
        rast = rast,
121
        max_samples_per_class = max_samples_per_class,
122
123
124
        mc.cores = cores,
        mc.preschedule = TRUE,
        mc.cleanup = TRUE
125
126
      )
      for (k in 1:nb_mean) {
127
128
        points_list[[k]] <- res[["k" = k]][["points"]]
        models_list[[k]] <- res[["k" = k]][["models"]]
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
        oobe[, k] <- res[["k" = k]][["oobe"]][, 1]
      }
    } else {
      for (k in 1:nb_mean) {
        res <- model_opt_r(
          raster = raster,
          sample_type = sample_type,
          buffer = buffer,
          model = model,
          seed = seed,
          k = k,
          n = n,
          sample_size = sample_size,
          n_channel = n_channel,
          seed2 = seed2,
          mtry = mtry,
Carsten Neumann's avatar
Carsten Neumann committed
145
          mod.error = mod.error,
146
          pbtn1 = pbtn1,
Carsten Neumann's avatar
Carsten Neumann committed
147
          rast = rast,
148
149
          max_samples_per_class = max_samples_per_class
        )
150
151
        points_list[[k]] <- res$points
        models_list[[k]] <- res$models
152
        oobe[, k] <- res$oobe[, 1]
153
154
155
156
157
158
159
160
        if (progress_bar) {
            setTxtProgressBar(pb, k)
        } else {
            shinybusy::update_modal_progress(
              value = k / (nb_mean + nrow(reference)),
              text = paste("Doing sampling for model", k)
            )
        }
161
162
163
      }
    }

164
    #which_models_null <- which(models == "NULL")
165
166
167
    which_models_null <- vapply(models_list, is.not.null <- function(x){!is.null(x)}, FALSE)
    if (length(models_list) == 0 |
        length(which_models_null[which_models_null == FALSE]) == length(models_list)) {
168
169
      remove(points_list)
      remove(models_list)
170
171
172
173
174
175
176
177
      out <- list(
        returns = 1,
        index = NULL,
        num_models = 0,
        acc = NULL,
        obj = NULL
      )
      return(out)
178
    }
179
    if (length(which_models_null[which_models_null == FALSE]) > 0) {
180
      models <- models_list[which_models_null]
181
182
    } else {
      models <- models_list
183
    }
184
    remove(models_list)
185

186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
    for (jj in 1:nrow(reference)) {
      ref <- jj
      rr = 3
      for (i in 1:length(models)) {
        if (i == 1) {
          dummy <- as.numeric(
            as.character(stats::predict(models[[i]], newdata = reference)))
          if (dummy[ref] != 2) {
            dummy[dummy == 1] <- 3
            dummy[dummy == 2] <- 1
            dummy[dummy == 3] <- 2
            switch[i, jj] <- i
          }
        } else {
          dummy2 <- as.numeric(
            as.character(stats::predict(models[[i]], newdata = reference)))

          if (dummy2[ref] != 2) {
            dummy2[dummy2 == 1] <- 3
            dummy2[dummy2 == 2] <- 1
            dummy2[dummy2 == 3] <- 2
            switch[i, jj] <- i
          }

          dummy_set <- dummy
          dummy <- dummy + dummy2

          dummy3 <- dummy / dummy[ref]
          dif[i, jj] <- dummy3[ref] - max(dummy3[-ref])

          if (i > 2) {
            if (dummy3[ref] - max(dummy3[-ref]) > dif[(rr - 1), jj]) {
              dummy <- dummy
              channel[i, jj] <- i
              dif[(rr - 1), jj] <- dif[i, jj]
            } else {
              dummy <- dummy_set
            }
          }
        }
      }
      m[l] <- max(dif[2,], na.rm = T)
228
229
230
231
232
233
      if (!progress_bar) {
          shinybusy::update_modal_progress(
            value = (nb_mean+jj) / (nb_mean + nrow(reference)),
            text = paste("Doing prediction for class", jj)
          )
      }
234
235
236
237
    }
    index <- which.max(dif[2,])
    ch <- as.numeric(na.omit(channel[, index]))
    if (length(ch) == 0) {
238
239
240
241
242
243
      out <- list(
        returns = 2,
        index = NULL,
        num_models = 0,
        acc = NULL,
        obj = NULL
244
      )
245
      return(out)
246
247
248
    }
    acc <- (round(m[l] ^ 2, 2) / 0.25)

249
    cat("\n")
250
251
252
253
    print(paste("class=", index, "  difference=", (round(m[l] ^ 2, 2) / 0.25),
                sep = ""))
    l <- l + 1
  }
254
255
256
  if (progress_bar == TRUE) {
    close(pb)
  }
257

258
259
  if (save_runs == TRUE) {
    mod_all <- models
260
261
  } else {
    mod_all = list()
262
  }
263
  models <- models[ch]
264
265
266
  num_models <- length(models)
  print(paste("n_models =", num_models))
  flush(stdout())
267
  switch <- switch[ch, index]
268
  points <- points_list[ch]
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
  dif <- dif[2,]
  ##############################################################################
  ###Vohersage
  ch <- c(1:length(models))
  switch <- switch

  j <- 1
  for (i in ch) {
    if (j == 1) {
      result1 <- raster::predict(object = raster, model = models[[i]])

      if (is.na(switch[i]) == F) {
        result1 <-
          raster::reclassify(result1, rbind(c(0.5, 1.5, 2), c(1.6, 2.5, 1)))
      }
    } else {
      result1 <-
        raster::stack( result1, raster::predict(object = raster, model = models[[i]]))

      if (is.na(switch[i]) == F) {
        result1[[j]] <-
          raster::reclassify(result1[[j]], rbind(c(0.5, 1.5, 2), c(1.6, 2.5, 1)))
      }
    }
293
    #print(j)
294
295
296
297
298
299
300
    j <- j + 1
  }
  ###
  dummy <- raster::brick(result1)
  dummy <- raster::calc(dummy, fun = sum)
  layer[[1]] <- dummy

301
302
303
304
305
306
307
308
309
310
311
312
313
314
  if (save_runs == TRUE) {
    obj <- new(
      "Habitat",
      models = models,
      ref_samples = points,
      switch = switch,
      layer = layer,
      mod_all = mod_all,
      class_ind = dif,
      seeds = seed2
    )
  } else {
    obj <- new(
      "Habitat",
315
      models = list(),
316
317
      ref_samples = points,
      switch = switch,
318
      layer = layer,
319
320
321
      mod_all = list(),
      class_ind = 0,
      seeds = 0
322
323
    )
  }
324
325
326
327
328
  remove(models)
  remove(points)
  remove(switch)
  remove(layer)
  remove(mod_all)
329
  remove(dif)
330
  remove(seed2)
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
331
  gc(full = TRUE)
332

333
334
335
336
337
338
339
  out <- list(
    returns = 0,
    index = index,
    num_models = num_models,
    acc = acc,
    obj = obj
  )
340
  return(out)
carstennh's avatar
carstennh committed
341
}