inner_procedure.r 9.61 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#' Perform Habitat Sampling and Probability Mapping
#'
#'This is the function that performs: initiate sampling and model building
#'
#' @param raster satellite time series stack (rasterBrickObject) or just any type of image (*rasterObject)
#' @param nb_samples number of spatial locations
#' @param sample_type distribution of spatial locations c("random","regular")
#' @param nb_models number of models (independent classifiers) to collect
#' @param nb_mean number of iterations for model accuracy
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param reference reference spectra either SpatialPointsDataFrame (shape file) or data.frame with lines = classes, column = predictors]
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or suppurt vector machine implementation
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
#' @param last only true for one class classifier c("FALSE", TRUE")
#' @param seed set seed for reproducable results
#' @param init.seed "sample" for new or use run1@seeds to reproduce previous steps
17
#' @param save_runs if the user wants to save the runs, if TRUE the complete Habitat Class object is returned
18
19
#' @param parallel_mode run loops in parallel
#' @param max_num_cores maximum number of cores for parallelism
20
21
22
23
24
#'
#' @return a list with 3 elements:
#' 1) An index
#' 2) Accuracy vector
#' 3) A vector with a Habitat objects, each consisting of 7 slots: \cr
25
26
27
#' run1@models - list of selected classifiers (only if save_runs is TRUE) \cr
#' run1@ref_samples - list of SpatialPointsDataFrames with same length as run1@models holding reference labels [1,2] for each selected model (only if save_runs is TRUE) \cr
#' run1@switch - vector of length run1@models indicating if target class equals 2, if not NA the labels need to be switched (only if save_runs is TRUE) \cr
28
#' run1@layer - raster map of habitat type probability \cr
29
30
31
#' run1@mod_all - list of all classifiers (equals nb_models) (only if save_runs is TRUE) \cr
#' run1@class_ind - vector of predictive distance measure for all habitats (only if save_runs is TRUE) \cr
#' run1@seeds - vector of seeds for random sampling (only if save_runs is TRUE) \cr
32
33
#' all files are saved with step number, the *.tif file is additionally saved with class names
#' @keywords internal
34
35
36


###################################################################################
37
38
39
40
41
42
43
44
45
sample_nb <- function(raster,
                      nb_samples,
                      sample_type,
                      nb_mean,
                      nb_it,
                      buffer,
                      reference,
                      model,
                      mtry,
Carsten Neumann's avatar
Carsten Neumann committed
46
                      mod.error,
47
48
                      last,
                      seed,
49
                      init.seed,
Carsten Neumann's avatar
Carsten Neumann committed
50
                      in.memory,
51
                      save_runs,
52
53
                      parallel_mode,
                      max_num_cores) {
54
55
  ###
  n_channel <- length(names(raster))
Carsten Neumann's avatar
Carsten Neumann committed
56
57
  ###raster in MEMORY or NOT
  if (in.memory == TRUE) { rast <- raster::readAll(raster) }else { rast <-raster }
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
  ###
  l <- 1        ###6. opt=260
  pbtn1 <- matrix(1, nrow = 1, ncol = 1)
  pbtn2 <- matrix(2, nrow = 1, ncol = 1)
  m <- vector("numeric", length = length(nb_samples))
  layer <- list()
  for (r in nb_samples) {
    ############################################################################
    if (last == T) {
      reference <- rbind(reference, rep(3000, ncol(reference)))
    }
    n <- nb_it
    sample_size <- r
    max_samples_per_class <- sample_size * 5
    if (init.seed == "sample") {
      seed2 <- sample(c(1:1000000), size = nb_mean, replace = F)
    } else {
      seed2 <- init.seed
    }
    oobe <- matrix(NA, nrow = n, ncol = nb_mean)
78
79
    models_list <- list()
    points_list <- list()
80
81
82
    dif <- matrix(NA, nrow = nb_mean, ncol = nrow(reference))
    channel <- matrix(NA, nrow = nb_mean, ncol = nrow(reference))
    switch <- matrix(NA, nrow = nb_mean, ncol = nrow(reference))
83
84
85
86
87
88
    pb <- utils::txtProgressBar(min = 1,
                                max = nb_mean,
                                style = 3)

    if (parallel_mode == TRUE) {
      cores = parallel::detectCores( logical = TRUE)
89
90
91
92
93
94
      if (cores > max_num_cores) {
        cores <- max_num_cores
      } else {
        cores <- cores - 1
      }

95
96
97
98
99
100
101
102
103
104
105
106
107
      res <- parallel::mclapply(
        1:nb_mean,
        model_opt_r,
        raster = raster,
        sample_type = sample_type,
        buffer = buffer,
        model = model,
        seed = seed,
        n = n,
        sample_size = sample_size,
        n_channel = n_channel,
        seed2 = seed2,
        mtry = mtry,
Carsten Neumann's avatar
Carsten Neumann committed
108
        mod.error = mod.error,
109
110
        pbtn1 = pbtn1,
        pbtn2 = pbtn2,
Carsten Neumann's avatar
Carsten Neumann committed
111
        rast = rast,
112
        max_samples_per_class = max_samples_per_class,
113
114
115
        mc.cores = cores,
        mc.preschedule = TRUE,
        mc.cleanup = TRUE
116
117
      )
      for (k in 1:nb_mean) {
118
119
        points_list[[k]] <- res[["k" = k]][["points"]]
        models_list[[k]] <- res[["k" = k]][["models"]]
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
        oobe[, k] <- res[["k" = k]][["oobe"]][, 1]
      }
    } else {
      for (k in 1:nb_mean) {
        res <- model_opt_r(
          raster = raster,
          sample_type = sample_type,
          buffer = buffer,
          model = model,
          seed = seed,
          k = k,
          n = n,
          sample_size = sample_size,
          n_channel = n_channel,
          seed2 = seed2,
          mtry = mtry,
Carsten Neumann's avatar
Carsten Neumann committed
136
          mod.error = mod.error,
137
138
          pbtn1 = pbtn1,
          pbtn2 = pbtn2,
Carsten Neumann's avatar
Carsten Neumann committed
139
          rast = rast,
140
141
          max_samples_per_class = max_samples_per_class
        )
142
143
        points_list[[k]] <- res$points
        models_list[[k]] <- res$models
144
145
        oobe[, k] <- res$oobe[, 1]
        setTxtProgressBar(pb, k)
146
147
148
      }
    }

149
    #which_models_null <- which(models == "NULL")
150
151
152
    which_models_null <- vapply(models_list, is.not.null <- function(x){!is.null(x)}, FALSE)
    if (length(models_list) == 0 |
        length(which_models_null[which_models_null == FALSE]) == length(models_list)) {
153
154
155
      remove(points_list)
      remove(models_list)
      remove(ooe)
156
157
      stop("No Models - would you be so kind to increase init.samples, please")
    }
158
    if (length(which_models_null[which_models_null == FALSE]) > 0) {
159
      models <- models_list[which_models_null]
160
161
    } else {
      models <- models_list
162
    }
163
    remove(models_list)
164

165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
    for (jj in 1:nrow(reference)) {
      ref <- jj
      rr = 3
      for (i in 1:length(models)) {
        if (i == 1) {
          dummy <- as.numeric(
            as.character(stats::predict(models[[i]], newdata = reference)))
          if (dummy[ref] != 2) {
            dummy[dummy == 1] <- 3
            dummy[dummy == 2] <- 1
            dummy[dummy == 3] <- 2
            switch[i, jj] <- i
          }
        } else {
          dummy2 <- as.numeric(
            as.character(stats::predict(models[[i]], newdata = reference)))

          if (dummy2[ref] != 2) {
            dummy2[dummy2 == 1] <- 3
            dummy2[dummy2 == 2] <- 1
            dummy2[dummy2 == 3] <- 2
            switch[i, jj] <- i
          }

          dummy_set <- dummy
          dummy <- dummy + dummy2

          dummy3 <- dummy / dummy[ref]
          dif[i, jj] <- dummy3[ref] - max(dummy3[-ref])

          if (i > 2) {
            if (dummy3[ref] - max(dummy3[-ref]) > dif[(rr - 1), jj]) {
              dummy <- dummy
              channel[i, jj] <- i
              dif[(rr - 1), jj] <- dif[i, jj]
            } else {
              dummy <- dummy_set
            }
          }
        }
      }
      m[l] <- max(dif[2,], na.rm = T)
    }
    index <- which.max(dif[2,])
    ch <- as.numeric(na.omit(channel[, index]))
    if (length(ch) == 0) {
      stop(
        "No optimal classifier - would you be so kind to adjust init.samples & nb_models, please"
      )
    }
    acc <- (round(m[l] ^ 2, 2) / 0.25)

    print(paste("class=", index, "  difference=", (round(m[l] ^ 2, 2) / 0.25),
                sep = ""))
    l <- l + 1
  }
  close(pb)

223
224
  if (save_runs == TRUE) {
    mod_all <- models
225
226
  } else {
    mod_all = list()
227
  }
228
229
230
  models <- models[ch]
  print(paste("n_models =", length(models)))
  switch <- switch[ch, index]
231
232
  points <- points_list[ch]
  remove(points_list)
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
  dif <- dif[2,]
  ##############################################################################
  ###Vohersage
  ch <- c(1:length(models))
  switch <- switch

  j <- 1
  for (i in ch) {
    if (j == 1) {
      result1 <- raster::predict(object = raster, model = models[[i]])

      if (is.na(switch[i]) == F) {
        result1 <-
          raster::reclassify(result1, rbind(c(0.5, 1.5, 2), c(1.6, 2.5, 1)))
      }
    } else {
      result1 <-
        raster::stack( result1, raster::predict(object = raster, model = models[[i]]))

      if (is.na(switch[i]) == F) {
        result1[[j]] <-
          raster::reclassify(result1[[j]], rbind(c(0.5, 1.5, 2), c(1.6, 2.5, 1)))
      }
    }
    print(j)
    j <- j + 1
  }
  ###
  dummy <- raster::brick(result1)
  dummy <- raster::calc(dummy, fun = sum)
  layer[[1]] <- dummy

265
266
267
268
269
270
271
272
273
274
275
276
277
278
  if (save_runs == TRUE) {
    obj <- new(
      "Habitat",
      models = models,
      ref_samples = points,
      switch = switch,
      layer = layer,
      mod_all = mod_all,
      class_ind = dif,
      seeds = seed2
    )
  } else {
    obj <- new(
      "Habitat",
279
280
281
      models = list(),
      ref_samples = list(),
      switch = vector(),
282
      layer = layer,
283
284
285
      mod_all = list(),
      class_ind = 0,
      seeds = 0
286
287
    )
  }
288
289
290
291
292
  remove(models)
  remove(points)
  remove(switch)
  remove(layer)
  remove(mod_all)
293
  remove(dif)
294
  remove(seed2)
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
295
  gc(full = TRUE)
296

297
298
  out <- list(index = index, acc = acc, obj = obj)
  return(out)
carstennh's avatar
carstennh committed
299
}