inner_procedure.r 9.43 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#' Perform Habitat Sampling and Probability Mapping
#'
#'This is the function that performs: initiate sampling and model building
#'
#' @param raster satellite time series stack (rasterBrickObject) or just any type of image (*rasterObject)
#' @param nb_samples number of spatial locations
#' @param sample_type distribution of spatial locations c("random","regular")
#' @param nb_models number of models (independent classifiers) to collect
#' @param nb_mean number of iterations for model accuracy
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param reference reference spectra either SpatialPointsDataFrame (shape file) or data.frame with lines = classes, column = predictors]
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or suppurt vector machine implementation
#' @param area extent where the the classification is happening
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
#' @param last only true for one class classifier c("FALSE", TRUE")
#' @param seed set seed for reproducable results
#' @param init.seed "sample" for new or use run1@seeds to reproduce previous steps
18
#' @param save_runs if the user wants to save the runs, if TRUE the complete Habitat Class object is returned
19
20
#' @param parallel_mode run loops in parallel
#' @param max_num_cores maximum number of cores for parallelism
21
22
23
24
25
#'
#' @return a list with 3 elements:
#' 1) An index
#' 2) Accuracy vector
#' 3) A vector with a Habitat objects, each consisting of 7 slots: \cr
26
27
28
#' run1@models - list of selected classifiers (only if save_runs is TRUE) \cr
#' run1@ref_samples - list of SpatialPointsDataFrames with same length as run1@models holding reference labels [1,2] for each selected model (only if save_runs is TRUE) \cr
#' run1@switch - vector of length run1@models indicating if target class equals 2, if not NA the labels need to be switched (only if save_runs is TRUE) \cr
29
#' run1@layer - raster map of habitat type probability \cr
30
31
32
#' run1@mod_all - list of all classifiers (equals nb_models) (only if save_runs is TRUE) \cr
#' run1@class_ind - vector of predictive distance measure for all habitats (only if save_runs is TRUE) \cr
#' run1@seeds - vector of seeds for random sampling (only if save_runs is TRUE) \cr
33
34
#' all files are saved with step number, the *.tif file is additionally saved with class names
#' @keywords internal
35
36
37


###################################################################################
38
39
40
41
42
43
44
45
46
47
48
49
sample_nb <- function(raster,
                      nb_samples,
                      sample_type,
                      nb_mean,
                      nb_it,
                      buffer,
                      reference,
                      model,
                      area,
                      mtry,
                      last,
                      seed,
50
                      init.seed,
51
                      save_runs,
52
53
                      parallel_mode,
                      max_num_cores) {
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
  ###
  n_channel <- length(names(raster))
  ###velox
  rID = raster[[1]]
  rID[] = 1:(nrow(rID) * ncol(rID))
  r = raster::stack(rID, raster)
  ras.vx <- velox::velox(r)
  ###
  l <- 1        ###6. opt=260
  pbtn1 <- matrix(1, nrow = 1, ncol = 1)
  pbtn2 <- matrix(2, nrow = 1, ncol = 1)
  m <- vector("numeric", length = length(nb_samples))
  layer <- list()
  for (r in nb_samples) {
    ############################################################################
    if (last == T) {
      reference <- rbind(reference, rep(3000, ncol(reference)))
    }
    n <- nb_it
    sample_size <- r
    max_samples_per_class <- sample_size * 5
    if (init.seed == "sample") {
      seed2 <- sample(c(1:1000000), size = nb_mean, replace = F)
    } else {
      seed2 <- init.seed
    }
    oobe <- matrix(NA, nrow = n, ncol = nb_mean)
81
82
    models_list <- list()
    points_list <- list()
83
84
85
    dif <- matrix(NA, nrow = nb_mean, ncol = nrow(reference))
    channel <- matrix(NA, nrow = nb_mean, ncol = nrow(reference))
    switch <- matrix(NA, nrow = nb_mean, ncol = nrow(reference))
86
87
88
89
90
91
    pb <- utils::txtProgressBar(min = 1,
                                max = nb_mean,
                                style = 3)

    if (parallel_mode == TRUE) {
      cores = parallel::detectCores( logical = TRUE)
92
93
94
95
96
97
      if (cores > max_num_cores) {
        cores <- max_num_cores
      } else {
        cores <- cores - 1
      }

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
      res <- parallel::mclapply(
        1:nb_mean,
        model_opt_r,
        raster = raster,
        sample_type = sample_type,
        buffer = buffer,
        model = model,
        area = area,
        seed = seed,
        n = n,
        sample_size = sample_size,
        n_channel = n_channel,
        seed2 = seed2,
        mtry = mtry,
        pbtn1 = pbtn1,
        pbtn2 = pbtn2,
        ras_vx = ras.vx,
        max_samples_per_class = max_samples_per_class,
        mc.cores = cores
      )
      for (k in 1:nb_mean) {
119
120
        points_list[[k]] <- res[["k" = k]][["points"]]
        models_list[[k]] <- res[["k" = k]][["models"]]
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
        oobe[, k] <- res[["k" = k]][["oobe"]][, 1]
      }
    } else {
      for (k in 1:nb_mean) {
        res <- model_opt_r(
          raster = raster,
          sample_type = sample_type,
          buffer = buffer,
          model = model,
          area = area,
          seed = seed,
          k = k,
          n = n,
          sample_size = sample_size,
          n_channel = n_channel,
          seed2 = seed2,
          mtry = mtry,
          pbtn1 = pbtn1,
          pbtn2 = pbtn2,
          ras_vx = ras.vx,
          max_samples_per_class = max_samples_per_class
        )
143
144
        points_list[[k]] <- res$points
        models_list[[k]] <- res$models
145
146
        oobe[, k] <- res$oobe[, 1]
        setTxtProgressBar(pb, k)
147
148
149
      }
    }

150
    #which_models_null <- which(models == "NULL")
151
152
153
    which_models_null <- vapply(models_list, is.not.null <- function(x){!is.null(x)}, FALSE)
    if (length(models_list) == 0 |
        length(which_models_null[which_models_null == FALSE]) == length(models_list)) {
154
155
      stop("No Models - would you be so kind to increase init.samples, please")
    }
156
    if (length(which_models_null[which_models_null == FALSE]) > 0) {
157
158
      models <- models_list[which_models_null]
      remove(models_list)
159
    }
160

161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
    for (jj in 1:nrow(reference)) {
      ref <- jj
      rr = 3
      for (i in 1:length(models)) {
        if (i == 1) {
          dummy <- as.numeric(
            as.character(stats::predict(models[[i]], newdata = reference)))
          if (dummy[ref] != 2) {
            dummy[dummy == 1] <- 3
            dummy[dummy == 2] <- 1
            dummy[dummy == 3] <- 2
            switch[i, jj] <- i
          }
        } else {
          dummy2 <- as.numeric(
            as.character(stats::predict(models[[i]], newdata = reference)))

          if (dummy2[ref] != 2) {
            dummy2[dummy2 == 1] <- 3
            dummy2[dummy2 == 2] <- 1
            dummy2[dummy2 == 3] <- 2
            switch[i, jj] <- i
          }

          dummy_set <- dummy
          dummy <- dummy + dummy2

          dummy3 <- dummy / dummy[ref]
          dif[i, jj] <- dummy3[ref] - max(dummy3[-ref])

          if (i > 2) {
            if (dummy3[ref] - max(dummy3[-ref]) > dif[(rr - 1), jj]) {
              dummy <- dummy
              channel[i, jj] <- i
              dif[(rr - 1), jj] <- dif[i, jj]
            } else {
              dummy <- dummy_set
            }
          }
        }
      }
      m[l] <- max(dif[2,], na.rm = T)
    }
    index <- which.max(dif[2,])
    ch <- as.numeric(na.omit(channel[, index]))
    if (length(ch) == 0) {
      stop(
        "No optimal classifier - would you be so kind to adjust init.samples & nb_models, please"
      )
    }
    acc <- (round(m[l] ^ 2, 2) / 0.25)

    print(paste("class=", index, "  difference=", (round(m[l] ^ 2, 2) / 0.25),
                sep = ""))
    l <- l + 1
  }
  close(pb)

219
220
221
  if (save_runs == TRUE) {
    mod_all <- models
  }
222
223
224
  models <- models[ch]
  print(paste("n_models =", length(models)))
  switch <- switch[ch, index]
225
226
  points <- points_list[ch]
  remove(points_list)
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
  dif <- dif[2,]
  ##############################################################################
  ###Vohersage
  ch <- c(1:length(models))
  switch <- switch

  j <- 1
  for (i in ch) {
    if (j == 1) {
      result1 <- raster::predict(object = raster, model = models[[i]])

      if (is.na(switch[i]) == F) {
        result1 <-
          raster::reclassify(result1, rbind(c(0.5, 1.5, 2), c(1.6, 2.5, 1)))
      }
    } else {
      result1 <-
        raster::stack( result1, raster::predict(object = raster, model = models[[i]]))

      if (is.na(switch[i]) == F) {
        result1[[j]] <-
          raster::reclassify(result1[[j]], rbind(c(0.5, 1.5, 2), c(1.6, 2.5, 1)))
      }
    }
    print(j)
    j <- j + 1
  }
  ###
  dummy <- raster::brick(result1)
  dummy <- raster::calc(dummy, fun = sum)
  layer[[1]] <- dummy

259
260
261
262
263
264
265
266
267
268
269
270
271
272
  if (save_runs == TRUE) {
    obj <- new(
      "Habitat",
      models = models,
      ref_samples = points,
      switch = switch,
      layer = layer,
      mod_all = mod_all,
      class_ind = dif,
      seeds = seed2
    )
  } else {
    obj <- new(
      "Habitat",
273
274
275
      models = list(),
      ref_samples = list(),
      switch = vector(),
276
      layer = layer,
277
278
279
      mod_all = list(),
      class_ind = 0,
      seeds = 0
280
281
    )
  }
282
283
284
285
286
287
288
289
  remove(models)
  remove(points)
  remove(switch)
  remove(layer)
  remove(mod_all)
  remove(diff)
  remove(seed2)

290
291
  out <- list(index = index, acc = acc, obj = obj)
  return(out)
carstennh's avatar
carstennh committed
292
}