model_opt.r 7.35 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#' Perform Habitat Sampling and Probability Mapping
#'
#' This function finds the best model (mmax) for a set of sampled points.
#'
#' @param k Iteration value for the models.
#' @param raster satellite time series stack (rasterBrickObject) or just any type of image (*rasterObject)
#' @param sample_type distribution of spatial locations c("random","regular")
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or support vector machine implementation
#' @param seed set seed for reproducible results
#' @param n number of iterations for model accuracy
#' @param sample_size number of spatial locations
#' @param n_channel number of channels
#' @param seed2 spatial points sample
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
16
#' @param mod.error threshold for model error until which iteration is being executed
17
18
19
20
21
22
#' @param pbtn1 matrix for points
#' @param max_samples_per_class maximum number of samples per class
#' 
#' @return a list with 4 elements:
#' 1) k To identify the used model \cr
#' 2) model The model - mmax \cr
23
#' 3) points List of points used part of the sample. \cr
24
25
26
27
28
29
30
31
32
33
34
35
36
#' 4) oobe The accuracy achieved by the model \cr
#' @keywords internal
model_opt_r <- function(k,
                        raster,
                        sample_type,
                        buffer,
                        model,
                        seed,
                        n,
                        sample_size,
                        n_channel,
                        seed2,
                        mtry,
Carsten Neumann's avatar
Carsten Neumann committed
37
                        mod.error,
38
39
                        pbtn1,
                        max_samples_per_class) {
40
  points <- NULL
41
42
  models <- NULL
  oobe <- matrix(NA, nrow = n, ncol = 1)
43
  rast <- raster::mask(raster, raster::calc(raster, fun = sum))
44
45
46
47
48
49
50
  for (j in 1:n) {
    ###Vorbereitung Klassifizierung
    if (j == 1) {
      classes <- as.factor(c(1, 1))
      if (sample_type == "random") {
        set.seed(seed2[k])
        pbt <-
51
          raster::sampleRandom(rast, size = sample_size, sp = T)
52
53
      }
      if (sample_type == "regular") {
54
        pbt <- raster::sampleRegular(rast, size = sample_size, sp = T)
55
      }
56
57
58
59
     #f <- which(is.na(pbt@data[1]))
     #if (length(f) != 0) {
     #  pbt <- pbt[-f,]
     #}
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
      set.seed(seed2[k])
      classes <-
        as.factor(sample(c(1:2), size = nrow(pbt), replace = T))
      if (length(levels(classes)) < 2) {
        break
      }
      data <- as.data.frame(cbind(classes, pbt@data))
    }
    ########################################################################
    if (model == "rf") {
      model1 <-
        randomForest::randomForest(as.factor(classes) ~ .,
                                   data = data,
                                   mtry = mtry)
      if (is.na(mean(model1$err.rate[, 1])) == TRUE) {
        break
      }
      oobe[j, 1] <- mean(model1$err.rate[, 1])
    }
    ###
    if (model == "svm") {
      model1 <- e1071::svm(as.factor(classes) ~ ., data = data)
      co <-
        length(which(
          as.numeric(as.character(model1$fitted)) - as.numeric(as.character(classes)) == 0
        ))
      if (co == 0) {
        break
      }
      oobe[j, 1] <- 1 - (co / length(classes))
    }
    
    model_pre <- model1
    pbtn1_pre <- pbtn1
    #if ( j > 1) {if (oobe[j,k] < 0.02 || abs(oobe[(j-1),k]-oobe[j,k]) <= 0.011 )
    if (j > 1) {
Carsten Neumann's avatar
Carsten Neumann committed
96
      if (oobe[j, 1] < mod.error) {
97
        models <- model1
Carsten Neumann's avatar
Carsten Neumann committed
98
        points <- pbtn1
99
100
101
102
103
        break
      }
      
      if (oobe[(j - 1), 1] <= oobe[j, 1]) {
        models <- model_pre
Carsten Neumann's avatar
Carsten Neumann committed
104
        points <- pbtn1_pre
105
106
107
        break
      }
      
Carsten Neumann's avatar
Carsten Neumann committed
108
      if (j == n & oobe[j, 1] >= mod.error) {
109
        models <- NULL
110
        points <- NULL
111
112
113
        break
      }
    }
114

115
116
117
118
119
120
121
122
123
124
125
    ########################################################################
    if (model == "rf") {
      correct <-
        which(as.numeric(as.character(classes)) - as.numeric(as.character(model1$predicted)) == 0)
    }
    if (model == "svm") {
      correct <-
        which(as.numeric(as.character(model1$fitted)) - as.numeric(as.character(classes)) == 0)
    }
    ########################################################################
    which_classes_correct <- which(classes[correct] == 1)
Carsten Neumann's avatar
Carsten Neumann committed
126
127
    which_classes_correct_2 <- which(classes[correct] == 2)
    if (length(which_classes_correct) == 0 || length(which_classes_correct_2) == 0) {
128
129
130
131
132
      if (j == 1) {
        break
      }
    } else {
      d1 <- correct[which_classes_correct]
Carsten Neumann's avatar
Carsten Neumann committed
133
134
      d2 <- correct[which_classes_correct_2]
              
135
      ###generate new samples from only correctly classified samples [label 1]
Carsten Neumann's avatar
Carsten Neumann committed
136
      p1 <- pbt@coords[append(d1,d2), ]
137
      pbtn1 <-
Carsten Neumann's avatar
Carsten Neumann committed
138
        as.data.frame(cbind(c(classes[d1],classes[d2]), matrix(p1, ncol = 2)))
139
      sp::coordinates(pbtn1) <- c("V2", "V3")
140
      #sp::proj4string(pbtn1) <- sp::proj4string(pbt)
Carsten Neumann's avatar
Carsten Neumann committed
141
      raster::crs(pbtn1) <- raster::crs(pbt)
142
143
144
145
      
      poly <- rgeos::gBuffer(spgeom = pbtn1,
                             width = buffer,
                             byid = TRUE)
Carsten Neumann's avatar
Carsten Neumann committed
146
                             
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
147
148
149
150
151
152
      test1 <-
        na.omit(raster::as.matrix(fasterize::fasterize(sf::st_as_sf(poly), rast[[1]]) *
                                    rast))
      nam <-
        as.vector(fasterize::fasterize(sf::st_as_sf(poly), rast[[1]], field = "V1"))[-attr(test1, "na.action")]
      
153
      co <- raster::xyFromCell(rast, c(1:raster::ncell(rast))[-attr(test1,"na.action")])
Carsten Neumann's avatar
Carsten Neumann committed
154
      pbtn1 <- as.data.frame(cbind(nam, co))
155
      sp::coordinates(pbtn1) <- c("x", "y")
Carsten Neumann's avatar
Carsten Neumann committed
156
157
                 
     if (ncol(test1) == 1) {
158
159
        test1 <- t(test1)
      }
160
      colnames(test1) <- names(rast)
Carsten Neumann's avatar
Carsten Neumann committed
161
          }
162
163
164
    if (class(test1)[1] == "numeric") {
      test1 <- t(matrix(test1))
    }
Carsten Neumann's avatar
Carsten Neumann committed
165
    if (length(levels(as.factor(nam))) < 2) {
166
167
168
      break
    }
    ######################################
Carsten Neumann's avatar
Carsten Neumann committed
169
    ###balancing sample size
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
170
    di <- c(sum(pbtn1@data$nam == 1), sum(pbtn1@data$nam == 2))
Carsten Neumann's avatar
Carsten Neumann committed
171
    if (abs(di[1] - di[2]) > min(di) * 0.3) {
172
173
      if (which.min(di) == 2) {
        set.seed(seed)
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
174
        d3 <- sample(which(pbtn1@data$nam == 1), di[1] - di[2], replace = F)
Carsten Neumann's avatar
Carsten Neumann committed
175
176
        pbtn1 <- pbtn1[-d3, ]
        test1 <- test1[-d3, ]
177
178
      } else {
        set.seed(seed)
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
179
        d4 <- sample(which(pbtn1@data$nam == 2), di[2] - di[1], replace = F)
Carsten Neumann's avatar
Carsten Neumann committed
180
181
        pbtn1 <- pbtn1[-d4, ]
        test1 <- test1[-d4, ]
182
183
184
      }
    }
    #####################################
Carsten Neumann's avatar
Carsten Neumann committed
185
    ###maximum sample size
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
186
    if (sum(pbtn1@data$nam == 1) > max_samples_per_class) {
187
188
      set.seed(seed)
      dr <-
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
189
190
191
        sample(which(pbtn1@data$nam == 1),
               sum(pbtn1@data$nam == 1) - max_samples_per_class,
               replace = F)
Carsten Neumann's avatar
Carsten Neumann committed
192
193
      pbtn1 <- pbtn1[-dr, ]
      test1 <- test1[-dr, ]
194
    }
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
195
    if (sum(pbtn1@data$nam == 2) > max_samples_per_class) {
196
197
      set.seed(seed)
      dr <-
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
198
199
200
        sample(which(pbtn1@data$nam == 2),
               sum(pbtn1@data$nam == 2) - max_samples_per_class,
               replace = F)
Carsten Neumann's avatar
Carsten Neumann committed
201
202
      pbtn1 <- pbtn1[-dr, ]
      test1 <- test1[-dr, ]
203
204
205
    }
    ########################################################################
    data <-
Carsten Neumann's avatar
Carsten Neumann committed
206
      as.data.frame(cbind(pbtn1@data$nam, test1)) ##data
207
208
    names(data)[1] <- "classes"
    classes <- data$classes
Carsten Neumann's avatar
Carsten Neumann committed
209
    pbt <- pbtn1
210
  }
211
212
  remove(pbt)

213
214
215
  return(list(
    "k" = k,
    "models" = models,
216
    "points" = points,
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
217
    "oobe" = oobe
218
219
  ))
}