model_opt.r 7.25 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#' Perform Habitat Sampling and Probability Mapping
#'
#' This function finds the best model (mmax) for a set of sampled points.
#'
#' @param k Iteration value for the models.
#' @param raster satellite time series stack (rasterBrickObject) or just any type of image (*rasterObject)
#' @param sample_type distribution of spatial locations c("random","regular")
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or support vector machine implementation
#' @param seed set seed for reproducible results
#' @param n number of iterations for model accuracy
#' @param sample_size number of spatial locations
#' @param n_channel number of channels
#' @param seed2 spatial points sample
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
16
#' @param mod.error threshold for model error until which iteration is being executed
17
#' @param pbtn1 matrix for points
18
#' @param rast raster
19
20
21
22
23
#' @param max_samples_per_class maximum number of samples per class
#' 
#' @return a list with 4 elements:
#' 1) k To identify the used model \cr
#' 2) model The model - mmax \cr
24
#' 3) points List of points used part of the sample. \cr
25
26
27
28
29
30
31
32
33
34
35
36
37
#' 4) oobe The accuracy achieved by the model \cr
#' @keywords internal
model_opt_r <- function(k,
                        raster,
                        sample_type,
                        buffer,
                        model,
                        seed,
                        n,
                        sample_size,
                        n_channel,
                        seed2,
                        mtry,
Carsten Neumann's avatar
Carsten Neumann committed
38
                        mod.error,
39
                        pbtn1,
Carsten Neumann's avatar
Carsten Neumann committed
40
                        rast,
41
                        max_samples_per_class) {
42
  points <- NULL
43
44
  models <- NULL
  oobe <- matrix(NA, nrow = n, ncol = 1)
45
  rast <- raster::mask(rast, raster::calc(rast, fun = sum))
46
47
48
49
50
51
52
  for (j in 1:n) {
    ###Vorbereitung Klassifizierung
    if (j == 1) {
      classes <- as.factor(c(1, 1))
      if (sample_type == "random") {
        set.seed(seed2[k])
        pbt <-
53
          raster::sampleRandom(rast, size = sample_size, sp = T)
54
55
      }
      if (sample_type == "regular") {
56
        pbt <- raster::sampleRegular(rast, size = sample_size, sp = T)
57
      }
58
59
60
61
     #f <- which(is.na(pbt@data[1]))
     #if (length(f) != 0) {
     #  pbt <- pbt[-f,]
     #}
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
      set.seed(seed2[k])
      classes <-
        as.factor(sample(c(1:2), size = nrow(pbt), replace = T))
      if (length(levels(classes)) < 2) {
        break
      }
      data <- as.data.frame(cbind(classes, pbt@data))
    }
    ########################################################################
    if (model == "rf") {
      model1 <-
        randomForest::randomForest(as.factor(classes) ~ .,
                                   data = data,
                                   mtry = mtry)
      if (is.na(mean(model1$err.rate[, 1])) == TRUE) {
        break
      }
      oobe[j, 1] <- mean(model1$err.rate[, 1])
    }
    ###
    if (model == "svm") {
      model1 <- e1071::svm(as.factor(classes) ~ ., data = data)
      co <-
        length(which(
          as.numeric(as.character(model1$fitted)) - as.numeric(as.character(classes)) == 0
        ))
      if (co == 0) {
        break
      }
      oobe[j, 1] <- 1 - (co / length(classes))
    }
    
    model_pre <- model1
    pbtn1_pre <- pbtn1
    #if ( j > 1) {if (oobe[j,k] < 0.02 || abs(oobe[(j-1),k]-oobe[j,k]) <= 0.011 )
    if (j > 1) {
Carsten Neumann's avatar
Carsten Neumann committed
98
      if (oobe[j, 1] < mod.error) {
99
        models <- model1
Carsten Neumann's avatar
Carsten Neumann committed
100
        points <- pbtn1
101
102
103
104
105
        break
      }
      
      if (oobe[(j - 1), 1] <= oobe[j, 1]) {
        models <- model_pre
Carsten Neumann's avatar
Carsten Neumann committed
106
        points <- pbtn1_pre
107
108
109
        break
      }
      
Carsten Neumann's avatar
Carsten Neumann committed
110
      if (j == n & oobe[j, 1] >= mod.error) {
111
        models <- NULL
112
        points <- NULL
113
114
115
        break
      }
    }
116

117
118
119
120
121
122
123
124
125
126
127
    ########################################################################
    if (model == "rf") {
      correct <-
        which(as.numeric(as.character(classes)) - as.numeric(as.character(model1$predicted)) == 0)
    }
    if (model == "svm") {
      correct <-
        which(as.numeric(as.character(model1$fitted)) - as.numeric(as.character(classes)) == 0)
    }
    ########################################################################
    which_classes_correct <- which(classes[correct] == 1)
Carsten Neumann's avatar
Carsten Neumann committed
128
129
    which_classes_correct_2 <- which(classes[correct] == 2)
    if (length(which_classes_correct) == 0 || length(which_classes_correct_2) == 0) {
130
131
132
133
134
      if (j == 1) {
        break
      }
    } else {
      d1 <- correct[which_classes_correct]
Carsten Neumann's avatar
Carsten Neumann committed
135
136
      d2 <- correct[which_classes_correct_2]
              
137
      ###generate new samples from only correctly classified samples [label 1]
Carsten Neumann's avatar
Carsten Neumann committed
138
      p1 <- pbt@coords[append(d1,d2), ]
139
      pbtn1 <-
Carsten Neumann's avatar
Carsten Neumann committed
140
        as.data.frame(cbind(c(classes[d1],classes[d2]), matrix(p1, ncol = 2)))
141
      sp::coordinates(pbtn1) <- c("V2", "V3")
142
      #sp::proj4string(pbtn1) <- sp::proj4string(pbt)
Carsten Neumann's avatar
Carsten Neumann committed
143
      raster::crs(pbtn1) <- raster::crs(pbt)
144
145
146
147
      
      poly <- rgeos::gBuffer(spgeom = pbtn1,
                             width = buffer,
                             byid = TRUE)
Carsten Neumann's avatar
Carsten Neumann committed
148
149
150
151
152
153
                             
     test1<-na.omit(raster::as.matrix(fasterize::fasterize(sf::st_as_sf(poly), rast[[1]])*rast))
     nam<-as.vector(fasterize::fasterize(sf::st_as_sf(poly), rast[[1]], field="V1"))[-attr(test1,"na.action")]
        
      co<-raster::xyFromCell(rast, c(1:raster::ncell(rast))[-attr(test1,"na.action")])
      pbtn1 <- as.data.frame(cbind(nam, co))
154
      sp::coordinates(pbtn1) <- c("x", "y")
Carsten Neumann's avatar
Carsten Neumann committed
155
156
                 
     if (ncol(test1) == 1) {
157
158
        test1 <- t(test1)
      }
159
      colnames(test1) <- names(rast)
Carsten Neumann's avatar
Carsten Neumann committed
160
          }
161
162
163
    if (class(test1)[1] == "numeric") {
      test1 <- t(matrix(test1))
    }
Carsten Neumann's avatar
Carsten Neumann committed
164
    if (length(levels(as.factor(nam))) < 2) {
165
166
167
      break
    }
    ######################################
Carsten Neumann's avatar
Carsten Neumann committed
168
169
170
    ###balancing sample size
    di <- c(sum(pbtn1@data$nam==1), sum(pbtn1@data$nam==2))
    if (abs(di[1] - di[2]) > min(di) * 0.3) {
171
172
      if (which.min(di) == 2) {
        set.seed(seed)
Carsten Neumann's avatar
Carsten Neumann committed
173
174
175
        d3 <- sample(which(pbtn1@data$nam==1), di[1]-di[2], replace = F)
        pbtn1 <- pbtn1[-d3, ]
        test1 <- test1[-d3, ]
176
177
      } else {
        set.seed(seed)
Carsten Neumann's avatar
Carsten Neumann committed
178
179
180
        d4 <- sample(which(pbtn1@data$nam==2), di[2]-di[1], replace = F)
        pbtn1 <- pbtn1[-d4, ]
        test1 <- test1[-d4, ]
181
182
183
      }
    }
    #####################################
Carsten Neumann's avatar
Carsten Neumann committed
184
185
    ###maximum sample size
    if (sum(pbtn1@data$nam==1) > max_samples_per_class) {
186
187
      set.seed(seed)
      dr <-
Carsten Neumann's avatar
Carsten Neumann committed
188
189
190
        sample(which(pbtn1@data$nam==1), sum(pbtn1@data$nam==1) - max_samples_per_class, replace = F)
      pbtn1 <- pbtn1[-dr, ]
      test1 <- test1[-dr, ]
191
    }
Carsten Neumann's avatar
Carsten Neumann committed
192
    if (sum(pbtn1@data$nam==2) > max_samples_per_class) {
193
194
      set.seed(seed)
      dr <-
Carsten Neumann's avatar
Carsten Neumann committed
195
196
197
        sample(which(pbtn1@data$nam==2), sum(pbtn1@data$nam==2) - max_samples_per_class, replace = F)
      pbtn1 <- pbtn1[-dr, ]
      test1 <- test1[-dr, ]
198
199
200
    }
    ########################################################################
    data <-
Carsten Neumann's avatar
Carsten Neumann committed
201
      as.data.frame(cbind(pbtn1@data$nam, test1)) ##data
202
203
    names(data)[1] <- "classes"
    classes <- data$classes
Carsten Neumann's avatar
Carsten Neumann committed
204
    pbt <- pbtn1
205
  }
206
207
  remove(pbt)

208
209
210
  return(list(
    "k" = k,
    "models" = models,
211
    "points" = points,
Romulo Pereira Goncalves's avatar
Romulo Pereira Goncalves committed
212
    "oobe" = oobe
213
214
  ))
}