Commit 4ea21e7c authored by Romulo Pereira Goncalves's avatar Romulo Pereira Goncalves
Browse files

Parallelize the loop that fine tunes a model for a set of sampled points.

parent 8d93fd78
#' Perform Habitat Sampling and Probability Mapping
#'
#' This function finds the best model (mmax) for a set of sampled points.
#'
#' @param k Iteration value for the models.
#' @param raster satellite time series stack (rasterBrickObject) or just any type of image (*rasterObject)
#' @param sample_type distribution of spatial locations c("random","regular")
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or support vector machine implementation
#' @param area extent where the the classification is happening
#' @param seed set seed for reproducible results
#' @param n number of iterations for model accuracy
#' @param sample_size number of spatial locations
#' @param n_channel number of channels
#' @param seed2 spatial points sample
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
#' @param pbtn1 matrix for points
#' @param pbtn2 matrix for points
#' @param ras_vx velox raster
#' @param max_samples_per_class maximum number of samples per class
#'
#' @return a list with 4 elements:
#' 1) k To identify the used model \cr
#' 2) model The model - mmax \cr
#' 3) points_list List of points used part of the sample. \cr
#' 4) oobe The accuracy achieved by the model \cr
#' @keywords internal
model_opt_r <- function(k,
raster,
sample_type,
buffer,
model,
area,
seed,
n,
sample_size,
n_channel,
seed2,
mtry,
pbtn1,
pbtn2,
ras_vx,
max_samples_per_class) {
points_list <- NULL
models <- NULL
oobe <- matrix(NA, nrow = n, ncol = 1)
for (j in 1:n) {
###Vorbereitung Klassifizierung
if (j == 1) {
classes <- as.factor(c(1, 1))
if (sample_type == "random") {
set.seed(seed2[k])
pbt <-
raster::sampleRandom(raster, size = sample_size, sp = T)
}
if (sample_type == "regular") {
pbt <- raster::sampleRegular(raster, size = sample_size, sp = T)
}
pbt <- spatialEco::point.in.poly(pbt, area)[, 1:n_channel]
#f <- which(is.na(pbt@data[1]))
#if (length(f) != 0) {
# pbt <- pbt[-f,]
#}
pbt@data <- pbt@data[complete.cases(pbt@data[1]), ]
set.seed(seed2[k])
classes <-
as.factor(sample(c(1:2), size = nrow(pbt), replace = T))
if (length(levels(classes)) < 2) {
break
}
data <- as.data.frame(cbind(classes, pbt@data))
}
########################################################################
if (model == "rf") {
model1 <-
randomForest::randomForest(as.factor(classes) ~ .,
data = data,
mtry = mtry)
if (is.na(mean(model1$err.rate[, 1])) == TRUE) {
break
}
oobe[j, 1] <- mean(model1$err.rate[, 1])
}
###
if (model == "svm") {
model1 <- e1071::svm(as.factor(classes) ~ ., data = data)
co <-
length(which(
as.numeric(as.character(model1$fitted)) - as.numeric(as.character(classes)) == 0
))
if (co == 0) {
break
}
oobe[j, 1] <- 1 - (co / length(classes))
}
model_pre <- model1
pbtn1_pre <- pbtn1
pbtn2_pre <- pbtn2
#if ( j > 1) {if (oobe[j,k] < 0.02 || abs(oobe[(j-1),k]-oobe[j,k]) <= 0.011 )
if (j > 1) {
if (oobe[j, 1] < 0.02) {
models <- model1
points_list <- rbind(pbtn1, pbtn2)
break
}
if (oobe[(j - 1), 1] <= oobe[j, 1]) {
models <- model_pre
points_list <- rbind(pbtn1_pre, pbtn2_pre)
break
}
if (j == n & oobe[j, 1] >= 0.02) {
models <- NULL
points_list <- NULL
break
}
}
model_pre <- model1
pbtn1_pre <- pbtn1
pbtn2_pre <- pbtn2
oobe <- oobe
########################################################################
if (model == "rf") {
correct <-
which(as.numeric(as.character(classes)) - as.numeric(as.character(model1$predicted)) == 0)
}
if (model == "svm") {
correct <-
which(as.numeric(as.character(model1$fitted)) - as.numeric(as.character(classes)) == 0)
}
########################################################################
which_classes_correct <- which(classes[correct] == 1)
if (length(which_classes_correct) == 0) {
if (j == 1) {
break
} else{
pbtn1 <- pbtn1
}
} else {
d1 <- correct[which_classes_correct]
###generate new samples from only correctly classified samples [label 1]
p1 <- pbt@coords[d1, ]
pbtn1 <-
as.data.frame(cbind(classes[d1], matrix(p1, ncol = 2)))
sp::coordinates(pbtn1) <- c("V2", "V3")
sp::proj4string(pbtn1) <- sp::proj4string(pbt)
poly <- rgeos::gBuffer(spgeom = pbtn1,
width = buffer,
byid = TRUE)
test <- ras_vx$extract(sp = poly)
for (i in 1:length(test)) {
s1 <- dim(test[[i]])[1]
#if (s1 <= 5) {
# test[[i]] <-
# test[[i]]
#} else {
if (s1 > 5) {
set.seed(seed)
test[[i]] <-
test[[i]][sample(c(1:s1), 5, replace = F), ]
}
}
for (i in 1:length(test)) {
if (i == 1) {
co <- raster::xyFromCell(raster, test[[i]][, 1])
} else {
co <- rbind(co, raster::xyFromCell(raster, test[[i]][, 1]))
}
}
pbtn1 <- as.data.frame(cbind(rep(1, nrow(co)), co))
sp::coordinates(pbtn1) <- c("x", "y")
test1 <- as.matrix(do.call(rbind, test)[, -1])
if (ncol(test1) == 1) {
test1 <- t(test1)
}
colnames(test1) <- names(raster)
if (length(which(is.na(test1))) > 0) {
pbtn1 <- pbtn1[complete.cases(test1), ]
test1 <- test1[complete.cases(test1), ]
}
}
if (class(test1)[1] == "numeric") {
test1 <- t(matrix(test1))
}
if (nrow(test1) == 0) {
break
}
##############################
##############################
which_classes_correct_2 <- which(classes[correct] == 2)
if (length(which_classes_correct_2) == 0) {
if (j == 1) {
break
} else{
pbtn2 <- pbtn2
}
} else {
d2 <- correct[which_classes_correct_2]
###generate new samples from only correctly classified samples [label 2]
p2 <- pbt@coords[d2, ]
pbtn2 <-
as.data.frame(cbind(classes[d2], matrix(p2, ncol = 2)))
sp::coordinates(pbtn2) <- c("V2", "V3")
sp::proj4string(pbtn2) <- sp::proj4string(pbt)
poly <- rgeos::gBuffer(spgeom = pbtn2,
width = buffer,
byid = TRUE)
test <- ras_vx$extract(sp = poly)
for (i in 1:length(test)) {
s1 <- dim(test[[i]])[1]
#if (s1 <= 5) {
# test[[i]] <-
# test[[i]]
#} else {
if (s1 > 5) {
set.seed(seed)
test[[i]] <-
test[[i]][sample(c(1:s1), 5, replace = F), ]
}
}
for (i in 1:length(test)) {
if (i == 1) {
co <- raster::xyFromCell(raster, test[[i]][, 1])
} else {
co <- rbind(co, raster::xyFromCell(raster, test[[i]][, 1]))
}
}
pbtn2 <- as.data.frame(cbind(rep(2, nrow(co)), co))
sp::coordinates(pbtn2) <- c("x", "y")
test2 <- as.matrix(do.call(rbind, test)[, -1])
if (ncol(test2) == 1) {
test2 <- t(test2)
}
colnames(test2) <- names(raster)
if (length(which(is.na(test2))) > 0) {
pbtn2 <- pbtn2[complete.cases(test2), ]
test2 <- test2[complete.cases(test2), ]
}
}
if (class(test2)[1] == "numeric") {
test2 <- t(matrix(test2))
}
if (nrow(test2) == 0) {
break
}
######################################
###Gleichverteilung samples in Klassen
di <- c(nrow(pbtn1), nrow(pbtn2))
if (abs(nrow(pbtn1) - nrow(pbtn2)) > min(di) * 0.3) {
if (which.min(di) == 2) {
set.seed(seed)
d3 <- sample(1:nrow(pbtn1), nrow(pbtn2), replace = F)
pbtn1 <- pbtn1[d3, ]
test1 <- test1[d3, ]
} else {
set.seed(seed)
d4 <- sample(1:nrow(pbtn2), nrow(pbtn1), replace = F)
pbtn2 <- pbtn2[d4, ]
test2 <- test2[d4, ]
}
}
#####################################
###max Klassenbelegungswert
if (nrow(pbtn1) > max_samples_per_class) {
set.seed(seed)
dr <-
sample(1:nrow(pbtn1), max_samples_per_class, replace = F)
pbtn1 <- pbtn1[dr, ]
test1 <- test1[dr, ]
}
if (nrow(pbtn2) > max_samples_per_class) {
set.seed(seed)
dr <-
sample(1:nrow(pbtn2), max_samples_per_class, replace = F)
pbtn2 <- pbtn2[dr, ]
test2 <- test2[dr, ]
}
########################################################################
data <-
as.data.frame(cbind(append(pbtn1@data$V1, pbtn2@data$V1),
rbind(test1, test2))) ##data
names(data)[1] <- "classes"
classes <- data$classes
pbt <- rbind(pbtn1, pbtn2)
}
return(list(
"k" = k,
"models" = models,
"points_list" = points_list,
"oobe" = oobe,
))
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment