Commit d7702099 authored by Daniela Rabe's avatar Daniela Rabe
Browse files

Merge branch 'carsten_optimization' into 'master'

Carsten optimization

See merge request !26
parents 8edae516 ba94edfd
Pipeline #26154 passed with stages
in 8 minutes and 22 seconds
...@@ -22,12 +22,11 @@ Imports: ...@@ -22,12 +22,11 @@ Imports:
randomForest, randomForest,
e1071, e1071,
devtools, devtools,
velox, fasterize,
leaflet, leaflet,
leafem, leafem,
IRdisplay, IRdisplay,
htmlwidgets htmlwidgets
Remotes:url::https://cran.r-project.org/src/contrib/Archive/velox/velox_0.2.0.tar.gz
Encoding: UTF-8 Encoding: UTF-8
LazyData: true LazyData: true
Roxygen: list(markdown = TRUE) Roxygen: list(markdown = TRUE)
......
...@@ -10,8 +10,9 @@ ...@@ -10,8 +10,9 @@
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size) #' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param reference reference spectra either SpatialPointsDataFrame (shape file) or data.frame with lines = classes, column = predictors] #' @param reference reference spectra either SpatialPointsDataFrame (shape file) or data.frame with lines = classes, column = predictors]
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or suppurt vector machine implementation #' @param model which machine learning classifier to use c("rf", "svm") for random forest or suppurt vector machine implementation
#' @param area extent where the the classification is happening
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors) #' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
#' @param mod.error threshold for model error until which iteration is being executed
#' @param in.memory boolean for raster processing (memory = "TRUE", from disk = "FALSE")
#' @param last only true for one class classifier c("FALSE", TRUE") #' @param last only true for one class classifier c("FALSE", TRUE")
#' @param seed set seed for reproducable results #' @param seed set seed for reproducable results
#' @param init.seed "sample" for new or use run1@seeds to reproduce previous steps #' @param init.seed "sample" for new or use run1@seeds to reproduce previous steps
...@@ -47,11 +48,12 @@ sample_nb <- function(raster, ...@@ -47,11 +48,12 @@ sample_nb <- function(raster,
buffer, buffer,
reference, reference,
model, model,
area,
mtry, mtry,
mod.error,
last, last,
seed, seed,
init.seed, init.seed,
in.memory,
save_runs, save_runs,
parallel_mode, parallel_mode,
max_num_cores, max_num_cores,
...@@ -60,15 +62,11 @@ sample_nb <- function(raster, ...@@ -60,15 +62,11 @@ sample_nb <- function(raster,
paste("models = ", nb_mean))) paste("models = ", nb_mean)))
### ###
n_channel <- length(names(raster)) n_channel <- length(names(raster))
###velox ###raster in MEMORY or NOT
rID = raster[[1]] if (in.memory == TRUE && raster::fromDisk(raster) == TRUE) { rast <- raster::readAll(raster) }else { rast <-raster }
rID[] = 1:(nrow(rID) * ncol(rID))
r = raster::stack(rID, raster)
ras.vx <- velox::velox(r)
### ###
l <- 1 ###6. opt=260 l <- 1 ###6. opt=260
pbtn1 <- matrix(1, nrow = 1, ncol = 1) pbtn1 <- matrix(1, nrow = 1, ncol = 1)
pbtn2 <- matrix(2, nrow = 1, ncol = 1)
m <- vector("numeric", length = length(nb_samples)) m <- vector("numeric", length = length(nb_samples))
layer <- list() layer <- list()
for (r in nb_samples) { for (r in nb_samples) {
...@@ -111,16 +109,15 @@ sample_nb <- function(raster, ...@@ -111,16 +109,15 @@ sample_nb <- function(raster,
sample_type = sample_type, sample_type = sample_type,
buffer = buffer, buffer = buffer,
model = model, model = model,
area = area,
seed = seed, seed = seed,
n = n, n = n,
sample_size = sample_size, sample_size = sample_size,
n_channel = n_channel, n_channel = n_channel,
seed2 = seed2, seed2 = seed2,
mtry = mtry, mtry = mtry,
mod.error = mod.error,
pbtn1 = pbtn1, pbtn1 = pbtn1,
pbtn2 = pbtn2, rast = rast,
ras_vx = ras.vx,
max_samples_per_class = max_samples_per_class, max_samples_per_class = max_samples_per_class,
mc.cores = cores, mc.cores = cores,
mc.preschedule = TRUE, mc.preschedule = TRUE,
...@@ -138,7 +135,6 @@ sample_nb <- function(raster, ...@@ -138,7 +135,6 @@ sample_nb <- function(raster,
sample_type = sample_type, sample_type = sample_type,
buffer = buffer, buffer = buffer,
model = model, model = model,
area = area,
seed = seed, seed = seed,
k = k, k = k,
n = n, n = n,
...@@ -146,9 +142,9 @@ sample_nb <- function(raster, ...@@ -146,9 +142,9 @@ sample_nb <- function(raster,
n_channel = n_channel, n_channel = n_channel,
seed2 = seed2, seed2 = seed2,
mtry = mtry, mtry = mtry,
mod.error = mod.error,
pbtn1 = pbtn1, pbtn1 = pbtn1,
pbtn2 = pbtn2, rast = rast,
ras_vx = ras.vx,
max_samples_per_class = max_samples_per_class max_samples_per_class = max_samples_per_class
) )
points_list[[k]] <- res$points points_list[[k]] <- res$points
......
...@@ -7,16 +7,15 @@ ...@@ -7,16 +7,15 @@
#' @param sample_type distribution of spatial locations c("random","regular") #' @param sample_type distribution of spatial locations c("random","regular")
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size) #' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or support vector machine implementation #' @param model which machine learning classifier to use c("rf", "svm") for random forest or support vector machine implementation
#' @param area extent where the the classification is happening
#' @param seed set seed for reproducible results #' @param seed set seed for reproducible results
#' @param n number of iterations for model accuracy #' @param n number of iterations for model accuracy
#' @param sample_size number of spatial locations #' @param sample_size number of spatial locations
#' @param n_channel number of channels #' @param n_channel number of channels
#' @param seed2 spatial points sample #' @param seed2 spatial points sample
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors) #' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
#' @param mod.error threshold for model error until which iteration is being executed
#' @param pbtn1 matrix for points #' @param pbtn1 matrix for points
#' @param pbtn2 matrix for points #' @param rast raster
#' @param ras_vx velox raster
#' @param max_samples_per_class maximum number of samples per class #' @param max_samples_per_class maximum number of samples per class
#' #'
#' @return a list with 4 elements: #' @return a list with 4 elements:
...@@ -30,16 +29,15 @@ model_opt_r <- function(k, ...@@ -30,16 +29,15 @@ model_opt_r <- function(k,
sample_type, sample_type,
buffer, buffer,
model, model,
area,
seed, seed,
n, n,
sample_size, sample_size,
n_channel, n_channel,
seed2, seed2,
mtry, mtry,
mod.error,
pbtn1, pbtn1,
pbtn2, rast,
ras_vx,
max_samples_per_class) { max_samples_per_class) {
points <- NULL points <- NULL
models <- NULL models <- NULL
...@@ -56,14 +54,12 @@ model_opt_r <- function(k, ...@@ -56,14 +54,12 @@ model_opt_r <- function(k,
if (sample_type == "regular") { if (sample_type == "regular") {
pbt <- raster::sampleRegular(raster, size = sample_size, sp = T) pbt <- raster::sampleRegular(raster, size = sample_size, sp = T)
} }
pbt <- spatialEco::point.in.poly(pbt, area)[, 1:n_channel]
f <- which(is.na(pbt@data[1]))
f <- which(is.na(pbt@data[1])) if (length(f) != 0) {
if (length(f) != 0) {
pbt <- pbt[-f,] pbt <- pbt[-f,]
} }
set.seed(seed2[k]) set.seed(seed2[k])
classes <- classes <-
as.factor(sample(c(1:2), size = nrow(pbt), replace = T)) as.factor(sample(c(1:2), size = nrow(pbt), replace = T))
...@@ -99,22 +95,21 @@ model_opt_r <- function(k, ...@@ -99,22 +95,21 @@ model_opt_r <- function(k,
model_pre <- model1 model_pre <- model1
pbtn1_pre <- pbtn1 pbtn1_pre <- pbtn1
pbtn2_pre <- pbtn2
#if ( j > 1) {if (oobe[j,k] < 0.02 || abs(oobe[(j-1),k]-oobe[j,k]) <= 0.011 ) #if ( j > 1) {if (oobe[j,k] < 0.02 || abs(oobe[(j-1),k]-oobe[j,k]) <= 0.011 )
if (j > 1) { if (j > 1) {
if (oobe[j, 1] < 0.02) { if (oobe[j, 1] < mod.error) {
models <- model1 models <- model1
points <- rbind(pbtn1, pbtn2) points <- pbtn1
break break
} }
if (oobe[(j - 1), 1] <= oobe[j, 1]) { if (oobe[(j - 1), 1] <= oobe[j, 1]) {
models <- model_pre models <- model_pre
points <- rbind(pbtn1_pre, pbtn2_pre) points <- pbtn1_pre
break break
} }
if (j == n & oobe[j, 1] >= 0.02) { if (j == n & oobe[j, 1] >= mod.error) {
models <- NULL models <- NULL
points <- NULL points <- NULL
break break
...@@ -132,17 +127,19 @@ model_opt_r <- function(k, ...@@ -132,17 +127,19 @@ model_opt_r <- function(k,
} }
######################################################################## ########################################################################
which_classes_correct <- which(classes[correct] == 1) which_classes_correct <- which(classes[correct] == 1)
if (length(which_classes_correct) == 0) { which_classes_correct_2 <- which(classes[correct] == 2)
if (length(which_classes_correct) == 0 || length(which_classes_correct_2) == 0) {
if (j == 1) { if (j == 1) {
break break
} }
} else { } else {
d1 <- correct[which_classes_correct] d1 <- correct[which_classes_correct]
d2 <- correct[which_classes_correct_2]
###generate new samples from only correctly classified samples [label 1] ###generate new samples from only correctly classified samples [label 1]
p1 <- pbt@coords[d1, ] p1 <- pbt@coords[append(d1,d2), ]
pbtn1 <- pbtn1 <-
as.data.frame(cbind(classes[d1], matrix(p1, ncol = 2))) as.data.frame(cbind(c(classes[d1],classes[d2]), matrix(p1, ncol = 2)))
sp::coordinates(pbtn1) <- c("V2", "V3") sp::coordinates(pbtn1) <- c("V2", "V3")
#sp::proj4string(pbtn1) <- sp::proj4string(pbt) #sp::proj4string(pbtn1) <- sp::proj4string(pbt)
raster::crs(pbtn1) <- raster::crs(pbt) raster::crs(pbtn1) <- raster::crs(pbt)
...@@ -150,148 +147,63 @@ model_opt_r <- function(k, ...@@ -150,148 +147,63 @@ model_opt_r <- function(k,
poly <- rgeos::gBuffer(spgeom = pbtn1, poly <- rgeos::gBuffer(spgeom = pbtn1,
width = buffer, width = buffer,
byid = TRUE) byid = TRUE)
test <- ras_vx$extract(sp = poly)
test1<-na.omit(raster::as.matrix(fasterize::fasterize(sf::st_as_sf(poly), rast[[1]])*rast))
for (i in 1:length(test)) { nam<-as.vector(fasterize::fasterize(sf::st_as_sf(poly), rast[[1]], field="V1"))[-attr(test1,"na.action")]
s1 <- dim(test[[i]])[1]
#if (s1 <= 5) { co<-raster::xyFromCell(rast, c(1:raster::ncell(rast))[-attr(test1,"na.action")])
# test[[i]] <- pbtn1 <- as.data.frame(cbind(nam, co))
# test[[i]]
#} else {
if (s1 > 5) {
set.seed(seed)
test[[i]] <-
test[[i]][sample(c(1:s1), 5, replace = F), ]
}
}
for (i in 1:length(test)) {
if (i == 1) {
co <- raster::xyFromCell(raster, test[[i]][, 1])
} else {
co <- rbind(co, raster::xyFromCell(raster, test[[i]][, 1]))
}
}
pbtn1 <- as.data.frame(cbind(rep(1, nrow(co)), co))
sp::coordinates(pbtn1) <- c("x", "y") sp::coordinates(pbtn1) <- c("x", "y")
test1 <- as.matrix(do.call(rbind, test)[, -1]) if (ncol(test1) == 1) {
if (ncol(test1) == 1) {
test1 <- t(test1) test1 <- t(test1)
} }
colnames(test1) <- names(raster) colnames(test1) <- names(raster)
if (length(which(is.na(test1))) > 0) { }
pbtn1 <- pbtn1[complete.cases(test1), ]
test1 <- test1[complete.cases(test1), ]
}
}
if (class(test1)[1] == "numeric") { if (class(test1)[1] == "numeric") {
test1 <- t(matrix(test1)) test1 <- t(matrix(test1))
} }
if (nrow(test1) == 0) { if (length(levels(as.factor(nam))) < 2) {
break
}
##############################
##############################
which_classes_correct_2 <- which(classes[correct] == 2)
if (length(which_classes_correct_2) == 0) {
if (j == 1) {
break
}
} else {
d2 <- correct[which_classes_correct_2]
###generate new samples from only correctly classified samples [label 2]
p2 <- pbt@coords[d2, ]
pbtn2 <-
as.data.frame(cbind(classes[d2], matrix(p2, ncol = 2)))
sp::coordinates(pbtn2) <- c("V2", "V3")
#sp::proj4string(pbtn2) <- sp::proj4string(pbt)
raster::crs(pbtn2) <- raster::crs(pbt)
poly <- rgeos::gBuffer(spgeom = pbtn2,
width = buffer,
byid = TRUE)
test <- ras_vx$extract(sp = poly)
for (i in 1:length(test)) {
s1 <- dim(test[[i]])[1]
#if (s1 <= 5) {
# test[[i]] <-
# test[[i]]
#} else {
if (s1 > 5) {
set.seed(seed)
test[[i]] <-
test[[i]][sample(c(1:s1), 5, replace = F), ]
}
}
for (i in 1:length(test)) {
if (i == 1) {
co <- raster::xyFromCell(raster, test[[i]][, 1])
} else {
co <- rbind(co, raster::xyFromCell(raster, test[[i]][, 1]))
}
}
pbtn2 <- as.data.frame(cbind(rep(2, nrow(co)), co))
sp::coordinates(pbtn2) <- c("x", "y")
test2 <- as.matrix(do.call(rbind, test)[, -1])
if (ncol(test2) == 1) {
test2 <- t(test2)
}
colnames(test2) <- names(raster)
if (length(which(is.na(test2))) > 0) {
pbtn2 <- pbtn2[complete.cases(test2), ]
test2 <- test2[complete.cases(test2), ]
}
}
if (class(test2)[1] == "numeric") {
test2 <- t(matrix(test2))
}
if (nrow(test2) == 0) {
break break
} }
###################################### ######################################
###Gleichverteilung samples in Klassen ###balancing sample size
di <- c(nrow(pbtn1), nrow(pbtn2)) di <- c(sum(pbtn1@data$nam==1), sum(pbtn1@data$nam==2))
if (abs(nrow(pbtn1) - nrow(pbtn2)) > min(di) * 0.3) { if (abs(di[1] - di[2]) > min(di) * 0.3) {
if (which.min(di) == 2) { if (which.min(di) == 2) {
set.seed(seed) set.seed(seed)
d3 <- sample(1:nrow(pbtn1), nrow(pbtn2), replace = F) d3 <- sample(which(pbtn1@data$nam==1), di[1]-di[2], replace = F)
pbtn1 <- pbtn1[d3, ] pbtn1 <- pbtn1[-d3, ]
test1 <- test1[d3, ] test1 <- test1[-d3, ]
} else { } else {
set.seed(seed) set.seed(seed)
d4 <- sample(1:nrow(pbtn2), nrow(pbtn1), replace = F) d4 <- sample(which(pbtn1@data$nam==2), di[2]-di[1], replace = F)
pbtn2 <- pbtn2[d4, ] pbtn1 <- pbtn1[-d4, ]
test2 <- test2[d4, ] test1 <- test1[-d4, ]
} }
} }
##################################### #####################################
###max Klassenbelegungswert ###maximum sample size
if (nrow(pbtn1) > max_samples_per_class) { if (sum(pbtn1@data$nam==1) > max_samples_per_class) {
set.seed(seed) set.seed(seed)
dr <- dr <-
sample(1:nrow(pbtn1), max_samples_per_class, replace = F) sample(which(pbtn1@data$nam==1), sum(pbtn1@data$nam==1) - max_samples_per_class, replace = F)
pbtn1 <- pbtn1[dr, ] pbtn1 <- pbtn1[-dr, ]
test1 <- test1[dr, ] test1 <- test1[-dr, ]
} }
if (nrow(pbtn2) > max_samples_per_class) { if (sum(pbtn1@data$nam==2) > max_samples_per_class) {
set.seed(seed) set.seed(seed)
dr <- dr <-
sample(1:nrow(pbtn2), max_samples_per_class, replace = F) sample(which(pbtn1@data$nam==2), sum(pbtn1@data$nam==2) - max_samples_per_class, replace = F)
pbtn2 <- pbtn2[dr, ] pbtn1 <- pbtn1[-dr, ]
test2 <- test2[dr, ] test1 <- test1[-dr, ]
} }
######################################################################## ########################################################################
data <- data <-
as.data.frame(cbind(append(pbtn1@data$V1, pbtn2@data$V1), as.data.frame(cbind(pbtn1@data$nam, test1)) ##data
rbind(test1, test2))) ##data
names(data)[1] <- "classes" names(data)[1] <- "classes"
classes <- data$classes classes <- data$classes
pbt <- rbind(pbtn1, pbtn2) pbt <- pbtn1
} }
remove(pbt) remove(pbt)
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#' @param reference reference spectra as a data.frame with (lines = classes, column = predictors) #' @param reference reference spectra as a data.frame with (lines = classes, column = predictors)
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or support vector machine implementation #' @param model which machine learning classifier to use c("rf", "svm") for random forest or support vector machine implementation
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors) #' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
#' @param mod.error threshold for model error until which iteration is being executed
#' @param last only true for one class classifier c("FALSE", TRUE") #' @param last only true for one class classifier c("FALSE", TRUE")
#' @param seed set seed for reproducible results #' @param seed set seed for reproducible results
#' @param init.seed "sample" for new or use run1@seeds to reproduce previous steps #' @param init.seed "sample" for new or use run1@seeds to reproduce previous steps
...@@ -20,7 +21,8 @@ ...@@ -20,7 +21,8 @@
#' @param n_classes total number of classes (habitat types) to be separated #' @param n_classes total number of classes (habitat types) to be separated
#' @param multiTest number of test runs to compare different probability outputs #' @param multiTest number of test runs to compare different probability outputs
#' @param RGB rgb channel numbers for image plot #' @param RGB rgb channel numbers for image plot
#' @param color color pallet #' @param in.memory boolean for raster processing (memory = "TRUE", from disk = "FALSE")
#' @param color single colors for continuous color palette interpolation
#' @param overwrite overwrite the KML and raster files from previous runs (default TRUE) #' @param overwrite overwrite the KML and raster files from previous runs (default TRUE)
#' @param save_runs an Habitat object is saved into disk for each run (default TRUE) #' @param save_runs an Habitat object is saved into disk for each run (default TRUE)
#' @param parallel_mode run loops using all available cores (default FALSE) #' @param parallel_mode run loops using all available cores (default FALSE)
...@@ -30,7 +32,7 @@ ...@@ -30,7 +32,7 @@
#' @return 4 files per step: #' @return 4 files per step:
#' 1) Habitat type probability map as geocoded *.kmz file (with a *.kml layer and *.png image output), and *.tif raster file #' 1) Habitat type probability map as geocoded *.kmz file (with a *.kml layer and *.png image output), and *.tif raster file
#' 2) A Habitat object (only if save_runs is set to TRUE) consisting of 7 slots: \cr #' 2) A Habitat object (only if save_runs is set to TRUE) consisting of 7 slots: \cr
#' run1@models - list of selcted classifiers \cr #' run1@models - list of selected classifiers \cr
#' run1@ref_samples - list of SpatialPointsDataFrames with same length as run1@models holding reference labels [1,2] for each selected model \cr #' run1@ref_samples - list of SpatialPointsDataFrames with same length as run1@models holding reference labels [1,2] for each selected model \cr
#' run1@switch - vector of lenght run1@models indicating if target class equals 2, if not NA the labels need to be switched \cr #' run1@switch - vector of lenght run1@models indicating if target class equals 2, if not NA the labels need to be switched \cr
#' run1@layer - raster map of habitat type probability \cr #' run1@layer - raster map of habitat type probability \cr
...@@ -76,6 +78,7 @@ multi_Class_Sampling <- function(in.raster, ...@@ -76,6 +78,7 @@ multi_Class_Sampling <- function(in.raster,
reference, reference,
model = "rf", model = "rf",
mtry = 10, mtry = 10,
mod.error=0.02,
last = F, last = F,
seed = 3, seed = 3,
init.seed = "sample", init.seed = "sample",
...@@ -85,6 +88,7 @@ multi_Class_Sampling <- function(in.raster, ...@@ -85,6 +88,7 @@ multi_Class_Sampling <- function(in.raster,
n_classes, n_classes,
multiTest = 1, multiTest = 1,
RGB = c(19, 20, 21), RGB = c(19, 20, 21),
in.memory = TRUE,
color = c("lightgrey", "orange", "yellow", "limegreen", "forestgreen"), color = c("lightgrey", "orange", "yellow", "limegreen", "forestgreen"),
overwrite = TRUE, overwrite = TRUE,
save_runs = TRUE, save_runs = TRUE,
...@@ -94,7 +98,7 @@ multi_Class_Sampling <- function(in.raster, ...@@ -94,7 +98,7 @@ multi_Class_Sampling <- function(in.raster,
# Checks if its a new or a resumed run and asks the user to remove all step_*.tif # Checks if its a new or a resumed run and asks the user to remove all step_*.tif
# files from the results folder in case of a new run. # files from the results folder in case of a new run.
if(step == 1){ if (step == 1) {
if (length(list.files( if (length(list.files(
outPath, outPath,
pattern = "step_(.*).tif", pattern = "step_(.*).tif",
...@@ -113,11 +117,6 @@ multi_Class_Sampling <- function(in.raster, ...@@ -113,11 +117,6 @@ multi_Class_Sampling <- function(in.raster,
} }
input_raster <- in.raster