Commit e50cf8c9 authored by Carsten Neumann's avatar Carsten Neumann
Browse files

faster, shorter, better

parent 903261da
......@@ -11,9 +11,9 @@ Description: Calculates samples and related classifiers for mapping gradual prob
License: GPL-3
Imports:
BH (<= 1.69.0-1),
sf (<= 0.8-1),
sp (<= 1.4-1),
rgdal (<= 1.4-8),
sf,
sp,
rgdal,
raster,
geojsonio,
maptools,
......@@ -22,12 +22,11 @@ Imports:
randomForest,
e1071,
devtools,
velox,
fasterize,
leaflet,
leafem,
IRdisplay,
htmlwidgets
Remotes:url::https://cran.r-project.org/src/contrib/Archive/velox/velox_0.2.0.tar.gz
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
......
......@@ -10,7 +10,6 @@
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param reference reference spectra either SpatialPointsDataFrame (shape file) or data.frame with lines = classes, column = predictors]
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or suppurt vector machine implementation
#' @param area extent where the the classification is happening
#' @param mtry number of predictor used at random forest splitting nodes (mtry << n predictors)
#' @param last only true for one class classifier c("FALSE", TRUE")
#' @param seed set seed for reproducable results
......@@ -43,21 +42,19 @@ sample_nb <- function(raster,
buffer,
reference,
model,
area,
mtry,
mod.error,
last,
seed,
init.seed,
in.memory,
save_runs,
parallel_mode,
max_num_cores) {
###
n_channel <- length(names(raster))
###velox
rID = raster[[1]]
rID[] = 1:(nrow(rID) * ncol(rID))
r = raster::stack(rID, raster)
ras.vx <- velox::velox(r)
###raster in MEMORY or NOT
if (in.memory == TRUE) { rast <- raster::readAll(raster) }else { rast <-raster }
###
l <- 1 ###6. opt=260
pbtn1 <- matrix(1, nrow = 1, ncol = 1)
......@@ -102,16 +99,16 @@ sample_nb <- function(raster,
sample_type = sample_type,
buffer = buffer,
model = model,
area = area,
seed = seed,
n = n,
sample_size = sample_size,
n_channel = n_channel,
seed2 = seed2,
mtry = mtry,
mod.error = mod.error,
pbtn1 = pbtn1,
pbtn2 = pbtn2,
ras_vx = ras.vx,
rast = rast,
max_samples_per_class = max_samples_per_class,
mc.cores = cores,
mc.preschedule = TRUE,
......@@ -129,7 +126,6 @@ sample_nb <- function(raster,
sample_type = sample_type,
buffer = buffer,
model = model,
area = area,
seed = seed,
k = k,
n = n,
......@@ -137,9 +133,10 @@ sample_nb <- function(raster,
n_channel = n_channel,
seed2 = seed2,
mtry = mtry,
mod.error = mod.error,
pbtn1 = pbtn1,
pbtn2 = pbtn2,
ras_vx = ras.vx,
rast = rast,
max_samples_per_class = max_samples_per_class
)
points_list[[k]] <- res$points
......
......@@ -7,7 +7,6 @@
#' @param sample_type distribution of spatial locations c("random","regular")
#' @param buffer distance (in m) for new sample collection around initial samples (depends on pixel size)
#' @param model which machine learning classifier to use c("rf", "svm") for random forest or support vector machine implementation
#' @param area extent where the the classification is happening
#' @param seed set seed for reproducible results
#' @param n number of iterations for model accuracy
#' @param sample_size number of spatial locations
......@@ -30,16 +29,16 @@ model_opt_r <- function(k,
sample_type,
buffer,
model,
area,
seed,
n,
sample_size,
n_channel,
seed2,
mtry,
mod.error,
pbtn1,
pbtn2,
ras_vx,
rast,
max_samples_per_class) {
points <- NULL
models <- NULL
......@@ -56,14 +55,12 @@ model_opt_r <- function(k,
if (sample_type == "regular") {
pbt <- raster::sampleRegular(raster, size = sample_size, sp = T)
}
pbt <- spatialEco::point.in.poly(pbt, area)[, 1:n_channel]
f <- which(is.na(pbt@data[1]))
if (length(f) != 0) {
f <- which(is.na(pbt@data[1]))
if (length(f) != 0) {
pbt <- pbt[-f,]
}
}
set.seed(seed2[k])
classes <-
as.factor(sample(c(1:2), size = nrow(pbt), replace = T))
......@@ -99,22 +96,21 @@ model_opt_r <- function(k,
model_pre <- model1
pbtn1_pre <- pbtn1
pbtn2_pre <- pbtn2
#if ( j > 1) {if (oobe[j,k] < 0.02 || abs(oobe[(j-1),k]-oobe[j,k]) <= 0.011 )
if (j > 1) {
if (oobe[j, 1] < 0.02) {
if (oobe[j, 1] < mod.error) {
models <- model1
points <- rbind(pbtn1, pbtn2)
points <- pbtn1
break
}
if (oobe[(j - 1), 1] <= oobe[j, 1]) {
models <- model_pre
points <- rbind(pbtn1_pre, pbtn2_pre)
points <- pbtn1_pre
break
}
if (j == n & oobe[j, 1] >= 0.02) {
if (j == n & oobe[j, 1] >= mod.error) {
models <- NULL
points <- NULL
break
......@@ -132,166 +128,83 @@ model_opt_r <- function(k,
}
########################################################################
which_classes_correct <- which(classes[correct] == 1)
if (length(which_classes_correct) == 0) {
which_classes_correct_2 <- which(classes[correct] == 2)
if (length(which_classes_correct) == 0 || length(which_classes_correct_2) == 0) {
if (j == 1) {
break
}
} else {
d1 <- correct[which_classes_correct]
d2 <- correct[which_classes_correct_2]
###generate new samples from only correctly classified samples [label 1]
p1 <- pbt@coords[d1, ]
p1 <- pbt@coords[append(d1,d2), ]
pbtn1 <-
as.data.frame(cbind(classes[d1], matrix(p1, ncol = 2)))
as.data.frame(cbind(c(classes[d1],classes[d2]), matrix(p1, ncol = 2)))
sp::coordinates(pbtn1) <- c("V2", "V3")
#sp::proj4string(pbtn1) <- sp::proj4string(pbt)
crs(pbtn1) <- crs(pbt)
raster::crs(pbtn1) <- raster::crs(pbt)
poly <- rgeos::gBuffer(spgeom = pbtn1,
width = buffer,
byid = TRUE)
test <- ras_vx$extract(sp = poly)
for (i in 1:length(test)) {
s1 <- dim(test[[i]])[1]
#if (s1 <= 5) {
# test[[i]] <-
# test[[i]]
#} else {
if (s1 > 5) {
set.seed(seed)
test[[i]] <-
test[[i]][sample(c(1:s1), 5, replace = F), ]
}
}
for (i in 1:length(test)) {
if (i == 1) {
co <- raster::xyFromCell(raster, test[[i]][, 1])
} else {
co <- rbind(co, raster::xyFromCell(raster, test[[i]][, 1]))
}
}
pbtn1 <- as.data.frame(cbind(rep(1, nrow(co)), co))
test1<-na.omit(raster::as.matrix(fasterize::fasterize(sf::st_as_sf(poly), rast[[1]])*rast))
nam<-as.vector(fasterize::fasterize(sf::st_as_sf(poly), rast[[1]], field="V1"))[-attr(test1,"na.action")]
co<-raster::xyFromCell(rast, c(1:raster::ncell(rast))[-attr(test1,"na.action")])
pbtn1 <- as.data.frame(cbind(nam, co))
sp::coordinates(pbtn1) <- c("x", "y")
test1 <- as.matrix(do.call(rbind, test)[, -1])
if (ncol(test1) == 1) {
if (ncol(test1) == 1) {
test1 <- t(test1)
}
colnames(test1) <- names(raster)
if (length(which(is.na(test1))) > 0) {
pbtn1 <- pbtn1[complete.cases(test1), ]
test1 <- test1[complete.cases(test1), ]
}
}
}
if (class(test1)[1] == "numeric") {
test1 <- t(matrix(test1))
}
if (nrow(test1) == 0) {
break
}
##############################
##############################
which_classes_correct_2 <- which(classes[correct] == 2)
if (length(which_classes_correct_2) == 0) {
if (j == 1) {
break
}
} else {
d2 <- correct[which_classes_correct_2]
###generate new samples from only correctly classified samples [label 2]
p2 <- pbt@coords[d2, ]
pbtn2 <-
as.data.frame(cbind(classes[d2], matrix(p2, ncol = 2)))
sp::coordinates(pbtn2) <- c("V2", "V3")
#sp::proj4string(pbtn2) <- sp::proj4string(pbt)
crs(pbtn2) <- crs(pbt)
poly <- rgeos::gBuffer(spgeom = pbtn2,
width = buffer,
byid = TRUE)
test <- ras_vx$extract(sp = poly)
for (i in 1:length(test)) {
s1 <- dim(test[[i]])[1]
#if (s1 <= 5) {
# test[[i]] <-
# test[[i]]
#} else {
if (s1 > 5) {
set.seed(seed)
test[[i]] <-
test[[i]][sample(c(1:s1), 5, replace = F), ]
}
}
for (i in 1:length(test)) {
if (i == 1) {
co <- raster::xyFromCell(raster, test[[i]][, 1])
} else {
co <- rbind(co, raster::xyFromCell(raster, test[[i]][, 1]))
}
}
pbtn2 <- as.data.frame(cbind(rep(2, nrow(co)), co))
sp::coordinates(pbtn2) <- c("x", "y")
test2 <- as.matrix(do.call(rbind, test)[, -1])
if (ncol(test2) == 1) {
test2 <- t(test2)
}
colnames(test2) <- names(raster)
if (length(which(is.na(test2))) > 0) {
pbtn2 <- pbtn2[complete.cases(test2), ]
test2 <- test2[complete.cases(test2), ]
}
}
if (class(test2)[1] == "numeric") {
test2 <- t(matrix(test2))
}
if (nrow(test2) == 0) {
if (length(levels(as.factor(nam))) < 2) {
break
}
######################################
###Gleichverteilung samples in Klassen
di <- c(nrow(pbtn1), nrow(pbtn2))
if (abs(nrow(pbtn1) - nrow(pbtn2)) > min(di) * 0.3) {
###balancing sample size
di <- c(sum(pbtn1@data$nam==1), sum(pbtn1@data$nam==2))
if (abs(di[1] - di[2]) > min(di) * 0.3) {
if (which.min(di) == 2) {
set.seed(seed)
d3 <- sample(1:nrow(pbtn1), nrow(pbtn2), replace = F)
pbtn1 <- pbtn1[d3, ]
test1 <- test1[d3, ]
d3 <- sample(which(pbtn1@data$nam==1), di[1]-di[2], replace = F)
pbtn1 <- pbtn1[-d3, ]
test1 <- test1[-d3, ]
} else {
set.seed(seed)
d4 <- sample(1:nrow(pbtn2), nrow(pbtn1), replace = F)
pbtn2 <- pbtn2[d4, ]
test2 <- test2[d4, ]
d4 <- sample(which(pbtn1@data$nam==2), di[2]-di[1], replace = F)
pbtn1 <- pbtn1[-d4, ]
test1 <- test1[-d4, ]
}
}
#####################################
###max Klassenbelegungswert
if (nrow(pbtn1) > max_samples_per_class) {
###maximum sample size
if (sum(pbtn1@data$nam==1) > max_samples_per_class) {
set.seed(seed)
dr <-
sample(1:nrow(pbtn1), max_samples_per_class, replace = F)
pbtn1 <- pbtn1[dr, ]
test1 <- test1[dr, ]
sample(which(pbtn1@data$nam==1), sum(pbtn1@data$nam==1) - max_samples_per_class, replace = F)
pbtn1 <- pbtn1[-dr, ]
test1 <- test1[-dr, ]
}
if (nrow(pbtn2) > max_samples_per_class) {
if (sum(pbtn1@data$nam==2) > max_samples_per_class) {
set.seed(seed)
dr <-
sample(1:nrow(pbtn2), max_samples_per_class, replace = F)
pbtn2 <- pbtn2[dr, ]
test2 <- test2[dr, ]
sample(which(pbtn1@data$nam==2), sum(pbtn1@data$nam==2) - max_samples_per_class, replace = F)
pbtn1 <- pbtn1[-dr, ]
test1 <- test1[-dr, ]
}
########################################################################
data <-
as.data.frame(cbind(append(pbtn1@data$V1, pbtn2@data$V1),
rbind(test1, test2))) ##data
as.data.frame(cbind(pbtn1@data$nam, test1)) ##data
names(data)[1] <- "classes"
classes <- data$classes
pbt <- rbind(pbtn1, pbtn2)
pbt <- pbtn1
}
remove(pbt)
......
......@@ -75,6 +75,7 @@ multi_Class_Sampling <- function(in.raster,
reference,
model = "rf",
mtry = 10,
mod.error=0.02,
last = F,
seed = 3,
init.seed = "sample",
......@@ -84,6 +85,7 @@ multi_Class_Sampling <- function(in.raster,
n_classes,
multiTest = 1,
RGB = c(19, 20, 21),
in.memory = TRUE,
overwrite = TRUE,
save_runs = TRUE,
parallel_mode = FALSE,
......@@ -95,11 +97,7 @@ multi_Class_Sampling <- function(in.raster,
}
input_raster <- in.raster
area <- as(raster::extent(in.raster), 'SpatialPolygons')
area <- sp::SpatialPolygonsDataFrame(area, data.frame(ID = 1:length(area)))
#sp::proj4string(area) <- sp::proj4string(in.raster)
crs(area) <- crs(in.raster)
col <- colorRampPalette(c("lightgrey",
"orange",
"yellow",
......@@ -152,11 +150,12 @@ multi_Class_Sampling <- function(in.raster,
buffer = buffer,
reference = reference,
model = model,
area = area,
mtry = mtry,
mod.error = mod.error,
last = last,
seed = seed,
init.seed = init.seed,
in.memory = in.memory,
save_runs = save_runs,
parallel_mode = parallel_mode,
max_num_cores = max_num_cores
......@@ -205,11 +204,12 @@ multi_Class_Sampling <- function(in.raster,
buffer = buffer,
reference = reference,
model = model,
area = area,
mtry = mtry,
mod.error = mod.error,
last = last,
seed = seed,
init.seed = init.seed,
in.memory = in.memory,
save_runs = save_runs,
parallel_mode = parallel_mode,
max_num_cores = max_num_cores
......@@ -263,11 +263,12 @@ multi_Class_Sampling <- function(in.raster,
buffer = buffer,
reference = reference,
model = model,
area = area,
mtry = mtry,
mod.error = mod.error,
last = last,
seed = seed,
init.seed = init.seed,
in.memory = in.memory,
save_runs = save_runs,
parallel_mode = parallel_mode,
max_num_cores = max_num_cores
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment