Commit e05e265a authored by Marco De Lucia's avatar Marco De Lucia
Browse files

MDL: added demo-kin-surr-xgboost.R

parent 3d961987
## Licence: LGPL version 2.1
## Time-stamp: "Last modified 2020-09-21 17:22:31 delucia"
library(RedModRphree)
require(xgboost)
setwd("/home/work/simR/Rphree/trunk/smart_kd/Paper")
db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat", package="RedModRphree"), is.db=TRUE)
base <- c("SOLUTION 1", "units mol/kgw", "temp 25.0", "water 1",
"pH 9.91 charge", "pe 4.0", "C 1.2279E-04",
"Ca 1.2279E-04", "Mg 0.001", "Cl 0.002", "PURE 1",
"O2g -0.1675 10", "KINETICS 1", "-steps 100",
"-bad_step_max 1000", "Calcite", "-m 0.000207",
"-parms 3.20", "Dolomite", "-m 0.0", "-parms 0.32",
"END")
initsim <- c("SELECTED_OUTPUT", "-high_precision true",
"-reset false", "USER_PUNCH",
"-head C Ca Cl Mg H e O2g Calcite Dolomite",
"10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), ACT(\"H+\"), ACT(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")",
"SOLUTION 1", "units mol/kgw", "temp 25.0", "water 1",
"pH 9.91 charge", "pe 4.0", "C 1.2279E-04",
"Ca 1.2279E-04", "Cl 1E-12", "Mg 1E-12", "PURE 1",
"O2g -0.6788 10.0", "Calcite 0.0 2.07E-3",
"Dolomite 0.0 0.0", "END")
selout <- c("SELECTED_OUTPUT", "-high_precision true", "-reset false",
"-time", "-soln", "-temperature true", "-water true",
"-pH", "-pe", "-totals C Ca Cl Mg",
"-kinetic_reactants Calcite Dolomite", "-equilibrium O2g")
prop <- c("C","Ca","Cl","Mg","pH","pe","O2g", "Calcite","Dolomite")
maxsim <- 20 ## minimum number of single chemical simulations in each time step to spawn parallelization
grid <- 50 ## grid discretization (number of elements)
L <- 0.5 ## total system length [m]
U <- 9.375e-6 ## imposed (constant) Darcy velocity [m3/s]
Cf <- 1 ## Courant number to add some numerical dispersion
## boundary conditions (concentration of the injected fluid entering into the 1D column)
bound <- c(H=1E-7, e=1E-4, Mg=0.001, Cl=0.002)
exper <- c(50,100,200)
expsim <- vector(mode="list",length=length(exper))
timexpsim <- vector(mode="list",length=length(exper))
for (i in seq_along(exper)) {
setup <- list(n=exper[i], len=L, U=U, base=base, first=selout, Cf=Cf, bound=bound, dt=210,
prop=prop, immobile=c(7,8,9), kin=c(8,9), ann=list(O2g=-0.1675), initsim=initsim)
timexpsim[[i]] <- system.time({
expsim[[i]] <- ReactTranspBalanceKin(setup=setup, db=db, step="fix_dt", maxtime=60000,
procs=1, maxsim=maxsim, writeout=FALSE,
reduce=FALSE, surrogate=FALSE)
})
}
saveRDS("demo-kin-xgb-refsim-60k.rds", object=list(time=timexpsim, sims=expsim))
## par(mfrow=c(3,1))
## MatplotSingle(sim=expsim[[1]], xlab="Domain element", ylab="Ca, Mg, Calcite, Dolomite", main="Kinetic simulation, grid 50")
## MatplotSingle(sim=expsim[[2]], xlab="Domain element", ylab="Ca, Mg, Calcite, Dolomite", main="Kinetic simulation, grid 100")
## MatplotSingle(sim=expsim[[3]], xlab="Domain element", ylab="Ca, Mg, Calcite, Dolomite", main="Kinetic simulation, grid 200")
## pack all the "true" PHREEQC inputs/outputs in one dataset
## We extract only the first 21000 s, 101 iterations
refsim <- lapply(expsim, function(x) return(x[1:200]))
sam <- ExtractSamples(refsim,
var_design=c("C","Ca","Cl","Mg", "pH", "Calcite", "Dolomite"),
var_result=c("C","Ca","Cl","Mg", "pH", "Calcite", "Dolomite"))
############################### Train the surrogate
source("RFun_xgboost.R")
source("RFun_Eval.R")
FitXGB <- function(data, sc.factor=1000) {
require(xgboost)
des <- data.matrix(data$design)
res <- data.matrix(data$result)
## remove Cl from res
res <- res[, -which(colnames(res)=="Cl")]
maxes <- apply(res, 2, max)
scaledres <- scale(res, center=FALSE, scale=maxes/sc.factor)
retscale <- list(scale=maxes/sc.factor)
models <- vector(mode="list", length=ncol(res))
names(models) <- colnames(res)
for (i in seq(1, ncol(res))){
models[[i]] <- xgboost(data = des, label = scaledres[, i],
max.depth = 200, eta = 0.4,
nrounds = 1000, early_stopping_rounds = 50,
objective = "reg:tweedie", eval_metric="rmse",
tweedie_variance_power = 1.2, verbose = 2)
}
return(list(xgb=models, scale=retscale))
}
xgbsurrogate <- FitXGB(sam)
## Function called during the coupled simulations
MySurrKin <- function(state, model){
excl <- match(c("O2g","pe"), colnames(state))
remember_state_names <- colnames(state)
des <- state[,-excl]
if (NA %in% des) {
print(des)
stop("Mysurr: NA in data")
}
if (NaN %in% des) {
print(des)
stop("Mysurr: NaN in data")
}
order_for_surrogate <- c("C","Ca","Cl","Mg","pH","Calcite","Dolomite")
rem_attr <- attr(state,"immobile")
ss <- data.matrix(state[, order_for_surrogate])
prunsc <- PredUnscaled(ss, model$xgb)
pred <- ScaleBack(prunsc, model$scale)
colnames(pred) <- names(model$xgb)
## reattach auxiliary variables and Cl
out <- cbind(as.data.frame(pred), state[,excl], state[, "Cl"])
colnames(out) <- c(names(model$xgb),"O2g","pe","Cl")
out$pe <- rep(4.0, nrow(out))
## order back to how it is expected in "state"
out <- out[,remember_state_names]
attr(out,"immobile") <- rem_attr
return(out)
}
DoSims <- function(pargrid, rtsetup, surrogates, surrFUN, dir) {
nsim <- nrow(pargrid)
cat(":::: Starting", nsim, "simulations\n")
timestamp <- format(Sys.time(), "%Y%m%d_")
save <- FALSE
if (!missing(dir)){
save <- TRUE
dout <- dir
if (!dir.exists(dout)) {
dir.create(dout)
cat(":: Created directory ", dout, "\n")
}
}
## prepare the container
simsur <- vector(mode="list",length=nsim)
timesur <- vector(mode="list",length=nsim)
outbalsur <- vector(mode="list",length=nsim)
for (i in seq(nsim)) {
timesur[[i]] <- system.time(
simsur[[i]] <- ReactTranspBalanceKin(setup=rtsetup$setup, db=rtsetup$db,
step="fix_dt", maxtime=rtsetup$maxtime,
procs=1, maxsim=10, writeout=FALSE,
reduce=FALSE, surrogate=TRUE, surrogate.FUN=surrFUN,
model=surrogates, baleq=rtsetup$baleq,
tol=pargrid$tol[i],
call_pqc=pargrid$call[i]) )
outbalsur[[i]] <- out_bal
}
rawXgb <- lapply(surrogates$xgb, xgb.save.raw)
ret <- list(pargrid=pargrid,
timing=timesur,
sims=simsur,
balances=outbalsur,
FUN=surrFUN,
rtsetup=rtsetup,
xgb=rawXgb,
scale=surrogates$scale)
if (save){
fileout <- paste0(dout, "/", timestamp,"grid", pargrid$grid[1],".rds")
cat("\n\n:: Saving results and surrogates on file",fileout, "\n")
saveRDS(fileout, object=ret)
}
cat("Finished\n\n")
return(ret)
}
mycol <- RColorBrewer::brewer.pal(4, "Dark2")
metric <- "Rel Err / %"
baleq <- list(Calcite=c(C=1, Ca=1), Dolomite=c(C=2, Ca=1, Mg=1))
exp50 <- expand.grid(grid=50, tol=c(1E-5, 1E-6, 1E-7),
call=c(TRUE, FALSE))[1:4,]
exp100 <- expand.grid(grid=100, tol=c(1E-5, 1E-6, 1E-7),
call=c(TRUE, FALSE))[1:4,]
exp200 <- expand.grid(grid=200, tol=c(1E-5, 1E-6, 1E-7),
call=c(TRUE, FALSE))[1:4,]
setup <- list(n=50, len=L, U=U, base=base, first=selout, Cf=Cf, bound=bound, dt=210,
prop=prop, immobile=c(7,8,9), kin=c(8,9), ann=list(O2g=-0.1675), initsim=initsim)
rtset <- list(setup=setup, maxtime=60000, baleq=baleq, db=db)
eval50 <- DoSims(pargrid=exp50, rtsetup=rtset,
surrogates=xgbsurrogate,
surrFUN=MySurrKin,
dir="test_rmse_d200"
)
setup <- list(n=100, len=L, U=U, base=base, first=selout, Cf=Cf, bound=bound, dt=210,
prop=prop, immobile=c(7,8,9), kin=c(8,9), ann=list(O2g=-0.1675), initsim=initsim)
rtset <- list(setup=setup, maxtime=60000, baleq=baleq, db=db)
eval100 <- DoSims(pargrid=exp100, rtsetup=rtset,
surrogates=xgbsurrogate,
surrFUN=MySurrKin,
dir="test_rmse_d200")
setup <- list(n=200, len=L, U=U, base=base, first=selout, Cf=Cf, bound=bound, dt=210,
prop=prop, immobile=c(7,8,9), kin=c(8,9), ann=list(O2g=-0.1675), initsim=initsim)
rtset <- list(setup=setup, maxtime=60000, baleq=baleq, db=db)
eval200 <- DoSims(pargrid=exp200, rtsetup=rtset,
surrogates=xgbsurrogate,
surrFUN=MySurrKin,
dir="test_rmse_d200")
err50 <- EvalSims(eval50, ref=expsim[[1]], timeref=timexpsim[[1]][3])
err100 <- EvalSims(eval100, ref=expsim[[2]], timeref=timexpsim[[2]][3])
err200 <- EvalSims(eval200, ref=expsim[[3]], timeref=timexpsim[[3]][3])
vars <- c("Ca", "Mg", "C", "Calcite", "Dolomite", "pH")
cols <- c("red", "black", "blue", "olivedrab3", "orange", "grey", "light blue")[1:length(vars)]
par(mfrow = c(2,2), cex.main = 1.5)
par(mar=c(4,5,2,1))
MatplotErr(sur = eval50$sims[[2]] , sim = expsim[[1]], sample=101, vars=vars,
colors=cols, xlab="Domain element", ylab="Molalities",
main=paste("Grid 50 - speedup:", round(err50[[2]]$speedup, 2)))
par(mar=c(4,5,2,1))
MatplotErr(sur = eval100$sims[[2]] , sim = expsim[[2]], vars=vars,
colors=cols, xlab="Domain element", ylab="Molalities",
main=paste("Grid 100 - speedup:", round(err100[[1]]$speedup, 2)))
legend("right", c("Reference", "Surrogate"), lty=c("dotted", "solid"), bty = "n",
lwd=3, cex=1.5)
par(mar=c(4,5,2,1))
MatplotErr(sur = eval200$sims[[2]] , sim = expsim[[3]], vars=vars,
colors=cols, xlab="Domain element", ylab="Molalities",
main=paste("Grid 200 - speedup:", round(err200[[2]]$speedup, 2)))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment