Commit fd2b351d authored by Marco De Lucia's avatar Marco De Lucia
Browse files

MDL: bumped to 0.0.4; added demo/demo-gmd2020-445.R

parent 5ffddf6e
Package: RedModRphree
Title: R infrastructure for training and applying geochemical surrogate models to reactive transport.
Version: 0.0.3
Version: 0.0.4
Authors@R: c(person("Marco", "De Lucia", email = "delucia@gfz-potsdam.de", role = c("aut", "cre")),
person("Janis", "Jatnieks", email = "deltaxzz@gmail.com", role = c("ctb")))
Description: In the framework of RedMod project, we develop an R infrastructure for training and applying geochemical surrogate models to reactive transport.
......@@ -8,4 +8,4 @@ Depends: R (>= 3.2.0), doParallel, plyr, data.table, phreeqc, caret, mgcv, rando
License: LGPL-2.1
Encoding: UTF-8
LLazyData: true
RoxygenNote: 6.1.1
RoxygenNote: 7.1.1
......@@ -4,3 +4,4 @@ demo-eq-generate-data Generate the training data set from 3 equilibrium simulati
demo-eq-surr-RF Generate the training data set for equilibrium simulations, train randomForest surrogates and perform Reactive Transport simulations using them
demo-kin-surr-RF Generate the training data set for kinetic simulations, train randomForest surrogates and perform Reactive Transport simulations using them
demo-eq-TrainMultiple Uses the UNSTABLE infrastructure for comparing different trained surrogates
demo-gmd2020-445 Pointer to the actual gitlab repository for the code
## Licence: LGPL version 2.1
## Time-stamp: "Last modified 2021-01-05 17:45:17 delucia"
cat(":: To retrieve and run the code used in the preprint 'gmd2020-445', \n:: please follow the instructions on <https://git.gfz-potsdam.de/delucia/gmd2020-445>")
## Licence: LGPL version 2.1
## Time-stamp: "Last modified 2020-09-21 17:22:31 delucia"
library(RedModRphree)
require(xgboost)
setwd("/home/work/simR/Rphree/trunk/smart_kd/Paper")
db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat", package="RedModRphree"), is.db=TRUE)
base <- c("SOLUTION 1", "units mol/kgw", "temp 25.0", "water 1",
"pH 9.91 charge", "pe 4.0", "C 1.2279E-04",
"Ca 1.2279E-04", "Mg 0.001", "Cl 0.002", "PURE 1",
"O2g -0.1675 10", "KINETICS 1", "-steps 100",
"-bad_step_max 1000", "Calcite", "-m 0.000207",
"-parms 3.20", "Dolomite", "-m 0.0", "-parms 0.32",
"END")
initsim <- c("SELECTED_OUTPUT", "-high_precision true",
"-reset false", "USER_PUNCH",
"-head C Ca Cl Mg H e O2g Calcite Dolomite",
"10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), ACT(\"H+\"), ACT(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")",
"SOLUTION 1", "units mol/kgw", "temp 25.0", "water 1",
"pH 9.91 charge", "pe 4.0", "C 1.2279E-04",
"Ca 1.2279E-04", "Cl 1E-12", "Mg 1E-12", "PURE 1",
"O2g -0.6788 10.0", "Calcite 0.0 2.07E-3",
"Dolomite 0.0 0.0", "END")
selout <- c("SELECTED_OUTPUT", "-high_precision true", "-reset false",
"-time", "-soln", "-temperature true", "-water true",
"-pH", "-pe", "-totals C Ca Cl Mg",
"-kinetic_reactants Calcite Dolomite", "-equilibrium O2g")
prop <- c("C","Ca","Cl","Mg","pH","pe","O2g", "Calcite","Dolomite")
maxsim <- 20 ## minimum number of single chemical simulations in each time step to spawn parallelization
grid <- 50 ## grid discretization (number of elements)
L <- 0.5 ## total system length [m]
U <- 9.375e-6 ## imposed (constant) Darcy velocity [m3/s]
Cf <- 1 ## Courant number to add some numerical dispersion
## boundary conditions (concentration of the injected fluid entering into the 1D column)
bound <- c(H=1E-7, e=1E-4, Mg=0.001, Cl=0.002)
exper <- c(50,100,200)
expsim <- vector(mode="list",length=length(exper))
timexpsim <- vector(mode="list",length=length(exper))
for (i in seq_along(exper)) {
setup <- list(n=exper[i], len=L, U=U, base=base, first=selout, Cf=Cf, bound=bound, dt=210,
prop=prop, immobile=c(7,8,9), kin=c(8,9), ann=list(O2g=-0.1675), initsim=initsim)
timexpsim[[i]] <- system.time({
expsim[[i]] <- ReactTranspBalanceKin(setup=setup, db=db, step="fix_dt", maxtime=60000,
procs=1, maxsim=maxsim, writeout=FALSE,
reduce=FALSE, surrogate=FALSE)
})
}
saveRDS("demo-kin-xgb-refsim-60k.rds", object=list(time=timexpsim, sims=expsim))
## par(mfrow=c(3,1))
## MatplotSingle(sim=expsim[[1]], xlab="Domain element", ylab="Ca, Mg, Calcite, Dolomite", main="Kinetic simulation, grid 50")
## MatplotSingle(sim=expsim[[2]], xlab="Domain element", ylab="Ca, Mg, Calcite, Dolomite", main="Kinetic simulation, grid 100")
## MatplotSingle(sim=expsim[[3]], xlab="Domain element", ylab="Ca, Mg, Calcite, Dolomite", main="Kinetic simulation, grid 200")
## pack all the "true" PHREEQC inputs/outputs in one dataset
## We extract only the first 21000 s, 101 iterations
refsim <- lapply(expsim, function(x) return(x[1:200]))
sam <- ExtractSamples(refsim,
var_design=c("C","Ca","Cl","Mg", "pH", "Calcite", "Dolomite"),
var_result=c("C","Ca","Cl","Mg", "pH", "Calcite", "Dolomite"))
############################### Train the surrogate
source("RFun_xgboost.R")
source("RFun_Eval.R")
FitXGB <- function(data, sc.factor=1000) {
require(xgboost)
des <- data.matrix(data$design)
res <- data.matrix(data$result)
## remove Cl from res
res <- res[, -which(colnames(res)=="Cl")]
maxes <- apply(res, 2, max)
scaledres <- scale(res, center=FALSE, scale=maxes/sc.factor)
retscale <- list(scale=maxes/sc.factor)
models <- vector(mode="list", length=ncol(res))
names(models) <- colnames(res)
for (i in seq(1, ncol(res))){
models[[i]] <- xgboost(data = des, label = scaledres[, i],
max.depth = 200, eta = 0.4,
nrounds = 1000, early_stopping_rounds = 50,
objective = "reg:tweedie", eval_metric="rmse",
tweedie_variance_power = 1.2, verbose = 2)
}
return(list(xgb=models, scale=retscale))
}
xgbsurrogate <- FitXGB(sam)
## Function called during the coupled simulations
MySurrKin <- function(state, model){
excl <- match(c("O2g","pe"), colnames(state))
remember_state_names <- colnames(state)
des <- state[,-excl]
if (NA %in% des) {
print(des)
stop("Mysurr: NA in data")
}
if (NaN %in% des) {
print(des)
stop("Mysurr: NaN in data")
}
order_for_surrogate <- c("C","Ca","Cl","Mg","pH","Calcite","Dolomite")
rem_attr <- attr(state,"immobile")
ss <- data.matrix(state[, order_for_surrogate])
prunsc <- PredUnscaled(ss, model$xgb)
pred <- ScaleBack(prunsc, model$scale)
colnames(pred) <- names(model$xgb)
## reattach auxiliary variables and Cl
out <- cbind(as.data.frame(pred), state[,excl], state[, "Cl"])
colnames(out) <- c(names(model$xgb),"O2g","pe","Cl")
out$pe <- rep(4.0, nrow(out))
## order back to how it is expected in "state"
out <- out[,remember_state_names]
attr(out,"immobile") <- rem_attr
return(out)
}
DoSims <- function(pargrid, rtsetup, surrogates, surrFUN, dir) {
nsim <- nrow(pargrid)
cat(":::: Starting", nsim, "simulations\n")
timestamp <- format(Sys.time(), "%Y%m%d_")
save <- FALSE
if (!missing(dir)){
save <- TRUE
dout <- dir
if (!dir.exists(dout)) {
dir.create(dout)
cat(":: Created directory ", dout, "\n")
}
}
## prepare the container
simsur <- vector(mode="list",length=nsim)
timesur <- vector(mode="list",length=nsim)
outbalsur <- vector(mode="list",length=nsim)
for (i in seq(nsim)) {
timesur[[i]] <- system.time(
simsur[[i]] <- ReactTranspBalanceKin(setup=rtsetup$setup, db=rtsetup$db,
step="fix_dt", maxtime=rtsetup$maxtime,
procs=1, maxsim=10, writeout=FALSE,
reduce=FALSE, surrogate=TRUE, surrogate.FUN=surrFUN,
model=surrogates, baleq=rtsetup$baleq,
tol=pargrid$tol[i],
call_pqc=pargrid$call[i]) )
outbalsur[[i]] <- out_bal
}
rawXgb <- lapply(surrogates$xgb, xgb.save.raw)
ret <- list(pargrid=pargrid,
timing=timesur,
sims=simsur,
balances=outbalsur,
FUN=surrFUN,
rtsetup=rtsetup,
xgb=rawXgb,
scale=surrogates$scale)
if (save){
fileout <- paste0(dout, "/", timestamp,"grid", pargrid$grid[1],".rds")
cat("\n\n:: Saving results and surrogates on file",fileout, "\n")
saveRDS(fileout, object=ret)
}
cat("Finished\n\n")
return(ret)
}
mycol <- RColorBrewer::brewer.pal(4, "Dark2")
metric <- "Rel Err / %"
baleq <- list(Calcite=c(C=1, Ca=1), Dolomite=c(C=2, Ca=1, Mg=1))
exp50 <- expand.grid(grid=50, tol=c(1E-5, 1E-6, 1E-7),
call=c(TRUE, FALSE))[1:4,]
exp100 <- expand.grid(grid=100, tol=c(1E-5, 1E-6, 1E-7),
call=c(TRUE, FALSE))[1:4,]
exp200 <- expand.grid(grid=200, tol=c(1E-5, 1E-6, 1E-7),
call=c(TRUE, FALSE))[1:4,]
setup <- list(n=50, len=L, U=U, base=base, first=selout, Cf=Cf, bound=bound, dt=210,
prop=prop, immobile=c(7,8,9), kin=c(8,9), ann=list(O2g=-0.1675), initsim=initsim)
rtset <- list(setup=setup, maxtime=60000, baleq=baleq, db=db)
eval50 <- DoSims(pargrid=exp50, rtsetup=rtset,
surrogates=xgbsurrogate,
surrFUN=MySurrKin,
dir="test_rmse_d200"
)
setup <- list(n=100, len=L, U=U, base=base, first=selout, Cf=Cf, bound=bound, dt=210,
prop=prop, immobile=c(7,8,9), kin=c(8,9), ann=list(O2g=-0.1675), initsim=initsim)
rtset <- list(setup=setup, maxtime=60000, baleq=baleq, db=db)
eval100 <- DoSims(pargrid=exp100, rtsetup=rtset,
surrogates=xgbsurrogate,
surrFUN=MySurrKin,
dir="test_rmse_d200")
setup <- list(n=200, len=L, U=U, base=base, first=selout, Cf=Cf, bound=bound, dt=210,
prop=prop, immobile=c(7,8,9), kin=c(8,9), ann=list(O2g=-0.1675), initsim=initsim)
rtset <- list(setup=setup, maxtime=60000, baleq=baleq, db=db)
eval200 <- DoSims(pargrid=exp200, rtsetup=rtset,
surrogates=xgbsurrogate,
surrFUN=MySurrKin,
dir="test_rmse_d200")
err50 <- EvalSims(eval50, ref=expsim[[1]], timeref=timexpsim[[1]][3])
err100 <- EvalSims(eval100, ref=expsim[[2]], timeref=timexpsim[[2]][3])
err200 <- EvalSims(eval200, ref=expsim[[3]], timeref=timexpsim[[3]][3])
vars <- c("Ca", "Mg", "C", "Calcite", "Dolomite", "pH")
cols <- c("red", "black", "blue", "olivedrab3", "orange", "grey", "light blue")[1:length(vars)]
par(mfrow = c(2,2), cex.main = 1.5)
par(mar=c(4,5,2,1))
MatplotErr(sur = eval50$sims[[2]] , sim = expsim[[1]], sample=101, vars=vars,
colors=cols, xlab="Domain element", ylab="Molalities",
main=paste("Grid 50 - speedup:", round(err50[[2]]$speedup, 2)))
par(mar=c(4,5,2,1))
MatplotErr(sur = eval100$sims[[2]] , sim = expsim[[2]], vars=vars,
colors=cols, xlab="Domain element", ylab="Molalities",
main=paste("Grid 100 - speedup:", round(err100[[1]]$speedup, 2)))
legend("right", c("Reference", "Surrogate"), lty=c("dotted", "solid"), bty = "n",
lwd=3, cex=1.5)
par(mar=c(4,5,2,1))
MatplotErr(sur = eval200$sims[[2]] , sim = expsim[[3]], vars=vars,
colors=cols, xlab="Domain element", ylab="Molalities",
main=paste("Grid 200 - speedup:", round(err200[[2]]$speedup, 2)))
......@@ -4,8 +4,7 @@
\alias{Distribute}
\title{Distribute properties in an input buffer.}
\usage{
Distribute(input, prop, values, newname = NULL, first = NULL,
wholeline = TRUE)
Distribute(input, prop, values, newname = NULL, first = NULL, wholeline = TRUE)
}
\arguments{
\item{input}{The initial input buffer.}
......
......@@ -5,8 +5,7 @@
\title{Returns the composition of a mineral in terms of chemical
elements (not of species)}
\usage{
ElementalBalanceMin(pphase, stoichmat, elements = c("C", "Ca", "Mg",
"Cl"))
ElementalBalanceMin(pphase, stoichmat, elements = c("C", "Ca", "Mg", "Cl"))
}
\arguments{
\item{pphase}{name of the mineral to parse}
......
......@@ -5,8 +5,13 @@
\title{Display the profiles of some variables for 2 reactive
transport simulation (one using surrogates)}
\usage{
Matplot(sur, sim, sample = 1L, vars = c("Ca", "Mg", "Calcite",
"Dolomite"), ...)
Matplot(
sur,
sim,
sample = 1L,
vars = c("Ca", "Mg", "Calcite", "Dolomite"),
...
)
}
\arguments{
\item{sur}{The simulation with surrogate}
......
......@@ -5,8 +5,12 @@
\title{Display the profiles of some variables along a 1D reactive
transport simulation}
\usage{
MatplotSingle(sim, sample = 1L, vars = c("Ca", "Mg", "Calcite",
"Dolomite"), ...)
MatplotSingle(
sim,
sample = 1L,
vars = c("Ca", "Mg", "Calcite", "Dolomite"),
...
)
}
\arguments{
\item{sim}{The simulation to display}
......
......@@ -4,8 +4,7 @@
\alias{PlotModsInSample}
\title{Scatter plots of phreeqc simulations vs surrogate response}
\usage{
PlotModsInSample(mod, res, des, sample = seq(1, nrow(res)),
column = TRUE, ...)
PlotModsInSample(mod, res, des, sample = seq(1, nrow(res)), column = TRUE, ...)
}
\arguments{
\item{mod}{list of regression models, one per variable}
......
......@@ -5,11 +5,24 @@
\title{1D Reactive transport simulations using phreeqc or a
surrogate, equilibrium simulations}
\usage{
ReactTranspBalanceEq(setup, init, maxtime, step = c("time", "iter",
"fix_dt"), db = "phreeqc.dat", procs = 1, maxsim = 150,
reduce = TRUE, ebreak = FALSE, writeout = FALSE,
surrogate = FALSE, surrogate.FUN, model, baleq, tol = 1e-09,
call_pqc = TRUE)
ReactTranspBalanceEq(
setup,
init,
maxtime,
step = c("time", "iter", "fix_dt"),
db = "phreeqc.dat",
procs = 1,
maxsim = 150,
reduce = TRUE,
ebreak = FALSE,
writeout = FALSE,
surrogate = FALSE,
surrogate.FUN,
model,
baleq,
tol = 1e-09,
call_pqc = TRUE
)
}
\arguments{
\item{setup}{a list with several input parameters}
......
......@@ -5,11 +5,24 @@
\title{1D Reactive transport simulations using phreeqc or a
surrogate}
\usage{
ReactTranspBalanceKin(setup, init, maxtime, step = c("time", "iter",
"fix_dt"), db = "phreeqc.dat", procs = 1, maxsim = 150,
reduce = TRUE, ebreak = FALSE, writeout = FALSE,
surrogate = FALSE, surrogate.FUN, model, baleq, tol = 1e-09,
call_pqc = TRUE)
ReactTranspBalanceKin(
setup,
init,
maxtime,
step = c("time", "iter", "fix_dt"),
db = "phreeqc.dat",
procs = 1,
maxsim = 150,
reduce = TRUE,
ebreak = FALSE,
writeout = FALSE,
surrogate = FALSE,
surrogate.FUN,
model,
baleq,
tol = 1e-09,
call_pqc = TRUE
)
}
\arguments{
\item{setup}{a list with several input parameters}
......
......@@ -4,8 +4,16 @@
\alias{splitMultiFix}
\title{Splits a data (matrix) in n simulations to run in parallel}
\usage{
splitMultiFix(data, procs, base, first, prop, minerals, nmax = 200,
verbose = FALSE)
splitMultiFix(
data,
procs,
base,
first,
prop,
minerals,
nmax = 200,
verbose = FALSE
)
}
\arguments{
\item{data}{the data matrix}
......
......@@ -4,8 +4,19 @@
\alias{splitMultiKin}
\title{Splits a named matrix of data (a column is a variable) in n simulations to run in parallel}
\usage{
splitMultiKin(data, procs, base, first, prop, minerals, kin, dt, ann,
nmax = 200, verbose = FALSE)
splitMultiKin(
data,
procs,
base,
first,
prop,
minerals,
kin,
dt,
ann,
nmax = 200,
verbose = FALSE
)
}
\arguments{
\item{data}{The matrix or data.frame containing the values to be splitted}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment