Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Marco De Lucia
RedModRphree
Commits
e05e265a
Commit
e05e265a
authored
Jan 04, 2021
by
Marco De Lucia
Browse files
MDL: added demo-kin-surr-xgboost.R
parent
3d961987
Changes
1
Hide whitespace changes
Inline
Side-by-side
demo/demo-kin-surr-xgboost.R
0 → 100644
View file @
e05e265a
## Licence: LGPL version 2.1
## Time-stamp: "Last modified 2020-09-21 17:22:31 delucia"
library
(
RedModRphree
)
require
(
xgboost
)
setwd
(
"/home/work/simR/Rphree/trunk/smart_kd/Paper"
)
db
<-
RPhreeFile
(
system.file
(
"extdata"
,
"phreeqc_kin.dat"
,
package
=
"RedModRphree"
),
is.db
=
TRUE
)
base
<-
c
(
"SOLUTION 1"
,
"units mol/kgw"
,
"temp 25.0"
,
"water 1"
,
"pH 9.91 charge"
,
"pe 4.0"
,
"C 1.2279E-04"
,
"Ca 1.2279E-04"
,
"Mg 0.001"
,
"Cl 0.002"
,
"PURE 1"
,
"O2g -0.1675 10"
,
"KINETICS 1"
,
"-steps 100"
,
"-bad_step_max 1000"
,
"Calcite"
,
"-m 0.000207"
,
"-parms 3.20"
,
"Dolomite"
,
"-m 0.0"
,
"-parms 0.32"
,
"END"
)
initsim
<-
c
(
"SELECTED_OUTPUT"
,
"-high_precision true"
,
"-reset false"
,
"USER_PUNCH"
,
"-head C Ca Cl Mg H e O2g Calcite Dolomite"
,
"10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), ACT(\"H+\"), ACT(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")"
,
"SOLUTION 1"
,
"units mol/kgw"
,
"temp 25.0"
,
"water 1"
,
"pH 9.91 charge"
,
"pe 4.0"
,
"C 1.2279E-04"
,
"Ca 1.2279E-04"
,
"Cl 1E-12"
,
"Mg 1E-12"
,
"PURE 1"
,
"O2g -0.6788 10.0"
,
"Calcite 0.0 2.07E-3"
,
"Dolomite 0.0 0.0"
,
"END"
)
selout
<-
c
(
"SELECTED_OUTPUT"
,
"-high_precision true"
,
"-reset false"
,
"-time"
,
"-soln"
,
"-temperature true"
,
"-water true"
,
"-pH"
,
"-pe"
,
"-totals C Ca Cl Mg"
,
"-kinetic_reactants Calcite Dolomite"
,
"-equilibrium O2g"
)
prop
<-
c
(
"C"
,
"Ca"
,
"Cl"
,
"Mg"
,
"pH"
,
"pe"
,
"O2g"
,
"Calcite"
,
"Dolomite"
)
maxsim
<-
20
## minimum number of single chemical simulations in each time step to spawn parallelization
grid
<-
50
## grid discretization (number of elements)
L
<-
0.5
## total system length [m]
U
<-
9.375e-6
## imposed (constant) Darcy velocity [m3/s]
Cf
<-
1
## Courant number to add some numerical dispersion
## boundary conditions (concentration of the injected fluid entering into the 1D column)
bound
<-
c
(
H
=
1E-7
,
e
=
1E-4
,
Mg
=
0.001
,
Cl
=
0.002
)
exper
<-
c
(
50
,
100
,
200
)
expsim
<-
vector
(
mode
=
"list"
,
length
=
length
(
exper
))
timexpsim
<-
vector
(
mode
=
"list"
,
length
=
length
(
exper
))
for
(
i
in
seq_along
(
exper
))
{
setup
<-
list
(
n
=
exper
[
i
],
len
=
L
,
U
=
U
,
base
=
base
,
first
=
selout
,
Cf
=
Cf
,
bound
=
bound
,
dt
=
210
,
prop
=
prop
,
immobile
=
c
(
7
,
8
,
9
),
kin
=
c
(
8
,
9
),
ann
=
list
(
O2g
=
-0.1675
),
initsim
=
initsim
)
timexpsim
[[
i
]]
<-
system.time
({
expsim
[[
i
]]
<-
ReactTranspBalanceKin
(
setup
=
setup
,
db
=
db
,
step
=
"fix_dt"
,
maxtime
=
60000
,
procs
=
1
,
maxsim
=
maxsim
,
writeout
=
FALSE
,
reduce
=
FALSE
,
surrogate
=
FALSE
)
})
}
saveRDS
(
"demo-kin-xgb-refsim-60k.rds"
,
object
=
list
(
time
=
timexpsim
,
sims
=
expsim
))
## par(mfrow=c(3,1))
## MatplotSingle(sim=expsim[[1]], xlab="Domain element", ylab="Ca, Mg, Calcite, Dolomite", main="Kinetic simulation, grid 50")
## MatplotSingle(sim=expsim[[2]], xlab="Domain element", ylab="Ca, Mg, Calcite, Dolomite", main="Kinetic simulation, grid 100")
## MatplotSingle(sim=expsim[[3]], xlab="Domain element", ylab="Ca, Mg, Calcite, Dolomite", main="Kinetic simulation, grid 200")
## pack all the "true" PHREEQC inputs/outputs in one dataset
## We extract only the first 21000 s, 101 iterations
refsim
<-
lapply
(
expsim
,
function
(
x
)
return
(
x
[
1
:
200
]))
sam
<-
ExtractSamples
(
refsim
,
var_design
=
c
(
"C"
,
"Ca"
,
"Cl"
,
"Mg"
,
"pH"
,
"Calcite"
,
"Dolomite"
),
var_result
=
c
(
"C"
,
"Ca"
,
"Cl"
,
"Mg"
,
"pH"
,
"Calcite"
,
"Dolomite"
))
############################### Train the surrogate
source
(
"RFun_xgboost.R"
)
source
(
"RFun_Eval.R"
)
FitXGB
<-
function
(
data
,
sc.factor
=
1000
)
{
require
(
xgboost
)
des
<-
data.matrix
(
data
$
design
)
res
<-
data.matrix
(
data
$
result
)
## remove Cl from res
res
<-
res
[,
-
which
(
colnames
(
res
)
==
"Cl"
)]
maxes
<-
apply
(
res
,
2
,
max
)
scaledres
<-
scale
(
res
,
center
=
FALSE
,
scale
=
maxes
/
sc.factor
)
retscale
<-
list
(
scale
=
maxes
/
sc.factor
)
models
<-
vector
(
mode
=
"list"
,
length
=
ncol
(
res
))
names
(
models
)
<-
colnames
(
res
)
for
(
i
in
seq
(
1
,
ncol
(
res
))){
models
[[
i
]]
<-
xgboost
(
data
=
des
,
label
=
scaledres
[,
i
],
max.depth
=
200
,
eta
=
0.4
,
nrounds
=
1000
,
early_stopping_rounds
=
50
,
objective
=
"reg:tweedie"
,
eval_metric
=
"rmse"
,
tweedie_variance_power
=
1.2
,
verbose
=
2
)
}
return
(
list
(
xgb
=
models
,
scale
=
retscale
))
}
xgbsurrogate
<-
FitXGB
(
sam
)
## Function called during the coupled simulations
MySurrKin
<-
function
(
state
,
model
){
excl
<-
match
(
c
(
"O2g"
,
"pe"
),
colnames
(
state
))
remember_state_names
<-
colnames
(
state
)
des
<-
state
[,
-
excl
]
if
(
NA
%in%
des
)
{
print
(
des
)
stop
(
"Mysurr: NA in data"
)
}
if
(
NaN
%in%
des
)
{
print
(
des
)
stop
(
"Mysurr: NaN in data"
)
}
order_for_surrogate
<-
c
(
"C"
,
"Ca"
,
"Cl"
,
"Mg"
,
"pH"
,
"Calcite"
,
"Dolomite"
)
rem_attr
<-
attr
(
state
,
"immobile"
)
ss
<-
data.matrix
(
state
[,
order_for_surrogate
])
prunsc
<-
PredUnscaled
(
ss
,
model
$
xgb
)
pred
<-
ScaleBack
(
prunsc
,
model
$
scale
)
colnames
(
pred
)
<-
names
(
model
$
xgb
)
## reattach auxiliary variables and Cl
out
<-
cbind
(
as.data.frame
(
pred
),
state
[,
excl
],
state
[,
"Cl"
])
colnames
(
out
)
<-
c
(
names
(
model
$
xgb
),
"O2g"
,
"pe"
,
"Cl"
)
out
$
pe
<-
rep
(
4.0
,
nrow
(
out
))
## order back to how it is expected in "state"
out
<-
out
[,
remember_state_names
]
attr
(
out
,
"immobile"
)
<-
rem_attr
return
(
out
)
}
DoSims
<-
function
(
pargrid
,
rtsetup
,
surrogates
,
surrFUN
,
dir
)
{
nsim
<-
nrow
(
pargrid
)
cat
(
":::: Starting"
,
nsim
,
"simulations\n"
)
timestamp
<-
format
(
Sys.time
(),
"%Y%m%d_"
)
save
<-
FALSE
if
(
!
missing
(
dir
)){
save
<-
TRUE
dout
<-
dir
if
(
!
dir.exists
(
dout
))
{
dir.create
(
dout
)
cat
(
":: Created directory "
,
dout
,
"\n"
)
}
}
## prepare the container
simsur
<-
vector
(
mode
=
"list"
,
length
=
nsim
)
timesur
<-
vector
(
mode
=
"list"
,
length
=
nsim
)
outbalsur
<-
vector
(
mode
=
"list"
,
length
=
nsim
)
for
(
i
in
seq
(
nsim
))
{
timesur
[[
i
]]
<-
system.time
(
simsur
[[
i
]]
<-
ReactTranspBalanceKin
(
setup
=
rtsetup
$
setup
,
db
=
rtsetup
$
db
,
step
=
"fix_dt"
,
maxtime
=
rtsetup
$
maxtime
,
procs
=
1
,
maxsim
=
10
,
writeout
=
FALSE
,
reduce
=
FALSE
,
surrogate
=
TRUE
,
surrogate.FUN
=
surrFUN
,
model
=
surrogates
,
baleq
=
rtsetup
$
baleq
,
tol
=
pargrid
$
tol
[
i
],
call_pqc
=
pargrid
$
call
[
i
])
)
outbalsur
[[
i
]]
<-
out_bal
}
rawXgb
<-
lapply
(
surrogates
$
xgb
,
xgb.save.raw
)
ret
<-
list
(
pargrid
=
pargrid
,
timing
=
timesur
,
sims
=
simsur
,
balances
=
outbalsur
,
FUN
=
surrFUN
,
rtsetup
=
rtsetup
,
xgb
=
rawXgb
,
scale
=
surrogates
$
scale
)
if
(
save
){
fileout
<-
paste0
(
dout
,
"/"
,
timestamp
,
"grid"
,
pargrid
$
grid
[
1
],
".rds"
)
cat
(
"\n\n:: Saving results and surrogates on file"
,
fileout
,
"\n"
)
saveRDS
(
fileout
,
object
=
ret
)
}
cat
(
"Finished\n\n"
)
return
(
ret
)
}
mycol
<-
RColorBrewer
::
brewer.pal
(
4
,
"Dark2"
)
metric
<-
"Rel Err / %"
baleq
<-
list
(
Calcite
=
c
(
C
=
1
,
Ca
=
1
),
Dolomite
=
c
(
C
=
2
,
Ca
=
1
,
Mg
=
1
))
exp50
<-
expand.grid
(
grid
=
50
,
tol
=
c
(
1E-5
,
1E-6
,
1E-7
),
call
=
c
(
TRUE
,
FALSE
))[
1
:
4
,]
exp100
<-
expand.grid
(
grid
=
100
,
tol
=
c
(
1E-5
,
1E-6
,
1E-7
),
call
=
c
(
TRUE
,
FALSE
))[
1
:
4
,]
exp200
<-
expand.grid
(
grid
=
200
,
tol
=
c
(
1E-5
,
1E-6
,
1E-7
),
call
=
c
(
TRUE
,
FALSE
))[
1
:
4
,]
setup
<-
list
(
n
=
50
,
len
=
L
,
U
=
U
,
base
=
base
,
first
=
selout
,
Cf
=
Cf
,
bound
=
bound
,
dt
=
210
,
prop
=
prop
,
immobile
=
c
(
7
,
8
,
9
),
kin
=
c
(
8
,
9
),
ann
=
list
(
O2g
=
-0.1675
),
initsim
=
initsim
)
rtset
<-
list
(
setup
=
setup
,
maxtime
=
60000
,
baleq
=
baleq
,
db
=
db
)
eval50
<-
DoSims
(
pargrid
=
exp50
,
rtsetup
=
rtset
,
surrogates
=
xgbsurrogate
,
surrFUN
=
MySurrKin
,
dir
=
"test_rmse_d200"
)
setup
<-
list
(
n
=
100
,
len
=
L
,
U
=
U
,
base
=
base
,
first
=
selout
,
Cf
=
Cf
,
bound
=
bound
,
dt
=
210
,
prop
=
prop
,
immobile
=
c
(
7
,
8
,
9
),
kin
=
c
(
8
,
9
),
ann
=
list
(
O2g
=
-0.1675
),
initsim
=
initsim
)
rtset
<-
list
(
setup
=
setup
,
maxtime
=
60000
,
baleq
=
baleq
,
db
=
db
)
eval100
<-
DoSims
(
pargrid
=
exp100
,
rtsetup
=
rtset
,
surrogates
=
xgbsurrogate
,
surrFUN
=
MySurrKin
,
dir
=
"test_rmse_d200"
)
setup
<-
list
(
n
=
200
,
len
=
L
,
U
=
U
,
base
=
base
,
first
=
selout
,
Cf
=
Cf
,
bound
=
bound
,
dt
=
210
,
prop
=
prop
,
immobile
=
c
(
7
,
8
,
9
),
kin
=
c
(
8
,
9
),
ann
=
list
(
O2g
=
-0.1675
),
initsim
=
initsim
)
rtset
<-
list
(
setup
=
setup
,
maxtime
=
60000
,
baleq
=
baleq
,
db
=
db
)
eval200
<-
DoSims
(
pargrid
=
exp200
,
rtsetup
=
rtset
,
surrogates
=
xgbsurrogate
,
surrFUN
=
MySurrKin
,
dir
=
"test_rmse_d200"
)
err50
<-
EvalSims
(
eval50
,
ref
=
expsim
[[
1
]],
timeref
=
timexpsim
[[
1
]][
3
])
err100
<-
EvalSims
(
eval100
,
ref
=
expsim
[[
2
]],
timeref
=
timexpsim
[[
2
]][
3
])
err200
<-
EvalSims
(
eval200
,
ref
=
expsim
[[
3
]],
timeref
=
timexpsim
[[
3
]][
3
])
vars
<-
c
(
"Ca"
,
"Mg"
,
"C"
,
"Calcite"
,
"Dolomite"
,
"pH"
)
cols
<-
c
(
"red"
,
"black"
,
"blue"
,
"olivedrab3"
,
"orange"
,
"grey"
,
"light blue"
)[
1
:
length
(
vars
)]
par
(
mfrow
=
c
(
2
,
2
),
cex.main
=
1.5
)
par
(
mar
=
c
(
4
,
5
,
2
,
1
))
MatplotErr
(
sur
=
eval50
$
sims
[[
2
]]
,
sim
=
expsim
[[
1
]],
sample
=
101
,
vars
=
vars
,
colors
=
cols
,
xlab
=
"Domain element"
,
ylab
=
"Molalities"
,
main
=
paste
(
"Grid 50 - speedup:"
,
round
(
err50
[[
2
]]
$
speedup
,
2
)))
par
(
mar
=
c
(
4
,
5
,
2
,
1
))
MatplotErr
(
sur
=
eval100
$
sims
[[
2
]]
,
sim
=
expsim
[[
2
]],
vars
=
vars
,
colors
=
cols
,
xlab
=
"Domain element"
,
ylab
=
"Molalities"
,
main
=
paste
(
"Grid 100 - speedup:"
,
round
(
err100
[[
1
]]
$
speedup
,
2
)))
legend
(
"right"
,
c
(
"Reference"
,
"Surrogate"
),
lty
=
c
(
"dotted"
,
"solid"
),
bty
=
"n"
,
lwd
=
3
,
cex
=
1.5
)
par
(
mar
=
c
(
4
,
5
,
2
,
1
))
MatplotErr
(
sur
=
eval200
$
sims
[[
2
]]
,
sim
=
expsim
[[
3
]],
vars
=
vars
,
colors
=
cols
,
xlab
=
"Domain element"
,
ylab
=
"Molalities"
,
main
=
paste
(
"Grid 200 - speedup:"
,
round
(
err200
[[
2
]]
$
speedup
,
2
)))
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment