Skip to content
Snippets Groups Projects
Select Git revision
  • 072d3b7371380fc3c5dda3208c7a76de9691c10a
  • master default
  • 3-port-RcppTUG
  • 2-refactor-pourbaix
  • v0.0.4
5 results

demo-kin-surr-RF.R

Blame
  • demo-kin-surr-RF.R 6.61 KiB
    ## Licence: LGPL version 2.1
    ## Time-stamp: "Last modified 2021-04-28 18:27:32 delucia"
    
    
    library(RedModRphree)
    require(e1071)
    require(import)
    
    db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat", package="RedModRphree"), is.db=TRUE)
    
    base <- c("SOLUTION 1", "units mol/kgw", "temp 25.0", "water 1",
              "pH 9.91 charge", "pe 4.0", "C   1.2279E-04",
              "Ca  1.2279E-04", "Mg 0.001", "Cl 0.002", "PURE 1",
              "O2g -0.1675 10", "KINETICS 1", "-steps 100",
              "-bad_step_max 1000", "Calcite", "-m      0.000207",
              "-parms  3.20", "Dolomite", "-m      0.0", "-parms  0.32",
              "END")
    
    
    initsim <- c("SELECTED_OUTPUT", "-high_precision true",
                 "-reset false", "USER_PUNCH",
                 "-head C Ca Cl Mg H e O2g Calcite Dolomite",
                 "10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), ACT(\"H+\"), ACT(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")",
                 "SOLUTION 1", "units mol/kgw", "temp 25.0", "water 1",
                 "pH 9.91 charge", "pe 4.0", "C   1.2279E-04",
                 "Ca  1.2279E-04", "Cl 1E-12", "Mg 1E-12", "PURE 1",
                 "O2g     -0.6788 10.0", "Calcite  0.0 2.07E-3",
                 "Dolomite 0.0 0.0", "END")
     
    
    selout <- c("SELECTED_OUTPUT", "-high_precision true", "-reset false",
                "-time", "-soln", "-temperature true", "-water true",
                "-pH", "-pe", "-totals C Ca Cl Mg",
                "-kinetic_reactants Calcite Dolomite", "-equilibrium O2g")
    
    prop <- c("C","Ca","Cl","Mg","pH","pe","O2g", "Calcite","Dolomite")
    maxsim <- 20         ## minimum number of single chemical simulations in each time step to spawn parallelization
    grid   <- 50         ## grid discretization (number of elements)
    L      <- 0.5        ## total system length [m]
    U      <- 9.375e-6   ## imposed (constant) Darcy velocity [m3/s]
    Cf     <- 0.9        ## Courant number to add some numerical dispersion
    
    ## boundary conditions (concentration of the injected fluid entering into the 1D column)
    bound <- c(H=1E-7, e=1E-4, Mg=0.001, Cl=0.002)
    
    exper <- c(50,100)
    expsim <- vector(mode="list",length=length(exper))
    timexpsim <- vector(mode="list",length=length(exper))
    
    for (i in seq_along(exper)) {
        setup <- list(n=exper[i], len=L, U=U, base=base, first=selout, Cf=Cf, bound=bound, dt=210,
                      prop=prop, immobile=c(7,8,9), kin=c(8,9), ann=list(O2g=-0.1675), initsim=initsim)
        
        timexpsim[[i]] <- system.time({
            expsim[[i]] <- ReactTranspBalanceKin(setup=setup, db=db, step="fix_dt", maxtime=21000,
                                                 procs=4, maxsim=maxsim, writeout=FALSE, 
                                                 reduce=FALSE, surrogate=FALSE)
        })
    }
    
        
    par(mfrow=c(2,1))
    MatplotSingle(sim=expsim[[1]], xlab="Domain element", ylab="Ca, Mg [molal], Calcite, Dolomite [mol]", main="Kinetic simulation, grid 50")
    MatplotSingle(sim=expsim[[2]], xlab="Domain element", ylab="Ca, Mg [molal], Calcite, Dolomite [mol]", main="Kinetic simulation, grid 100")
    
    sam <- ExtractSamples(expsim,
                          var_design=c("C","Ca","Cl","Mg", "pH", "pe", "Calcite", "Dolomite"),
                          var_result=c("C","Ca","Cl","Mg", "pH", "pe", "Calcite", "Dolomite"))
    
    
    
    
    ## library(data.table)
    
    ## data.table::fwrite(sam$design, file = "RedModRphree/inst/extdata/demo_kinetic_design.csv", append = FALSE, quote = "auto",
    ##   sep = ",",  eol = "\n", na = "NA", dec = ".", row.names = FALSE, col.names = TRUE)
    
    ## data.table::fwrite(sam$result, file = "RedModRphree/inst/extdata/demo_kinetic_result.csv", append = FALSE, quote = "auto",
    ##   sep = ",",  eol = "\n", na = "NA", dec = ".", row.names = FALSE, col.names = TRUE)
    
    set.seed(3560)
    
    library(caret)
    fitControl <- trainControl(method = "none",
                               allowParallel = TRUE,
                               verboseIter=TRUE,
                               returnData = FALSE,
                               returnResamp="none",
                               savePredictions="none",
                               predictionBounds=rep(TRUE,2))
    
    
    doParallel::registerDoParallel(4)
    
    ## modk <- parallel::mclapply(colnames(sam$result), function (x) 
    ##     return(train(x=sam$design, y=sam$result[,x],trControl = fitControl,  ##tuneGrid = tg, 
    ##                  method="parRF")), mc.cores=8)
    
    modk <- foreach(i=colnames(sam$result)) %dopar% caret::train(x=sam$design, y=sam$result[,i], trControl = fitControl, method="parRF")
    names(modk) <- colnames(sam$result)
    
    saveRDS(modk, "models_kinetics_RF.rds")
    
    PlotModsInSample(modk, sam$result, sam$design)
    
    
    baleq <- list(Calcite=c(C=1, Ca=1), Dolomite=c(C=2, Ca=1, Mg=1))
    
    balance <- CheckBalance(baleq, sam$design, sam$result)
    
    
    MySurrKin <- function(state, model){
        excl <- which(colnames(state)=="O2g")
        remember_state_names <- colnames(state)
        des <- state[,-excl]
        if (NA %in% des) {
            print(des)
            stop("Mysurr: NA in data")
        }
        if (NaN %in% des) {
            print(des)
            stop("Mysurr: NaN in data")
        }    
        order_for_surrogate <- c("C","Ca","Cl","Mg","pH","pe","Calcite","Dolomite")
        rem_attr <- attr(state,"immobile")
        ss <- des[, order_for_surrogate]
        pred <- parLapply(ThisRunCluster, names(model), function(x) as.numeric(predict.train(model[[x]], ss)))
        out <- cbind(as.data.frame(pred), state[,excl])
        colnames(out) <- c(names(model),"O2g")
    
        ## order back to how it is expected in "state"
        out <- out[,remember_state_names]
        attr(out,"immobile") <- rem_attr 
        return(out)
    }
    
    exper <- expand.grid(grid=c(50, 100), tol=c(1E-6, 1E-5))
    simsur    <- vector(mode="list",length=nrow(exper))
    timesur   <- vector(mode="list",length=nrow(exper))
    outbalsur <- vector(mode="list",length=nrow(exper))
    
    for (i in seq(nrow(exper))) {
    
        setup <- list(n=exper$grid[i], len=L, U=U, base=base, first=selout, Cf=Cf, bound=bound, dt=210,
                      prop=prop, immobile=c(7,8,9), kin=c(8,9), ann=list(O2g=-0.1675), initsim=initsim)
    
        timesur[[i]] <- system.time( simsur[[i]] <- ReactTranspBalanceKin(setup=setup, db=db, step="fix_dt", maxtime=21000,
                                                                          procs=4, maxsim=maxsim, writeout=FALSE, 
                                                                          reduce=FALSE, surrogate=TRUE, surrogate.FUN=MySurrKin, 
                                                                          model=modk, baleq=baleq, tol=exper$tol[i], call_pqc=TRUE) )
        outbalsur[[i]] <- out_bal
    }
    
    
    par(mfrow=c(2, 2))
    inds <- rep(c(1,2),2)
    for (i in seq(nrow(exper))) {
        Matplot(sur=simsur[[i]], sim=expsim[[ inds[i]  ]],
                xlab="Domain element", ylab="Ca, Mg [molal], Calcite, Dolomite [mol]",
                main=paste("RandomForest surrogate, tol=", exper$tol[i]))
    }