Commit 7a4acb99 authored by Marco De Lucia's avatar Marco De Lucia
Browse files

Initial commit

parents
Package: RedModRphree
Title: R infrastructure for training and applying geochemical surrogate models to reactive transport
Version: 0.0.0.1
Authors@R: c(person("Marco", "De Lucia", email = "delucia@gfz-potsdam.de", role = c("aut", "cre")),
person("Janis", "Jatnieks", email = "deltaxzz@gmail.com", role = c("ctb")))
Description: In the framework of RedMod project, we develop an R infrastructure for training and applying geochemical surrogate models to reactive transport.
Depends: R (>= 3.2.0)
License: LGPL-2.1
Encoding: UTF-8
Imports:
parallel,
methods,
phreeqc,
mgcv,
caret,
randomForest,
graphics
LLazyData: true
RoxygenNote: 6.0.1
# Generated by roxygen2: do not edit by hand
export(Act2pH)
export(AddProp)
export(AdvectionPQC)
export(AnalyticalLogK)
export(BalanceEquations)
export(CheckBalance)
export(Distribute)
export(DistributeKin)
export(DistributeKinMatrix)
export(ElementalBalanceMin)
export(ExtractPphases)
export(ExtractSpecies)
export(ExtractTotals)
export(FindAllMinNames)
export(FindAllSpeciesNames)
export(FindAllTotNames)
export(FindLogK)
export(FindPhase)
export(FlattenList)
export(FormulaFromBal)
export(ParseFormula)
export(PlotModsInSample)
export(RPhreeFile)
export(RPhreeWriteInp)
export(RReadOut)
export(RReadOutKin)
export(ReactTranspBalanceKin)
export(RecomposeState)
export(ReduceState)
export(RepSol)
export(RunPQC)
export(StoichiometricMatrix)
export(SuppressSim)
export(TrailSpaces)
export(TranspAsPert)
export(mae)
export(msg)
export(pH2Act)
export(splitMultiFix)
export(splitMultiKin)
export(stopmsg)
import(caret)
import(graphics)
import(mgcv)
import(phreeqc)
import(randomForest)
## Functions for manipulating PHREEQC databases
### Marco De Lucia, delucia@gfz-potsdam.de, 2009-2018
### Time-stamp: "Last modified 2018-05-03 17:23:09 delucia"
##' @title Finds the expression of a logK of a given compound/mineral
##' in a PHREEQC database
##' @param species name of the species/mineral to search for
##' @param db the buffer containing the database
##' @param type one of c("analytical","logk","equation","lhs","rhs")
##' @return the requested logK
##' @author MDL
##' @export
FindLogK <- function(species, db, type=c("analytical"))
{
## reg <- gsub("\\(","\\\\(",species) ## obsoleted through the fixed=TRUE option
## reg <- gsub("\\)","\\\\)",reg)
args <- c("analytical","logk","equation","lhs","rhs")
type <- match.arg(type, args)
pos <- grep(species,db,fixed=TRUE)
if (length(pos) > 1) pos <- pos[1] ## just the 1st occurrence
if (length(pos) == 0) {
msg("Species \"", species,"\" not found in the DataBase")
return(NA)
}
## check if analytical expression is present!
if (type=="analytical") {
linepos <- grep("anal",db[c(pos:(pos+4))])
if (length(linepos)==1) {
linepos <- pos + linepos - 1
line <- TrailSpaces(db[linepos])
parread <- unlist(strsplit(line," +"))[-1]
len <- length(parread)
if (len != 5) {
parread[seq(len+1,5)] <- 0
msg("Analytical expression for reaction",species,"has only", len, "terms")
}
par.anal <- as.numeric(parread)
attr(par.anal,"line") <- linepos
attr(par.anal,"type") <- "analytical"
return(par.anal)
}
}
if (type=="logk") {
## if we are here, either there is no analytical, or we want to calculate also the new logK
linepos <- grep("log",db[c(pos:(pos+3))])
if (length(linepos)==1) {
linepos <- pos + linepos - 1
line <- TrailSpaces(db[linepos])
parread <- unlist(strsplit(line," +"))[2]
par.logk <- as.numeric(parread)
attr(par.logk,"line") <- linepos
attr(par.logk,"type") <- "logk"
return(par.logk)
}
}
if (type=="equation") {
return(TrailSpaces(db[pos+1]))
}
if (type=="lhs") {
return(ParseFormula(TrailSpaces(db[pos+1]), type="lhs"))
}
if (type=="rhs") {
return(ParseFormula(TrailSpaces(db[pos+1]), type="rhs"))
}
## if we are here, no matching expression found, returning NA
return(NA)
}
##' @title Calculates the logK of reaction for a given temperature
##' @param param the vector containing the coefficients of analytical
##' logK
##' @param Tk temperature in Kelvin
##' @return The logK at wanted temperature
##' @author MDL
##' @export
AnalyticalLogK <- function(param,Tk)
{
return(param[1]+param[2]*Tk+param[3]/Tk+param[4]*log10(Tk)+param[5]/Tk^2)
}
##' @title Parses a formula as it is written in a PHREEQC database
##' @param line the string containing the reaction
##' @param type one of c("all","lhs","rhs")
##' @return a list containing components and coefficients.
##' Coefficients of left hand side are negated.
##' @author MDL
##' @export
ParseFormula <- function(line, type="all")
{
args <- c("all","lhs","rhs")
type <- match.arg(type, args)
tsline <- TrailSpaces(line)
formula <- unlist(strsplit(tsline,"="))
left <- formula[1]
right <- formula[2]
lhs <- unlist(strsplit(left," + ", fixed=TRUE))
rhs <- unlist(strsplit(right," + ", fixed=TRUE))
if (type=="all" || type=="lhs") {
## left hand side
coeffsl <- rep(1,length(lhs))
compl <- rep("",length(lhs))
for (i in seq_along(lhs)) {
lhs[i] <- sub(' +$', '', lhs[i]) ## remove spaces at the end of string
spaces <- grep(" ",lhs[i],fixed=TRUE)
if (length(spaces)==1) {
tmp <- unlist(strsplit(lhs[i]," ", fixed=TRUE))
coeffsl[i] <- as.numeric(tmp[1])
compl[i] <- tmp[2]
} else {
compl[i] <- lhs[i]
}
}
}
if (type=="all" || type=="rhs") {
## right hand side
coeffsr <- rep(1,length(rhs))
compr <- rep("",length(rhs))
for (i in seq_along(rhs)) {
rhs[i] <- sub('^ +', '', rhs[i]) ## remove spaces at the beginning
spaces <- grep(" ",rhs[i],fixed=TRUE)
if (length(spaces)==1) {
tmp <- unlist(strsplit(rhs[i]," ", fixed=TRUE))
coeffsr[i] <- as.numeric(tmp[1])
compr[i] <- tmp[2]
} else {
compr[i] <- rhs[i]
}
}
}
if (type=="all")
out <- list(comp=c(compl,compr), coeff=c(-coeffsl,coeffsr))
if (type=="lhs")
out <- list(comp=compl, coeff=-coeffsl)
if (type=="rhs")
out <- list(comp=compr, coeff=coeffsr)
return(out)
}
##' @title Find a PURE_PHASE in a database and parses its formula
##' @param species name of phase to search for
##' @param db a PHREEQC database (buffer)
##' @return a list with the components of the formula and the
##' stoichiometric coefficients, plus the corresponding line in
##' the db as attr(,"line")
##' @author MDL
##' @export
FindPhase <- function(species,db)
{
phases <- grep("^PHASES",db)
pos <- grep(species, db, fixed=TRUE)
if (length(pos) > 1) pos <- pos[1] ## just the 1st occurrence
line <- db[pos+1]
out <- ParseFormula(TrailSpaces(line))
## substitute the composition with the mineral name, lower case
out$comp[1] <- tolower(species)
attr(out,"line") <- pos+1
return(out)
}
##' This utility function is not meant to be used directly by the user
##' @title Remove spaces at the end and beginning of a line
##' @param line the string to remove trailing spaces from
##' @return reformatted string without blanks at beginning and end
##' @author MDL
##' @export
TrailSpaces <- function(line)
{
tmp <- sub(' +$', '', line) ## remove spaces at the end of the lines
tmp <- sub('^ +', '', tmp) ## remove spaces at the beginning
return(tmp)
}
### File-related utility functions for RedModRphree
### Marco De Lucia, delucia@gfz-potsdam.de, 2009-2018
### Time-stamp: "Last modified 2018-05-03 11:00:16 delucia"
##' Reads a normal PHREEQC input file and prepares it for
##' Rphree
##'
##' content for DETAILS: TODO!
##' @title Read a PHREEQC input file and format it for Rphree
##' @param filein PHREEQC input script to read.
##' @param is.db Logical. Set it to TRUE if reading a database, for
##' performing additional checks and formatting.
##' @param tabs Logical, defaults to TRUE. Should we replace
##' tabulation with spaces?
##' @return A buffer (character vector, one line per element)
##' containing a synthctically valid PHREEQC input
##' @author MDL
##' @examples
##' \dontrun{inp <- RPhreeFile("test.phrq")
##' db <- RPhreeFile("db/phreeqc.dat",is.db=TRUE)}
##' @export
RPhreeFile <- function(filein, is.db=FALSE, tabs=TRUE)
{
## open connection, read, close file
fbuff <- file(filein, "r")
buff <- readLines(fbuff)
close.connection(fbuff)
## regexp to clean up
buff <- sub(' +$', '', buff) ## remove spaces at the end of the lines
buff <- sub("#.*$","",buff) ## remove everything after the #'s
if (tabs)
buff <- gsub('\t', ' ', buff) ## substitute tabs with spaces
if (!is.db) {
buff <- sub('^ +', '', buff) ## remove spaces at the beginning
db <- grep("^DATABASE",buff)
if (length(db) != 0) {
buff <- buff[-db]
}
}
multilines <- grep("\\\\$",buff)
if (length(multilines) != 0) {
for (i in multilines)
buff[i] <- paste(sub("\\\\$", "", buff[i]), buff[i+1])
}
buff[multilines+1] <- ""
buff <- buff[buff!=""] ## remove empty elements
return(buff)
}
##' Write an input buffer to a file on disk.
##'
##' It really just opens a file and uses "writeLines" for writing the
##' input to it.
##' @title RPhreeWriteInp
##' @param ofile designated file the input will be written to.
##' @param input the input buffer.
##' @return NULL
##' @author MDL
##' @export
RPhreeWriteInp <- function(ofile,input)
{
oconn <- file(ofile,"w")
writeLines(input, con = oconn, sep = "\n")
close(oconn)
}
##' Reads a phreeqc output file and forms a results list as if the
##' calculation were made with Rphree.
##'
##' Currently all blocks are read. For simulations with kinetics the
##' function to use is \code{\link{RReadOutKin}}.
##' @title RReadOut, parse a PHREEQC output file
##' @param out The PHREEQC output file.
##' @return An output list, as if the simulation would have being run
##' through Rphree (the same blocks and the same names are returned)
##' @author MDL
##' @export
##' @examples
##' \dontrun{
##' out <- RReadOut("ex.out")
##' }
RReadOut <- function(out)
{
if (length(out)==1) {## is it a buffer or a file?
## note that empty lines are removed!
cat(paste("RReadOut:: opening file",out,"\n"))
tot <- RPhreeFile(out, is.db=FALSE, tabs=TRUE)
} else {
## remove empty lines and tabs as in RPhreeFile, store
## everything in "tot"
cat("RReadOut:: scanning the buffer... \n")
tot <- sub(' +$', '', out) ## remove spaces at the end of the lines
tot <- sub("#.*$","", tot) ## remove everything after the #'s
tot <- gsub('\t', ' ', tot) ## substitute tabs with spaces
tot <- tot[tot!=""] ## remove empty elements
}
## some extra lines in PHREEQC version 3 we need to take care of
toremove <- grep("^\\*\\*For", tot)
if (length(toremove)>0) {
toremove <- sort(c(toremove, toremove+1))
tot <- tot[-toremove]
}
solutions <- grep("Beginning of initial solution calculations", tot)
ntot <- length(solutions)
## This is unique (only for calculated solution)
endsim <- grep('End of simulation.',tot,fixed=TRUE)
cat(paste("RReadOut::",ntot," simulation in the given output"))
## "phase assemblage" is unique, but could not be there
has_pphases <- TRUE
endkin <- grep('-Phase assemblage-',tot,fixed=TRUE)
if (length(endkin)==0) {
## This means we don't have any pure phase in the simulations,
## and that the only solutions to read are the initial ones!
has_pphases <- FALSE
initials <- seq(1,ntot)
} else {
## this index stores the lines marking initial solutions
initials <- -(seq(1,ntot)*2 -1)
}
## for these, don't consider the "initial solution" instead
endpure <- grep('-Solution composition-',tot,fixed=TRUE)[initials]
endcomp <- grep('-Description of solution-',tot,fixed=TRUE)[initials]
enddesc <- grep('-Distribution of species-',tot,fixed=TRUE)[initials]
endspec <- grep('-Saturation indices-',tot,fixed=TRUE)[initials]
## create the container
res <- vector(mode="list",length=ntot)
## loop over all steps
for (n in seq_along(solutions)) {
## cat(paste(":: Reading solution n. ",n,"\n"))
## check if there is a fatal error
error <- grep("^ERROR",tot[ solutions[n] : endsim[n] ])
if (length(error) > 0) {
res[[n]] <- "error"
cat(" ERROR!!\n")
next
}
## next block in output file is the "optional"
## EQUILIBRIUM_PHASES
if (has_pphases) {
startpure <- endkin[n]+3
npure <- endpure[n] - startpure - 1
pure_conn <- textConnection( tot[startpure:(startpure+npure)])
pure <- read.table( pure_conn, row.names=1, fill=TRUE, as.is=TRUE)
close(pure_conn)
indreactants <- which(pure[,2]=="reactant")
if (length(indreactants) > 0)
{
for (ir in indreactants) {
rownames(pure)[ir-1] <- paste(rownames(pure)[ir-1], rownames(pure)[ir],sep="_")
pure[(ir-1),c(4,5,6)] <- pure[ir,c(3,4,5)]
}
pure <- pure[-indreactants,]
}
pure <- pure[,c(5,6)]
colnames(pure) <- c("moles","delta")
## if (verbose)
## cat(paste(":: Read pphases block ", n, "of length",npure+1," \n"))
} else {
pure <- NA
}
## now total solutes
startcomp <- endpure[n] + 2
ncomp <- endcomp[n]-startcomp
comp_conn <- textConnection(tot[startcomp:(startcomp+ncomp-1)])
comp <- read.table(comp_conn,row.names=1,fill=TRUE)
close(comp_conn)
colnames(comp) <- c("molal","moles")
## if (verbose)
## cat(paste(":: Read total solutes block ", n, "\n"))
## desc: pH, pe, ecc
block <- tot[(endcomp[n]+1):(enddesc[n]-1)]
pH <- as.numeric(unlist(strsplit(grep('^pH',block, value=TRUE)," +"))[3])
pe <- as.numeric(unlist(strsplit(grep('^pe',block, value=TRUE)," +"))[3])
temp <- as.numeric(unlist(strsplit(grep('^Temperature ',block,value=TRUE)," = "))[2])
water <- as.numeric(unlist(strsplit(grep('Mass of water',block,fixed=TRUE,value=TRUE)," = "))[2])
WaterActivity <- as.numeric(unlist(strsplit(grep('Activity of water',block,fixed=TRUE,value=TRUE)," = "))[2])
IonStr <- as.numeric(unlist(strsplit(grep('Ionic strength',block,fixed=TRUE,value=TRUE)," = "))[2])
TotAlk <- as.numeric(unlist(strsplit(grep('Total alkalinity',block,fixed=TRUE,value=TRUE)," = "))[2])
Tot_C <- as.numeric(unlist(strsplit(grep('Total carbon',block,fixed=TRUE,value=TRUE)," = "))[2])
Tot_CO2 <- as.numeric(unlist(strsplit(grep('Total CO2',block,fixed=TRUE,value=TRUE)," = "))[2])
ElBal <- as.numeric(unlist(strsplit(grep('Electrical balance',block,fixed=TRUE,value=TRUE)," = "))[2])
Per_Error <- as.numeric(unlist(strsplit(grep('Percent error',block,fixed=TRUE,value=TRUE)," = "))[2])
Iter <- as.numeric(unlist(strsplit(grep('^Iterations',block,value=TRUE)," = "))[2])
Tot_H <- as.numeric(unlist(strsplit(grep('Total H',block,fixed=TRUE,value=TRUE)," = "))[2])
Tot_O <- as.numeric(unlist(strsplit(grep('Total O',block,fixed=TRUE,value=TRUE)," = "))[2])
block <- tot[(endcomp[n]+1):(enddesc[n]-1)]
desc <- data.frame(val=c(pH=pH,pe=pe,temp=temp,water=water,WaterActivity=WaterActivity,
Tot_Alk=TotAlk, IonicStr=IonStr, ElectrBal=ElBal,
Per_Error=Per_Error, iterations=Iter, Tot_CO2=Tot_CO2, Tot_H=Tot_H, Tot_O=Tot_O))
## next block in output is the speciation
block <- tot[(enddesc[n] + 4) :( endspec[n] - 1)]
## find out the short lines
shorts <- nchar(block)
excl <- which(shorts < mean(shorts))
block <- block[-excl]
block_conn <- textConnection(block)
## Now we can read.table it
tmp <- read.table( block_conn, fill=TRUE, as.is=TRUE)[,c(1,2,3)]
close(block_conn)
## remove duplicated
rnames <- tmp$V1
dup <- which(!duplicated(rnames))
species <- tmp[dup,c(2,3)]
rownames(species) <- rnames[dup]
colnames(species) <- c("molal","act")
## Now saturation indexes
block <- tot[(endspec[n] + 2) :( endsim[n] - 4)]
block_conn <- textConnection(block)
SI <- read.table(block_conn, fill=TRUE, row.names=1, as.is=TRUE)
close(block_conn)
names(SI) <- c("SI","IAP","logK","formula")
## finally pack all together
res[[n]] <- list(desc=desc,pphases=pure,tot=comp, SI=SI, species=species)
} ## end of loop over simulations
cat(" OK\n")
return(res)
}
##' Kinetics simulation: reads a phreeqc output file and forms an
##' output list - as if the calculation was made through
##' Rphree.
##'
##'
##' @title RReadOutKin, import the output file of a kinetic simulation
##' into R
##' @param out The PHREEQC output file.
##' @param strip logical. If TRUE, the "ListInfo" element will be
##' appended to the list.
##' @param verbose logical. If TRUE more output is given to the
##' console (for debugging).
##' @return An output list as per Rphree call.
##' @author MDL
##' @export
RReadOutKin <- function(out, strip=TRUE, verbose=FALSE)
{
if (length(out)==1) {## is it a buffer or a file?
## note that empty lines are removed!
cat(paste("RReadOutKin:: opening file",out,"\n"))
tot <- RPhreeFile(out, is.db=FALSE, tabs=TRUE)
} else {
## remove empty lines and tabs as in RPhreeFile, store
## everything in "tot"
cat("RReadOutKin:: scanning the buffer... \n")
tot <- sub(' +$', '', out) ## remove spaces at the end of the lines
tot <- sub("#.*$","", tot) ## remove everything after the #'s
tot <- gsub('\t', ' ', tot) ## substitute tabs with spaces
tot <- tot[tot!=""] ## remove empty elements
}
## some extra lines in PHREEQC version 3 we need to take care of
toremove <- grep("^\\*\\*For", tot)
if (length(toremove)>0) {
toremove <- sort(c(toremove, toremove+1))
tot <- tot[-toremove]
if (verbose) cat("RReadOutKin:: Appears to be PHREEQC version 3\n")
}
times <- grep("Time step:", tot, fixed=TRUE)
ntot <- length(times)
years <- round(as.numeric(sub('\ .*$','',gsub('.*:\ ','',tot[times]))),1)
if (verbose) {
cat(out,":\n")
cat(paste("RReadOutKin:: Found ",ntot,"time steps, the last at time",years[ntot],"\n"))
}
endkin <- grep('-Phase assemblage-',tot,fixed=TRUE)
endpure <- grep('-Solution composition-',tot,fixed=TRUE)[-1]
endcomp <- grep('-Description of solution-',tot,fixed=TRUE)[-1]
enddesc <- grep('-Distribution of species-',tot,fixed=TRUE)[-1]
endspec <- grep('-Saturation indices-',tot,fixed=TRUE)[-1]
## This is unique (only for calculated solution)
endsim <- grep('Reaction step',tot,fixed=TRUE)[-1]
endtot <- grep('End of simulation',tot,fixed=TRUE)
endsim <- c(endsim, endtot-1)
if (strip) ## do you want "ListInfo"?
reslen <- ntot
else
reslen <- ntot + 1
## create the container
res <- vector(mode="list",length=reslen)
## loop over all steps
for (n in seq_along(times)) {
if (verbose)
cat(paste("RReadOutKin:: Reading", n, "solution, time ",years[n],"\n"))
## find the last solution
start <- times[n]+2
## how many kinetics??
nkin <- endkin[n] - start
kconn <- textConnection(tot[(start):(start+nkin-1)])
kin <- read.table(kconn, row.names=1,fill=TRUE)[,c(2,1)]
close(kconn)
colnames(kin) <- c("moles","delta")
if (verbose)
cat(paste("RReadOutKin:: Read kinetic block ", n, "\n"))
## next block in output file is EQUILIBRIUM_PHASES