Skip to content
Snippets Groups Projects
Select Git revision
  • 1ffb5dccb9e5e7155f4401769a35f0fc246a3fa0
  • master default
  • 3-port-RcppTUG
  • 2-refactor-pourbaix
  • v0.0.4
5 results

Rphree_Pourbaix.R

Blame
  • Rphree_Pourbaix.R 18.49 KiB
    ### Licence: LGPL version 2.1
    ## Time-stamp: "Last modified 2021-06-27 16:17:00 delucia"
    
    ##' @title Revert a string
    ##' @param x character vector. All element of the vector will be
    ##'     reverted
    ##' @return the reverted character vector
    ##' @export
    ##' @author MDL
    strrev <- function(x) {
        res <- x
        for (i in seq_along(x)) {
            nc <- nchar(x[i])
            res[i] <- paste0(substring(x[i], nc:1, nc:1), collapse = "")
        }
        res
    }
    
    ##' @title Converts a PHREEQC formula into R expression for nice
    ##'     annotations
    ##' @param ff character vector of formulas which will be translated
    ##' @return character vector of valid R expressions
    ##' @author MDL
    ##' @export
    FormulaToExpression <- function(ff) {
    ### transform PHREEQC formulas into R's expressions
    
        add.sup   <- FALSE
        add.colon <- FALSE
    
        
        ## Body is everything which precedes a "-", a "+" or a ":"
        body <- sub("[\\:\\+\\-].*$", "", ff)
        ##body <- sub("[+-:].*$", "", ff)
    
        ## first we take the exponents for the charge
        has.sup <- grep("[\\+\\-]", ff)
        if (length(has.sup)>0) {
            add.sup <- TRUE
            super <- sub("(^.*)([\\+\\-])", "\\2", ff[has.sup])
            super[nchar(super)==2] <- strrev(super[nchar(super)==2])
            esup <- paste0("^{",super,"phantom()}")
        }
    
        ## treat the ":"
        has.colon <- grep(":", ff, fixed=TRUE)
        if (length(has.colon)>0) {
            add.colon  <- TRUE
            aftercolon <- sub("^.*\\:", ":", ff[has.colon])
            after <- sub("(\\:[0-9]*)H2O", "\\1*H[2]*O", aftercolon)
            after <- sub(":", "*':'*", after)
        }
    
        has.sub <- grep("[0-9]+", body)
        body[has.sub] <- gsub("([0-9]+)", "[\\1]", body[has.sub])
    
        body <- gsub("\\]([A-Z])", "]*\\1" , body)
        formula <- body
        if (add.sup)
            formula[has.sup]   <- paste0(formula[has.sup], esup)
        if (add.colon)
            formula[has.colon] <- paste0(formula[has.colon],after)
        return(formula)
    }
    
    ##' This function takes the string output of a PHREEQC calculation and
    ##' forms the "SELECTED_OUTPUT" for further simulations including all
    ##' totals and the activities and saturation indeces of all species
    ##' and minerals which contain the optional \emph{element}
    ##'
    ##' @title Form SELECTED_OUTPUT from a PHREEQC string output
    ##' @param sim the string output of a previous PHREEQC simulation
    ##' @param element optional, the single element which must be present
    ##'     in all species and minerals included in the SELECTED_OUTPUT
    ##' @return A character vector containing the SELECTED_OUTPUT block
    ##' @author MDL
    ##' @export
    FormSelectedOutput <- function(sim, element) {
        AllSpecies <- rownames(sim$species)
        AllTot     <- rownames(sim$tot)
        AllSI      <- rownames(sim$SI)
        
        if (!missing(element)) {
            ## first we grab the formula from the SI tab
            mins   <- sim$SI$formula
            indmin <- grep(element, mins)
            AllSI  <- AllSI[indmin]
            indsp  <-  grep(element, AllSpecies) 
            AllSpecies  <- AllSpecies[indsp]
        }
        SI      <- paste("    -saturation_indices", paste(AllSI, collapse=" "))
        Totals  <- paste("    -totals", paste(AllTot, collapse=" "))
        Species <- paste("    -activities", paste(AllSpecies, collapse=" "))
        ## make sure there is no "H2O"
        Species <- sub(" H2O ", " ", Species, fixed=TRUE)
        SelOut <- c("SELECTED_OUTPUT",
                    "    -high_precision  true",
                    "    -reset   false",
                    "    -simulation  true",
                    "    -pH      true",
                    "    -pe      true",
                    "    -water   true",
                    "    -charge_balance true",
                    "    -ionic_strength true",
                    "    -percent_error  true",
                    Totals, Species, SI)
    
        return(SelOut)
    }
    
    
    ##' This function uses PHREEQC to calculate and plot a Pourbaix
    ##' diagram for the specified system at a given T and P. The
    ##' geochemical system is specified as a single PHREEQC input
    ##' specifying one "SOLUTION" while the actual values of pe and pH at
    ##' which the predominance shall be calculated must be specified by
    ##' the user at input. To try and prevent numerical instabilities,
    ##' which are quite frequent, the function only actually performs
    ##' PHREEQC calculations inside the water stability region.
    ##'
    ##' @title Pourbaix (pe/pH) diagrams using PHREEQC
    ##' @param element character of length 1. The element for which the
    ##'     Pourbaix diagram is being calculated
    ##' @param pe numeric vector, grid values for redox potential at which
    ##'     predominance is calculated
    ##' @param pH numeric vector, grid values for pH at which predominance
    ##'     is calculated
    ##' @param Tc temperature in Celsius
    ##' @param Patm pressure in atmosphere
    ##' @param base character vector. The template PHREEQC input script,
    ##'     mostly containing only one SOLUTION block
    ##' @param first Optional character vector containing i.e. own PHASES
    ##'     definitions which comes on top of the base input
    ##' @param db PHREEQC chemical database
    ##' @param aqonly logical, defaults to FALSE. If TRUE, only aqueous
    ##'     species are considered
    ##' @param suppress optional character vector. Species and minerals
    ##'     contained here are not considered in the diagram
    ##' @param plot logical, defaults to TRUE. Do you want to plot the
    ##'     diagram?
    ##' @param ann.title Optional string. If specified, it will replace
    ##'     main title of the picture
    ##' @param ann.sub Optional string. If specified, it will replace
    ##'     subtitle of the picture (usually this specifies the
    ##'     concentrations)
    ##' @param colors Optional character vector containing the colors. If
    ##'     missing, grDevices::terrain.colors is used internally for
    ##'     plotting
    ##' @param procs how many CPUs should be used for the computations?
    ##'     Defaults to 1. Parallelization is achieved through DoParallel
    ##'     and foreach, and should work on both *nix and Windows using
    ##'     fork and sockets respectively
    ##' @param ... further graphic parameters passed to the \code{image}
    ##'     for plotting
    ##' @return invisibly returns a list containing all calculated
    ##'     simulations, input script for PHREEQC simulations, formulas,
    ##'     names, etc
    ##' @author MDL
    ##' @export
    ##' @examples
    ##' base <- c("SOLUTION 1",
    ##'           "units mol/kgw",
    ##'           "temp 25.0",
    ##'           "pressure 1",
    ##'           "-water 1",
    ##'           "pH 7",
    ##'           "pe 4.0",
    ##'           "Na 1",
    ##'           "C 0.0125",
    ##'           "Ca 0.025",
    ##'           "Mg 0.025",
    ##'           "Cl 2.002 charge",
    ##'           "END")
    ##' par(mfrow=c(1,2))
    ##' a <- Pourbaix(element="Ca", pe=seq(-15,15, length=31), pH=seq(2,12, length=31), Tc= 25,
    ##'               Patm=1, base=base, first="", ann.title="Custom title", db=llnl.dat)
    ##' b <- Pourbaix(element="Mg", pe=seq(-15,15, length=31), pH=seq(2,12, length=31), Tc= 25,
    ##'               Patm=1, base=base, first="", ann.sub="Custom subtitle", db=llnl.dat)
    ##'
    ##' ## Now we restrict the speciation to a fixed valence state of the element,
    ##' ## here Fe(2) and Cu(2), only aqueous species:
    ##' base <- c("SOLUTION 1",
    ##'           "units mol/kgw",
    ##'           "temp 25.0",
    ##'           "pressure 1",
    ##'           "-water 1",
    ##'           "pH 7",
    ##'           "pe 4.0",
    ##'           "Ca        0.025",
    ##'           "Cu        0.001",
    ##'           "Cl        0.05122 charge",
    ##'           "Fe        3.0E-04",
    ##'           "END")
    ##' par(mfrow=c(1,2))
    ##' a <- Pourbaix(element="Fe(2)", pe=seq(-10,12, length=51), pH=seq(2,10, length=51),
    ##'               Patm=1, base=base, aqonly=TRUE,  db=llnl.dat)
    ##' b <- Pourbaix(element="Cu(2)", pe=seq(-10,12, length=51), pH=seq(2,10, length=51),
    ##'               Patm=1, base=base, aqonly=TRUE,  db=llnl.dat)
    Pourbaix <- function(element,
                         pe=seq(-5, 8, length.out = 31),
                         pH=seq(6, 12, length.out = 31),
                         Tc=25,
                         Patm=1,
                         base,
                         first,
                         db,
                         aqonly=FALSE,
                         suppress,
                         ann.title,
                         ann.sub,
                         colors,
                         procs=1,
                         plot=TRUE,
                         ...)
    {  
        phreeqc::phrLoadDatabaseString(db)
        phreeqc::phrSetOutputStringsOn(TRUE)
    
        ## "first" is an optional argument
        if (missing(first))
            first <- ""
    
        ## some checks in "base" - pe, pH, pressure and temp must be
        ## there, if not, add them to base script
        for (prop in c("temp", "pe", "pH", "pressure")) {
            if (length(grep(prop,base))==0) {
                base <- AddProp(base, name=prop, values = 1, cat="tot")
                msg("Adding ", prop, "to base script")
            }
        }
        
        ## testrun of base script to get the output
        aa  <- phreeqc::phrRunString(c(first, base))
        con <- phreeqc::phrGetOutputStrings()
    
        has_valence <- FALSE
        if (missing(element)) {
            utils::capture.output(res <- ReadOutVal(con)[[1]])
            selout <- FormSelectedOutput(res)
        } else {
            ## first we distinguish if we want total amount of element or a specific valence
            if (length(grep("(", element, fixed=TRUE))>0) {
                ## now we call "valence" the full expression (including the parentheses)
                valence <- element
                has_valence <- TRUE
                ## strip the parentheses from valence to get the element
                element <- sub("\\(.*\\)", "", valence)
                msg("Restricting the calculation to the oxidation state", valence)
                utils::capture.output(res <- ReadOutVal(con, valence)[[1]])
                selout <- FormSelectedOutput(res, element)
           } else {
                utils::capture.output(res <- ReadOutVal(con)[[1]])
                selout <- FormSelectedOutput(res, element)
            }
        }
    
        ## do we only want aqueous species?
        ## if yes, just suppress SIs from selected output
        if (aqonly) {
            selout <- selout[-grep("-saturation_indices", selout, fixed=TRUE)]
        }
    
        if (!missing(suppress)) {
            for (min in suppress) {
                selout <- sub(paste0(" ", min, " "), " ", selout)
                msg("suppressed output of ", paste(suppress, collapse="; "))
            }
        }
        
        phreeqc::phrSetOutputStringsOn(FALSE)
        combs <- expand.grid(pe=pe, pH=pH)
    
        ## eliminate outside water stability region!
        ## TODO: check T/P dependance
        ind_h2 <- which(combs$pe < - combs$pH)
        ind_o2 <- which(combs$pe > 20.75 - combs$pH)
        ## we need to remember which simulation is suppressed to reform
        ## the grid afterwards
        unstable <- union(ind_o2,ind_h2)
        if (length(unstable) >= 1) { 
            suppr <- TRUE
            fcombs <- combs[-unstable, ]
            indeces <- seq(1, nrow(combs))[-unstable]
        } else {
            suppr <- FALSE
            fcombs <- combs
        }
    
        msg("requested", nrow(combs), "evaluations, restricted to", nrow(fcombs), "for stability of water")
    
        Aqnames   <- unlist(strsplit(sub("^.*-activities ", "", grep("activities", selout, value=TRUE))," "))
        SInames    <- unlist(strsplit(sub("^.*-saturation_indices ", "", grep("saturation_indices", selout, value=TRUE))," "))
    
        ## Distribute T and P
        base2 <- Distribute(base, "temp", Tc)
        if (length(grep("pressure", base))>0)
            base2 <- Distribute(base2, "pressure", Patm)
    
    
        msg("running PHREEQC simulations...")
    
        ## Parallelization
        if (procs > 1) {
            if (Sys.info()[["sysname"]]=="Windows") {
                ThisRunCluster <- parallel::makePSOCKcluster(procs)
            } else {
                ThisRunCluster <- parallel::makeForkCluster(procs)
            }
    
            doParallel::registerDoParallel(ThisRunCluster)
            msg("Registered default doParallel cluster with ", procs, "nodes")
            parallel::clusterCall(cl=ThisRunCluster, phreeqc::phrLoadDatabaseString, db)
            msg("Database loaded on each worker")
    
            items <- nrow(fcombs)
            if (items %/% procs > 0)
                itpercpu <- items %/% procs +1
            else
                itpercpu <- items %/% procs
    
            inds <- rep(seq(1,items),each=itpercpu)[1:items]
            ilist <- split(fcombs, inds)
    
            ## distribute pe and pH into each chunk
            biginp <- lapply(ilist, function(x) {
                inp <- Distribute(base2, "pe", x$pe);
                Distribute(inp, "pH", x$pH)
            })
            ## add "first" and "selout" blocks
            biginp <- lapply(biginp, function(x) c(first, selout, x))
            on.exit({
                return(list(input=biginp, db=db, combs=combs, fcombs=fcombs))
            })
            Tm <- system.time(bigres <- RunPQC(biginp, procs=procs, second=FALSE))[3]
    
            parallel::stopCluster(ThisRunCluster)
            msg("Parallelization cluster stopped")
       
        } else {
            ## distribute pe and pH
            biginp <- Distribute(base2, "pe", fcombs$pe)
            biginp <- Distribute(biginp, "pH", fcombs$pH)
            
            on.exit({
                return(list(input=c(first, selout, biginp), db=db, combs=combs, fcombs=fcombs))
            })
            ## Run phreeqc
            Tm <- system.time(bigres <- RunPQC(c(first, selout, biginp), second=FALSE))[3]
            
        }
    
        msg(nrow(fcombs), "simulations computed in", round(Tm,3), "s")
    
        ## Extract the highest acivities
        Aqind <- grep("^la_", colnames(bigres))
        if (length(Aqind)>1) {
            Aqmax <- apply(10^bigres[, Aqind], 1, which.max)
            Aqmaxnam <- Aqnames[Aqmax]
            Aqmaxval <- numeric(nrow(bigres))
            for (i in seq_along(Aqmaxval)) {
                Aqmaxval[i] <- 10^bigres[i, Aqind[Aqmax][i] ]
            }
        } else {
            Aqmaxnam <- rep(Aqnames, nrow(bigres)) ## Aqnames must also have length 1
            Aqmaxval <- 10^bigres[ , Aqind]
        }
        
        ## Extract the highest SIs
        if (!aqonly) {
            SIind <- grep("^si_", colnames(bigres))
            if (length(SIind)>1) {
                SImax <- apply(bigres[, SIind], 1, which.max)
                SImaxnam <- SInames[SImax]
                SImaxval <- numeric(nrow(bigres))
                for (i in seq_along(SImaxval)) {
                    SImaxval[i] <- bigres[i, SIind[SImax][i] ]
                }
            } else {
                SImaxnam <- rep(SInames, nrow(bigres)) ## SInames must also have length 1
                SImaxval <- bigres[ , SIind]
            }
    
            if (suppr) {
                dat <- data.frame(pe=combs$pe, pH=combs$pH, SIname=NA_character_, SIval=NA, Aqname=NA_character_, Aqval=NA, stringsAsFactors = FALSE)
                dat$SIname[indeces] <- SImaxnam
                dat$SIval[indeces]  <- SImaxval
                dat$Aqname[indeces] <- Aqmaxnam
                dat$Aqval[indeces]  <- Aqmaxval
            } else {
                dat <- data.frame(pe=combs$pe, pH=combs$pH, SIname=SImaxnam, SIval=SImaxval, Aqname=Aqmaxnam, Aqval=Aqmaxval, stringsAsFactors = FALSE)
            }
        
            vec <- ifelse(dat$SIval > 0, dat$SIname, dat$Aqname)
        } else {
            if (suppr) {
                dat <- data.frame(pe=combs$pe, pH=combs$pH, Aqname=NA_character_, Aqval=NA, stringsAsFactors = FALSE)
                dat$Aqname[indeces] <- Aqmaxnam
                dat$Aqval[indeces]  <- Aqmaxval
            } else {
                dat <- data.frame(pe=combs$pe, pH=combs$pH, Aqname=Aqmaxnam, Aqval=Aqmaxval, stringsAsFactors = FALSE)
            }
    
            vec <- dat$Aqname
        }
    
        todisplay <- stats::na.omit(unique(vec))
    
        if (!missing(suppress))
            todisplay <- todisplay[! todisplay %in% suppress]
    
        naqueous <- length(aq <- which(todisplay %in% unique(Aqmaxnam)))
        
        ## fonts 1 for eq phases, font 4 for aqueous
        is.aqueous <- rep(1, length(todisplay))
        formulas <- rep("", length(todisplay))
        is.aqueous[aq] <- 4
        
        if (!aqonly) {
            neqphases <- length(eq <- which(todisplay %in% unique(SImaxnam)))
            ## formulas of eqphases
            is.pp <- todisplay[is.aqueous==1]
            formulas[is.aqueous==1] <- res$SI$formula[match(is.pp, rownames(res$SI))]
        } else eq <- NA
    
        
        numvec <- match(vec, todisplay)## so that vec==todisplay[numvec]
        levs <- max(numvec, na.rm=TRUE)
        mat <- matrix(numvec, ncol=length(pe), nrow=length(pH), byrow=TRUE)
        centers <- matrix(0, ncol=2, nrow=length(todisplay))
        for (i in seq_along(todisplay)) {
            centers[i, ] <- floor(colMeans(which(mat == i, arr.ind = TRUE)))
        }
    
        ## Fix the names
        parsednames <- todisplay
        legexpr <- FormulaToExpression(formulas[is.aqueous==1])
        Rformulas <- parse(text = paste0("italic(", legexpr,")"))
        aqlegexpr <- FormulaToExpression(parsednames[is.aqueous==4])
        Raqlegexpr <- parse(text = aqlegexpr)
        parsednames[is.aqueous==4] <- Raqlegexpr
    
        on.exit()
        
        ## plotting
        if (plot) {
            if (missing(colors))
                colors <- grDevices::terrain.colors(levs, alpha = 0.5)
            
            par(mar = c(5,5,5,5), las=1, family="serif")
            
            image(x=pH, y=pe, z=mat, col = colors, useRaster=TRUE, axes=FALSE, ...)
            text(pH[centers[,1]], pe[centers[,2]], parsednames, font=is.aqueous, pos=3)
            if (!aqonly){
                ii <- which(is.aqueous==1)
                ## we need also to check if ii is not empty!
                if (length(ii)>0) {
                    ## we plot these labels
                    text(pH[centers[ii,1]], pe[centers[ii,2]], Rformulas, font=3, pos=1)
                }
            }
            
            
            if (missing(ann.title)) {
                mymain <- paste("Pourbaix diagram for",
                                ifelse(missing(element), "the specified system", ifelse(has_valence, valence, element)),
                                "\n T=", Tc, "\u00B0C  /  P=", Patm, "atm")
            } else
                mymain <- ann.title
            
            if (missing(ann.sub)) {
                mysub <- paste("Molalities of dissolved elements:", paste(rownames(res$tot),res$tot[,1], collapse="; "))
            } else
                mysub <- ann.sub
            
            title(main = mymain,
                  sub  = mysub)
            
            axis(1, at=pretty(pH))
            axis(2, at=pretty(pe))
            
            ## stability lines of water
            abline(0, -1, lty="dashed")
            abline(20.8, -1, lty="dashed")
            
            F <- 96485.3365
            
            Eh <- seq(-1,1,0.25)
            ppe <- Eh/2.3/8.3145/{Tc+273.15}*F
            ## Eh <-  2.3*8.3145*{Tc+273.15}* ppe/F
            ## par(new = TRUE)
            ## plot(0, 0, type = "n", axes = FALSE, bty = "n", xlab = "", ylab = "")
            axis(side=4, at = ppe, labels=Eh)
            ## axis(3, at=1/{Tgrid+273.15}, labels=(Tgrid))
            
            mtext("Eh / V", side=4, line=3, las=0)
            box()
        } ## plotting
        
        invisible(list(mat=mat,
                       displayed=todisplay,
                       formula=parsednames,
                       is.aq=is.aqueous,
                       phreeqc=list(input=c(first, selout, biginp), pqc=bigres, db=db, res=res),
                       dat=dat,
                       vec=vec,
                       aq=aq,
                       eq=eq))
    }