Rphree_Pourbaix.R 18.5 KB
Newer Older
1
### Licence: LGPL version 2.1
2
## Time-stamp: "Last modified 2021-04-29 15:56:47 delucia"
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198

##' @title Revert a string
##' @param x character vector. All element of the vector will be
##'     reverted
##' @return the reverted character vector
##' @export
##' @author MDL
strrev <- function(x) {
    res <- x
    for (i in seq_along(x)) {
        nc <- nchar(x[i])
        res[i] <- paste0(substring(x[i], nc:1, nc:1), collapse = "")
    }
    res
}

##' @title Converts a PHREEQC formula into R expression for nice
##'     annotations
##' @param ff character vector of formulas which will be translated
##' @return character vector of valid R expressions
##' @author MDL
##' @export
FormulaToExpression <- function(ff) {
### transform PHREEQC formulas into R's expressions

    add.sup   <- FALSE
    add.colon <- FALSE

    
    ## Body is everything which precedes a "-", a "+" or a ":"
    body <- sub("[\\:\\+\\-].*$", "", ff)
    ##body <- sub("[+-:].*$", "", ff)

    ## first we take the exponents for the charge
    has.sup <- grep("[\\+\\-]", ff)
    if (length(has.sup)>0) {
        add.sup <- TRUE
        super <- sub("(^.*)([\\+\\-])", "\\2", ff[has.sup])
        super[nchar(super)==2] <- strrev(super[nchar(super)==2])
        esup <- paste0("^{",super,"phantom()}")
    }

    ## treat the ":"
    has.colon <- grep(":", ff, fixed=TRUE)
    if (length(has.colon)>0) {
        add.colon  <- TRUE
        aftercolon <- sub("^.*\\:", ":", ff[has.colon])
        after <- sub("(\\:[0-9]*)H2O", "\\1*H[2]*O", aftercolon)
        after <- sub(":", "*':'*", after)
    }

    has.sub <- grep("[0-9]+", body)
    body[has.sub] <- gsub("([0-9]+)", "[\\1]", body[has.sub])

    body <- gsub("\\]([A-Z])", "]*\\1" , body)
    formula <- body
    if (add.sup)
        formula[has.sup]   <- paste0(formula[has.sup], esup)
    if (add.colon)
        formula[has.colon] <- paste0(formula[has.colon],after)
    return(formula)
}

##' This function takes the string output of a PHREEQC calculation and
##' forms the "SELECTED_OUTPUT" for further simulations including all
##' totals and the activities and saturation indeces of all species
##' and minerals which contain the optional \emph{element}
##'
##' @title Form SELECTED_OUTPUT from a PHREEQC string output
##' @param sim the string output of a previous PHREEQC simulation
##' @param element optional, the single element which must be present
##'     in all species and minerals included in the SELECTED_OUTPUT
##' @return A character vector containing the SELECTED_OUTPUT block
##' @author MDL
##' @export
FormSelectedOutput <- function(sim, element) {
    AllSpecies <- rownames(sim$species)
    AllTot     <- rownames(sim$tot)
    AllSI      <- rownames(sim$SI)
    
    if (!missing(element)) {
        ## first we grab the formula from the SI tab
        mins   <- sim$SI$formula
        indmin <- grep(element, mins)
        AllSI  <- AllSI[indmin]
        indsp  <-  grep(element, AllSpecies) 
        AllSpecies  <- AllSpecies[indsp]
    }
    SI      <- paste("    -saturation_indices", paste(AllSI, collapse=" "))
    Totals  <- paste("    -totals", paste(AllTot, collapse=" "))
    Species <- paste("    -activities", paste(AllSpecies, collapse=" "))
    ## make sure there is no "H2O"
    Species <- sub(" H2O ", " ", Species, fixed=TRUE)
    SelOut <- c("SELECTED_OUTPUT",
                "    -high_precision  true",
                "    -reset   false",
                "    -simulation  true",
                "    -pH      true",
                "    -pe      true",
                "    -water   true",
                "    -charge_balance true",
                "    -ionic_strength true",
                "    -percent_error  true",
                Totals, Species, SI)

    return(SelOut)
}


##' This function uses PHREEQC to calculate and plot a Pourbaix
##' diagram for the specified system at a given T and P. The
##' geochemical system is specified as a single PHREEQC input
##' specifying one "SOLUTION" while the actual values of pe and pH at
##' which the predominance shall be calculated must be specified by
##' the user at input. To try and prevent numerical instabilities,
##' which are quite frequent, the function only actually performs
##' PHREEQC calculations inside the water stability region.
##'
##' @title Pourbaix (pe/pH) diagrams using PHREEQC
##' @param element character of length 1. The element for which the
##'     Pourbaix diagram is being calculated
##' @param pe numeric vector, grid values for redox potential at which
##'     predominance is calculated
##' @param pH numeric vector, grid values for pH at which predominance
##'     is calculated
##' @param Tc temperature in Celsius
##' @param Patm pressure in atmosphere
##' @param base character vector. The template PHREEQC input script,
##'     mostly containing only one SOLUTION block
##' @param first Optional character vector containing i.e. own PHASES
##'     definitions which comes on top of the base input
##' @param db PHREEQC chemical database
##' @param aqonly logical, defaults to FALSE. If TRUE, only aqueous
##'     species are considered
##' @param suppress optional character vector. Species and minerals
##'     contained here are not considered in the diagram
##' @param plot logical, defaults to TRUE. Do you want to plot the
##'     diagram?
##' @param ann.title Optional string. If specified, it will replace
##'     main title of the picture
##' @param ann.sub Optional string. If specified, it will replace
##'     subtitle of the picture (usually this specifies the
##'     concentrations)
##' @param colors Optional character vector containing the colors. If
##'     missing, grDevices::terrain.colors is used internally for
##'     plotting
##' @param procs how many CPUs should be used for the computations?
##'     Defaults to 1. Parallelization is achieved through DoParallel
##'     and foreach, and should work on both *nix and Windows using
##'     fork and sockets respectively
##' @param ... further graphic parameters passed to the \code{image}
##'     for plotting
##' @return invisibly returns a list containing all calculated
##'     simulations, input script for PHREEQC simulations, formulas,
##'     names, etc
##' @author MDL
##' @export
##' @examples
##' base <- c("SOLUTION 1",
##'           "units mol/kgw",
##'           "temp 25.0",
##'           "pressure 1",
##'           "-water 1",
##'           "pH 7",
##'           "pe 4.0",
##'           "Na 1",
##'           "C 0.0125",
##'           "Ca 0.025",
##'           "Mg 0.025",
##'           "Cl 2.002 charge",
##'           "END")
##' par(mfrow=c(1,2))
##' a <- Pourbaix(element="Ca", pe=seq(-15,15, length=31), pH=seq(2,12, length=31), Tc= 25,
##'               Patm=1, base=base, first="", ann.title="Custom title", db=llnl.dat)
##' b <- Pourbaix(element="Mg", pe=seq(-15,15, length=31), pH=seq(2,12, length=31), Tc= 25,
##'               Patm=1, base=base, first="", ann.sub="Custom subtitle", db=llnl.dat)
##'
##' ## Now we restrict the speciation to a fixed valence state of the element,
##' ## here Fe(2) and Cu(2), only aqueous species:
##' base <- c("SOLUTION 1",
##'           "units mol/kgw",
##'           "temp 25.0",
##'           "pressure 1",
##'           "-water 1",
##'           "pH 7",
##'           "pe 4.0",
##'           "Ca        0.025",
##'           "Cu        0.001",
##'           "Cl        0.05122 charge",
##'           "Fe        3.0E-04",
##'           "END")
##' par(mfrow=c(1,2))
##' a <- Pourbaix(element="Fe(2)", pe=seq(-10,12, length=51), pH=seq(2,10, length=51),
##'               Patm=1, base=base, aqonly=TRUE,  db=llnl.dat)
##' b <- Pourbaix(element="Cu(2)", pe=seq(-10,12, length=51), pH=seq(2,10, length=51),
##'               Patm=1, base=base, aqonly=TRUE,  db=llnl.dat)
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
Pourbaix <- function(element,
                     pe=seq(-5, 8, length.out = 31),
                     pH=seq(6, 12, length.out = 31),
                     Tc=25,
                     Patm=1,
                     base,
                     first,
                     db,
                     aqonly=FALSE,
                     suppress,
                     ann.title,
                     ann.sub,
                     colors,
                     procs=1,
                     plot=TRUE,
214
215
216
217
218
                     ...)
{  
    phreeqc::phrLoadDatabaseString(db)
    phreeqc::phrSetOutputStringsOn(TRUE)

219
    ## "first" is an optional argument
220
221
    if (missing(first))
        first <- ""
222
223
224
225
226
227
228
229
230
231

    ## some checks in "base" - pe, pH, pressure and temp must be
    ## there, if not, add them to base script
    for (prop in c("temp", "pe", "pH", "pressure")) {
        if (length(grep(prop,base))==0) {
            base <- AddProp(base, name=prop, values = 1, cat="tot")
            msg("Adding ", prop, "to base script")
        }
    }
    
232
233
234
235
236
237
238
239
240
    ## testrun of base script to get the output
    aa  <- phreeqc::phrRunString(c(first, base))
    con <- phreeqc::phrGetOutputStrings()

    has_valence <- FALSE
    if (missing(element)) {
        utils::capture.output(res <- RReadOutVal(con)[[1]])
        selout <- FormSelectedOutput(res)
    } else {
241
        ## first we distinguish if we want total amount of element or a specific valence
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
        if (length(grep("(", element, fixed=TRUE))>0) {
            ## now we call "valence" the full expression (including the parentheses)
            valence <- element
            has_valence <- TRUE
            ## strip the parentheses from valence to get the element
            element <- sub("\\(.*\\)", "", valence)
            msg("Restricting the calculation to the oxidation state", valence)
            utils::capture.output(res <- RReadOutVal(con, valence)[[1]])
            selout <- FormSelectedOutput(res, element)
       } else {
            utils::capture.output(res <- RReadOutVal(con)[[1]])
            selout <- FormSelectedOutput(res, element)
        }
    }

    ## do we only want aqueous species?
258
    ## if yes, just suppress SIs from selected output
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
    if (aqonly) {
        selout <- selout[-grep("-saturation_indices", selout, fixed=TRUE)]
    }

    if (!missing(suppress)) {
        for (min in suppress) {
            selout <- sub(paste0(" ", min, " "), " ", selout)
            msg("suppressed output of ", paste(suppress, collapse="; "))
        }
    }
    
    phreeqc::phrSetOutputStringsOn(FALSE)
    combs <- expand.grid(pe=pe, pH=pH)

    ## eliminate outside water stability region!
    ## TODO: check T/P dependance
    ind_h2 <- which(combs$pe < - combs$pH)
    ind_o2 <- which(combs$pe > 20.75 - combs$pH)
    ## we need to remember which simulation is suppressed to reform
    ## the grid afterwards
    unstable <- union(ind_o2,ind_h2)
    if (length(unstable) >= 1) { 
        suppr <- TRUE
        fcombs <- combs[-unstable, ]
        indeces <- seq(1, nrow(combs))[-unstable]
    } else {
        suppr <- FALSE
        fcombs <- combs
    }

    msg("requested", nrow(combs), "evaluations, restricted to", nrow(fcombs), "for stability of water")

    Aqnames   <- unlist(strsplit(sub("^.*-activities ", "", grep("activities", selout, value=TRUE))," "))
    SInames    <- unlist(strsplit(sub("^.*-saturation_indices ", "", grep("saturation_indices", selout, value=TRUE))," "))

    ## Distribute T and P
    base2 <- Distribute(base, "temp", Tc)
    if (length(grep("pressure", base))>0)
        base2 <- Distribute(base2, "pressure", Patm)


    msg("running PHREEQC simulations...")

    ## Parallelization
    if (procs > 1) {
        if (Sys.info()[["sysname"]]=="Windows") {
            ThisRunCluster <- parallel::makePSOCKcluster(procs)
        } else {
            ThisRunCluster <- parallel::makeForkCluster(procs)
        }

        doParallel::registerDoParallel(ThisRunCluster)
311
        msg("Registered default doParallel cluster with ", procs, "nodes")
312
        parallel::clusterCall(cl=ThisRunCluster, phreeqc::phrLoadDatabaseString, db)
313
        msg("Database loaded on each worker")
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465

        items <- nrow(fcombs)
        if (items %/% procs > 0)
            itpercpu <- items %/% procs +1
        else
            itpercpu <- items %/% procs

        inds <- rep(seq(1,items),each=itpercpu)[1:items]
        ilist <- split(fcombs, inds)

        ## distribute pe and pH into each chunk
        biginp <- lapply(ilist, function(x) {
            inp <- Distribute(base2, "pe", x$pe);
            Distribute(inp, "pH", x$pH)
        })
        ## add "first" and "selout" blocks
        biginp <- lapply(biginp, function(x) c(first, selout, x))
        on.exit({
            return(list(input=biginp, db=db, combs=combs, fcombs=fcombs))
        })
        Tm <- system.time(bigres <- RunPQC(biginp, procs=procs, second=FALSE))[3]

        parallel::stopCluster(ThisRunCluster)
        msg("Parallelization cluster stopped")
   
    } else {
        ## distribute pe and pH
        biginp <- Distribute(base2, "pe", fcombs$pe)
        biginp <- Distribute(biginp, "pH", fcombs$pH)
        
        on.exit({
            return(list(input=c(first, selout, biginp), db=db, combs=combs, fcombs=fcombs))
        })
        ## Run phreeqc
        Tm <- system.time(bigres <- RunPQC(c(first, selout, biginp), second=FALSE))[3]
        
    }

    msg(nrow(fcombs), "simulations computed in", round(Tm,3), "s")

    ## Extract the highest acivities
    Aqind <- grep("^la_", colnames(bigres))
    if (length(Aqind)>1) {
        Aqmax <- apply(10^bigres[, Aqind], 1, which.max)
        Aqmaxnam <- Aqnames[Aqmax]
        Aqmaxval <- numeric(nrow(bigres))
        for (i in seq_along(Aqmaxval)) {
            Aqmaxval[i] <- 10^bigres[i, Aqind[Aqmax][i] ]
        }
    } else {
        Aqmaxnam <- rep(Aqnames, nrow(bigres)) ## Aqnames must also have length 1
        Aqmaxval <- 10^bigres[ , Aqind]
    }
    
    ## Extract the highest SIs
    if (!aqonly) {
        SIind <- grep("^si_", colnames(bigres))
        if (length(SIind)>1) {
            SImax <- apply(bigres[, SIind], 1, which.max)
            SImaxnam <- SInames[SImax]
            SImaxval <- numeric(nrow(bigres))
            for (i in seq_along(SImaxval)) {
                SImaxval[i] <- bigres[i, SIind[SImax][i] ]
            }
        } else {
            SImaxnam <- rep(SInames, nrow(bigres)) ## SInames must also have length 1
            SImaxval <- bigres[ , SIind]
        }

        if (suppr) {
            dat <- data.frame(pe=combs$pe, pH=combs$pH, SIname=NA_character_, SIval=NA, Aqname=NA_character_, Aqval=NA, stringsAsFactors = FALSE)
            dat$SIname[indeces] <- SImaxnam
            dat$SIval[indeces]  <- SImaxval
            dat$Aqname[indeces] <- Aqmaxnam
            dat$Aqval[indeces]  <- Aqmaxval
        } else {
            dat <- data.frame(pe=combs$pe, pH=combs$pH, SIname=SImaxnam, SIval=SImaxval, Aqname=Aqmaxnam, Aqval=Aqmaxval, stringsAsFactors = FALSE)
        }
    
        vec <- ifelse(dat$SIval > 0, dat$SIname, dat$Aqname)
    } else {
        if (suppr) {
            dat <- data.frame(pe=combs$pe, pH=combs$pH, Aqname=NA_character_, Aqval=NA, stringsAsFactors = FALSE)
            dat$Aqname[indeces] <- Aqmaxnam
            dat$Aqval[indeces]  <- Aqmaxval
        } else {
            dat <- data.frame(pe=combs$pe, pH=combs$pH, Aqname=Aqmaxnam, Aqval=Aqmaxval, stringsAsFactors = FALSE)
        }

        vec <- dat$Aqname
    }

    todisplay <- stats::na.omit(unique(vec))

    if (!missing(suppress))
        todisplay <- todisplay[! todisplay %in% suppress]

    naqueous <- length(aq <- which(todisplay %in% unique(Aqmaxnam)))
    
    ## fonts 1 for eq phases, font 4 for aqueous
    is.aqueous <- rep(1, length(todisplay))
    formulas <- rep("", length(todisplay))
    is.aqueous[aq] <- 4
    
    if (!aqonly) {
        neqphases <- length(eq <- which(todisplay %in% unique(SImaxnam)))
        ## formulas of eqphases
        is.pp <- todisplay[is.aqueous==1]
        formulas[is.aqueous==1] <- res$SI$formula[match(is.pp, rownames(res$SI))]
    } else eq <- NA

    
    numvec <- match(vec, todisplay)## so that vec==todisplay[numvec]
    levs <- max(numvec, na.rm=TRUE)
    mat <- matrix(numvec, ncol=length(pe), nrow=length(pH), byrow=TRUE)
    centers <- matrix(0, ncol=2, nrow=length(todisplay))
    for (i in seq_along(todisplay)) {
        centers[i, ] <- floor(colMeans(which(mat == i, arr.ind = TRUE)))
    }

    ## Fix the names
    parsednames <- todisplay
    legexpr <- FormulaToExpression(formulas[is.aqueous==1])
    Rformulas <- parse(text = paste0("italic(", legexpr,")"))
    aqlegexpr <- FormulaToExpression(parsednames[is.aqueous==4])
    Raqlegexpr <- parse(text = aqlegexpr)
    parsednames[is.aqueous==4] <- Raqlegexpr

    on.exit()
    
    ## plotting
    if (plot) {
        if (missing(colors))
            colors <- grDevices::terrain.colors(levs, alpha = 0.5)
        
        par(mar = c(5,5,5,5), las=1, family="serif")
        
        image(x=pH, y=pe, z=mat, col = colors, useRaster=TRUE, axes=FALSE, ...)
        text(pH[centers[,1]], pe[centers[,2]], parsednames, font=is.aqueous, pos=3)
        if (!aqonly){
            ii <- which(is.aqueous==1)
            ## we need also to check if ii is not empty!
            if (length(ii)>0) {
                ## we plot these labels
                text(pH[centers[ii,1]], pe[centers[ii,2]], Rformulas, font=3, pos=1)
            }
        }
        
        
        if (missing(ann.title)) {
            mymain <- paste("Pourbaix diagram for",
                            ifelse(missing(element), "the specified system", ifelse(has_valence, valence, element)),
466
                            "\n T=", Tc, "\u00B0C  /  P=", Patm, "atm")
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
        } else
            mymain <- ann.title
        
        if (missing(ann.sub)) {
            mysub <- paste("Molalities of dissolved elements:", paste(rownames(res$tot),res$tot[,1], collapse="; "))
        } else
            mysub <- ann.sub
        
        title(main = mymain,
              sub  = mysub)
        
        axis(1, at=pretty(pH))
        axis(2, at=pretty(pe))
        
        ## stability lines of water
        abline(0, -1, lty="dashed")
        abline(20.8, -1, lty="dashed")
        
        F <- 96485.3365
        
        Eh <- seq(-1,1,0.25)
        ppe <- Eh/2.3/8.3145/{Tc+273.15}*F
        ## Eh <-  2.3*8.3145*{Tc+273.15}* ppe/F
        ## par(new = TRUE)
        ## plot(0, 0, type = "n", axes = FALSE, bty = "n", xlab = "", ylab = "")
        axis(side=4, at = ppe, labels=Eh)
        ## axis(3, at=1/{Tgrid+273.15}, labels=(Tgrid))
        
        mtext("Eh / V", side=4, line=3, las=0)
        box()
    } ## plotting
    
499
500
501
502
    invisible(list(mat=mat,
                   displayed=todisplay,
                   formula=parsednames,
                   is.aq=is.aqueous,
503
                   phreeqc=list(input=c(first, selout, biginp), pqc=bigres, db=db, res=res),
504
505
506
507
                   dat=dat,
                   vec=vec,
                   aq=aq,
                   eq=eq))
508
509
}