Rphree_SurrogateUtils.R 11 KB
Newer Older
Marco De Lucia's avatar
Marco De Lucia committed
1
2
##  Functions for dealing with surrogate simulations

Marco De Lucia's avatar
Marco De Lucia committed
3
### Marco De Lucia, delucia@gfz-potsdam.de, 2009-2021
4
### Time-stamp: "Last modified 2021-04-28 15:44:10 delucia"
Marco De Lucia's avatar
Marco De Lucia committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

##' Computes the average of absolute values of a vector
##' @title Average of absolute values
##' @param x numeric vector
##' @return mean(abs(x))
##' @author MDL
##' @export
mae <- function(x) mean(abs(x))

## ##' .. content for \description{} (no empty lines) ..
## ##'
## ##' .. content for \details{} ..
## ##' @title SoftMax transformation
## ##' @param obj numeric vector
## ##' @return 
## ##' @author 
## SoftMax <- function(obj) {
##     eo <- exp(obj-max(obj))
##     ss <- sum(eo)
##     scaled <- eo/ss
##     attr(res,"back") <- ss
##     return(res)
## }

Marco De Lucia's avatar
Marco De Lucia committed
29
## ## SoftMaxBackTransf <- function(vec, back) return(log(vec)+log(back))
Marco De Lucia's avatar
Marco De Lucia committed
30

Marco De Lucia's avatar
Marco De Lucia committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
## ##' @title Flatten out a Rphree list
## ##' @param list list of Rphree lists
## ##' @param strip logical, should we delete "ListInfo"? Defaults to
## ##'     true
## ##' @return a list whose elements are single Rphree simulations
## ##' @author MDL
## ##' @export
## FlattenList <- function(list) {
##    tot <- vector(mode="list", length=sum(sapply(stripped, length)))
##    k <- 1
##    for (i in seq_along(stripped)) {
##       for (j in seq_along(stripped[[i]])) {
##          tot[[k]] <- stripped[[i]][[j]]
##          k=k+1
##       }
##    }
##    return(tot)
## }
Marco De Lucia's avatar
Marco De Lucia committed
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92


##' @title Find all species occurring in the ensemble
##' @param flatlist the flatted list of Rphree simulations
##' @return vector with unique occurrences of species names
##' @author MDL
##' @export
FindAllSpeciesNames <- function(flatlist)
    unique(sort(unlist(sapply(flatlist, function(sol) rownames(sol$species)))))


##' @title Find all pphases occurring in the ensemble
##' @param flatlist the flatted list of Rphree simulations
##' @return vector with unique occurrences of pphases names
##' @author MDL
##' @export
FindAllMinNames <- function(flatlist)
    unique(sort(unlist(sapply(flatlist, function(sol) rownames(sol$pphases)))))

##' @title Find all totals occurring in the ensemble
##' @param flatlist the flatted list of Rphree simulations
##' @return vector with unique occurrences of element names
##' @author MDL
##' @export
FindAllTotNames <- function(flatlist)
    unique(sort(unlist(sapply(flatlist, function(sol) rownames(sol$tot)))))


##' @title Extract species from a flatted Rphree list
##' @param flatlist the list as input to search in
##' @param species vector containing the names of the species to
##'     search for
##' @return matix with one species per column
##' @author MDL
##' @export
ExtractSpecies <- function(flatlist, species) {
   tmp <- matrix(NA, len <- length(flatlist), speclen <- length(species))
   colnames(tmp) <- species
   for (i in seq(speclen)) {
      tmp[,i] <- RPinfo(flatlist, "species", species[i], flex=TRUE)
   }
   return(tmp)
}

93
##' @title Extract all equilibrium phases from a flattened output list
Marco De Lucia's avatar
Marco De Lucia committed
94
##' @param flatlist the list as input to search in
95
96
97
##' @param pphases character vector containing the names of the Pure
##'     Phases whose values are to be extracted
##' @return a matrix containing as many columns as specified pphases
98
##' @export
Marco De Lucia's avatar
Marco De Lucia committed
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
##' @author MDL
ExtractPphases <- function(flatlist, pphases) {
   tmp <- matrix(NA, len <- length(flatlist), speclen <- length(pphases))
   colnames(tmp) <- pphases
   for (i in seq(speclen)) {
      tmp[,i] <- RPinfo(flatlist, "pphase", pphases[i], flex=TRUE)
   }
   return(tmp)
}


##' @title Extract total element concentrations from a flatted Rphree
##'     list
##' @param flatlist the list as input to search in
##' @param totals vector containing the names of the elements to
##'     search for
##' @return Matrix with one element per column
##' @author MDL
##' @export
ExtractTotals <- function(flatlist, totals) {
   tmp <- matrix(NA, len <- length(flatlist), speclen <- length(totals))
   colnames(tmp) <- totals
   for (i in seq(speclen)) {
      tmp[,i] <- RPinfo(flatlist, "tot", totals[i], flex=TRUE)
   }
   return(tmp)
}

##' @title Scan a list of Rphree simulations to form stoichiometric
##'     matrix
##' @param flatlist a flatted list of Rphree simulations
##' @param db database (as read by RReadFile(..., is.db=TRUE))
##' @param species vector containing all species to be included in the
##'     stoichiometric matrix. If omitted, the flat list is searched
##'     for all possible species
##' @param pphases vector containing all EQUILIBRIUM minerals to be
##'     included in the stoichiometric matrix. If omitted, the flat
##'     list is searched for all possible minerals.
##' @return A stoichiometric matrix
##' @author MDL
##' @export
StoichiometricMatrix <- function(flatlist, db, species, pphases)
{

    if (missing(species))
        species <- FindAllSpeciesNames(flatlist)
    if(missing(pphases))
        minerals <- FindAllMinNames(flatlist)

    pos_solsp <- grep("SOLUTION_SPECIES", db, fixed=TRUE)
    pos_phases <- grep("PHASES", db, fixed=TRUE)
    dbtmp <- db[{pos_solsp+1}:{pos_phases -1}]
    tots <- lapply(species, function(x) grep(x,dbtmp,fixed=TRUE))

    equations <- dbtmp[grep("^[[:alnum:]]", dbtmp, perl=TRUE)]
    parsed <- lapply(equations, ParseFormula)
    names(parsed) <- paste("e",seq_along(equations), sep="")
    matched <- lapply(parsed, function(x) match(x$comp, species))
    matched2 <- sapply(matched, function(x) NA %in% x)
    indeq <- which(!matched2)
    equations <- equations[indeq]
    matched   <- matched[indeq]
    parsed <- parsed[indeq]

    indeq <- which(sapply(matched, length) > 2)
    equations <- equations[indeq]
    matched   <- matched[indeq]
    parsed <- parsed[indeq]

    parsedmin <- lapply(minerals, FindLogK,db=db, type="rhs")
    names(parsedmin) <- minerals
    matchedmin <- lapply(parsedmin, function(x) match(x$comp, species))

    totparsed <- c(parsed, parsedmin)
    totmatch <- c(matched, matchedmin)
    mat <- matrix(0, ncol=length(species)+length(parsedmin), nrow=length(totmatch))
    colnames(mat) <- c(species,minerals)
    rownames(mat) <- names(totmatch)

    for (i in seq(nrow(mat))) {
        cat(c("Equation: ",i, paste( totparsed[[i]]$comp, collapse=" ")),"\n")
        mat[i,totmatch[[i]]] <- totparsed[[i]]$coeff
    }

    return(list(mat=mat, parsed=totparsed, matched=totmatch, minerals=minerals, species=species))
}

##' @title Returns the composition of a mineral in terms of chemical
##'     elements (not of species)
##' @param pphase name of the mineral to parse
##' @param stoichmat full stoichiometric matrix returned by
##' @param elements elements to include in the mass balance equations,
##'     defaults to c("C","Ca","Mg","Cl")
##' @return named vector with stoichiometric coefficients of chemical
##'     elements for the pphase mineral
##' @author MDL
##' @export
ElementalBalanceMin <- function(pphase, stoichmat, elements = c("C","Ca","Mg","Cl"))
{
    ind <- match(pphase[1], rownames(stoichmat))
    eq <- stoichmat[ ind[1],]
    eq <- eq[eq!=0]
    Coefs <- rep(0, length(elements))
    names(Coefs) <- elements
    for (i in seq_along(elements)) {
        Shift <- nchar(elements[i])-1
        expr <- paste0(elements[i],'[A-Z0-9\\+\\-\\ ]') ## Element followed by UpperCase, number, "+" space or "-"
        indm <- grep(expr, names(eq), perl=TRUE)

        if (length(indm)!=0) {
            ## Now find the coefficient
            SpeciesHavingElement <- names(eq)[indm]
            Multip <- eq[indm]
            Pos <- regexpr(expr, SpeciesHavingElement)
            NextChar <- substr(SpeciesHavingElement, Pos+1+Shift, Pos+1+Shift)
            if (NextChar == "+") 
                Coefs[i] <- Multip
            if (NextChar == "-") 
                Coefs[i] <- Multip 
            if (length(grep('[A-Z]', NextChar))>0) 
                Coefs[i] <- Multip
            if (length(grep('[0-9]', NextChar))>0) 
                Coefs[i] <- as.integer(NextChar)*Multip
        }
    }
    Coefs <- Coefs[Coefs!=0]
    return(Coefs)
}



##' @title Form the balance equations from a full stoichiometric
##'     matrix
##' @param stmat a stoichiometric matrix returned by
##' @param elements elements to include in the mass balance equations,
##'     defaults to c("C","Ca","Mg","Cl")
##' @return a structure (list) containing the balance equations
##' @author MDL
##' @export
BalanceEquations <- function(stmat, elements = c("C","Ca","Mg","Cl"))
{
    balance <- lapply(stmat$minerals, ElementalBalanceMin, stoichmat=stmat$mat, elements = elements)
    names(balance) <- stmat$minerals
    return(balance)
}

##' @title Figures out the formula from a baleq structure
##' @param baleq the data structure expressing the mass balance of
##'     minerals
##' @return a named vector with the stoichiometric formula of the
##'     minerals
##' @author MDL
##' @export
FormulaFromBal <- function(baleq)
{
    sapply(names(baleq), function(x) paste0(baleq[[x]],"*", names(baleq[[x]]), collapse="+" ))

}

##' @title Check mass balance between design and results for
##'     surrogates
##' @param baleq structure (list) containing elemental mass balance
##' @param des design (state_T under our conventions)
##' @param res results of the surrogates (corresponds to state_C)
##' @param elements optional, vector of elements on which to perform
##'     mass balance check
##' @return a matrix containing a mass balance error, one column for
##'     each tested equation (=element)
##' @author MDL
##' @export
CheckBalance <- function(baleq, des, res, elements)
{
    pphases <- names(baleq)
    desm <- data.matrix(des)
    resm <- data.matrix(res)
    ## deltas <- data.matrix(as.data.frame(lapply(pphases, function(x) resm[,x] - desm[,x]),optional = TRUE))
    ## colnames(deltas) <- paste0("D",pphases)

    if (missing(elements))
        elements <- unique(sort(unlist(lapply(baleq, function(x) names(x)))))

    diffs <- matrix(NA, ncol=length(elements), nrow=nrow(desm))
    colnames(diffs) <- elements
    for (i in seq_along(elements)){
        ## lhs == desmign
        ## matchinmin <- sapply(baleq, function(x) match(elements[i], names(x)))
        ## matchinmin <- matchinmin[!is.na(matchinmin)]
        coefs <- sapply(baleq, function(x) return(x[elements[i]]))
        names(coefs) <- names(baleq)
        indcoefs <- which(!is.na(coefs))
        coefs <- coefs[indcoefs]
        minleft <- sapply(names(coefs), function(x) desm[,x]*coefs[x])
        lhs <- desm[,elements[i]] + rowSums(minleft)
        
        minright <- sapply(names(coefs), function(x) resm[,x]*coefs[x])
        rhs <- resm[,elements[i]] + rowSums(minright)
        diffs[,i] <- rhs - lhs
    }
    return(diffs)

}

##' From a list containing results of a Reactive Transport simulation,
##' this function extracts the "transport" as a "perturbation" of
##' chemistry. This simply means computing the differences between the
##' state_T at current time and state_C at previous time step.
##' @title Extract the changes in stateC due to transport
##' @param sim a result of Reactive Transport (for each time step, a T
##'     and a C data.frame)
308
##' @return a list
Marco De Lucia's avatar
Marco De Lucia committed
309
310
311
312
313
314
315
##' @author MDL
##' @export
TranspAsPert <- function (sim) {
    out <- lapply(2:length(sim), function(x) sim[[x]]$T - sim[[x-1]]$C)
    names(out) <- names(sim)[2:length(sim)]
    return(out)
}