core.py 41.8 KB
Newer Older
Sebastian Heimann's avatar
Sebastian Heimann committed
1
2
3
4
5
6
7
8
9
10
import math
import os
import sys
import logging
import time
import copy
import shutil
import os.path as op
import numpy as num

11
12
13
from pyrocko.guts import (load, Object, String, Float, Int, Bool, List,
                          StringChoice)
from pyrocko import orthodrome as od, gf, trace, guts, util
14
from pyrocko import parimap, model, marker as pmarker
Sebastian Heimann's avatar
Sebastian Heimann committed
15

16
from .dataset import DatasetConfig, NotFound
17
18
19
20
from .problems import ProblemConfig, Problem
from .targets import TargetAnalysisResult, TargetConfig
from .meta import (Path, HasPaths, expand_template, xjoin, GrondError,
                   Forbidden)
Sebastian Heimann's avatar
Sebastian Heimann committed
21
22
23
24
25

logger = logging.getLogger('grond.core')
guts_prefix = 'grond'


Sebastian Heimann's avatar
Sebastian Heimann committed
26
27
28
29
30
31
32
def mahalanobis_distance(xs, mx, cov):
    imask = num.diag(cov) != 0.
    icov = num.linalg.inv(cov[imask, :][:, imask])
    temp = xs[:, imask] - mx[imask]
    return num.sqrt(num.sum(temp * num.dot(icov, temp.T).T, axis=1))


Sebastian Heimann's avatar
Sebastian Heimann committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class DirectoryAlreadyExists(Exception):
    pass


class AnalyserConfig(Object):
    niter = Int.T(default=1000)


class SamplerDistributionChoice(StringChoice):
    choices = ['multivariate_normal', 'normal']


class SolverConfig(Object):
    niter_uniform = Int.T(default=1000)
Sebastian Heimann's avatar
Sebastian Heimann committed
47
    niter_transition = Int.T(default=0)
Sebastian Heimann's avatar
Sebastian Heimann committed
48
49
50
51
    niter_explorative = Int.T(default=10000)
    niter_non_explorative = Int.T(default=0)
    sampler_distribution = SamplerDistributionChoice.T(
        default='multivariate_normal')
52
    scatter_scale_transition = Float.T(default=2.0)
53
    scatter_scale = Float.T(default=1.0)
54
55
    chain_length_factor = Float.T(default=8.0)
    compensate_excentricity = Bool.T(default=True)
Sebastian Heimann's avatar
Sebastian Heimann committed
56
57
58
59

    def get_solver_kwargs(self):
        return dict(
            niter_uniform=self.niter_uniform,
Sebastian Heimann's avatar
Sebastian Heimann committed
60
            niter_transition=self.niter_transition,
Sebastian Heimann's avatar
Sebastian Heimann committed
61
62
            niter_explorative=self.niter_explorative,
            niter_non_explorative=self.niter_non_explorative,
63
            sampler_distribution=self.sampler_distribution,
64
            scatter_scale_transition=self.scatter_scale_transition,
65
66
67
            scatter_scale=self.scatter_scale,
            chain_length_factor=self.chain_length_factor,
            compensate_excentricity=self.compensate_excentricity)
Sebastian Heimann's avatar
Sebastian Heimann committed
68
69


Sebastian Heimann's avatar
Sebastian Heimann committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
class EngineConfig(HasPaths):
    gf_stores_from_pyrocko_config = Bool.T(default=True)
    gf_store_superdirs = List.T(Path.T())
    gf_store_dirs = List.T(Path.T())

    def __init__(self, *args, **kwargs):
        HasPaths.__init__(self, *args, **kwargs)
        self._engine = None

    def get_engine(self):
        if self._engine is None:
            fp = self.expand_path
            self._engine = gf.LocalEngine(
                use_config=self.gf_stores_from_pyrocko_config,
                store_superdirs=fp(self.gf_store_superdirs),
                store_dirs=fp(self.gf_store_dirs))

        return self._engine


Sebastian Heimann's avatar
Sebastian Heimann committed
90
91
92
93
94
95
96
class Config(HasPaths):
    rundir_template = Path.T()
    dataset_config = DatasetConfig.T()
    target_configs = List.T(TargetConfig.T())
    problem_config = ProblemConfig.T()
    analyser_config = AnalyserConfig.T(default=AnalyserConfig.D())
    solver_config = SolverConfig.T(default=SolverConfig.D())
Sebastian Heimann's avatar
Sebastian Heimann committed
97
    engine_config = EngineConfig.T(default=EngineConfig.D())
Sebastian Heimann's avatar
Sebastian Heimann committed
98
99
100
101

    def __init__(self, *args, **kwargs):
        HasPaths.__init__(self, *args, **kwargs)

Sebastian Heimann's avatar
Sebastian Heimann committed
102
103
104
    def get_event_names(self):
        return self.dataset_config.get_event_names()

105
106
    def get_dataset(self, event_name):
        return self.dataset_config.get_dataset(event_name)
Sebastian Heimann's avatar
Sebastian Heimann committed
107
108

    def get_targets(self, event):
109
        ds = self.get_dataset(event.name)
Sebastian Heimann's avatar
Sebastian Heimann committed
110
111
112
113
114
115
116
117

        targets = []
        for igroup, target_config in enumerate(self.target_configs):
            targets.extend(target_config.get_targets(
                ds, event, 'group_%i' % igroup))

        return targets

118
119
120
121
122
123
    def setup_modelling_environment(self, problem):
        problem.set_engine(self.engine_config.get_engine())
        ds = self.get_dataset(problem.base_source.name)
        synt = ds.synthetic_test
        if synt:
            synt.set_problem(problem)
Marius Isken's avatar
Marius Isken committed
124
            problem.base_source = problem.get_source(synt.get_x())
125

Sebastian Heimann's avatar
Sebastian Heimann committed
126
127
    def get_problem(self, event):
        targets = self.get_targets(event)
Sebastian Heimann's avatar
Sebastian Heimann committed
128
        problem = self.problem_config.get_problem(event, targets)
129
        self.setup_modelling_environment(problem)
Sebastian Heimann's avatar
Sebastian Heimann committed
130
        return problem
Sebastian Heimann's avatar
Sebastian Heimann committed
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147


def sarr(a):
    return ' '.join('%15g' % x for x in a)


def load_problem_info_and_data(dirname, subset=None):
    problem = load_problem_info(dirname)
    xs, misfits = load_problem_data(xjoin(dirname, subset), problem)
    return problem, xs, misfits


def load_problem_info(dirname):
    fn = op.join(dirname, 'problem.yaml')
    return guts.load(filename=fn)


148
def load_problem_data(dirname, problem, skip_models=0):
149
    fn = op.join(dirname, 'models')
Sebastian Heimann's avatar
Sebastian Heimann committed
150
151
    with open(fn, 'r') as f:
        nmodels = os.fstat(f.fileno()).st_size / (problem.nparameters * 8)
152
153
        nmodels -= skip_models
        f.seek(skip_models * problem.nparameters * 8)
Sebastian Heimann's avatar
Sebastian Heimann committed
154
155
        data = num.fromfile(
            f, dtype='<f8',
156
157
            count=nmodels * problem.nparameters)\
            .astype(num.float)
Sebastian Heimann's avatar
Sebastian Heimann committed
158

159
    nmodels = data.size/problem.nparameters - skip_models
Sebastian Heimann's avatar
Sebastian Heimann committed
160
161
162
163
    xs = data.reshape((nmodels, problem.nparameters))

    fn = op.join(dirname, 'misfits')
    with open(fn, 'r') as f:
164
        f.seek(skip_models * problem.ntargets * 2 * 8)
Sebastian Heimann's avatar
Sebastian Heimann committed
165
        data = num.fromfile(
166
167
            f, dtype='<f8', count=nmodels*problem.ntargets*2)\
            .astype(num.float)
Sebastian Heimann's avatar
Sebastian Heimann committed
168
169
170
171
172
173
174
175
176
177
178
179

    data = data.reshape((nmodels, problem.ntargets*2))

    combi = num.empty_like(data)
    combi[:, 0::2] = data[:, :problem.ntargets]
    combi[:, 1::2] = data[:, problem.ntargets:]

    misfits = combi.reshape((nmodels, problem.ntargets, 2))

    return xs, misfits


Sebastian Heimann's avatar
Sebastian Heimann committed
180
181
182
183
def get_mean_x(xs):
    return num.mean(xs, axis=0)


184
185
186
187
188
def get_mean_x_and_gm(problem, xs, misfits):
    gms = problem.global_misfits(misfits)
    return num.mean(xs, axis=0), num.mean(gms)


Sebastian Heimann's avatar
Sebastian Heimann committed
189
190
191
192
193
194
def get_best_x(problem, xs, misfits):
    gms = problem.global_misfits(misfits)
    ibest = num.argmin(gms)
    return xs[ibest, :]


195
196
197
198
199
200
def get_best_x_and_gm(problem, xs, misfits):
    gms = problem.global_misfits(misfits)
    ibest = num.argmin(gms)
    return xs[ibest, :], gms[ibest]


Sebastian Heimann's avatar
Sebastian Heimann committed
201
202
def get_mean_source(problem, xs):
    x_mean = get_mean_x(xs)
Marius Isken's avatar
Marius Isken committed
203
    source = problem.get_source(x_mean)
Sebastian Heimann's avatar
Sebastian Heimann committed
204
205
206
207
208
    return source


def get_best_source(problem, xs, misfits):
    x_best = get_best_x(problem, xs, misfits)
Marius Isken's avatar
Marius Isken committed
209
    source = problem.get_source(x_best)
Sebastian Heimann's avatar
Sebastian Heimann committed
210
211
212
    return source


Sebastian Heimann's avatar
Sebastian Heimann committed
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
def mean_latlondist(lats, lons):
    if len(lats) == 0:
        return 0., 0., 1000.
    else:
        ns, es = od.latlon_to_ne_numpy(lats[0], lons[0], lats, lons)
        n, e = num.mean(ns), num.mean(es)
        dists = num.sqrt((ns-n)**2 + (es-e)**2)
        lat, lon = od.ne_to_latlon(lats[0], lons[0], n, e)
        return float(lat), float(lon), float(num.max(dists))


def stations_mean_latlondist(stations):
    lats = num.array([s.lat for s in stations])
    lons = num.array([s.lon for s in stations])
    return mean_latlondist(lats, lons)


def read_config(path):
    config = load(filename=path)
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
232
233
234
    if not isinstance(config, Config):
        raise GrondError('invalid Grond configuration in file "%s"' % path)

Sebastian Heimann's avatar
Sebastian Heimann committed
235
236
237
238
    config.set_basepath(op.dirname(path) or '.')
    return config


Sebastian Heimann's avatar
Sebastian Heimann committed
239
240
241
242
243
244
245
246
def write_config(config, path):
    basepath = config.get_basepath()
    dirname = op.dirname(path) or '.'
    config.change_basepath(dirname)
    guts.dump(config, filename=path)
    config.change_basepath(basepath)


Sebastian Heimann's avatar
Sebastian Heimann committed
247
248
249
250
251
def analyse(problem, niter=1000, show_progress=False):
    if niter == 0:
        return

    wtargets = []
252
253
254
255
    if not problem.has_waveforms:
        return

    for target in problem.waveform_targets:
Sebastian Heimann's avatar
Sebastian Heimann committed
256
257
258
259
260
        wtarget = copy.copy(target)
        wtarget.flip_norm = True
        wtarget.weight = 1.0
        wtargets.append(wtarget)

261
    groups, ngroups = problem.get_group_mask()
262

Sebastian Heimann's avatar
Sebastian Heimann committed
263
264
265
    wproblem = problem.copy()
    wproblem.targets = wtargets

Sebastian Heimann's avatar
wip...    
Sebastian Heimann committed
266
    xbounds = num.array(wproblem.get_parameter_bounds(), dtype=num.float)
Sebastian Heimann's avatar
Sebastian Heimann committed
267
268
    npar = xbounds.shape[0]

269
    mss = num.zeros((niter, wproblem.ntargets))
Sebastian Heimann's avatar
Sebastian Heimann committed
270
271
272
273
274
    rstate = num.random.RandomState(123)

    if show_progress:
        pbar = util.progressbar('analysing problem', niter)

275
    isbad_mask = None
Sebastian Heimann's avatar
Sebastian Heimann committed
276
277
278
    for iiter in xrange(niter):
        while True:
            x = []
279
280
            for ipar in xrange(npar):
                v = rstate.uniform(xbounds[ipar, 0], xbounds[ipar, 1])
Sebastian Heimann's avatar
Sebastian Heimann committed
281
282
283
284
285
286
287
288
289
                x.append(v)

            try:
                x = wproblem.preconstrain(x)
                break

            except Forbidden:
                pass

290
291
292
293
294
        if isbad_mask is not None and num.any(isbad_mask):
            isok_mask = num.logical_not(isbad_mask)
        else:
            isok_mask = None
        _, ms = wproblem.evaluate(x, mask=isok_mask)
Sebastian Heimann's avatar
Sebastian Heimann committed
295
296
        mss[iiter, :] = ms

297
298
        isbad_mask = num.isnan(ms)

Sebastian Heimann's avatar
Sebastian Heimann committed
299
300
301
302
303
304
        if show_progress:
            pbar.update(iiter)

    if show_progress:
        pbar.finish()

305
306
307
    # mean_ms = num.mean(mss, axis=0)
    # weights = 1.0 / mean_ms
    weights = num.ones(wproblem.ntargets)
308
309
310
311
    for igroup in xrange(ngroups):
        weights[groups == igroup] /= (
            num.nansum(weights[groups == igroup]) /
            num.nansum(num.isfinite(weights[groups == igroup])))
Sebastian Heimann's avatar
Sebastian Heimann committed
312
313
314
315
316
317

    for weight, target in zip(weights, problem.targets):
        target.analysis_result = TargetAnalysisResult(
            balancing_weight=float(weight))


318
319
320
321
def excentricity_compensated_choice(xs, sbx, factor):
    inonflat = num.where(sbx != 0.0)[0]
    scale = num.zeros_like(sbx)
    scale[inonflat] = 1.0 / (sbx[inonflat] * (factor if factor != 0. else 1.0))
322
323
324
    # distances_all = math.sqrt(num.sum(
    #     ((xs[num.newaxis, :, :] - xs[:, num.newaxis, :]) *
    #      scale[num.newaxis, num.newaxis, :])**2, axis=2))
325
326
327
328
    distances_sqr_all = num.sum(
        ((xs[num.newaxis, :, :] - xs[:, num.newaxis, :]) *
         scale[num.newaxis, num.newaxis, :])**2, axis=2)
    probabilities = 1.0 / num.sum(distances_sqr_all < 1.0, axis=1)
329
    # print num.sort(num.sum(distances_sqr_all < 1.0, axis=1))
330
331
332
333
334
335
336
337
338
339
340
341
342
343
    probabilities /= num.sum(probabilities)
    r = num.random.random()
    ichoice = num.searchsorted(num.cumsum(probabilities), r)
    ichoice = min(ichoice, xs.shape[0]-1)
    return xs[ichoice]


def select_most_excentric(xcandidates, xs, sbx, factor):
    inonflat = num.where(sbx != 0.0)[0]
    scale = num.zeros_like(sbx)
    scale[inonflat] = 1.0 / (sbx[inonflat] * (factor if factor != 0. else 1.0))
    distances_sqr_all = num.sum(
        ((xcandidates[num.newaxis, :, :] - xs[:, num.newaxis, :]) *
         scale[num.newaxis, num.newaxis, :])**2, axis=2)
344
    # print num.sort(num.sum(distances_sqr_all < 1.0, axis=0))
345
346
347
348
    ichoice = num.argmin(num.sum(distances_sqr_all < 1.0, axis=0))
    return xcandidates[ichoice]


Sebastian Heimann's avatar
Sebastian Heimann committed
349
350
351
352
353
354
def solve(problem,
          rundir=None,
          niter_uniform=1000,
          niter_transition=1000,
          niter_explorative=10000,
          niter_non_explorative=0,
355
          scatter_scale_transition=2.0,
356
          scatter_scale=1.0,
357
          chain_length_factor=8.0,
Sebastian Heimann's avatar
Sebastian Heimann committed
358
359
          xs_inject=None,
          sampler_distribution='multivariate_normal',
360
          compensate_excentricity=False,
Sebastian Heimann's avatar
Sebastian Heimann committed
361
362
          status=()):

Sebastian Heimann's avatar
wip...    
Sebastian Heimann committed
363
    xbounds = num.array(problem.get_parameter_bounds(), dtype=num.float)
364
    npar = problem.nparameters
Sebastian Heimann's avatar
Sebastian Heimann committed
365

366
    nlinks_cap = int(round(chain_length_factor * npar + 1))
Sebastian Heimann's avatar
Sebastian Heimann committed
367
368
369
    chains_m = num.zeros((1 + problem.nbootstrap, nlinks_cap), num.float)
    chains_i = num.zeros((1 + problem.nbootstrap, nlinks_cap), num.int)
    nlinks = 0
Sebastian Heimann's avatar
Sebastian Heimann committed
370
    mbx = None
Sebastian Heimann's avatar
Sebastian Heimann committed
371
372
373
374
375
376

    if xs_inject is not None and xs_inject.size != 0:
        niter_inject = xs_inject.shape[0]
    else:
        niter_inject = 0

377
    niter = niter_inject + niter_uniform + niter_transition + \
378
        niter_explorative + niter_non_explorative
Sebastian Heimann's avatar
Sebastian Heimann committed
379
380

    iiter = 0
Sebastian Heimann's avatar
Sebastian Heimann committed
381
382
    sbx = None
    mxs = None
Sebastian Heimann's avatar
Sebastian Heimann committed
383
384
385
386
    covs = None
    xhist = num.zeros((niter, npar))
    isbad_mask = None
    accept_sum = num.zeros(1 + problem.nbootstrap, dtype=num.int)
387
    accept_hist = num.zeros(niter, dtype=num.int)
388
    pnames = problem.parameter_names
Sebastian Heimann's avatar
Sebastian Heimann committed
389
390
391

    while iiter < niter:

392
393
394
395
396
397
398
399
400
401
402
403
        if iiter < niter_inject:
            phase = 'inject'
        elif iiter < niter_inject + niter_uniform:
            phase = 'uniform'
        elif iiter < niter_inject + niter_uniform + niter_transition:
            phase = 'transition'
        elif iiter < niter_inject + niter_uniform + niter_transition + \
                niter_explorative:
            phase = 'explorative'
        else:
            phase = 'non_explorative'

404
        factor = 0.0
405
        if phase == 'transition':
406
407
408
409
410
411
412
413
414
415
            T = float(niter_transition)
            A = scatter_scale_transition
            B = scatter_scale
            tau = T/(math.log(A) - math.log(B))
            t0 = math.log(A) * T / (math.log(A) - math.log(B))
            t = float(iiter - niter_uniform - niter_inject)
            factor = num.exp(-(t-t0) / tau)

        elif phase in ('explorative', 'non_explorative'):
            factor = scatter_scale
Sebastian Heimann's avatar
Sebastian Heimann committed
416
417
418
419

        ntries_preconstrain = 0
        ntries_sample = 0

420
        if phase == 'inject':
Sebastian Heimann's avatar
Sebastian Heimann committed
421
422
423
424
425
            x = xs_inject[iiter, :]
        else:
            while True:
                ntries_preconstrain += 1

426
                if mbx is None or phase == 'uniform':
427
                    x = problem.random_uniform(xbounds)
Sebastian Heimann's avatar
Sebastian Heimann committed
428
429
430
431
                else:
                    # jchoice = num.random.randint(0, 1 + problem.nbootstrap)
                    jchoice = num.argmin(accept_sum)

432
                    if phase in ('transition', 'explorative'):
433
434
435
436
437
438
439
440

                        if compensate_excentricity:
                            xchoice = excentricity_compensated_choice(
                                xhist[chains_i[jchoice, :], :], sbx, 3.)

                        else:
                            ichoice = num.random.randint(0, nlinks)
                            xchoice = xhist[chains_i[jchoice, ichoice]]
Sebastian Heimann's avatar
Sebastian Heimann committed
441
                    else:
442
                        xchoice = mxs[jchoice]
Sebastian Heimann's avatar
Sebastian Heimann committed
443
444
445
446

                    if sampler_distribution == 'multivariate_normal':
                        ntries_sample = 0

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
447
448
                        ntry = 0
                        ok_mask_sum = num.zeros(npar, dtype=num.int)
Sebastian Heimann's avatar
Sebastian Heimann committed
449
450
451
                        while True:
                            ntries_sample += 1
                            vs = num.random.multivariate_normal(
452
                                xchoice, factor**2 * covs[jchoice])
Sebastian Heimann's avatar
Sebastian Heimann committed
453

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
454
455
456
457
                            ok_mask = num.logical_and(
                                xbounds[:, 0] <= vs, vs <= xbounds[:, 1])

                            if num.all(ok_mask):
Sebastian Heimann's avatar
Sebastian Heimann committed
458
459
                                break

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
460
461
462
463
464
465
466
467
468
469
470
471
                            ok_mask_sum += ok_mask

                            if ntry > 1000:
                                raise GrondError(
                                    'failed to produce a suitable candidate '
                                    'sample from multivariate normal '
                                    'distribution, (%s)' %
                                    ', '.join('%s:%i' % xx for xx in
                                              zip(pnames, ok_mask_sum)))

                            ntry += 1

Sebastian Heimann's avatar
Sebastian Heimann committed
472
473
474
                        x = vs.tolist()

                    if sampler_distribution == 'normal':
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
                        ncandidates = 1
                        xcandidates = num.zeros((ncandidates, npar))
                        for icandidate in xrange(ncandidates):
                            for ipar in xrange(npar):
                                ntry = 0
                                while True:
                                    if sbx[ipar] > 0.:
                                        v = num.random.normal(
                                            xchoice[ipar], factor*sbx[ipar])
                                    else:
                                        v = xchoice[ipar]

                                    if xbounds[ipar, 0] <= v and \
                                            v <= xbounds[ipar, 1]:

                                        break

                                    if ntry > 1000:
                                        raise GrondError(
                                            'failed to produce a suitable '
                                            'candidate sample from normal '
                                            'distribution')

                                    ntry += 1

                                xcandidates[icandidate, ipar] = v

                        x = select_most_excentric(
                            xcandidates,
                            xhist[chains_i[jchoice, :], :],
                            sbx,
                            factor)
Sebastian Heimann's avatar
Sebastian Heimann committed
507
508
509
510
511
512
513
514

                try:
                    x = problem.preconstrain(x)
                    break

                except Forbidden:
                    pass

515
516
517
518
519
520
        if isbad_mask is not None and num.any(isbad_mask):
            isok_mask = num.logical_not(isbad_mask)
        else:
            isok_mask = None

        ms, ns = problem.evaluate(x, mask=isok_mask)
Sebastian Heimann's avatar
Sebastian Heimann committed
521
522
523
524
525
526

        isbad_mask_new = num.isnan(ms)
        if isbad_mask is not None and num.any(isbad_mask != isbad_mask_new):
            logger.error(
                'skipping problem %s: inconsistency in data availability' %
                problem.name)
Sebastian Heimann's avatar
Sebastian Heimann committed
527

Sebastian Heimann's avatar
flake8    
Sebastian Heimann committed
528
529
530
            for target, isbad_new, isbad in zip(
                    problem.targets, isbad_mask_new, isbad_mask):

Sebastian Heimann's avatar
Sebastian Heimann committed
531
532
533
534
                if isbad_new != isbad:
                    logger.error('%s, %s -> %s' % (
                        target.string_id(), isbad, isbad_new))

Sebastian Heimann's avatar
Sebastian Heimann committed
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
            return

        isbad_mask = isbad_mask_new

        if num.all(isbad_mask):
            logger.error(
                'skipping problem %s: all target misfit values are NaN' %
                problem.name)
            return

        if rundir:
            problem.dump_problem_data(rundir, x, ms, ns)

        m = problem.global_misfit(ms, ns)
        ms = problem.bootstrap_misfit(ms, ns)

        chains_m[0, nlinks] = m
        chains_m[1:, nlinks] = ms
        chains_i[:, nlinks] = iiter

        nlinks += 1

        for ichain in xrange(chains_m.shape[0]):
            isort = num.argsort(chains_m[ichain, :nlinks])
            chains_m[ichain, :nlinks] = chains_m[ichain, isort]
            chains_i[ichain, :nlinks] = chains_i[ichain, isort]

        if nlinks == nlinks_cap:
            accept = (chains_i[:, nlinks_cap-1] != iiter).astype(num.int)
            nlinks -= 1
        else:
            accept = num.ones(1 + problem.nbootstrap, dtype=num.int)

        accept_sum += accept
569
        accept_hist[iiter] = num.sum(accept)
Sebastian Heimann's avatar
Sebastian Heimann committed
570
571
572

        lines = []
        if 'state' in status:
573
            lines.append('%s, %i' % (problem.name, iiter))
Sebastian Heimann's avatar
Sebastian Heimann committed
574
575
576
577
578
579
580
581
            lines.append(''.join('-X'[int(acc)] for acc in accept))

        xhist[iiter, :] = x

        bxs = xhist[chains_i[:, :nlinks].ravel(), :]
        gxs = xhist[chains_i[0, :nlinks], :]
        gms = chains_m[0, :nlinks]

Marius Isken's avatar
Marius Isken committed
582
583
584
585
586
        col_width = 15
        col_param_width = max([len(p) for p in problem.parameter_names])+2
        console_output = '{:<{col_param_width}s}'
        console_output += ''.join(['{:>{col_width}{type}}'] * 5)

Sebastian Heimann's avatar
Sebastian Heimann committed
587
        if nlinks > (nlinks_cap-1)/2:
Sebastian Heimann's avatar
Sebastian Heimann committed
588
589
590
591
592
593
594
595
596
597
598
            # mean and std of all bootstrap ensembles together
            mbx = num.mean(bxs, axis=0)
            sbx = num.std(bxs, axis=0)

            # mean and std of global configuration
            mgx = num.mean(gxs, axis=0)
            sgx = num.std(gxs, axis=0)

            # best in global configuration
            bgx = xhist[chains_i[0, 0], :]

Sebastian Heimann's avatar
Sebastian Heimann committed
599
            covs = []
Sebastian Heimann's avatar
Sebastian Heimann committed
600
            mxs = []
Sebastian Heimann's avatar
Sebastian Heimann committed
601
602
            for i in xrange(1 + problem.nbootstrap):
                xs = xhist[chains_i[i, :nlinks], :]
Sebastian Heimann's avatar
Sebastian Heimann committed
603
604
605
606
607
                mx = num.mean(xs, axis=0)
                cov = num.cov(xs.T)

                mxs.append(mx)
                covs.append(cov)
Sebastian Heimann's avatar
Sebastian Heimann committed
608
609
610

            if 'state' in status:
                lines.append(
Marius Isken's avatar
Marius Isken committed
611
612
613
614
615
616
                    console_output.format(
                        'parameter', 'B mean', 'B std', 'G mean', 'G std',
                        'G best',
                        col_param_width=col_param_width,
                        col_width=col_width,
                        type='s'))
Sebastian Heimann's avatar
Sebastian Heimann committed
617

Sebastian Heimann's avatar
Sebastian Heimann committed
618
                for (pname, mbv, sbv, mgv, sgv, bgv) in zip(
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
619
                        pnames, mbx, sbx, mgx, sgx, bgx):
Sebastian Heimann's avatar
Sebastian Heimann committed
620
621

                    lines.append(
Marius Isken's avatar
Marius Isken committed
622
623
624
625
626
                        console_output.format(
                            pname, mbv, sbv, mgv, sgv, bgv,
                            col_param_width=col_param_width,
                            col_width=col_width,
                            type='.4g'))
Sebastian Heimann's avatar
Sebastian Heimann committed
627

Marius Isken's avatar
Marius Isken committed
628
629
630
631
632
633
634
635
636
                lines.append(
                    console_output.format(
                        'misfit', '', '',
                        '%.4g' % num.mean(gms),
                        '%.4g' % num.std(gms),
                        '%.4g' % num.min(gms),
                        col_param_width=col_param_width,
                        col_width=col_width,
                        type='s'))
Sebastian Heimann's avatar
Sebastian Heimann committed
637
638
639

        if 'state' in status:
            lines.append(
Marius Isken's avatar
Marius Isken committed
640
                console_output.format(
641
                    'iteration', iiter+1, '(%s, %g)' % (phase, factor),
Marius Isken's avatar
Marius Isken committed
642
643
644
645
                    ntries_sample, ntries_preconstrain, '',
                    col_param_width=col_param_width,
                    col_width=col_width,
                    type=''))
Sebastian Heimann's avatar
Sebastian Heimann committed
646
647
648
649
650
651
652
653
654
655
656
657
658
659

        if 'matrix' in status:
            matrix = (chains_i[:, :30] % 94 + 32).T
            for row in matrix[::-1]:
                lines.append(''.join(chr(xxx) for xxx in row))

        if status:
            lines[0:0] = ['\033[2J']
            lines.append('')
            print '\n'.join(lines)

        iiter += 1


Sebastian Heimann's avatar
Sebastian Heimann committed
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
def bootstrap_outliers(problem, misfits, std_factor=1.0):
    '''
    Identify bootstrap configurations performing bad in global configuration
    '''

    gms = problem.global_misfits(misfits)

    ibests = []
    for ibootstrap in xrange(problem.nbootstrap):
        bms = problem.bootstrap_misfits(misfits, ibootstrap)
        ibests.append(num.argmin(bms))

    m = num.median(gms[ibests])
    s = num.std(gms[ibests])

    return num.where(gms > m+s)[0]


Sebastian Heimann's avatar
Sebastian Heimann committed
678
679
680
681
def forward(rundir_or_config_path, event_names):

    if not event_names:
        return
Sebastian Heimann's avatar
Sebastian Heimann committed
682

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
683
    if os.path.isdir(rundir_or_config_path):
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
684
685
686
687
        rundir = rundir_or_config_path
        config = guts.load(
            filename=op.join(rundir, 'config.yaml'))

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
688
        config.set_basepath(rundir)
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
689
690
        problem, xs, misfits = load_problem_info_and_data(
            rundir, subset='harvest')
Sebastian Heimann's avatar
Sebastian Heimann committed
691

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
692
693
694
        gms = problem.global_misfits(misfits)
        ibest = num.argmin(gms)
        xbest = xs[ibest, :]
Sebastian Heimann's avatar
Sebastian Heimann committed
695

696
        ds = config.get_dataset(problem.base_source.name)
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
697
        problem.set_engine(config.engine_config.get_engine())
Sebastian Heimann's avatar
Sebastian Heimann committed
698

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
699
700
701
702
703
704
705
706
707
        for target in problem.targets:
            target.set_dataset(ds)

        payload = [(problem, xbest)]

    else:
        config = read_config(rundir_or_config_path)

        payload = []
708
709
710
        for event_name in event_names:
            ds = config.get_dataset(event_name)
            event = ds.get_event()
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
711
712
713
714
            problem = config.get_problem(event)
            xref = problem.preconstrain(
                problem.pack(problem.base_source))
            payload.append((problem, xref))
Sebastian Heimann's avatar
Sebastian Heimann committed
715
716

    all_trs = []
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
717
718
719
    events = []
    for (problem, x) in payload:
        ds.empty_cache()
720
        ms, ns, results = problem.evaluate(x, result_mode='full')
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
721

Marius Isken's avatar
Marius Isken committed
722
        event = problem.get_source(x).pyrocko_event()
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
723
        events.append(event)
Sebastian Heimann's avatar
Sebastian Heimann committed
724
725

        for result in results:
726
            if not isinstance(result, gf.SeismosizerError):
Sebastian Heimann's avatar
Sebastian Heimann committed
727
728
729
730
731
                result.filtered_obs.set_codes(location='ob')
                result.filtered_syn.set_codes(location='sy')
                all_trs.append(result.filtered_obs)
                all_trs.append(result.filtered_syn)

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
732
733
    markers = []
    for ev in events:
734
        markers.append(pmarker.EventMarker(ev))
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
735
736

    trace.snuffle(all_trs, markers=markers, stations=ds.get_stations())
Sebastian Heimann's avatar
Sebastian Heimann committed
737
738


Sebastian Heimann's avatar
Sebastian Heimann committed
739
def harvest(rundir, problem=None, nbest=10, force=False, weed=0):
Sebastian Heimann's avatar
Sebastian Heimann committed
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754

    if problem is None:
        problem, xs, misfits = load_problem_info_and_data(rundir)
    else:
        xs, misfits = load_problem_data(rundir, problem)

    dumpdir = op.join(rundir, 'harvest')
    if op.exists(dumpdir):
        if force:
            shutil.rmtree(dumpdir)
        else:
            raise DirectoryAlreadyExists(dumpdir)

    util.ensuredir(dumpdir)

Sebastian Heimann's avatar
Sebastian Heimann committed
755
756
    ibests_list = []
    ibests = []
Sebastian Heimann's avatar
Sebastian Heimann committed
757
758
759
    gms = problem.global_misfits(misfits)
    isort = num.argsort(gms)

Sebastian Heimann's avatar
Sebastian Heimann committed
760
761
    ibests_list.append(isort[:nbest])

762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
    if weed != 3:
        for ibootstrap in xrange(problem.nbootstrap):
            bms = problem.bootstrap_misfits(misfits, ibootstrap)
            isort = num.argsort(bms)
            ibests_list.append(isort[:nbest])
            ibests.append(isort[0])

        if weed:
            mean_gm_best = num.median(gms[ibests])
            std_gm_best = num.std(gms[ibests])
            ibad = set()

            for ibootstrap, ibest in enumerate(ibests):
                if gms[ibest] > mean_gm_best + std_gm_best:
                    ibad.add(ibootstrap)

            ibests_list = [
                ibests_ for (ibootstrap, ibests_) in enumerate(ibests_list)
                if ibootstrap not in ibad]
Sebastian Heimann's avatar
Sebastian Heimann committed
781

782
    ibests = num.concatenate(ibests_list)
Sebastian Heimann's avatar
Sebastian Heimann committed
783
784
785
786
787
788
789
790

    if weed == 2:
        ibests = ibests[gms[ibests] < mean_gm_best]

    for i in ibests:
        x = xs[i]
        ms = misfits[i, :, 0]
        ns = misfits[i, :, 1]
Sebastian Heimann's avatar
Sebastian Heimann committed
791
792
793
        problem.dump_problem_data(dumpdir, x, ms, ns)


Sebastian Heimann's avatar
Sebastian Heimann committed
794
795
796
797
def get_event_names(config):
    return config.get_event_names()


Sebastian Heimann's avatar
Sebastian Heimann committed
798
def check_problem(problem):
Sebastian Heimann's avatar
Sebastian Heimann committed
799
800
801
802
    if len(problem.targets) == 0:
        raise GrondError('no targets available')


803
804
805
806
807
def check(
        config,
        event_names=None,
        target_string_ids=None,
        show_plot=False,
808
        show_waveforms=False,
809
810
        n_random_synthetics=10):

811
812
813
    if show_plot:
        from matplotlib import pyplot as plt
        from grond.plot import colors
Sebastian Heimann's avatar
Sebastian Heimann committed
814

815
    markers = []
816
817
818
    for ievent, event_name in enumerate(event_names):
        ds = config.get_dataset(event_name)
        event = ds.get_event()
819
        trs_all = []
Sebastian Heimann's avatar
Sebastian Heimann committed
820
821
        try:
            problem = config.get_problem(event)
822

Sebastian Heimann's avatar
Sebastian Heimann committed
823
824
825
826
            _, ngroups = problem.get_group_mask()
            logger.info('number of target supergroups: %i' % ngroups)
            logger.info('number of targets (total): %i' % len(problem.targets))

827
828
829
            if target_string_ids:
                problem.targets = [
                    target for target in problem.targets
Sebastian Heimann's avatar
Sebastian Heimann committed
830
831
                    if util.match_nslc(target_string_ids, target.string_id())]

832
833
            logger.info(
                'number of targets (selected): %i' % len(problem.targets))
834

Sebastian Heimann's avatar
Sebastian Heimann committed
835
836
            check_problem(problem)

837
838
            xbounds = num.array(problem.get_parameter_bounds(),
                                dtype=num.float)
Sebastian Heimann's avatar
Sebastian Heimann committed
839
840

            results_list = []
841

842
            sources = []
843
844
            if n_random_synthetics == 0:
                x = problem.pack(problem.base_source)
845
                sources.append(problem.base_source)
846
                ms, ns, results = problem.evaluate(x, result_mode='full')
Sebastian Heimann's avatar
Sebastian Heimann committed
847
848
                results_list.append(results)

849
850
851
            else:
                for i in xrange(n_random_synthetics):
                    x = problem.random_uniform(xbounds)
Marius Isken's avatar
Marius Isken committed
852
                    sources.append(problem.get_source(x))
853
854
855
                    ms, ns, results = problem.evaluate(x, result_mode='full')
                    results_list.append(results)

856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
            if show_waveforms:
                engine = config.engine_config.get_engine()
                times = []
                tdata = []
                for target in problem.targets:
                    tobs_shift_group = []
                    tcuts = []
                    for source in sources:
                        tmin_fit, tmax_fit, tfade, tfade_taper = \
                            target.get_taper_params(engine, source)

                        times.extend((tmin_fit-tfade*2., tmax_fit+tfade*2.))

                        tobs, tsyn = target.get_pick_shift(engine, source)
                        if None not in (tobs, tsyn):
                            tobs_shift = tobs - tsyn
                        else:
                            tobs_shift = 0.0

                        tcuts.append(target.get_cutout_timespan(
                            tmin_fit+tobs_shift, tmax_fit+tobs_shift, tfade))

                        tobs_shift_group.append(tobs_shift)

                    tcuts = num.array(tcuts, dtype=num.float)

                    tdata.append((
                        tfade,
                        num.mean(tobs_shift_group),
                        (num.min(tcuts[:, 0]), num.max(tcuts[:, 1]))))

                tmin = min(times)
                tmax = max(times)

                tmax += (tmax-tmin)*2

                for (tfade, tobs_shift, tcut), target in zip(
                        tdata, problem.targets):

                    store = engine.get_store(target.store_id)

                    deltat = store.config.deltat

                    freqlimits = list(target.get_freqlimits())
                    freqlimits[2] = 0.45/deltat
                    freqlimits[3] = 0.5/deltat
                    freqlimits = tuple(freqlimits)

904
905
906
907
908
909
910
911
912
                    try:
                        trs_projected, trs_restituted, trs_raw = \
                            ds.get_waveform(
                                target.codes,
                                tmin=tmin+tobs_shift,
                                tmax=tmax+tobs_shift,
                                tfade=tfade,
                                freqlimits=freqlimits,
                                deltat=deltat,
913
914
                                backazimuth=target.
                                get_backazimuth_for_waveform(),
915
                                debug=True)
916
                    except NotFound, e:
917
918
                        logger.warn(str(e))
                        continue
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956

                    trs_projected = copy.deepcopy(trs_projected)
                    trs_restituted = copy.deepcopy(trs_restituted)
                    trs_raw = copy.deepcopy(trs_raw)

                    for trx in trs_projected + trs_restituted + trs_raw:
                        trx.shift(-tobs_shift)
                        trx.set_codes(
                            network='',
                            station=target.string_id(),
                            location='')

                    for trx in trs_projected:
                        trx.set_codes(location=trx.location + '2_proj')

                    for trx in trs_restituted:
                        trx.set_codes(location=trx.location + '1_rest')

                    for trx in trs_raw:
                        trx.set_codes(location=trx.location + '0_raw')

                    trs_all.extend(trs_projected)
                    trs_all.extend(trs_restituted)
                    trs_all.extend(trs_raw)

                    for source in sources:
                        tmin_fit, tmax_fit, tfade, tfade_taper = \
                            target.get_taper_params(engine, source)

                        markers.append(pmarker.Marker(
                            nslc_ids=[('', target.string_id(), '*', '*')],
                            tmin=tmin_fit, tmax=tmax_fit))

                    markers.append(pmarker.Marker(
                        nslc_ids=[('', target.string_id(), '*', '*')],
                        tmin=tcut[0]-tobs_shift, tmax=tcut[1]-tobs_shift,
                        kind=1))

Sebastian Heimann's avatar
Sebastian Heimann committed
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
            if show_plot:
                for itarget, target in enumerate(problem.targets):
                    yabsmaxs = []
                    for results in results_list:
                        result = results[itarget]
                        if not isinstance(result, gf.SeismosizerError):
                            yabsmaxs.append(
                                num.max(num.abs(
                                    result.filtered_obs.get_ydata())))

                    if yabsmaxs:
                        yabsmax = max(yabsmaxs) or 1.0
                    else:
                        yabsmax = None

                    fig = None
                    ii = 0
                    for results in results_list:
                        result = results[itarget]
                        if not isinstance(result, gf.SeismosizerError):
                            if fig is None:
                                fig = plt.figure()
                                axes = fig.add_subplot(1, 1, 1)
                                axes.set_ylim(0., 4.)
                                axes.set_title('%s' % target.string_id())

                            xdata = result.filtered_obs.get_xdata()
                            ydata = result.filtered_obs.get_ydata() / yabsmax
                            axes.plot(xdata, ydata*0.5 + 3.5, color='black')

                            color = colors[ii % len(colors)]

                            xdata = result.filtered_syn.get_xdata()
                            ydata = result.filtered_syn.get_ydata()
                            ydata = ydata / (num.max(num.abs(ydata)) or 1.0)

                            axes.plot(xdata, ydata*0.5 + 2.5, color=color)

                            xdata = result.processed_syn.get_xdata()
                            ydata = result.processed_syn.get_ydata()
                            ydata = ydata / (num.max(num.abs(ydata)) or 1.0)

                            axes.plot(xdata, ydata*0.5 + 1.5, color=color)
                            if result.tsyn_pick: