core.py 45.9 KB
Newer Older
Sebastian Heimann's avatar
Sebastian Heimann committed
1
2
3
4
5
6
7
8
9
10
import math
import os
import sys
import logging
import time
import copy
import shutil
import os.path as op
import numpy as num

11
12
from pyrocko.guts import (load, Object, String, Float, Int, Bool, List,
                          StringChoice)
Sebastian Heimann's avatar
Sebastian Heimann committed
13
from pyrocko import orthodrome as od, gf, trace, guts, util, weeding
14
from pyrocko import parimap, model, marker as pmarker
Sebastian Heimann's avatar
Sebastian Heimann committed
15

16
from .dataset import DatasetConfig, NotFound
17
18
19
20
from .problems import ProblemConfig, Problem
from .targets import TargetAnalysisResult, TargetConfig
from .meta import (Path, HasPaths, expand_template, xjoin, GrondError,
                   Forbidden)
Sebastian Heimann's avatar
Sebastian Heimann committed
21
22
23
24
25

logger = logging.getLogger('grond.core')
guts_prefix = 'grond'


Sebastian Heimann's avatar
Sebastian Heimann committed
26
27
28
29
30
31
32
def mahalanobis_distance(xs, mx, cov):
    imask = num.diag(cov) != 0.
    icov = num.linalg.inv(cov[imask, :][:, imask])
    temp = xs[:, imask] - mx[imask]
    return num.sqrt(num.sum(temp * num.dot(icov, temp.T).T, axis=1))


Sebastian Heimann's avatar
Sebastian Heimann committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class DirectoryAlreadyExists(Exception):
    pass


def weed(origin, targets, limit, neighborhood=3):

    azimuths = num.zeros(len(targets))
    dists = num.zeros(len(targets))
    for i, target in enumerate(targets):
        _, azimuths[i] = target.azibazi_to(origin)
        dists[i] = target.distance_to(origin)

    badnesses = num.ones(len(targets), dtype=float)
    deleted, meandists_kept = weeding.weed(
        azimuths, dists, badnesses,
        nwanted=limit,
        neighborhood=neighborhood)

    targets_weeded = [
        target for (delete, target) in zip(deleted, targets) if not delete]

    return targets_weeded, meandists_kept, deleted


class AnalyserConfig(Object):
    niter = Int.T(default=1000)


class SamplerDistributionChoice(StringChoice):
    choices = ['multivariate_normal', 'normal']


65
66
67
68
69
70
71
class StandardDeviationEstimatorChoice(StringChoice):
    choices = [
        'median_density_single_chain',
        'standard_deviation_all_chains',
        'standard_deviation_single_chain']


Sebastian Heimann's avatar
Sebastian Heimann committed
72
73
class SolverConfig(Object):
    niter_uniform = Int.T(default=1000)
Sebastian Heimann's avatar
Sebastian Heimann committed
74
    niter_transition = Int.T(default=0)
Sebastian Heimann's avatar
Sebastian Heimann committed
75
76
77
78
    niter_explorative = Int.T(default=10000)
    niter_non_explorative = Int.T(default=0)
    sampler_distribution = SamplerDistributionChoice.T(
        default='multivariate_normal')
79
80
    standard_deviation_estimator = StandardDeviationEstimatorChoice.T(
        default='median_density_single_chain')
81
    scatter_scale_transition = Float.T(default=2.0)
82
    scatter_scale = Float.T(default=1.0)
83
84
    chain_length_factor = Float.T(default=8.0)
    compensate_excentricity = Bool.T(default=True)
Sebastian Heimann's avatar
Sebastian Heimann committed
85
86
87
88

    def get_solver_kwargs(self):
        return dict(
            niter_uniform=self.niter_uniform,
Sebastian Heimann's avatar
Sebastian Heimann committed
89
            niter_transition=self.niter_transition,
Sebastian Heimann's avatar
Sebastian Heimann committed
90
91
            niter_explorative=self.niter_explorative,
            niter_non_explorative=self.niter_non_explorative,
92
            sampler_distribution=self.sampler_distribution,
93
            standard_deviation_estimator=self.standard_deviation_estimator,
94
            scatter_scale_transition=self.scatter_scale_transition,
95
96
97
            scatter_scale=self.scatter_scale,
            chain_length_factor=self.chain_length_factor,
            compensate_excentricity=self.compensate_excentricity)
Sebastian Heimann's avatar
Sebastian Heimann committed
98
99


Sebastian Heimann's avatar
Sebastian Heimann committed
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
class EngineConfig(HasPaths):
    gf_stores_from_pyrocko_config = Bool.T(default=True)
    gf_store_superdirs = List.T(Path.T())
    gf_store_dirs = List.T(Path.T())

    def __init__(self, *args, **kwargs):
        HasPaths.__init__(self, *args, **kwargs)
        self._engine = None

    def get_engine(self):
        if self._engine is None:
            fp = self.expand_path
            self._engine = gf.LocalEngine(
                use_config=self.gf_stores_from_pyrocko_config,
                store_superdirs=fp(self.gf_store_superdirs),
                store_dirs=fp(self.gf_store_dirs))

        return self._engine


Sebastian Heimann's avatar
Sebastian Heimann committed
120
121
122
123
124
125
126
class Config(HasPaths):
    rundir_template = Path.T()
    dataset_config = DatasetConfig.T()
    target_configs = List.T(TargetConfig.T())
    problem_config = ProblemConfig.T()
    analyser_config = AnalyserConfig.T(default=AnalyserConfig.D())
    solver_config = SolverConfig.T(default=SolverConfig.D())
Sebastian Heimann's avatar
Sebastian Heimann committed
127
    engine_config = EngineConfig.T(default=EngineConfig.D())
Sebastian Heimann's avatar
Sebastian Heimann committed
128
129
130
131

    def __init__(self, *args, **kwargs):
        HasPaths.__init__(self, *args, **kwargs)

Sebastian Heimann's avatar
Sebastian Heimann committed
132
133
134
    def get_event_names(self):
        return self.dataset_config.get_event_names()

135
136
    def get_dataset(self, event_name):
        return self.dataset_config.get_dataset(event_name)
Sebastian Heimann's avatar
Sebastian Heimann committed
137
138

    def get_targets(self, event):
139
        ds = self.get_dataset(event.name)
Sebastian Heimann's avatar
Sebastian Heimann committed
140
141
142
143
144
145
146
147

        targets = []
        for igroup, target_config in enumerate(self.target_configs):
            targets.extend(target_config.get_targets(
                ds, event, 'group_%i' % igroup))

        return targets

148
149
150
151
152
153
    def setup_modelling_environment(self, problem):
        problem.set_engine(self.engine_config.get_engine())
        ds = self.get_dataset(problem.base_source.name)
        synt = ds.synthetic_test
        if synt:
            synt.set_problem(problem)
Marius Isken's avatar
Marius Isken committed
154
            problem.base_source = problem.get_source(synt.get_x())
155

Sebastian Heimann's avatar
Sebastian Heimann committed
156
157
    def get_problem(self, event):
        targets = self.get_targets(event)
Sebastian Heimann's avatar
Sebastian Heimann committed
158
        problem = self.problem_config.get_problem(event, targets)
159
        self.setup_modelling_environment(problem)
Sebastian Heimann's avatar
Sebastian Heimann committed
160
        return problem
Sebastian Heimann's avatar
Sebastian Heimann committed
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177


def sarr(a):
    return ' '.join('%15g' % x for x in a)


def load_problem_info_and_data(dirname, subset=None):
    problem = load_problem_info(dirname)
    xs, misfits = load_problem_data(xjoin(dirname, subset), problem)
    return problem, xs, misfits


def load_problem_info(dirname):
    fn = op.join(dirname, 'problem.yaml')
    return guts.load(filename=fn)


Sebastian Heimann's avatar
Sebastian Heimann committed
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def load_optimizer_history(dirname, problem):
    fn = op.join(dirname, 'accepted')
    with open(fn, 'r') as f:
        nmodels = os.fstat(f.fileno()).st_size // (problem.nbootstrap+1)
        data1 = num.fromfile(
            f,
            dtype='<i1',
            count=nmodels*(problem.nbootstrap+1)).astype(num.bool)

    accepted = data1.reshape((nmodels, problem.nbootstrap+1))

    fn = op.join(dirname, 'choices')
    with open(fn, 'r') as f:
        data2 = num.fromfile(
            f,
            dtype='<i8',
            count=nmodels*2).astype(num.int64)

    ibootstrap_choices, imodel_choices = data2.reshape((nmodels, 2)).T
    return ibootstrap_choices, imodel_choices, accepted


200
def load_problem_data(dirname, problem, skip_models=0):
201
    fn = op.join(dirname, 'models')
Sebastian Heimann's avatar
Sebastian Heimann committed
202
    with open(fn, 'r') as f:
Sebastian Heimann's avatar
Sebastian Heimann committed
203
        nmodels = os.fstat(f.fileno()).st_size // (problem.nparameters * 8)
204
205
        nmodels -= skip_models
        f.seek(skip_models * problem.nparameters * 8)
Sebastian Heimann's avatar
Sebastian Heimann committed
206
        data1 = num.fromfile(
Sebastian Heimann's avatar
Sebastian Heimann committed
207
            f, dtype='<f8',
208
209
            count=nmodels * problem.nparameters)\
            .astype(num.float)
Sebastian Heimann's avatar
Sebastian Heimann committed
210

211
    nmodels = data1.size/problem.nparameters - skip_models
Sebastian Heimann's avatar
Sebastian Heimann committed
212
    xs = data1.reshape((nmodels, problem.nparameters))
Sebastian Heimann's avatar
Sebastian Heimann committed
213
214
215

    fn = op.join(dirname, 'misfits')
    with open(fn, 'r') as f:
216
        f.seek(skip_models * problem.ntargets * 2 * 8)
Sebastian Heimann's avatar
Sebastian Heimann committed
217
        data2 = num.fromfile(
Sebastian Heimann's avatar
Sebastian Heimann committed
218
219
            f, dtype='<f8', count=nmodels*problem.ntargets*2).astype(num.float)

Sebastian Heimann's avatar
Sebastian Heimann committed
220
    data2 = data2.reshape((nmodels, problem.ntargets*2))
Sebastian Heimann's avatar
Sebastian Heimann committed
221

Sebastian Heimann's avatar
Sebastian Heimann committed
222
223
224
    combi = num.empty_like(data2)
    combi[:, 0::2] = data2[:, :problem.ntargets]
    combi[:, 1::2] = data2[:, problem.ntargets:]
Sebastian Heimann's avatar
Sebastian Heimann committed
225
226
227
228
229
230

    misfits = combi.reshape((nmodels, problem.ntargets, 2))

    return xs, misfits


Sebastian Heimann's avatar
Sebastian Heimann committed
231
232
233
234
def get_mean_x(xs):
    return num.mean(xs, axis=0)


235
236
237
238
239
def get_mean_x_and_gm(problem, xs, misfits):
    gms = problem.global_misfits(misfits)
    return num.mean(xs, axis=0), num.mean(gms)


Sebastian Heimann's avatar
Sebastian Heimann committed
240
241
242
243
244
245
def get_best_x(problem, xs, misfits):
    gms = problem.global_misfits(misfits)
    ibest = num.argmin(gms)
    return xs[ibest, :]


246
247
248
249
250
251
def get_best_x_and_gm(problem, xs, misfits):
    gms = problem.global_misfits(misfits)
    ibest = num.argmin(gms)
    return xs[ibest, :], gms[ibest]


Sebastian Heimann's avatar
Sebastian Heimann committed
252
253
def get_mean_source(problem, xs):
    x_mean = get_mean_x(xs)
Marius Isken's avatar
Marius Isken committed
254
    source = problem.get_source(x_mean)
Sebastian Heimann's avatar
Sebastian Heimann committed
255
256
257
258
259
    return source


def get_best_source(problem, xs, misfits):
    x_best = get_best_x(problem, xs, misfits)
Marius Isken's avatar
Marius Isken committed
260
    source = problem.get_source(x_best)
Sebastian Heimann's avatar
Sebastian Heimann committed
261
262
263
    return source


Sebastian Heimann's avatar
Sebastian Heimann committed
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
def mean_latlondist(lats, lons):
    if len(lats) == 0:
        return 0., 0., 1000.
    else:
        ns, es = od.latlon_to_ne_numpy(lats[0], lons[0], lats, lons)
        n, e = num.mean(ns), num.mean(es)
        dists = num.sqrt((ns-n)**2 + (es-e)**2)
        lat, lon = od.ne_to_latlon(lats[0], lons[0], n, e)
        return float(lat), float(lon), float(num.max(dists))


def stations_mean_latlondist(stations):
    lats = num.array([s.lat for s in stations])
    lons = num.array([s.lon for s in stations])
    return mean_latlondist(lats, lons)


def read_config(path):
    config = load(filename=path)
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
283
284
285
    if not isinstance(config, Config):
        raise GrondError('invalid Grond configuration in file "%s"' % path)

Sebastian Heimann's avatar
Sebastian Heimann committed
286
287
288
289
    config.set_basepath(op.dirname(path) or '.')
    return config


Sebastian Heimann's avatar
Sebastian Heimann committed
290
291
292
293
294
295
296
297
def write_config(config, path):
    basepath = config.get_basepath()
    dirname = op.dirname(path) or '.'
    config.change_basepath(dirname)
    guts.dump(config, filename=path)
    config.change_basepath(basepath)


Sebastian Heimann's avatar
Sebastian Heimann committed
298
299
300
301
302
def analyse(problem, niter=1000, show_progress=False):
    if niter == 0:
        return

    wtargets = []
303
304
305
306
    if not problem.has_waveforms:
        return

    for target in problem.waveform_targets:
Sebastian Heimann's avatar
Sebastian Heimann committed
307
308
309
310
311
312
313
314
        wtarget = copy.copy(target)
        wtarget.flip_norm = True
        wtarget.weight = 1.0
        wtargets.append(wtarget)

    wproblem = problem.copy()
    wproblem.targets = wtargets

Sebastian Heimann's avatar
wip...    
Sebastian Heimann committed
315
    xbounds = num.array(wproblem.get_parameter_bounds(), dtype=num.float)
Sebastian Heimann's avatar
Sebastian Heimann committed
316
317
    npar = xbounds.shape[0]

318
    mss = num.zeros((niter, wproblem.ntargets))
Sebastian Heimann's avatar
Sebastian Heimann committed
319
320
321
322
323
    rstate = num.random.RandomState(123)

    if show_progress:
        pbar = util.progressbar('analysing problem', niter)

324
    isbad_mask = None
Sebastian Heimann's avatar
Sebastian Heimann committed
325
326
327
    for iiter in xrange(niter):
        while True:
            x = []
328
329
            for ipar in xrange(npar):
                v = rstate.uniform(xbounds[ipar, 0], xbounds[ipar, 1])
Sebastian Heimann's avatar
Sebastian Heimann committed
330
331
332
333
334
335
336
337
338
                x.append(v)

            try:
                x = wproblem.preconstrain(x)
                break

            except Forbidden:
                pass

339
340
341
342
343
        if isbad_mask is not None and num.any(isbad_mask):
            isok_mask = num.logical_not(isbad_mask)
        else:
            isok_mask = None
        _, ms = wproblem.evaluate(x, mask=isok_mask)
Sebastian Heimann's avatar
Sebastian Heimann committed
344
345
        mss[iiter, :] = ms

346
347
        isbad_mask = num.isnan(ms)

Sebastian Heimann's avatar
Sebastian Heimann committed
348
349
350
351
352
353
        if show_progress:
            pbar.update(iiter)

    if show_progress:
        pbar.finish()

354
355
356
357
    mean_ms = num.mean(mss, axis=0)
    weights = 1. / mean_ms
    groups, ngroups = wproblem.get_group_mask()

358
359
360
361
    for igroup in xrange(ngroups):
        weights[groups == igroup] /= (
            num.nansum(weights[groups == igroup]) /
            num.nansum(num.isfinite(weights[groups == igroup])))
Sebastian Heimann's avatar
Sebastian Heimann committed
362

363
    for weight, target in zip(weights, problem.waveform_targets):
Sebastian Heimann's avatar
Sebastian Heimann committed
364
365
366
367
        target.analysis_result = TargetAnalysisResult(
            balancing_weight=float(weight))


368
def excentricity_compensated_probabilities(xs, sbx, factor):
369
370
371
372
373
374
375
    inonflat = num.where(sbx != 0.0)[0]
    scale = num.zeros_like(sbx)
    scale[inonflat] = 1.0 / (sbx[inonflat] * (factor if factor != 0. else 1.0))
    distances_sqr_all = num.sum(
        ((xs[num.newaxis, :, :] - xs[:, num.newaxis, :]) *
         scale[num.newaxis, num.newaxis, :])**2, axis=2)
    probabilities = 1.0 / num.sum(distances_sqr_all < 1.0, axis=1)
Sebastian Heimann's avatar
Sebastian Heimann committed
376
    print num.sort(num.sum(distances_sqr_all < 1.0, axis=1))
377
    probabilities /= num.sum(probabilities)
378
379
380
381
382
383
    return probabilities


def excentricity_compensated_choice(xs, sbx, factor):
    probabilities = excentricity_compensated_probabilities(
        xs, sbx, factor)
384
385
386
    r = num.random.random()
    ichoice = num.searchsorted(num.cumsum(probabilities), r)
    ichoice = min(ichoice, xs.shape[0]-1)
387
    return ichoice
388
389
390
391
392
393
394
395
396


def select_most_excentric(xcandidates, xs, sbx, factor):
    inonflat = num.where(sbx != 0.0)[0]
    scale = num.zeros_like(sbx)
    scale[inonflat] = 1.0 / (sbx[inonflat] * (factor if factor != 0. else 1.0))
    distances_sqr_all = num.sum(
        ((xcandidates[num.newaxis, :, :] - xs[:, num.newaxis, :]) *
         scale[num.newaxis, num.newaxis, :])**2, axis=2)
Sebastian Heimann's avatar
Sebastian Heimann committed
397
    # print num.sort(num.sum(distances_sqr_all < 1.0, axis=0))
398
399
400
401
    ichoice = num.argmin(num.sum(distances_sqr_all < 1.0, axis=0))
    return xcandidates[ichoice]


402
403
404
405
406
407
408
def local_std(xs):
    ssbx = num.sort(xs, axis=0)
    dssbx = num.diff(ssbx, axis=0)
    mdssbx = num.median(dssbx, axis=0)
    return mdssbx * dssbx.shape[0] / 2.6


Sebastian Heimann's avatar
Sebastian Heimann committed
409
410
411
412
413
414
def solve(problem,
          rundir=None,
          niter_uniform=1000,
          niter_transition=1000,
          niter_explorative=10000,
          niter_non_explorative=0,
415
          scatter_scale_transition=2.0,
416
          scatter_scale=1.0,
417
          chain_length_factor=8.0,
Sebastian Heimann's avatar
Sebastian Heimann committed
418
419
          xs_inject=None,
          sampler_distribution='multivariate_normal',
420
          standard_deviation_estimator='median_density_single_chain',
421
          compensate_excentricity=True,
422
          status=(),
Sebastian Heimann's avatar
Sebastian Heimann committed
423
          plot=None):
Sebastian Heimann's avatar
Sebastian Heimann committed
424

Sebastian Heimann's avatar
wip...    
Sebastian Heimann committed
425
    xbounds = num.array(problem.get_parameter_bounds(), dtype=num.float)
426
    npar = problem.nparameters
Sebastian Heimann's avatar
Sebastian Heimann committed
427

428
    nlinks_cap = int(round(chain_length_factor * npar + 1))
Sebastian Heimann's avatar
Sebastian Heimann committed
429
430
431
    chains_m = num.zeros((1 + problem.nbootstrap, nlinks_cap), num.float)
    chains_i = num.zeros((1 + problem.nbootstrap, nlinks_cap), num.int)
    nlinks = 0
Sebastian Heimann's avatar
Sebastian Heimann committed
432
    mbx = None
Sebastian Heimann's avatar
Sebastian Heimann committed
433
434
435
436
437
438

    if xs_inject is not None and xs_inject.size != 0:
        niter_inject = xs_inject.shape[0]
    else:
        niter_inject = 0

439
    niter = niter_inject + niter_uniform + niter_transition + \
440
        niter_explorative + niter_non_explorative
Sebastian Heimann's avatar
Sebastian Heimann committed
441
442

    iiter = 0
Sebastian Heimann's avatar
Sebastian Heimann committed
443
444
    sbx = None
    mxs = None
Sebastian Heimann's avatar
Sebastian Heimann committed
445
    covs = None
446
    local_sxs = None
Sebastian Heimann's avatar
Sebastian Heimann committed
447
448
449
    xhist = num.zeros((niter, npar))
    isbad_mask = None
    accept_sum = num.zeros(1 + problem.nbootstrap, dtype=num.int)
450
    accept_hist = num.zeros(niter, dtype=num.int)
451
    pnames = problem.parameter_names
Sebastian Heimann's avatar
Sebastian Heimann committed
452

453
    if plot:
Sebastian Heimann's avatar
Sebastian Heimann committed
454
        plot.start(problem)
455

Sebastian Heimann's avatar
Sebastian Heimann committed
456
    while iiter < niter:
Sebastian Heimann's avatar
Sebastian Heimann committed
457
        ibootstrap_choice = None
458
        ichoice = None
Sebastian Heimann's avatar
Sebastian Heimann committed
459

460
461
462
463
464
465
466
467
468
469
470
471
        if iiter < niter_inject:
            phase = 'inject'
        elif iiter < niter_inject + niter_uniform:
            phase = 'uniform'
        elif iiter < niter_inject + niter_uniform + niter_transition:
            phase = 'transition'
        elif iiter < niter_inject + niter_uniform + niter_transition + \
                niter_explorative:
            phase = 'explorative'
        else:
            phase = 'non_explorative'

472
        factor = 0.0
473
        if phase == 'transition':
474
475
476
477
478
479
480
481
482
483
            T = float(niter_transition)
            A = scatter_scale_transition
            B = scatter_scale
            tau = T/(math.log(A) - math.log(B))
            t0 = math.log(A) * T / (math.log(A) - math.log(B))
            t = float(iiter - niter_uniform - niter_inject)
            factor = num.exp(-(t-t0) / tau)

        elif phase in ('explorative', 'non_explorative'):
            factor = scatter_scale
Sebastian Heimann's avatar
Sebastian Heimann committed
484
485
486
487

        ntries_preconstrain = 0
        ntries_sample = 0

488
        if phase == 'inject':
Sebastian Heimann's avatar
Sebastian Heimann committed
489
490
491
492
493
            x = xs_inject[iiter, :]
        else:
            while True:
                ntries_preconstrain += 1

494
                if mbx is None or phase == 'uniform':
495
                    x = problem.random_uniform(xbounds)
Sebastian Heimann's avatar
Sebastian Heimann committed
496
                else:
Sebastian Heimann's avatar
Sebastian Heimann committed
497
498
499
                    # ibootstrap_choice = num.random.randint(
                    #     0, 1 + problem.nbootstrap)
                    ibootstrap_choice = num.argmin(accept_sum)
Sebastian Heimann's avatar
Sebastian Heimann committed
500

501
                    if phase in ('transition', 'explorative'):
502
503

                        if compensate_excentricity:
504
                            ichoice = excentricity_compensated_choice(
Sebastian Heimann's avatar
Sebastian Heimann committed
505
506
                                xhist[chains_i[ibootstrap_choice, :], :],
                                local_sxs[ibootstrap_choice], 2.)
507

Sebastian Heimann's avatar
Sebastian Heimann committed
508
509
                            xchoice = xhist[
                                chains_i[ibootstrap_choice, ichoice], :]
510

511
512
                        else:
                            ichoice = num.random.randint(0, nlinks)
Sebastian Heimann's avatar
Sebastian Heimann committed
513
514
                            xchoice = xhist[
                                chains_i[ibootstrap_choice, ichoice], :]
Sebastian Heimann's avatar
Sebastian Heimann committed
515
                    else:
Sebastian Heimann's avatar
Sebastian Heimann committed
516
                        xchoice = mxs[ibootstrap_choice]
Sebastian Heimann's avatar
Sebastian Heimann committed
517
518
519
520

                    if sampler_distribution == 'multivariate_normal':
                        ntries_sample = 0

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
521
522
                        ntry = 0
                        ok_mask_sum = num.zeros(npar, dtype=num.int)
Sebastian Heimann's avatar
Sebastian Heimann committed
523
524
525
                        while True:
                            ntries_sample += 1
                            vs = num.random.multivariate_normal(
Sebastian Heimann's avatar
Sebastian Heimann committed
526
                                xchoice, factor**2 * covs[ibootstrap_choice])
Sebastian Heimann's avatar
Sebastian Heimann committed
527

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
528
529
530
531
                            ok_mask = num.logical_and(
                                xbounds[:, 0] <= vs, vs <= xbounds[:, 1])

                            if num.all(ok_mask):
Sebastian Heimann's avatar
Sebastian Heimann committed
532
533
                                break

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
534
535
536
537
538
539
540
541
542
543
544
545
                            ok_mask_sum += ok_mask

                            if ntry > 1000:
                                raise GrondError(
                                    'failed to produce a suitable candidate '
                                    'sample from multivariate normal '
                                    'distribution, (%s)' %
                                    ', '.join('%s:%i' % xx for xx in
                                              zip(pnames, ok_mask_sum)))

                            ntry += 1

Sebastian Heimann's avatar
Sebastian Heimann committed
546
547
548
                        x = vs.tolist()

                    if sampler_distribution == 'normal':
549
550
551
552
553
554
                        ncandidates = 1
                        xcandidates = num.zeros((ncandidates, npar))
                        for icandidate in xrange(ncandidates):
                            for ipar in xrange(npar):
                                ntry = 0
                                while True:
Sebastian Heimann's avatar
Sebastian Heimann committed
555
                                    if local_sxs[ibootstrap_choice][ipar] > 0.:
556
                                        v = num.random.normal(
Sebastian Heimann's avatar
Sebastian Heimann committed
557
558
559
                                            xchoice[ipar],
                                            factor*local_sxs[
                                                ibootstrap_choice][ipar])
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
                                    else:
                                        v = xchoice[ipar]

                                    if xbounds[ipar, 0] <= v and \
                                            v <= xbounds[ipar, 1]:

                                        break

                                    if ntry > 1000:
                                        raise GrondError(
                                            'failed to produce a suitable '
                                            'candidate sample from normal '
                                            'distribution')

                                    ntry += 1

                                xcandidates[icandidate, ipar] = v

                        x = select_most_excentric(
                            xcandidates,
Sebastian Heimann's avatar
Sebastian Heimann committed
580
581
                            xhist[chains_i[ibootstrap_choice, :], :],
                            local_sxs[ibootstrap_choice],
582
                            factor)
Sebastian Heimann's avatar
Sebastian Heimann committed
583
584
585
586
587
588
589
590

                try:
                    x = problem.preconstrain(x)
                    break

                except Forbidden:
                    pass

591
        ibase = None
Sebastian Heimann's avatar
Sebastian Heimann committed
592
593
        if ichoice is not None and ibootstrap_choice is not None:
            ibase = chains_i[ibootstrap_choice, ichoice]
594

595
596
597
598
599
600
        if isbad_mask is not None and num.any(isbad_mask):
            isok_mask = num.logical_not(isbad_mask)
        else:
            isok_mask = None

        ms, ns = problem.evaluate(x, mask=isok_mask)
Sebastian Heimann's avatar
Sebastian Heimann committed
601
602
603
604
605
606

        isbad_mask_new = num.isnan(ms)
        if isbad_mask is not None and num.any(isbad_mask != isbad_mask_new):
            logger.error(
                'skipping problem %s: inconsistency in data availability' %
                problem.name)
Sebastian Heimann's avatar
Sebastian Heimann committed
607

Sebastian Heimann's avatar
flake8    
Sebastian Heimann committed
608
609
610
            for target, isbad_new, isbad in zip(
                    problem.targets, isbad_mask_new, isbad_mask):

Sebastian Heimann's avatar
Sebastian Heimann committed
611
612
613
614
                if isbad_new != isbad:
                    logger.error('%s, %s -> %s' % (
                        target.string_id(), isbad, isbad_new))

Sebastian Heimann's avatar
Sebastian Heimann committed
615
616
617
618
619
620
621
622
623
624
            return

        isbad_mask = isbad_mask_new

        if num.all(isbad_mask):
            logger.error(
                'skipping problem %s: all target misfit values are NaN' %
                problem.name)
            return

625
626
        gm = problem.global_misfit(ms, ns)
        bms = problem.bootstrap_misfit(ms, ns)
Sebastian Heimann's avatar
Sebastian Heimann committed
627

628
629
        chains_m[0, nlinks] = gm
        chains_m[1:, nlinks] = bms
Sebastian Heimann's avatar
Sebastian Heimann committed
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
        chains_i[:, nlinks] = iiter

        nlinks += 1

        for ichain in xrange(chains_m.shape[0]):
            isort = num.argsort(chains_m[ichain, :nlinks])
            chains_m[ichain, :nlinks] = chains_m[ichain, isort]
            chains_i[ichain, :nlinks] = chains_i[ichain, isort]

        if nlinks == nlinks_cap:
            accept = (chains_i[:, nlinks_cap-1] != iiter).astype(num.int)
            nlinks -= 1
        else:
            accept = num.ones(1 + problem.nbootstrap, dtype=num.int)

Sebastian Heimann's avatar
Sebastian Heimann committed
645
646
647
        if rundir:
            problem.dump_problem_data(
                rundir, x, ms, ns, accept,
648
                ibootstrap_choice if ibootstrap_choice is not None else -1,
Sebastian Heimann's avatar
Sebastian Heimann committed
649
650
                ibase if ibase is not None else -1)

Sebastian Heimann's avatar
Sebastian Heimann committed
651
        accept_sum += accept
652
        accept_hist[iiter] = num.sum(accept)
Sebastian Heimann's avatar
Sebastian Heimann committed
653
654
655

        lines = []
        if 'state' in status:
656
            lines.append('%s, %i' % (problem.name, iiter))
Sebastian Heimann's avatar
Sebastian Heimann committed
657
658
659
660
661
662
663
664
            lines.append(''.join('-X'[int(acc)] for acc in accept))

        xhist[iiter, :] = x

        bxs = xhist[chains_i[:, :nlinks].ravel(), :]
        gxs = xhist[chains_i[0, :nlinks], :]
        gms = chains_m[0, :nlinks]

Marius Isken's avatar
Marius Isken committed
665
666
667
668
669
        col_width = 15
        col_param_width = max([len(p) for p in problem.parameter_names])+2
        console_output = '{:<{col_param_width}s}'
        console_output += ''.join(['{:>{col_width}{type}}'] * 5)

Sebastian Heimann's avatar
Sebastian Heimann committed
670
        if nlinks > (nlinks_cap-1)/2:
Sebastian Heimann's avatar
Sebastian Heimann committed
671
672
673
674
675
676
677
678
679
680
681
            # mean and std of all bootstrap ensembles together
            mbx = num.mean(bxs, axis=0)
            sbx = num.std(bxs, axis=0)

            # mean and std of global configuration
            mgx = num.mean(gxs, axis=0)
            sgx = num.std(gxs, axis=0)

            # best in global configuration
            bgx = xhist[chains_i[0, 0], :]

Sebastian Heimann's avatar
Sebastian Heimann committed
682
            covs = []
Sebastian Heimann's avatar
Sebastian Heimann committed
683
            mxs = []
684
685
            local_sxs = []

Sebastian Heimann's avatar
Sebastian Heimann committed
686
687
            for i in xrange(1 + problem.nbootstrap):
                xs = xhist[chains_i[i, :nlinks], :]
Sebastian Heimann's avatar
Sebastian Heimann committed
688
689
690
691
692
                mx = num.mean(xs, axis=0)
                cov = num.cov(xs.T)

                mxs.append(mx)
                covs.append(cov)
693
694
695
696
697
698
699
700
701
702
703
704
705
                if standard_deviation_estimator == \
                        'median_density_single_chain':
                    local_sx = local_std(xs)
                    local_sxs.append(local_sx)
                elif standard_deviation_estimator == \
                        'standard_deviation_all_chains':
                    local_sxs.append(sbx)
                elif standard_deviation_estimator == \
                        'standard_deviation_single_chain':
                    sx = num.std(xs, axis=0)
                    local_sxs.append(sx)
                else:
                    assert False, 'invalid standard_deviation_estimator choice'
Sebastian Heimann's avatar
Sebastian Heimann committed
706
707
708

            if 'state' in status:
                lines.append(
Marius Isken's avatar
Marius Isken committed
709
710
711
712
713
714
                    console_output.format(
                        'parameter', 'B mean', 'B std', 'G mean', 'G std',
                        'G best',
                        col_param_width=col_param_width,
                        col_width=col_width,
                        type='s'))
Sebastian Heimann's avatar
Sebastian Heimann committed
715

Sebastian Heimann's avatar
Sebastian Heimann committed
716
                for (pname, mbv, sbv, mgv, sgv, bgv) in zip(
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
717
                        pnames, mbx, sbx, mgx, sgx, bgx):
Sebastian Heimann's avatar
Sebastian Heimann committed
718
719

                    lines.append(
Marius Isken's avatar
Marius Isken committed
720
721
722
723
724
                        console_output.format(
                            pname, mbv, sbv, mgv, sgv, bgv,
                            col_param_width=col_param_width,
                            col_width=col_width,
                            type='.4g'))
Sebastian Heimann's avatar
Sebastian Heimann committed
725

Marius Isken's avatar
Marius Isken committed
726
727
728
729
730
731
732
733
734
                lines.append(
                    console_output.format(
                        'misfit', '', '',
                        '%.4g' % num.mean(gms),
                        '%.4g' % num.std(gms),
                        '%.4g' % num.min(gms),
                        col_param_width=col_param_width,
                        col_width=col_width,
                        type='s'))
Sebastian Heimann's avatar
Sebastian Heimann committed
735
736
737

        if 'state' in status:
            lines.append(
Marius Isken's avatar
Marius Isken committed
738
                console_output.format(
739
                    'iteration', iiter+1, '(%s, %g)' % (phase, factor),
Marius Isken's avatar
Marius Isken committed
740
741
742
743
                    ntries_sample, ntries_preconstrain, '',
                    col_param_width=col_param_width,
                    col_width=col_width,
                    type=''))
Sebastian Heimann's avatar
Sebastian Heimann committed
744
745
746
747
748
749
750
751
752
753
754

        if 'matrix' in status:
            matrix = (chains_i[:, :30] % 94 + 32).T
            for row in matrix[::-1]:
                lines.append(''.join(chr(xxx) for xxx in row))

        if status:
            lines[0:0] = ['\033[2J']
            lines.append('')
            print '\n'.join(lines)

Sebastian Heimann's avatar
Sebastian Heimann committed
755
        if plot and plot.want_to_update(iiter):
Sebastian Heimann's avatar
Sebastian Heimann committed
756
            plot.update(
757
758
759
                xhist[:iiter+1, :],
                chains_i[:, :nlinks],
                ibase,
Sebastian Heimann's avatar
Sebastian Heimann committed
760
                ibootstrap_choice,
761
                local_sxs,
762
763
                factor)

Sebastian Heimann's avatar
Sebastian Heimann committed
764
765
        iiter += 1

766
    if plot:
Sebastian Heimann's avatar
Sebastian Heimann committed
767
        plot.finish()
768

Sebastian Heimann's avatar
Sebastian Heimann committed
769

Sebastian Heimann's avatar
Sebastian Heimann committed
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
def bootstrap_outliers(problem, misfits, std_factor=1.0):
    '''
    Identify bootstrap configurations performing bad in global configuration
    '''

    gms = problem.global_misfits(misfits)

    ibests = []
    for ibootstrap in xrange(problem.nbootstrap):
        bms = problem.bootstrap_misfits(misfits, ibootstrap)
        ibests.append(num.argmin(bms))

    m = num.median(gms[ibests])
    s = num.std(gms[ibests])

    return num.where(gms > m+s)[0]


Sebastian Heimann's avatar
Sebastian Heimann committed
788
789
790
791
def forward(rundir_or_config_path, event_names):

    if not event_names:
        return
Sebastian Heimann's avatar
Sebastian Heimann committed
792

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
793
    if os.path.isdir(rundir_or_config_path):
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
794
795
796
797
        rundir = rundir_or_config_path
        config = guts.load(
            filename=op.join(rundir, 'config.yaml'))

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
798
        config.set_basepath(rundir)
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
799
800
        problem, xs, misfits = load_problem_info_and_data(
            rundir, subset='harvest')
Sebastian Heimann's avatar
Sebastian Heimann committed
801

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
802
803
804
        gms = problem.global_misfits(misfits)
        ibest = num.argmin(gms)
        xbest = xs[ibest, :]
Sebastian Heimann's avatar
Sebastian Heimann committed
805

806
        ds = config.get_dataset(problem.base_source.name)
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
807
        problem.set_engine(config.engine_config.get_engine())
Sebastian Heimann's avatar
Sebastian Heimann committed
808

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
809
810
811
812
813
814
815
816
817
        for target in problem.targets:
            target.set_dataset(ds)

        payload = [(problem, xbest)]

    else:
        config = read_config(rundir_or_config_path)

        payload = []
818
819
820
        for event_name in event_names:
            ds = config.get_dataset(event_name)
            event = ds.get_event()
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
821
822
823
824
            problem = config.get_problem(event)
            xref = problem.preconstrain(
                problem.pack(problem.base_source))
            payload.append((problem, xref))
Sebastian Heimann's avatar
Sebastian Heimann committed
825
826

    all_trs = []
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
827
828
829
    events = []
    for (problem, x) in payload:
        ds.empty_cache()
830
        ms, ns, results = problem.evaluate(x, result_mode='full')
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
831

Marius Isken's avatar
Marius Isken committed
832
        event = problem.get_source(x).pyrocko_event()
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
833
        events.append(event)
Sebastian Heimann's avatar
Sebastian Heimann committed
834
835

        for result in results:
836
            if not isinstance(result, gf.SeismosizerError):
Sebastian Heimann's avatar
Sebastian Heimann committed
837
838
839
840
841
                result.filtered_obs.set_codes(location='ob')
                result.filtered_syn.set_codes(location='sy')
                all_trs.append(result.filtered_obs)
                all_trs.append(result.filtered_syn)

Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
842
843
    markers = []
    for ev in events:
844
        markers.append(pmarker.EventMarker(ev))
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
845
846

    trace.snuffle(all_trs, markers=markers, stations=ds.get_stations())
Sebastian Heimann's avatar
Sebastian Heimann committed
847
848


Sebastian Heimann's avatar
Sebastian Heimann committed
849
def harvest(rundir, problem=None, nbest=10, force=False, weed=0):
Sebastian Heimann's avatar
Sebastian Heimann committed
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864

    if problem is None:
        problem, xs, misfits = load_problem_info_and_data(rundir)
    else:
        xs, misfits = load_problem_data(rundir, problem)

    dumpdir = op.join(rundir, 'harvest')
    if op.exists(dumpdir):
        if force:
            shutil.rmtree(dumpdir)
        else:
            raise DirectoryAlreadyExists(dumpdir)

    util.ensuredir(dumpdir)

Sebastian Heimann's avatar
Sebastian Heimann committed
865
866
    ibests_list = []
    ibests = []
Sebastian Heimann's avatar
Sebastian Heimann committed
867
868
869
    gms = problem.global_misfits(misfits)
    isort = num.argsort(gms)

Sebastian Heimann's avatar
Sebastian Heimann committed
870
871
    ibests_list.append(isort[:nbest])

872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
    if weed != 3:
        for ibootstrap in xrange(problem.nbootstrap):
            bms = problem.bootstrap_misfits(misfits, ibootstrap)
            isort = num.argsort(bms)
            ibests_list.append(isort[:nbest])
            ibests.append(isort[0])

        if weed:
            mean_gm_best = num.median(gms[ibests])
            std_gm_best = num.std(gms[ibests])
            ibad = set()

            for ibootstrap, ibest in enumerate(ibests):
                if gms[ibest] > mean_gm_best + std_gm_best:
                    ibad.add(ibootstrap)

            ibests_list = [
                ibests_ for (ibootstrap, ibests_) in enumerate(ibests_list)
                if ibootstrap not in ibad]
Sebastian Heimann's avatar
Sebastian Heimann committed
891

892
    ibests = num.concatenate(ibests_list)
Sebastian Heimann's avatar
Sebastian Heimann committed
893
894
895
896
897
898
899
900

    if weed == 2:
        ibests = ibests[gms[ibests] < mean_gm_best]

    for i in ibests:
        x = xs[i]
        ms = misfits[i, :, 0]
        ns = misfits[i, :, 1]
Sebastian Heimann's avatar
Sebastian Heimann committed
901
902
903
        problem.dump_problem_data(dumpdir, x, ms, ns)


Sebastian Heimann's avatar
Sebastian Heimann committed
904
905
906
907
def get_event_names(config):
    return config.get_event_names()


Sebastian Heimann's avatar
Sebastian Heimann committed
908
def check_problem(problem):
Sebastian Heimann's avatar
Sebastian Heimann committed
909
910
911
912
    if len(problem.targets) == 0:
        raise GrondError('no targets available')


913
914
915
916
917
def check(
        config,
        event_names=None,
        target_string_ids=None,
        show_plot=False,
918
        show_waveforms=False,
919
920
        n_random_synthetics=10):

921
922
923
    if show_plot:
        from matplotlib import pyplot as plt
        from grond.plot import colors
Sebastian Heimann's avatar
Sebastian Heimann committed
924

925
    markers = []
926
927
928
    for ievent, event_name in enumerate(event_names):
        ds = config.get_dataset(event_name)
        event = ds.get_event()
929
        trs_all = []
Sebastian Heimann's avatar
Sebastian Heimann committed
930
931
        try:
            problem = config.get_problem(event)
932

Sebastian Heimann's avatar
Sebastian Heimann committed
933
934
935
936
            _, ngroups = problem.get_group_mask()
            logger.info('number of target supergroups: %i' % ngroups)
            logger.info('number of targets (total): %i' % len(problem.targets))

937
938
939
            if target_string_ids:
                problem.targets = [
                    target for target in problem.targets
Sebastian Heimann's avatar
Sebastian Heimann committed
940
941
                    if util.match_nslc(target_string_ids, target.string_id())]

942
943
            logger.info(
                'number of targets (selected): %i' % len(problem.targets))
944

Sebastian Heimann's avatar
Sebastian Heimann committed
945
946
            check_problem(problem)

947
948
            xbounds = num.array(
                problem.get_parameter_bounds(), dtype=num.float)
Sebastian Heimann's avatar
Sebastian Heimann committed
949
950

            results_list = []
951

952
            sources = []
953
954
            if n_random_synthetics == 0:
                x = problem.pack(problem.base_source)
955
                sources.append(problem.base_source)
956
                ms, ns, results = problem.evaluate(x, result_mode='full')
Sebastian Heimann's avatar
Sebastian Heimann committed
957
958
                results_list.append(results)

959
960
961
            else:
                for i in xrange(n_random_synthetics):
                    x = problem.random_uniform(xbounds)
Marius Isken's avatar
Marius Isken committed
962
                    sources.append(problem.get_source(x))
963
964
965
                    ms, ns, results = problem.evaluate(x, result_mode='full')
                    results_list.append(results)

966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
            if show_waveforms:
                engine = config.engine_config.get_engine()
                times = []
                tdata = []
                for target in problem.targets:
                    tobs_shift_group = []
                    tcuts = []
                    for source in sources:
                        tmin_fit, tmax_fit, tfade, tfade_taper = \
                            target.get_taper_params(engine, source)

                        times.extend((tmin_fit-tfade*2., tmax_fit+tfade*2.))

                        tobs, tsyn = target.get_pick_shift(engine, source)
                        if None not in (tobs, tsyn):
                            tobs_shift = tobs - tsyn
                        else:
                            tobs_shift = 0.0

                        tcuts.append(target.get_cutout_timespan(
                            tmin_fit+tobs_shift, tmax_fit+tobs_shift, tfade))

                        tobs_shift_group.append(tobs_shift)

                    tcuts = num.array(tcuts, dtype=num.float)

                    tdata.append((
                        tfade,
                        num.mean(tobs_shift_group),
                        (num.min(tcuts[:, 0]), num.max(tcuts[:, 1]))))

                tmin = min(times)
                tmax = max(times)

                tmax += (tmax-tmin)*2