core.py 76.6 KB
Newer Older
Sebastian Heimann's avatar
Sebastian Heimann committed
1
2
3
4
5
6
7
import math
import os
import sys
import logging
import time
import copy
import shutil
Sebastian Heimann's avatar
Sebastian Heimann committed
8
import glob
Sebastian Heimann's avatar
Sebastian Heimann committed
9
import os.path as op
10
from string import Template
Sebastian Heimann's avatar
Sebastian Heimann committed
11
12
13
14

import numpy as num

from pyrocko.guts import load, Object, String, Float, Int, Bool, List, \
Sebastian Heimann's avatar
Sebastian Heimann committed
15
    StringChoice, Dict, Timestamp
Sebastian Heimann's avatar
Sebastian Heimann committed
16
from pyrocko import orthodrome as od, gf, trace, guts, util, weeding
17
from pyrocko import parimap, model, marker as pmarker
18
from pyrocko.guts_array import Array
Sebastian Heimann's avatar
Sebastian Heimann committed
19
20
21
22
23
24
25
26

from grond import dataset

logger = logging.getLogger('grond.core')

guts_prefix = 'grond'


27
28
29
30
31
32
33
34
35
36
37
def float_or_none(x):
    if x is None:
        return x
    else:
        return float(x)


class Trace(Object):
    pass


38
39
40
41
42
43
44
45
46
47
48
def backazimuth_for_waveform(azimuth, nslc):
    if nslc[-1] == 'R':
        backazimuth = azimuth + 180.
    elif nslc[-1] == 'T':
        backazimuth = azimuth + 90.
    else:
        backazimuth = None

    return backazimuth


49
50
51
52
53
54
55
56
57
class TraceSpectrum(Object):
    network = String.T()
    station = String.T()
    location = String.T()
    channel = String.T()
    deltaf = Float.T(default=1.0)
    fmin = Float.T(default=0.0)
    ydata = Array.T(shape=(None,), dtype=num.complex, serialize_as='list')

58
59
60
61
62
63
    def get_ydata(self):
        return self.ydata

    def get_xdata(self):
        return self.fmin + num.arange(self.ydata.size) * self.deltaf

64

Sebastian Heimann's avatar
Sebastian Heimann committed
65
66
67
68
69
70
71
def mahalanobis_distance(xs, mx, cov):
    imask = num.diag(cov) != 0.
    icov = num.linalg.inv(cov[imask, :][:, imask])
    temp = xs[:, imask] - mx[imask]
    return num.sqrt(num.sum(temp * num.dot(icov, temp.T).T, axis=1))


Sebastian Heimann's avatar
Sebastian Heimann committed
72
73
74
75
76
class Parameter(Object):
    name = String.T()
    unit = String.T(optional=True)
    scale_factor = Float.T(default=1., optional=True)
    scale_unit = String.T(optional=True)
77
    label = String.T(optional=True)
Sebastian Heimann's avatar
Sebastian Heimann committed
78
79
80
81
82
83
84
85
86

    def __init__(self, *args, **kwargs):
        if len(args) >= 1:
            kwargs['name'] = args[0]
        if len(args) >= 2:
            kwargs['unit'] = args[1]

        Object.__init__(self, **kwargs)

87
88
89
90
91
92
    def get_label(self, with_unit=True):
        l = [self.label or self.name]
        if with_unit:
            unit = self.get_unit_label()
            if unit:
                l.append('[%s]' % unit)
Sebastian Heimann's avatar
Sebastian Heimann committed
93
94
95

        return ' '.join(l)

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
    def get_value_label(self, value, format='%(value)g%(unit)s'):
        value = self.scaled(value)
        unit = self.get_unit_suffix()
        return format % dict(value=value, unit=unit)

    def get_unit_label(self):
        if self.scale_unit is not None:
            return self.scale_unit
        elif self.unit:
            return self.unit
        else:
            return None

    def get_unit_suffix(self):
        unit = self.get_unit_label()
        if not unit:
            return ''
        else:
            return ' %s' % unit

Sebastian Heimann's avatar
Sebastian Heimann committed
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
    def scaled(self, x):
        if isinstance(x, tuple):
            return tuple(v/self.scale_factor for v in x)
        if isinstance(x, list):
            return list(v/self.scale_factor for v in x)
        else:
            return x/self.scale_factor


class ADict(dict):
    def __getattr__(self, k):
        return self[k]

    def __setattr__(self, k, v):
        self[k] = v


class Problem(Object):
    name = String.T()
    parameters = List.T(Parameter.T())
    dependants = List.T(Parameter.T())
137
    apply_balancing_weights = Bool.T(default=True)
Sebastian Heimann's avatar
Sebastian Heimann committed
138
    norm_exponent = Int.T(default=2)
139
    base_source = gf.Source.T(optional=True)
Sebastian Heimann's avatar
Sebastian Heimann committed
140
141
142
143
144

    def __init__(self, **kwargs):
        Object.__init__(self, **kwargs)
        self._bootstrap_weights = None
        self._target_weights = None
Sebastian Heimann's avatar
Sebastian Heimann committed
145
        self._engine = None
146
        self._group_mask = None
Sebastian Heimann's avatar
Sebastian Heimann committed
147
148
149

    def get_engine(self):
        return self._engine
Sebastian Heimann's avatar
Sebastian Heimann committed
150
151
152
153
154
155
156
157
158
159
160
161
162

    def copy(self):
        o = copy.copy(self)
        o._bootstrap_weights = None
        o._target_weights = None
        return o

    def parameter_dict(self, x):
        return ADict((p.name, v) for (p, v) in zip(self.parameters, x))

    def parameter_array(self, d):
        return num.array([d[p.name] for p in self.parameters], dtype=num.float)

Sebastian Heimann's avatar
Sebastian Heimann committed
163
164
165
166
    @property
    def parameter_names(self):
        return [p.name for p in self.combined]

Sebastian Heimann's avatar
Sebastian Heimann committed
167
168
169
170
171
    def dump_problem_info(self, dirname):
        fn = op.join(dirname, 'problem.yaml')
        util.ensuredirs(fn)
        guts.dump(self, filename=fn)

Sebastian Heimann's avatar
Sebastian Heimann committed
172
173
174
175
    def dump_problem_data(
            self, dirname, x, ms, ns,
            accept=None, ibootstrap_choice=None, ibase=None):

Sebastian Heimann's avatar
Sebastian Heimann committed
176
177
178
179
180
181
182
183
184
        fn = op.join(dirname, 'x')
        with open(fn, 'ab') as f:
            x.astype('<f8').tofile(f)

        fn = op.join(dirname, 'misfits')
        with open(fn, 'ab') as f:
            ms.astype('<f8').tofile(f)
            ns.astype('<f8').tofile(f)

Sebastian Heimann's avatar
Sebastian Heimann committed
185
186
187
188
189
190
191
192
193
194
        if None not in (ibootstrap_choice, ibase):
            fn = op.join(dirname, 'choices')
            with open(fn, 'ab') as f:
                num.array((ibootstrap_choice, ibase), dtype='<i8').tofile(f)

        if accept is not None:
            fn = op.join(dirname, 'accepted')
            with open(fn, 'ab') as f:
                accept.astype('<i1').tofile(f)

Sebastian Heimann's avatar
Sebastian Heimann committed
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
    def name_to_index(self, name):
        pnames = [p.name for p in self.combined]
        return pnames.index(name)

    @property
    def nparameters(self):
        return len(self.parameters)

    @property
    def ntargets(self):
        return len(self.targets)

    @property
    def ndependants(self):
        return len(self.dependants)

    @property
    def ncombined(self):
        return len(self.parameters) + len(self.dependants)

    @property
    def combined(self):
        return self.parameters + self.dependants

    def make_bootstrap_weights(self, nbootstrap):
220
        ntargets = self.ntargets
Sebastian Heimann's avatar
Sebastian Heimann committed
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
        ws = num.zeros((nbootstrap, ntargets))
        rstate = num.random.RandomState(23)
        for ibootstrap in xrange(nbootstrap):
            ii = rstate.randint(0, ntargets, size=self.ntargets)
            ws[ibootstrap, :] = num.histogram(
                ii, ntargets, (-0.5, ntargets - 0.5))[0]

        return ws

    def get_bootstrap_weights(self, ibootstrap=None):
        if self._bootstrap_weights is None:
            self._bootstrap_weights = self.make_bootstrap_weights(
                self.nbootstrap)

        if ibootstrap is None:
            return self._bootstrap_weights
        else:
            return self._bootstrap_weights[ibootstrap, :]

Sebastian Heimann's avatar
Sebastian Heimann committed
240
241
242
    def set_engine(self, engine):
        self._engine = engine

243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
    def make_group_mask(self):
        super_group_names = set()
        groups = num.zeros(len(self.targets), dtype=num.int)
        ngroups = 0
        for itarget, target in enumerate(self.targets):
            if target.super_group not in super_group_names:
                super_group_names.add(target.super_group)
                ngroups += 1

            groups[itarget] = ngroups - 1

        return groups, ngroups

    def get_group_mask(self):
        if self._group_mask is None:
            self._group_mask = self.make_group_mask()

        return self._group_mask

262
263
264
    def xref(self):
        return self.pack(self.base_source)

Sebastian Heimann's avatar
Sebastian Heimann committed
265
266
267
268

class ProblemConfig(Object):
    name_template = String.T()
    apply_balancing_weights = Bool.T(default=True)
Sebastian Heimann's avatar
Sebastian Heimann committed
269
    norm_exponent = Int.T(default=2)
Sebastian Heimann's avatar
Sebastian Heimann committed
270
271
272
273
274
275
276
277
278
279
280
281
282
283


class Forbidden(Exception):
    pass


class DirectoryAlreadyExists(Exception):
    pass


class GrondError(Exception):
    pass


284
285
286
287
288
289
290
291
292
class DomainChoice(StringChoice):
    choices = [
        'time_domain',
        'frequency_domain',
        'envelope',
        'absolute',
        'cc_max_norm']


Sebastian Heimann's avatar
Sebastian Heimann committed
293
294
295
296
class InnerMisfitConfig(Object):
    fmin = Float.T()
    fmax = Float.T()
    ffactor = Float.T(default=1.5)
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
    tmin = gf.Timing.T(
        help='Start of main time window used for waveform fitting.')
    tmax = gf.Timing.T(
        help='End of main time window used for waveform fitting.')
    tfade = Float.T(
        optional=True,
        help='Decay time of taper prepended and appended to main time window '
             'used for waveform fitting [s].')
    pick_synthetic_traveltime = gf.Timing.T(
        optional=True,
        help='Synthetic phase arrival definition for alignment of observed '
             'and synthetic traces.')
    pick_phasename = String.T(
        optional=True,
        help='Name of picked phase for alignment of observed and synthetic '
             'traces.')
    domain = DomainChoice.T(
        default='time_domain',
        help='Type of data characteristic to be fitted.\n\nAvailable choices '
             'are: %s' % ', '.join("``'%s'``" % s
                                   for s in DomainChoice.choices))
Sebastian Heimann's avatar
Sebastian Heimann committed
318
319
320
321
    norm_exponent = Int.T(
        default=2,
        help='Exponent to use in norm (1: L1-norm, 2: L2-norm)')

322
323
324
325
326
327
328
329
330
331
    tautoshift_max = Float.T(
        default=0.0,
        help='If non-zero, allow synthetic and observed traces to be shifted '
             'against each other by up to +/- the given value [s].')
    autoshift_penalty_max = Float.T(
        default=0.0,
        help='If non-zero, a penalty misfit is added for non-zero shift '
             'values.\n\nThe penalty value is computed as '
             '``autoshift_penalty_max * normalization_factor * tautoshift**2 '
             '/ tautoshift_max**2``')
Sebastian Heimann's avatar
Sebastian Heimann committed
332

333
334
335
    def get_full_frequency_range(self):
        return self.fmin / self.ffactor, self.fmax * self.ffactor

Sebastian Heimann's avatar
Sebastian Heimann committed
336
337
338
339
340
341
342
343
344

class TargetAnalysisResult(Object):
    balancing_weight = Float.T()


class NoAnalysisResults(Exception):
    pass


345
346
347
348
349
350
351
352
353
354
355
356
class MisfitResult(gf.Result):
    misfit_value = Float.T()
    misfit_norm = Float.T()
    processed_obs = Trace.T(optional=True)
    processed_syn = Trace.T(optional=True)
    filtered_obs = Trace.T(optional=True)
    filtered_syn = Trace.T(optional=True)
    spectrum_obs = TraceSpectrum.T(optional=True)
    spectrum_syn = TraceSpectrum.T(optional=True)
    taper = trace.Taper.T(optional=True)
    tobs_shift = Float.T(optional=True)
    tsyn_pick = Timestamp.T(optional=True)
357
    tshift = Float.T(optional=True)
358
359
360
    cc = Trace.T(optional=True)


Sebastian Heimann's avatar
Sebastian Heimann committed
361
362
363
364
365
class MisfitTarget(gf.Target):
    misfit_config = InnerMisfitConfig.T()
    flip_norm = Bool.T(default=False)
    manual_weight = Float.T(default=1.0)
    analysis_result = TargetAnalysisResult.T(optional=True)
366
367
    super_group = gf.StringID.T()
    group = gf.StringID.T()
Sebastian Heimann's avatar
Sebastian Heimann committed
368
369
370
371

    def __init__(self, **kwargs):
        gf.Target.__init__(self, **kwargs)
        self._ds = None
372
        self._result_mode = 'sparse'
373
374
375
376

    def string_id(self):
        return '.'.join(x for x in (
            self.super_group, self.group) + self.codes if x)
Sebastian Heimann's avatar
Sebastian Heimann committed
377
378
379
380
381
382
383
384
385
386
387
388

    def get_plain_target(self):
        d = dict(
            (k, getattr(self, k)) for k in gf.Target.T.propnames)
        return gf.Target(**d)

    def get_dataset(self):
        return self._ds

    def set_dataset(self, ds):
        self._ds = ds

389
390
391
    def set_result_mode(self, result_mode):
        self._result_mode = result_mode

Sebastian Heimann's avatar
Sebastian Heimann committed
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
    def get_combined_weight(self, apply_balancing_weights):
        w = self.manual_weight
        if apply_balancing_weights:
            w *= self.get_balancing_weight()

        return w

    def get_balancing_weight(self):
        if not self.analysis_result:
            raise NoAnalysisResults('no balancing weights available')

        return self.analysis_result.balancing_weight

    def get_taper_params(self, engine, source):
        store = engine.get_store(self.store_id)
        config = self.misfit_config
        tmin_fit = source.time + store.t(config.tmin, source, self)
        tmax_fit = source.time + store.t(config.tmax, source, self)
        tfade = 1.0/config.fmin
411
412
413
414
415
416
        if config.tfade is None:
            tfade_taper = tfade
        else:
            tfade_taper = config.tfade

        return tmin_fit, tmax_fit, tfade, tfade_taper
Sebastian Heimann's avatar
Sebastian Heimann committed
417

418
    def get_backazimuth_for_waveform(self):
419
        return backazimuth_for_waveform(self.azimuth, self.codes)
420
421

    def get_freqlimits(self):
Sebastian Heimann's avatar
Sebastian Heimann committed
422
423
        config = self.misfit_config

424
425
426
427
        return (
            config.fmin/config.ffactor,
            config.fmin, config.fmax,
            config.fmax*config.ffactor)
Sebastian Heimann's avatar
Sebastian Heimann committed
428

429
430
431
432
    def get_pick_shift(self, engine, source):
        config = self.misfit_config
        tobs = None
        tsyn = None
Sebastian Heimann's avatar
Sebastian Heimann committed
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
        ds = self.get_dataset()

        if config.pick_synthetic_traveltime and config.pick_phasename:
            store = engine.get_store(self.store_id)
            tsyn = source.time + store.t(
                config.pick_synthetic_traveltime, source, self)

            marker = ds.get_pick(
                source.name,
                self.codes[:3],
                config.pick_phasename)

            if marker:
                tobs = marker.tmin

448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
        return tobs, tsyn

    def get_cutout_timespan(self, tmin, tmax, tfade):
        tinc_obs = 1.0 / self.misfit_config.fmin

        tmin_obs = (math.floor(
            (tmin - tfade) / tinc_obs) - 1.0) * tinc_obs
        tmax_obs = (math.ceil(
            (tmax + tfade) / tinc_obs) + 1.0) * tinc_obs

        return tmin_obs, tmax_obs

    def post_process(self, engine, source, tr_syn):

        tr_syn = tr_syn.pyrocko_trace()
        nslc = self.codes

        config = self.misfit_config

        tmin_fit, tmax_fit, tfade, tfade_taper = \
            self.get_taper_params(engine, source)

        ds = self.get_dataset()
Sebastian Heimann's avatar
Sebastian Heimann committed
471

472
473
474
475
476
        tobs, tsyn = self.get_pick_shift(engine, source)
        if None not in (tobs, tsyn):
            tobs_shift = tobs - tsyn
        else:
            tobs_shift = 0.0
Sebastian Heimann's avatar
Sebastian Heimann committed
477
478
479
480
481
482

        tr_syn.extend(
            tmin_fit - tfade * 2.0,
            tmax_fit + tfade * 2.0,
            fillmethod='repeat')

483
484
        freqlimits = self.get_freqlimits()

Sebastian Heimann's avatar
Sebastian Heimann committed
485
486
487
488
489
490
        tr_syn = tr_syn.transfer(
            freqlimits=freqlimits,
            tfade=tfade)

        tr_syn.chop(tmin_fit - 2*tfade, tmax_fit + 2*tfade)

491
492
        tmin_obs, tmax_obs = self.get_cutout_timespan(
            tmin_fit+tobs_shift, tmax_fit+tobs_shift, tfade)
Sebastian Heimann's avatar
Sebastian Heimann committed
493
494
495
496
497
498
499
500
501
502

        try:
            tr_obs = ds.get_waveform(
                nslc,
                tmin=tmin_obs,
                tmax=tmax_obs,
                tfade=tfade,
                freqlimits=freqlimits,
                deltat=tr_syn.deltat,
                cache=True,
503
                backazimuth=self.get_backazimuth_for_waveform())
Sebastian Heimann's avatar
Sebastian Heimann committed
504

Sebastian Heimann's avatar
Sebastian Heimann committed
505
506
507
508
            if tobs_shift != 0.0:
                tr_obs = tr_obs.copy()
                tr_obs.shift(-tobs_shift)

509
510
            mr = misfit(
                tr_obs, tr_syn,
Sebastian Heimann's avatar
Sebastian Heimann committed
511
                taper=trace.CosTaper(
512
                    tmin_fit - tfade_taper,
Sebastian Heimann's avatar
Sebastian Heimann committed
513
514
                    tmin_fit,
                    tmax_fit,
515
                    tmax_fit + tfade_taper),
516
                domain=config.domain,
Sebastian Heimann's avatar
Sebastian Heimann committed
517
                exponent=config.norm_exponent,
518
                flip=self.flip_norm,
519
520
521
                result_mode=self._result_mode,
                tautoshift_max=config.tautoshift_max,
                autoshift_penalty_max=config.autoshift_penalty_max)
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
522

523
524
            mr.tobs_shift = float(tobs_shift)
            mr.tsyn_pick = float_or_none(tsyn)
Sebastian Heimann's avatar
Sebastian Heimann committed
525

526
            return mr
Sebastian Heimann's avatar
Sebastian Heimann committed
527
528
529
530
531
532

        except dataset.NotFound, e:
            logger.debug(str(e))
            raise gf.SeismosizerError('no waveform data, %s' % str(e))


533
def misfit(
534
535
        tr_obs, tr_syn, taper, domain, exponent, tautoshift_max,
        autoshift_penalty_max, flip, result_mode='sparse'):
Sebastian Heimann's avatar
Sebastian Heimann committed
536

537
538
539
540
541
542
543
544
545
    '''
    Calculate misfit between observed and synthetic trace.

    :param tr_obs: observed trace as :py:class:`pyrocko.trace.Trace`
    :param tr_syn: synthetic trace as :py:class:`pyrocko.trace.Trace`
    :param taper: taper applied in timedomain as
        :py:class:`pyrocko.trace.Taper`
    :param domain: how to calculate difference, see :py:class:`DomainChoice`
    :param exponent: exponent of Lx type norms
546
547
548
549
550
551
    :param tautoshift_max: if non-zero, return lowest misfit when traces are
        allowed to shift against each other by up to +/- ``tautoshift_max``
    :param autoshift_penalty_max: if non-zero, a penalty misfit is added for
        for non-zero shift values. The penalty value is
        ``autoshift_penalty_max * normalization_factor * \
tautoshift**2 / tautoshift_max**2``
552
553
    :param flip: ``bool``, if set to ``True``, normalization factor is
        computed against *tr_syn* rather than *tr_obs*
554
555
    :param result_mode: ``'full'``, include traces and spectra or ``'sparse'``,
        include only misfit and normalization factor in result
556
557
558

    :returns: object of type :py:class:`MisfitResult`
    '''
Sebastian Heimann's avatar
Sebastian Heimann committed
559

560
    trace.assert_same_sampling_rate(tr_obs, tr_syn)
561
    deltat = tr_obs.deltat
562
563
564
565
566
    tmin, tmax = taper.time_span()

    tr_proc_obs, trspec_proc_obs = _process(tr_obs, tmin, tmax, taper, domain)
    tr_proc_syn, trspec_proc_syn = _process(tr_syn, tmin, tmax, taper, domain)

567
    tshift = None
568
    ctr = None
569
    deltat = tr_proc_obs.deltat
570
571
572
573
574
    if domain in ('time_domain', 'envelope', 'absolute'):
        a, b = tr_proc_syn.ydata, tr_proc_obs.ydata
        if flip:
            b, a = a, b

575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
        nshift_max = max(0, min(a.size-1,
                                int(math.floor(tautoshift_max / deltat))))

        if nshift_max == 0:
            m, n = trace.Lx_norm(a, b, norm=exponent)
        else:
            mns = []
            for ishift in xrange(-nshift_max, nshift_max+1):
                if ishift < 0:
                    a_cut = a[-ishift:]
                    b_cut = b[:ishift]
                elif ishift == 0:
                    a_cut = a
                    b_cut = b
                elif ishift > 0:
                    a_cut = a[:-ishift]
                    b_cut = b[ishift:]

                mns.append(trace.Lx_norm(a_cut, b_cut, norm=exponent))

            ms, ns = num.array(mns).T

            iarg = num.argmin(ms)
            tshift = (iarg-nshift_max)*deltat

            m, n = ms[iarg], ns[iarg]
            m += autoshift_penalty_max * n * tshift**2 / tautoshift_max**2
602
603
604
605
606
607
608
609
610

    elif domain == 'cc_max_norm':

        ctr = trace.correlate(
            tr_proc_syn,
            tr_proc_obs,
            mode='same',
            normalization='normal')

611
        tshift, cc_max = ctr.max()
612
613
614
615
616
617
618
619
620
621
        m = 0.5 - 0.5 * cc_max
        n = 0.5

    elif domain == 'frequency_domain':
        a, b = trspec_proc_syn.ydata, trspec_proc_obs.ydata
        if flip:
            b, a = a, b

        m, n = trace.Lx_norm(num.abs(a), num.abs(b), norm=exponent)

622
623
624
625
626
627
628
629
630
631
632
    if result_mode == 'full':
        result = MisfitResult(
            misfit_value=m,
            misfit_norm=n,
            processed_obs=tr_proc_obs,
            processed_syn=tr_proc_syn,
            filtered_obs=tr_obs.copy(),
            filtered_syn=tr_syn,
            spectrum_obs=trspec_proc_obs,
            spectrum_syn=trspec_proc_syn,
            taper=taper,
633
            tshift=tshift,
634
            cc=ctr)
635

636
637
638
639
640
641
    elif result_mode == 'sparse':
        result = MisfitResult(
            misfit_value=m,
            misfit_norm=n)
    else:
        assert False
642
643
644
645
646
647
648
649
650

    return result


def _process(tr, tmin, tmax, taper, domain):
    tr_proc = _extend_extract(tr, tmin, tmax)
    tr_proc.taper(taper)

    df = None
651
    trspec_proc = None
652
653
654

    if domain == 'envelope':
        tr_proc = tr_proc.envelope(inplace=False)
Sebastian Heimann's avatar
Sebastian Heimann committed
655
        tr_proc.set_ydata(num.abs(tr_proc.get_ydata()))
656
657
658
659
660
661
662
663
664
665
666
667

    elif domain == 'absolute':
        tr_proc.set_ydata(num.abs(tr_proc.get_ydata()))

    elif domain == 'frequency_domain':
        ndata = tr_proc.ydata.size
        nfft = trace.nextpow2(ndata)
        padded = num.zeros(nfft, dtype=num.float)
        padded[:ndata] = tr_proc.ydata
        spectrum = num.fft.rfft(padded)
        df = 1.0 / (tr_proc.deltat * nfft)

668
669
670
671
672
673
674
675
        trspec_proc = TraceSpectrum(
            network=tr_proc.network,
            station=tr_proc.station,
            location=tr_proc.location,
            channel=tr_proc.channel,
            deltaf=df,
            fmin=0.0,
            ydata=spectrum)
676
677
678
679
680
681
682
683

    return tr_proc, trspec_proc


def _extend_extract(tr, tmin, tmax):
    deltat = tr.deltat
    itmin_frame = int(math.floor(tmin/deltat))
    itmax_frame = int(math.ceil(tmax/deltat))
684
    nframe = itmax_frame - itmin_frame + 1
685
686
687
688
689
690
691
692
693
694
695
696
    n = tr.data_len()
    a = num.empty(nframe, dtype=num.float)
    itmin_tr = int(round(tr.tmin / deltat))
    itmax_tr = itmin_tr + n
    icut1 = min(max(0, itmin_tr - itmin_frame), nframe)
    icut2 = min(max(0, itmax_tr - itmin_frame), nframe)
    icut1_tr = min(max(0, icut1 + itmin_frame - itmin_tr), n)
    icut2_tr = min(max(0, icut2 + itmin_frame - itmin_tr), n)
    a[:icut1] = tr.ydata[0]
    a[icut1:icut2] = tr.ydata[icut1_tr:icut2_tr]
    a[icut2:] = tr.ydata[-1]
    tr = tr.copy(data=False)
697
    tr.tmin = itmin_frame * deltat
698
699
    tr.set_ydata(a)
    return tr
Sebastian Heimann's avatar
Sebastian Heimann committed
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737


def xjoin(basepath, path):
    if path is None and basepath is not None:
        return basepath
    elif op.isabs(path) or basepath is None:
        return path
    else:
        return op.join(basepath, path)


def xrelpath(path, start):
    if op.isabs(path):
        return path
    else:
        return op.relpath(path, start)


class Path(String):
    pass


class HasPaths(Object):
    path_prefix = Path.T(optional=True)

    def __init__(self, *args, **kwargs):
        Object.__init__(self, *args, **kwargs)
        self._basepath = None
        self._parent_path_prefix = None

    def set_basepath(self, basepath, parent_path_prefix=None):
        self._basepath = basepath
        self._parent_path_prefix = parent_path_prefix
        for (prop, val) in self.T.ipropvals(self):
            if isinstance(val, HasPaths):
                val.set_basepath(
                    basepath, self.path_prefix or self._parent_path_prefix)

Sebastian Heimann's avatar
Sebastian Heimann committed
738
739
740
741
    def get_basepath(self):
        assert self._basepath is not None
        return self._basepath

Sebastian Heimann's avatar
Sebastian Heimann committed
742
743
744
745
746
747
    def change_basepath(self, new_basepath, parent_path_prefix=None):
        assert self._basepath is not None

        self._parent_path_prefix = parent_path_prefix
        if self.path_prefix or not self._parent_path_prefix:

Sebastian Heimann's avatar
Sebastian Heimann committed
748
749
            self.path_prefix = op.normpath(xjoin(xrelpath(
                self._basepath, new_basepath), self.path_prefix))
Sebastian Heimann's avatar
Sebastian Heimann committed
750
751
752
753
754
755
756
757

        for val in self.T.ivals(self):
            if isinstance(val, HasPaths):
                val.change_basepath(
                    new_basepath, self.path_prefix or self._parent_path_prefix)

        self._basepath = new_basepath

758
    def expand_path(self, path, extra=None):
Sebastian Heimann's avatar
Sebastian Heimann committed
759
760
        assert self._basepath is not None

761
762
763
764
        if extra is None:
            def extra(path):
                return path

Sebastian Heimann's avatar
Sebastian Heimann committed
765
766
767
768
769
        path_prefix = self.path_prefix or self._parent_path_prefix

        if path is None:
            return None
        elif isinstance(path, basestring):
770
771
            return extra(
                op.normpath(xjoin(self._basepath, xjoin(path_prefix, path))))
Sebastian Heimann's avatar
Sebastian Heimann committed
772
773
        else:
            return [
774
775
                extra(
                    op.normpath(xjoin(self._basepath, xjoin(path_prefix, p))))
Sebastian Heimann's avatar
Sebastian Heimann committed
776
777
778
                for p in path]


Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
779
780
781
782
783
784
785
786
787
class RandomResponse(trace.FrequencyResponse):

    scale = Float.T(default=0.0)

    def set_random_state(self, rstate):
        self._rstate = rstate

    def evaluate(self, freqs):
        n = freqs.size
788
        return 1.0 + (
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
789
            self._rstate.normal(scale=self.scale, size=n) +
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
790
791
792
793
794
795
796
            0.0J * self._rstate.normal(scale=self.scale, size=n))


class SyntheticWaveformNotAvailable(Exception):
    pass


Sebastian Heimann's avatar
Sebastian Heimann committed
797
798
class SyntheticTest(Object):
    inject_solution = Bool.T(default=False)
799
    respect_data_availability = Bool.T(default=False)
800
801
    real_noise_scale = Float.T(default=0.0)
    white_noise_scale = Float.T(default=0.0)
802
    relative_white_noise_scale = Float.T(default=0.0)
803
    random_response_scale = Float.T(default=0.0)
804
805
    real_noise_toffset = Float.T(default=-3600.)
    random_seed = Int.T(optional=True)
Sebastian Heimann's avatar
Sebastian Heimann committed
806
807
808
809
    x = Dict.T(String.T(), Float.T())

    def __init__(self, **kwargs):
        Object.__init__(self, **kwargs)
810
        self._problem = None
Sebastian Heimann's avatar
Sebastian Heimann committed
811
812
        self._synthetics = None

813
814
815
    def set_problem(self, problem):
        self._problem = problem
        self._synthetics = None
Sebastian Heimann's avatar
Sebastian Heimann committed
816
817

    def get_problem(self):
818
819
820
        if self._problem is None:
            raise SyntheticWaveformNotAvailable(
                'SyntheticTest.set_problem() has not been called yet')
Sebastian Heimann's avatar
Sebastian Heimann committed
821

822
        return self._problem
Sebastian Heimann's avatar
Sebastian Heimann committed
823
824
825
826
827
828
829
830

    def get_x(self):
        problem = self.get_problem()
        if self.x:
            x = problem.preconstrain(
                problem.parameter_array(self.x))

        else:
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
831
832
833
834
            x = problem.preconstrain(
                problem.pack(
                    problem.base_source))

Sebastian Heimann's avatar
Sebastian Heimann committed
835
836
837
        return x

    def get_synthetics(self):
838
        problem = self.get_problem()
Sebastian Heimann's avatar
Sebastian Heimann committed
839
840
841
        if self._synthetics is None:
            x = self.get_x()
            results = problem.forward(x)
842
843
844
845
            synthetics = {}
            for iresult, result in enumerate(results):
                tr = result.trace.pyrocko_trace()
                tfade = tr.tmax - tr.tmin
846
                tr_orig = tr.copy()
847
                tr.extend(tr.tmin - tfade, tr.tmax + tfade)
848
849
                rstate = num.random.RandomState(
                    (self.random_seed or 0) + iresult)
850
851
852
853
854
855
856
857

                if self.random_response_scale != 0:
                    tf = RandomResponse(scale=self.random_response_scale)
                    tf.set_random_state(rstate)
                    tr = tr.transfer(
                        tfade=tfade,
                        transfer_function=tf)

858
859
860
                if self.white_noise_scale != 0.0:
                    u = rstate.normal(
                        scale=self.white_noise_scale,
861
862
863
864
                        size=tr.data_len())

                    tr.ydata += u

865
866
867
868
869
870
871
872
                if self.relative_white_noise_scale != 0.0:
                    u = rstate.normal(
                        scale=self.relative_white_noise_scale * num.std(
                            tr_orig.ydata),
                        size=tr.data_len())

                    tr.ydata += u

873
874
875
                synthetics[result.trace.codes] = tr

            self._synthetics = synthetics
Sebastian Heimann's avatar
Sebastian Heimann committed
876
877
878
879
880

        return self._synthetics

    def get_waveform(self, nslc, tmin, tmax, tfade=0., freqlimits=None):
        synthetics = self.get_synthetics()
Sebastian Heimann's avatar
wip    
Sebastian Heimann committed
881

882
883
884
885
886
887
888
889
890
891
892
893
        if nslc not in synthetics:
            return None

        tr = synthetics[nslc]
        tr.extend(tmin - tfade * 2.0, tmax + tfade * 2.0)

        tr = tr.transfer(
            tfade=tfade,
            freqlimits=freqlimits)

        tr.chop(tmin, tmax)
        return tr
Sebastian Heimann's avatar
Sebastian Heimann committed
894
895
896
897


class DatasetConfig(HasPaths):

Sebastian Heimann's avatar
Sebastian Heimann committed
898
899
    stations_path = Path.T(optional=True)
    stations_stationxml_paths = List.T(Path.T())
Sebastian Heimann's avatar
Sebastian Heimann committed
900
901
902
903
904
905
906
907
    events_path = Path.T()
    waveform_paths = List.T(Path.T())
    clippings_path = Path.T(optional=True)
    responses_sacpz_path = Path.T(optional=True)
    responses_stationxml_paths = List.T(Path.T())
    station_corrections_path = Path.T(optional=True)
    apply_correction_factors = Bool.T(default=True)
    apply_correction_delays = Bool.T(default=True)
908
    extend_incomplete = Bool.T(default=False)
Sebastian Heimann's avatar
Sebastian Heimann committed
909
    picks_paths = List.T(Path.T())
910
    blacklist_paths = List.T(Path.T())
911
912
913
914
    blacklist = List.T(
        String.T(),
        help='stations/components to be excluded according to their STA, '
             'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')
915
    whitelist_paths = List.T(Path.T())
Sebastian Heimann's avatar
flake8    
Sebastian Heimann committed
916
917
918
    whitelist = List.T(
        String.T(),
        optional=True,
Sebastian Heimann's avatar
Sebastian Heimann committed
919
        help='if not None, list of stations/components to include according '
920
921
922
             'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '
             'Note: ''when whitelisting on channel level, both, the raw and '
             'the processed channel codes have to be listed.')
Sebastian Heimann's avatar
Sebastian Heimann committed
923
924
925
926
    synthetic_test = SyntheticTest.T(optional=True)

    def __init__(self, *args, **kwargs):
        HasPaths.__init__(self, *args, **kwargs)
927
        self._ds = {}
Sebastian Heimann's avatar
Sebastian Heimann committed
928

Sebastian Heimann's avatar
Sebastian Heimann committed
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
    def get_event_names(self):
        def extra(path):
            return expand_template(path, dict(
                event_name='*'))

        def fp(path):
            return self.expand_path(path, extra=extra)

        events = []
        for fn in glob.glob(fp(self.events_path)):
            events.extend(model.load_events(filename=fn))

        event_names = [ev.name for ev in events]
        return event_names

944
945
    def get_dataset(self, event_name):
        if event_name not in self._ds:
946
947
948
949
950
951
952
            def extra(path):
                return expand_template(path, dict(
                    event_name=event_name))

            def fp(path):
                return self.expand_path(path, extra=extra)

953
            ds = dataset.Dataset(event_name)
Sebastian Heimann's avatar
Sebastian Heimann committed
954
955
956
957
            ds.add_stations(
                pyrocko_stations_filename=fp(self.stations_path),
                stationxml_filenames=fp(self.stations_stationxml_paths))

Sebastian Heimann's avatar
Sebastian Heimann committed
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
            ds.add_events(filename=fp(self.events_path))
            ds.add_waveforms(paths=fp(self.waveform_paths))
            if self.clippings_path:
                ds.add_clippings(markers_filename=fp(self.clippings_path))

            if self.responses_sacpz_path:
                ds.add_responses(
                    sacpz_dirname=fp(self.responses_sacpz_path))

            if self.responses_stationxml_paths:
                ds.add_responses(
                    stationxml_filenames=fp(self.responses_stationxml_paths))

            if self.station_corrections_path:
                ds.add_station_corrections(
                    filename=fp(self.station_corrections_path))

            ds.apply_correction_factors = self.apply_correction_factors
            ds.apply_correction_delays = self.apply_correction_delays
977
            ds.extend_incomplete = self.extend_incomplete
Sebastian Heimann's avatar
Sebastian Heimann committed
978

Sebastian Heimann's avatar
Sebastian Heimann committed
979
980
981
982
            for picks_path in self.picks_paths:
                ds.add_picks(
                    filename=fp(picks_path))

Sebastian Heimann's avatar
Sebastian Heimann committed
983
            ds.add_blacklist(self.blacklist)
984
            ds.add_blacklist(filenames=fp(self.blacklist_paths))
Sebastian Heimann's avatar
Sebastian Heimann committed
985
986
            if self.whitelist:
                ds.add_whitelist(self.whitelist)
987
            if self.whitelist_paths:
988
                ds.add_whitelist(filenames=fp(self.whitelist_paths))
Sebastian Heimann's avatar
Sebastian Heimann committed
989

990
991
            ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))
            self._ds[event_name] = ds
Sebastian Heimann's avatar
Sebastian Heimann committed
992

993
        return self._ds[event_name]
Sebastian Heimann's avatar
Sebastian Heimann committed
994
995
996
997
998
999
1000


def weed(origin, targets, limit, neighborhood=3):

    azimuths = num.zeros(len(targets))
    dists = num.zeros(len(targets))
    for i, target in enumerate(targets):