Commit 0ead9fe5 authored by Sebastian Heimann's avatar Sebastian Heimann
Browse files

rewrite/improved optimizer with history container (ProblemData)

parent 092f75f3
......@@ -15,7 +15,7 @@ setup(
version='0.1',
author='Sebastian Heimann',
author_email='sebastian.heimann@gfz-potsdam.de',
packages=['grond', 'grond.baraddur', 'grond.problems', 'grond.solvers',
packages=['grond', 'grond.baraddur', 'grond.problems', 'grond.optimizers',
'grond.analysers', 'grond.listeners', 'grond.targets'],
python_requires='>=3.5',
scripts=['apps/grond'],
......
......@@ -5,6 +5,6 @@ from .targets import * # noqa
from .meta import * # noqa
from .synthetic_tests import * # noqa
from .solvers import * # noqa
from .optimizers import * # noqa
__version__ = '0.2'
from .base import *
from .base import * # noqa
......@@ -57,7 +57,7 @@ class Analyser(object):
isok_mask = num.logical_not(isbad_mask)
else:
isok_mask = None
_, ms = wproblem.evaluate(x, mask=isok_mask)
ms = wproblem.evaluate(x, mask=isok_mask)[:, 1]
mss[iiter, :] = ms
isbad_mask = num.isnan(ms)
......@@ -80,10 +80,10 @@ class Analyser(object):
class AnalyserConfig(Object):
niter = Int.T(default=1000)
niterations = Int.T(default=1000)
def get_analyser(self):
return Analyser(niter=self.niter)
return Analyser(niter=self.niterations)
__all__ = '''
......
......@@ -14,13 +14,15 @@ from pyrocko import orthodrome as od, gf, trace, guts, util, weeding
from pyrocko import parimap, model, marker as pmarker
from .dataset import DatasetConfig, NotFound
from .problems.base import ProblemConfig, Problem
from .solvers.base import SolverConfig
from .problems.base import ProblemConfig, Problem, \
load_problem_info_and_data, load_problem_data
from .optimizers.base import OptimizerConfig, BadProblem
from .targets.base import TargetGroup
from .analysers.base import AnalyserConfig
from .listeners import TerminalListener
from .meta import Path, HasPaths, expand_template, xjoin, GrondError, \
Notifier, Forbidden
from .meta import Path, HasPaths, expand_template, GrondError, Notifier, \
Forbidden
logger = logging.getLogger('grond.core')
guts_prefix = 'grond'
......@@ -57,10 +59,6 @@ def weed(origin, targets, limit, neighborhood=3):
return targets_weeded, meandists_kept, deleted
class BadProblem(Exception):
pass
class EngineConfig(HasPaths):
gf_stores_from_pyrocko_config = Bool.T(default=True)
gf_store_superdirs = List.T(Path.T())
......@@ -87,7 +85,7 @@ class Config(HasPaths):
target_groups = List.T(TargetGroup.T())
problem_config = ProblemConfig.T()
analyser_config = AnalyserConfig.T(default=AnalyserConfig.D())
solver_config = SolverConfig.T(default=SolverConfig.D())
optimizer_config = OptimizerConfig.T()
engine_config = EngineConfig.T(default=EngineConfig.D())
def __init__(self, *args, **kwargs):
......@@ -128,75 +126,6 @@ def sarr(a):
return ' '.join('%15g' % x for x in a)
def load_config(dirname):
fn = op.join(dirname, 'config.yaml')
return guts.load(filename=fn)
def load_problem_info_and_data(dirname, subset=None):
problem = load_problem_info(dirname)
xs, misfits = load_problem_data(xjoin(dirname, subset), problem)
return problem, xs, misfits
def load_problem_info(dirname):
fn = op.join(dirname, 'problem.yaml')
return guts.load(filename=fn)
def load_optimizer_history(dirname, problem):
fn = op.join(dirname, 'accepted')
with open(fn, 'r') as f:
nmodels = os.fstat(f.fileno()).st_size // (problem.nbootstrap+1)
data1 = num.fromfile(
f,
dtype='<i1',
count=nmodels*(problem.nbootstrap+1)).astype(num.bool)
accepted = data1.reshape((nmodels, problem.nbootstrap+1))
fn = op.join(dirname, 'choices')
with open(fn, 'r') as f:
data2 = num.fromfile(
f,
dtype='<i8',
count=nmodels*2).astype(num.int64)
ibootstrap_choices, imodel_choices = data2.reshape((nmodels, 2)).T
return ibootstrap_choices, imodel_choices, accepted
def load_problem_data(dirname, problem, skip_models=0):
fn = op.join(dirname, 'models')
with open(fn, 'r') as f:
nmodels = os.fstat(f.fileno()).st_size // (problem.nparameters * 8)
nmodels -= skip_models
f.seek(skip_models * problem.nparameters * 8)
data1 = num.fromfile(
f, dtype='<f8',
count=nmodels * problem.nparameters)\
.astype(num.float)
nmodels = data1.size//problem.nparameters - skip_models
xs = data1.reshape((nmodels, problem.nparameters))
fn = op.join(dirname, 'misfits')
with open(fn, 'r') as f:
f.seek(skip_models * problem.ntargets * 2 * 8)
data2 = num.fromfile(
f, dtype='<f8', count=nmodels*problem.ntargets*2).astype(num.float)
data2 = data2.reshape((nmodels, problem.ntargets*2))
combi = num.empty_like(data2)
combi[:, 0::2] = data2[:, :problem.ntargets]
combi[:, 1::2] = data2[:, problem.ntargets:]
misfits = combi.reshape((nmodels, problem.ntargets, 2))
return xs, misfits
def get_mean_x(xs):
return num.mean(xs, axis=0)
......@@ -264,26 +193,6 @@ def write_config(config, path):
config.change_basepath(basepath)
def bootstrap_outliers(problem, misfits, std_factor=1.0):
'''
Identify bootstrap configurations performing bad in global configuration
'''
raise Exception('this function is broken')
gms = problem.global_misfits(misfits)
ibests = []
for ibootstrap in range(problem.nbootstrap):
bms = problem.bootstrap_misfits(misfits, ibootstrap)
ibests.append(num.argmin(bms))
m = num.median(gms[ibests])
s = num.std(gms[ibests])
return num.where(gms > m+s)[0]
def forward(rundir_or_config_path, event_names):
if not event_names:
......@@ -326,7 +235,7 @@ def forward(rundir_or_config_path, event_names):
events = []
for (problem, x) in payload:
ds.empty_cache()
ms, ns, results = problem.evaluate(x, result_mode='full')
_, results = problem.evaluate(x, result_mode='full')
event = problem.get_source(x).pyrocko_event()
events.append(event)
......@@ -352,7 +261,9 @@ def harvest(rundir, problem=None, nbest=10, force=False, weed=0):
else:
xs, misfits = load_problem_data(rundir, problem)
config = load_config(rundir)
optimizer_fn = op.join(rundir, 'optimizer.yaml')
optimizer = guts.load(filename=optimizer_fn)
nbootstrap = optimizer.nbootstrap
dumpdir = op.join(rundir, 'harvest')
if op.exists(dumpdir):
......@@ -371,9 +282,8 @@ def harvest(rundir, problem=None, nbest=10, force=False, weed=0):
ibests_list.append(isort[:nbest])
if weed != 3:
for ibootstrap in range(config.solver_config.nbootstrap):
bms = problem.bootstrap_misfits(
misfits, config.solver_config.nbootstrap, ibootstrap)
for ibootstrap in range(nbootstrap):
bms = problem.bootstrap_misfits(misfits, nbootstrap, ibootstrap)
isort = num.argsort(bms)
ibests_list.append(isort[:nbest])
ibests.append(isort[0])
......@@ -397,10 +307,7 @@ def harvest(rundir, problem=None, nbest=10, force=False, weed=0):
ibests = ibests[gms[ibests] < mean_gm_best]
for i in ibests:
x = xs[i]
ms = misfits[i, :, 0]
ns = misfits[i, :, 1]
problem.dump_problem_data(dumpdir, x, ms, ns)
problem.dump_problem_data(dumpdir, xs[i], misfits[i, :, :])
def get_event_names(config):
......@@ -455,7 +362,7 @@ def check(
if n_random_synthetics == 0:
x = problem.pack(problem.base_source)
sources.append(problem.base_source)
ms, ns, results = problem.evaluate(x, result_mode='full')
_, results = problem.evaluate(x, result_mode='full')
results_list.append(results)
else:
......@@ -470,7 +377,7 @@ def check(
pass
sources.append(problem.get_source(x))
ms, ns, results = problem.evaluate(x, result_mode='full')
_, results = problem.evaluate(x, result_mode='full')
results_list.append(results)
if show_waveforms:
......@@ -691,9 +598,6 @@ def process_event(ievent, g_data_id):
config, force, status, nparallel, event_names = g_state[g_data_id]
if nparallel > 1:
status = ()
event_name = event_names[ievent]
ds = config.get_dataset(event_name)
......@@ -756,14 +660,19 @@ def process_event(ievent, g_data_id):
# movie_filename='grond_opt_time_magnitude.mp4')
try:
solver = config.solver_config.get_solver()
solver.solve(
optimizer = config.optimizer_config.get_optimizer()
if xs_inject is not None:
from .optimizers import highscore
if not isinstance(optimizer, highscore.HighScoreOptimizer()):
raise GrondError(
'optimizer does not support injections')
optimizer.sampler_phases[0:0] = [
highscore.InjectionSamplerPhase(xs_inject=xs_inject)]
optimizer.optimize(
problem,
rundir=rundir,
status=status,
# plot=splot,
xs_inject=xs_inject,
notifier=notifier)
rundir=rundir)
harvest(rundir, problem, force=True)
......@@ -947,9 +856,6 @@ def export(what, rundirs, type=None, pnames=None, filename=None):
__all__ = '''
EngineConfig
Config
load_problem_info
load_problem_info_and_data
load_optimizer_history
read_config
write_config
forward
......
from __future__ import print_function
import glob
import copy
import logging
......
......@@ -6,11 +6,22 @@ from collections import OrderedDict
from pyrocko.guts import Object
from ..meta import GrondError
guts_prefix = 'grond'
logger = logging.getLogger('grond.solver')
class BadProblem(GrondError):
pass
class SimpleTimedelta(timedelta):
def __str__(self):
return timedelta.__str__(self).split('.')[0]
class RingBuffer(num.ndarray):
def __new__(cls, *args, **kwargs):
cls = num.ndarray.__new__(cls, *args, **kwargs)
......@@ -70,23 +81,18 @@ class SolverState(object):
return len(self.parameter_names)
class Solver(object):
state = SolverState()
class Optimizer(Object):
def solve(
self, problem, rundir=None, status=(), plot=None, xs_inject=None,
notifier=None):
def optimize(self, problem):
raise NotImplemented()
class SolverConfig(Object):
def get_solver(self):
return Solver()
class OptimizerConfig(Object):
pass
__all__ = '''
Solver
SolverState
SolverConfig
BadProblem
Optimizer
OptimizerConfig
'''.split()
from __future__ import print_function
import math
import os.path as op
import os
import logging
import numpy as num
from pyrocko.guts import StringChoice, Int, Float, Object, List
from pyrocko.guts_array import Array
from ..meta import GrondError, Forbidden
from .base import Optimizer, OptimizerConfig, BadProblem
from ..problems.base import ModelHistory
guts_prefix = 'grond'
logger = logging.getLogger('grond.optimizers.highscore')
def excentricity_compensated_probabilities(xs, sbx, factor):
inonflat = num.where(sbx != 0.0)[0]
scale = num.zeros_like(sbx)
scale[inonflat] = 1.0 / (sbx[inonflat] * (factor if factor != 0. else 1.0))
distances_sqr_all = num.sum(
((xs[num.newaxis, :, :] - xs[:, num.newaxis, :]) *
scale[num.newaxis, num.newaxis, :])**2, axis=2)
probabilities = 1.0 / num.sum(distances_sqr_all < 1.0, axis=1)
print(num.sort(num.sum(distances_sqr_all < 1.0, axis=1)))
probabilities /= num.sum(probabilities)
return probabilities
def excentricity_compensated_choice(xs, sbx, factor):
probabilities = excentricity_compensated_probabilities(
xs, sbx, factor)
r = num.random.random()
ichoice = num.searchsorted(num.cumsum(probabilities), r)
ichoice = min(ichoice, xs.shape[0]-1)
return ichoice
def local_std(xs):
ssbx = num.sort(xs, axis=0)
dssbx = num.diff(ssbx, axis=0)
mdssbx = num.median(dssbx, axis=0)
return mdssbx * dssbx.shape[0] / 2.6
class SamplerDistributionChoice(StringChoice):
choices = ['multivariate_normal', 'normal']
class StandardDeviationEstimatorChoice(StringChoice):
choices = [
'median_density_single_chain',
'standard_deviation_all_chains',
'standard_deviation_single_chain']
class SamplerStartingPointChoice(StringChoice):
choices = ['excentricity_compensated', 'random', 'mean']
class SamplerPhase(Object):
niterations = Int.T()
ntries_preconstrain_limit = Int.T(default=1000)
def get_sample(self, problem, iiter, chains):
assert 0 <= iiter < self.niterations
ntries_preconstrain = 0
for ntries_preconstrain in range(self.ntries_preconstrain_limit):
try:
return problem.preconstrain(
self.get_raw_sample(problem, iiter, chains))
except Forbidden:
pass
raise GrondError(
'could not find any suitable candidate sample within %i tries' % (
self.ntries_preconstrain_limit))
class InjectionSamplerPhase(SamplerPhase):
xs_inject = Array.T(dtype=num.float, shape=(None, None))
def get_raw_sample(self, problem, iiter, chains):
return self.xs_inject[iiter, :]
class UniformSamplerPhase(SamplerPhase):
def get_raw_sample(self, problem, iiter, chains):
xbounds = problem.get_parameter_bounds()
return problem.random_uniform(xbounds)
class DirectedSamplerPhase(SamplerPhase):
scatter_scale = Float.T(optional=True)
scatter_scale_begin = Float.T(optional=True)
scatter_scale_end = Float.T(optional=True)
starting_point = SamplerStartingPointChoice.T(
default='excentricity_compensated')
sampler_distribution = SamplerDistributionChoice.T(
default='normal')
standard_deviation_estimator = StandardDeviationEstimatorChoice.T(
default='median_density_single_chain')
ntries_sample_limit = Int.T(default=1000)
def get_scatter_scale_factor(self, iiter):
s = self.scatter_scale
sa = self.scatter_scale_begin
sb = self.scatter_scale_end
assert s is None or (sa is None and sb is None)
if sa != sb:
tb = float(self.niterations-1)
tau = tb/(math.log(sa) - math.log(sb))
t0 = math.log(sa) * tau
t = float(iiter)
return num.exp(-(t-t0) / tau)
else:
return s or 1.0
def get_raw_sample(self, problem, iiter, chains):
factor = self.get_scatter_scale_factor(iiter)
npar = problem.nparameters
pnames = problem.parameter_names
xbounds = problem.get_parameter_bounds()
ichain_choice = num.argmin(chains.accept_sum)
if self.starting_point == 'excentricity_compensated':
models = chains.models(ichain_choice)
ilink_choice = excentricity_compensated_choice(
models,
chains.standard_deviation_models(
ichain_choice, self.standard_deviation_estimator),
2.)
xchoice = chains.model(ichain_choice, ilink_choice)
elif self.starting_point == 'random':
ilink_choice = num.random.randint(0, chains.nlinks)
xchoice = chains.model(ichain_choice, ilink_choice)
elif self.starting_point == 'mean':
xchoice = chains.mean_model(ichain_choice)
else:
assert False, 'invalid starting_point choice: %s' % (
self.starting_point)
ntries_sample = 0
if self.sampler_distribution == 'normal':
x = num.zeros(npar, dtype=num.float)
sx = chains.standard_deviation_models(
ichain_choice, self.standard_deviation_estimator)
for ipar in range(npar):
ntries = 0
while True:
if sx[ipar] > 0.:
v = num.random.normal(
xchoice[ipar],
factor*sx[ipar])
else:
v = xchoice[ipar]
if xbounds[ipar, 0] <= v and \
v <= xbounds[ipar, 1]:
break
if ntries > self.ntries_sample_limit:
raise GrondError(
'failed to produce a suitable '
'candidate sample from normal '
'distribution')
ntries += 1
x[ipar] = v
elif self.sampler_distribution == 'multivariate_normal':
ok_mask_sum = num.zeros(npar, dtype=num.int)
while True:
ntries_sample += 1
xcandi = num.random.multivariate_normal(
xchoice, factor**2 * chains.cov(ichain_choice))
ok_mask = num.logical_and(
xbounds[:, 0] <= xcandi, xcandi <= xbounds[:, 1])
if num.all(ok_mask):
break
ok_mask_sum += ok_mask
if ntries_sample > self.ntries_sample_limit:
raise GrondError(
'failed to produce a suitable candidate '
'sample from multivariate normal '
'distribution, (%s)' %
', '.join('%s:%i' % xx for xx in
zip(pnames, ok_mask_sum)))
x = xcandi
return x
class Chains(object):
def __init__(self, problem, history, nchains, nlinks_cap):
self.problem = problem
self.history = history
self.nchains = nchains
self.nlinks_cap = nlinks_cap
self.chains_m = num.zeros(
(nchains, nlinks_cap), num.float)
self.chains_i = num.zeros(
(nchains, nlinks_cap), num.int)
self.nlinks = 0
self.accept_sum = num.zeros(nchains, dtype=num.int)
history.add_listener(self)
def append(self, iiter, model, misfits):
gm = self.problem.global_misfit(misfits)
bms = self.problem.bootstrap_misfit(misfits, self.nchains - 1)
gbms = num.concatenate(([gm], bms))
self.chains_m[:, self.nlinks] = gbms
self.chains_i[:, self.nlinks] = iiter
self.nlinks += 1
chains_m = self.chains_m
chains_i = self.chains_i
for ichain in range(chains_m.shape[0]):
isort = num.argsort(chains_m[ichain, :self.nlinks])
chains_m[ichain, :self.nlinks] = chains_m[ichain, isort]
chains_i[ichain, :self.nlinks] = chains_i[ichain, isort]
if self.nlinks == self.nlinks_cap: