Commit af8f227c authored by Maximilian Schanner's avatar Maximilian Schanner
Browse files

Initial code push.

parent 55849603
from pathlib import Path
path = Path(__file__).parent # relative to the location of that very par file
# a prefix for this parameter file. Serves as identifier for output as well
# as a path to output locations
prefix = '../out/default'
prefix = Path(path, prefix)
# specify the data file
data_filename = './archeo_1000-2000AD.csv'
data_filename = Path(path, data_filename) # Relative to the parfile
# read the data
# set up binning
# optional: specify initial and final year for the binning
# initial = 1000
# final = 2000
binlength = 100 # Specify (equal) bin width
# specify other parameters for the algorithm
r_ref = 3200. # reference radius
# the exploration space
lam_min = 100 # minimal legendre variance
lam_max = 1.e5 # maximal legendre variance
eps_min = 0.1 # minimal error scaling
eps_max = 3. # maximal error scaling
rho_min = 1000 # minimal residual
rho_max = 6500 # maximal residual
# number of gridpoints per dimension...
n_ex = 8 # ...in the exploration step
n_fine = 4 # ...in the integration step
# parameters for prediction
n_desi = 10 # number of field design points
l_max = 15 # maximal SH degree
This diff is collapsed.
import sys
import numpy as np
from src.utils import load, newer
from src.exploration import explore, ExplorationResult
from src.integration import get_integ_bounds, integrate, IntegrationResult
from src.evaluation import coeffs
suffix_coeffs = '_coeffs.txt'
class OutOfDateError(Exception):
pass
pars = load(sys.argv)
par_file = sys.argv[1]
for bin_name, data in pars.data:
# =========================================================================
# Step 1: Exploration
# =========================================================================
# check for the exploration output
try:
expl_fname = (f'{pars.bin_fname(bin_name)}'
f'{ExplorationResult.suffix}')
# check wether the output is up to date
if newer(par_file, expl_fname):
raise OutOfDateError
# if there is no output or it is out of date, run explore and write the
# results
except (FileNotFoundError, OutOfDateError):
print(f"\nExploring {bin_name}, this may take a while...")
rslt = explore(pars.bin_fname(bin_name), data, pars.n_ex, pars.bounds,
r_ref=pars.r_ref)
rslt.write()
# =========================================================================
# Step 2: Integration
# =========================================================================
# check for integration output
try:
# this is the full coefficient output for evaluation
coeffs_fname = (f'{pars.bin_fname(bin_name)}'
f'{IntegrationResult.suffix_coeffs}')
if newer(expl_fname, coeffs_fname) or newer(par_file, coeffs_fname):
raise OutOfDateError
fields_fname = (f'{pars.bin_fname(bin_name)}'
f'{IntegrationResult.suffix_fields}')
if newer(expl_fname, fields_fname) or newer(par_file, fields_fname):
raise OutOfDateError
cmb_fname = (f'{pars.bin_fname(bin_name)}'
f'{IntegrationResult.suffix_cmb}')
if newer(expl_fname, cmb_fname) or newer(par_file, cmb_fname):
raise OutOfDateError
# if there is no output or it is out of date, run integrate and write the
# results
except (FileNotFoundError, OutOfDateError):
print(f"Integrating {bin_name}, this may take a while...")
fh = np.load(f'{pars.bin_fname(bin_name)}'
f'{ExplorationResult.suffix}')
bounds = get_integ_bounds(pars.bounds, fh['posterior'])
rslt = integrate(pars.bin_fname(bin_name), data, pars.n_fine,
bounds, r_ref=pars.r_ref, N_desi=pars.n_desi,
l_max=pars.l_max, scale=fh['scale'])
rslt.write()
fh.close()
# finally check for the coefficient.txt output
try:
# this is the coefficient txt output
coeffs_fname = (f'{pars.bin_fname(bin_name)}'
f'{suffix_coeffs}')
if newer(par_file, coeffs_fname):
raise OutOfDateError
except (FileNotFoundError, OutOfDateError):
fh = np.load(f'{pars.bin_fname(bin_name)}'
f'{IntegrationResult.suffix_coeffs}')
out_coeffs = \
coeffs(fh['posterior'], fh['mu_coeffs'], fh['cov_coeffs'],
pars.r_ref)
header = (f'# Gauss coefficients data\n'
f'# The column dg represents one standard deviation. The '
f'column g_16 (g_68) represents the 16th (68th) percentile.'
f'\n'
f'l m g dg g_16 g_68')
np.savetxt(f'{pars.bin_fname(bin_name)}{suffix_coeffs}',
np.array(out_coeffs).T, header=header, comments='',
fmt=['%i', '%i', '%.6e', '%.6e', '%.6e', '%.6e'])
fh.close()
from . import utils
from . import inversion
from . import integration
from . import exploration
from . import evaluation
__all__ = ['evaluation', 'exploration', 'integration', 'inversion', 'utils']
import sys
if __name__ == '__main__':
# make relative imports work
sys.path.append('../')
import numpy as np
from scipy.stats import multivariate_normal as mvn, gaussian_kde
from scipy.special import erf
from pyfield import i2lm_l, i2lm_m, equi_sph, REARTH
from src.utils import scaling
def power_spectrum(ls, mu_gs, sd_gs=0):
""" For given ls, mean and standard deviation calculate the power spectrum
"""
lmax = np.max(ls)
ps = np.full((lmax, ) + mu_gs.shape[1:], np.nan)
gs = (mu_gs**2 + sd_gs**2)
for l in range(1, lmax + 1):
mask = np.equal(ls, l)
ps[l-1, ...] = (l+1)*gs[mask].sum(axis=0)
return ps
def xyz2rpt(x, y, z):
""" Transform cartesian coordinates x,y,z to spherical coordinates. """
r = np.sqrt(x**2 + y**2 + z**2)
return r, np.arctan2(y, x), np.arcsin(z / r)
def intensity(posterior, mu_dips, cov_dips, r_ref, r_at=REARTH,
n_par_samps=1000, n_g_samps=100, n_points=1001,
ret_samps=False):
""" Calculate the pdf of the dipole intensity, by sampling and
kde-smoothing.
Parameters
----------
posterior : array-like of shape (N, N, N)
The posterior from the integration
mu_dips : array-like of shape length (N, 3)
The dipole coefficients from the integration
cov_coeffs : array-like of shape (N, 3, 3)
The corresponding covariance matrices
r_ref : float
The reference radius for the input dipole coefficients
n_par_samps: int, default 100
The number of parameter sets to be sampled
n_g_samps : int, default 100
The number of samples per set of parameters
ret_samps : bool, default False
Wether to also return the raw samples
n_points : int, default 1001
Number of evaluation points for the smoothed pdf
r_at : float, default pyfield.REARTH
The reference radius for the output intensity
Returns
-------
points : array-like of length n_points
The array on which the pdf is evaluated
pdf : array-like of length n_points
The dipole intensity pdf, evaluated on 'points'
"""
mu_dips, cov_dips = rescale_coeff_results(r_ref, r_at, mu_dips,
cov_dips)
gm_weights = posterior / posterior.sum()
ens_dip = sample_GM(gm_weights.flatten(), mu_dips, cov_dips,
n_par_samps=n_par_samps, n_g_samps=n_g_samps)
# transform to spherical coordinates, keep only intensity
# the correspondence is g_1_0=z, g_1_1=x, g_1_-1=y
ens_inte, _, _ = xyz2rpt(ens_dip[1], ens_dip[2], ens_dip[0])
smooth = gaussian_kde(ens_inte)
points = np.linspace(ens_inte.min(), ens_inte.max(), n_points)
pdf = smooth(points)
if ret_samps:
return points, pdf, ens_inte
else:
return points, pdf
def location(posterior, mu_dips, cov_dips, n_points=10000,
bounds=[[0., np.deg2rad(30)], [0, 2*np.pi]]):
""" From the posterior and corresponding dipole coefficients, calculate
the pdf of the dipole location
Parameters
----------
posterior : array-like of shape (N, N, N)
The posterior from the integration
mu_dips : array-like of shape length (N, 3)
The dipole coefficients from the integration
cov_coeffs : array-like of shape (N, 3, 3)
The corresponding covariance matrices
n_points : int, default 1000
The (approximate) number of gridpoints to evaluate the location on
bounds : 2x2 array-like, default [[0., np.deg2rad(30)],
[np.deg2rad(0), np.deg2rad(350)]]
The interval over which the density shall be plotted, in rad. This is
useful, since in most cases the dipole will be close to the geographic
north pole, thus the default intervall covers only latitudes down to
30 deg.
Returns
-------
lat : array-like
The latitudes of the gridpoints
lon : array-like
The longitudes of the gridpoints
dens : array-like
The dipole location pdf, evaluated at the gridpoints
"""
# set up grid to evaluate on
theta, phi = zip(*equi_sph(n_points, bounds=bounds))
phi = np.asarray(phi)
theta = np.pi/2. - np.asarray(theta)
# allocate array for the density
dens = np.zeros_like(theta)
n = len(posterior)
# calculate the density
for it in range(n**3):
prc = np.linalg.inv(cov_dips[it])
zv = np.array([np.sin(theta),
np.cos(theta) * np.cos(phi),
np.cos(theta) * np.sin(phi)])
zalpha = np.einsum('i..., ij, j...->...', zv, prc, zv)
zr0 = np.einsum('i, ij, j...->...', -mu_dips[it], prc, zv)
zr0 /= zalpha
zk = np.exp((zr0**2 * zalpha - np.einsum('i,ij,j->',
mu_dips[it],
prc,
mu_dips[it])) / 2.)
za = zr0*np.sqrt(zalpha)
dummy = np.sqrt(np.pi/2.) * (za**2 + 1) * (erf(za / np.sqrt(2)) + 1) \
+ za * np.exp(-za**2 / 2)
dens += posterior[np.unravel_index(it, posterior.shape)] \
/ np.sqrt((2*np.pi)**3 * np.linalg.det(cov_dips[it])) \
* np.cos(theta) * zk / np.sqrt(zalpha**3) * dummy
lat = np.rad2deg(phi)
lon = np.rad2deg(theta)
return lat, lon, dens
def sample_GM(weights, mus, covs,
n_par_samps=1000, n_g_samps=100):
""" Sample from a Gaussian mixture distribution
Parameters
----------
weights : array-like
The weights of the mixture, the sum should be one
mus : array-like
The means of the mixture components
covs : array-like
The covariance-matrices of the mixture components
n_par_samps: int, default 1000
The number of mixture-component samples
n_g_samps : int, default 100
The number of samples per mixture component
Returns
-------
ens : numpy.ndarray
n_par_samps*n_g_samps samples from the Gaussian mixture
"""
if not np.isclose(sum(weights), 1.):
raise ValueError("Sum of the weights has to be one!")
if len(np.asarray(mus).shape) == 1:
dim = len(mus)
else:
dim = len(mus[0])
n = len(weights)
# sample indices, corresponding to parameter pairs
# weights are given by the posterior
par_samps = np.random.choice(n, size=n_par_samps, replace=True,
p=weights)
# allocate ensemble array
ens = np.empty((dim, n_g_samps*n_par_samps))
for it in range(n_par_samps):
# get the index
ind_it = par_samps[it]
# sample from the corresponding Gaussian
samp = mvn.rvs(mean=mus[ind_it],
cov=covs[ind_it],
size=n_g_samps)
# and write the samples to the ensemble
ens[:, it*n_g_samps:(it+1)*n_g_samps] = samp.T
return ens
def rescale_coeff_results(r_from, r_to, mu_coeffs, cov_coeffs):
""" rescale an array of coefficient means and covariances from r_from
to r_to
Parameters
----------
r_from : float
The reference radius for the input coefficients
r_to : float
The reference radius for the output coefficients
mu_coeffs : array-like
The coefficient array
cov_coeffs : array-like
The corresponding covariances
Returns
-------
mu_coeffs_resc : array-like
The rescaled coefficients
cov_coeffs_resc : array-like
The rescaled covariances
"""
scale = scaling(r_from, r_to, i2lm_l(len(mu_coeffs[0])-1))
mu_coeffs_resc = scale[None, :]*mu_coeffs
cov_coeffs_resc = scale[None, :]*cov_coeffs*scale[:, None]
return mu_coeffs_resc, cov_coeffs_resc
def coeffs(posterior, mu_coeffs, cov_coeffs, r_ref, r_at=REARTH):
""" Calculate the coefficient means and percentiles to output the model
coefficients
Parameters
----------
posterior : array-like
The posterior from the integration
mu_dips : array-like
The coefficients from the integration
cov_coeffs : array-like
The corresponding covariance matrices
r_ref : float
The reference radius for the input coefficients
r_at : float, default pyfield.REARTH
The reference radius for the output coefficients
Returns
-------
ls : array-like
The coefficient degrees
ms : array-like
The coefficient orders
coeffs : array-like
The Gauss coefficients
coeffs_16 : array-like
The 16% percentiles
coeffs_84 : array-like
The 84% percentiles
"""
ls = [i2lm_l(it) for it in range(len(mu_coeffs[0]))]
ms = [i2lm_m(it) for it in range(len(mu_coeffs[0]))]
mu_coeffs, cov_coeffs = rescale_coeff_results(r_ref, r_at, mu_coeffs,
cov_coeffs)
gm_weights = posterior / posterior.sum()
ens = sample_GM(gm_weights.flatten(), mu_coeffs, cov_coeffs,
n_par_samps=1000, n_g_samps=1000)
err_16, err_84 = np.percentile(ens, (16, 84), axis=1)
mean = (mu_coeffs.T * gm_weights.flatten()).sum(axis=1)
var = np.empty_like(mean)
for it in range(gm_weights.size):
var += cov_coeffs[it].diagonal() \
* gm_weights[np.unravel_index(it, gm_weights.shape)]
var += mu_coeffs[it]**2 \
* gm_weights[np.unravel_index(it, gm_weights.shape)]
var -= mean**2
return ls, ms, mean, np.sqrt(var), err_16, err_84
def spectrum(posterior, mu_coeffs, cov_coeffs, r_ref, r_at=REARTH,
n_par_samps=1000, n_g_samps=1000):
""" Calculate the power spectrum resulting from the mixture and find the
68% confidence-interval by sampling
Parameters
----------
posterior : array-like
The posterior from the integration
mu_dips : array-like
The coefficients from the integration
cov_coeffs : array-like
The corresponding covariance matrices
r_ref : float
The reference radius for the input coefficients
r_at : float, default pyfield.REARTH
The reference radius for the output coefficients
n_par_samps: int, default 1000
The number of parameter sets to be sampled
n_g_samps : int, default 100
The number of samples per set of parameters
Returns
-------
ls_out : array-like
The Gauss coefficient degrees
mean_ps : array-like
The corresponding mean of the power spectrum
a16_ps : array-like
The 16% percentiles
a84_ps : arary-like
The 84% percentiles
"""
mu_coeffs, cov_coeffs = rescale_coeff_results(r_ref, r_at, mu_coeffs,
cov_coeffs)
gm_weights = posterior / posterior.sum()
ens = sample_GM(gm_weights.flatten(), mu_coeffs, cov_coeffs,
n_par_samps=n_par_samps, n_g_samps=n_g_samps)
# 1st two moments of mixture
mean = np.sum(mu_coeffs * gm_weights.flatten()[:, None], axis=0)
cov = np.empty_like(cov_coeffs[0])
for it in range(gm_weights.size):
cov += cov_coeffs[it] \
* gm_weights[np.unravel_index(it, gm_weights.shape)]
cov += np.outer(mu_coeffs[it] - mean,
mu_coeffs[it] - mean) \
* gm_weights[np.unravel_index(it, gm_weights.shape)]
ls = [i2lm_l(it) for it in range(len(mu_coeffs[0]))]
mean_ps = power_spectrum(ls, mean, np.sqrt(cov.diagonal()))
samp_ps = power_spectrum(ls, ens)
a16_ps, a84_ps = np.percentile(samp_ps, (16, 84), axis=1)
ls_out = np.arange(1, max(ls) + 1)
return ls_out, mean_ps, a16_ps, a84_ps
if __name__ == '__main__':
from matplotlib import pyplot as plt
import cartopy.crs as ccrs
from utils import load
from integration import IntegrationResult
pars = load(sys.argv)
n_bins = len(pars.data)
# set up figures
int_fig, int_ax = plt.subplots(n_bins, 1, figsize=(4, 3*n_bins),
squeeze=False)
int_fig.canvas.set_window_title('PDF of dipole intensity')
cff_fig, cff_ax = plt.subplots(n_bins, 1, figsize=(4, 3*n_bins),
squeeze=False)
cff_fig.canvas.set_window_title('Dipole coefficients')
loc_fig = plt.figure(figsize=(4, 3*n_bins))
loc_fig.canvas.set_window_title('PDF of dipole location')
spc_fig, spc_ax = plt.subplots(n_bins, 1, figsize=(4, 3*n_bins),
squeeze=False)
spc_fig.canvas.set_window_title('Power spectrum')
# global projection for locations
ax_proj = ccrs.Stereographic(central_latitude=90., scale_factor=20)
cntr = 0 # a counter
for bin_name, data in pars.data:
try:
fh = np.load(f'{pars.bin_fname(bin_name)}'
f'{IntegrationResult.suffix_coeffs}')
except FileNotFoundError:
print(f"No integration results for bin {bin_name} found. Skipping "
f"this bin. The plot will be empty.")
cntr += 1
continue
else:
# calculate and plot the coefficients
ls, ms, mean, var, err_16, err_84 = \
coeffs(fh['posterior'], fh['mu_coeffs'], fh['cov_coeffs'],
r_ref=pars.r_ref)
cff_ax[cntr, 0].errorbar(ms[0:3], mean[0:3],
yerr=[mean[0:3]-err_16[0:3],
err_84[0:3]-mean[0:3]],
marker='x', ls='')
cff_ax[cntr, 0].set_xticks(ms[0:3])
# calculate and plot the dipole intensity
mu_dips = fh['mu_coeffs'][:, 0:3]
cov_dips = fh['cov_coeffs'][:, 0:3, 0:3]
arr, pdf, samps = intensity(fh['posterior'], mu_dips, cov_dips,
r_ref=pars.r_ref, ret_samps=True)
int_ax[cntr, 0].hist(samps, bins=100, density=True)
int_ax[cntr, 0].plot(arr, pdf, lw=3, alpha=0.8)
# calculate and plot the dipole location
lat, lon, dens = location(fh['posterior'], mu_dips, cov_dips)
sph_ax = loc_fig.add_subplot(n_bins, 1, 1 + cntr,
projection=ax_proj)
sph_ax.coastlines(zorder=1)
sph_ax.gridlines()
sph_ax.set_global()
spltlat, spltlon, _ = ax_proj.transform_points(ccrs.Geodetic(),
lat,
lon).T
sph_ax.tripcolor(spltlat, spltlon, dens, zorder=0,
cmap='Purples')
# calculate and plot the power spectrum
plt_l, mu_ps, a16_ps, a84_ps = spectrum(fh['posterior'],
fh['mu_coeffs'],
fh['cov_coeffs'],
pars.r_ref,
r_at=pars.r_ref)
spc_ax[cntr, 0].errorbar(plt_l, mu_ps,
yerr=[mu_ps-a16_ps, a84_ps-mu_ps],
marker='x')
spc_ax[cntr, 0].set_yscale('log')
spc_ax[cntr, 0].set_xticks(plt_l)
fh.close()
cff_ax[cntr, 0].set_title(str(bin_name))
int_ax[cntr, 0].set_title(str(bin_name))
int_ax[cntr, 0].set_yticks([])
sph_ax.set_title(str(bin_name))
spc_ax[cntr, 0].set_title(str(bin_name))