Commit b496dd5e authored by Stefan Mauerberger's avatar Stefan Mauerberger
Browse files

Comparison of error density and Gaussian proxy

parent 19a5ee51
%% Cell type:code id: tags:
``` python
# Imports
import sys
import os
# relative import
sys.path.append(os.path.abspath('') + '/../')
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import uniform, gamma
from scipy.stats import uniform, gamma, norm
import pyfield
from corbass.utils import load, nez2dif
from corbass.utils import load, nez2dif, dif2nez
```
%% Cell type:code id: tags:
``` python
# seed for reproducability
seed = 161
np.random.seed(seed)
```
%% Cell type:markdown id: tags:
# Generate synthetic data for tests
This notebook serves the purpose of generating synthetic data for testing the `CORBASS` algorithm. We therefore generate data from a given set of Gauss coefficients and add synthetic errors from the Fisher-von Mises and the gamma distribution.
%% Cell type:code id: tags:
``` python
# some basic parameters
# the name of the output file
out = '../dat/synth_data_clean_complete.csv'
# switch for using locations and incompleteness structure from the example data file
real_locs = False
# the number of records to be generated, is only used if real_locs is False
n_points = 412
# the fraction of incomplete records, works as a switch if real_locs is False
r_inc = 0.
# switch for corrupting the data by noise
noise = False
# the error levels to be stored
ddec = 4.5
dinc = 4.5
dint = 8250
# the average concentration parameter from GEOMAGIA for the interval [750, today] is 650
kappa = 650
header = f"# This file was produced using the notebook Gen_Data.ipynb with the following parameters:\n" \
+ f"# real_locs={real_locs}, n_points={n_points:d}, r_inc={r_inc:.2f}, noise={noise}, ddec={ddec:.2f}, " \
+ f"dinc={dinc:.2f}, dint={dint:d}, kappa={kappa:.1f}, seed={seed:d}\n"
```
%% Cell type:markdown id: tags:
## Sampling from the Fisher-von Mises distribution
%% Cell type:code id: tags:
``` python
# The sampling process involves some rotations, thus we first define some convenience functions
def angles(vec):
return np.arctan2(vec[1], vec[0]), \
np.pi/2 - np.arctan2(vec[2], np.sqrt(vec[0]**2 + vec[1]**2))
def rot_z(ang):
return np.array([[np.cos(ang), np.sin(ang), 0],
[-np.sin(ang), np.cos(ang), 0],
[0, 0, 1]])
def rot_y(ang):
return np.array([[np.cos(ang), 0, np.sin(ang)],
[0, 1, 0],
[-np.sin(ang), 0, np.cos(ang)]])
def rotator(vec):
vec = np.asarray(vec)
p, t = angles(vec)
return np.dot(rot_y(t).T, rot_z(p))
```
%% Cell type:code id: tags:
``` python
# This cell contains the sampling procedure
def sample_Fisher(n, mu=(0, 0, 1), kappa=20):
""" Generate samples from the Fisher distribution
Parameters:
-----------
n : int
The number of samples to be generated
mu : array-like of length 3, optional
A vector pointing towards the center of the distribution. Its length is ignored.
kappa : float, optional
The concentration parameter.
Returns:
--------
numpy array of shape (3, n) containing the sampled vectors
Reference:
----------
[1]: W. Jakob, "Numerically stable sampling of the von Mises
Fisher distribution on S^2 (and other tricks)",
http://www.mitsuba-renderer.org/~wenzel/files/vmf.pdf,
2015
"""
if kappa <= 0:
raise ValueError(f"The concentration parameter has to be positive, but kappa={kappa} was given.\n"
f"For kappa=0 use a uniform sampler on the sphere.")
trafo_mat = rotator(mu)
# sample from the uniform circle, V in [1]
angles = uniform.rvs(scale=2*np.pi, size=n)
vs = np.array([np.cos(angles),
np.sin(angles)])
# sample W in [1] via inverse cdf sampling
def inv_cdf(x):
return 1 + np.log(x + (1-x)*np.exp(-2*kappa))/kappa
unis = uniform.rvs(size=n)
ws = inv_cdf(unis)
ret = np.sqrt(1-ws**2)*vs
res = np.einsum("i...,ij->j...", np.array([ret[0], ret[1], ws]), trafo_mat)
if n == 1:
return res.flatten()
else:
return res
```
%% Cell type:markdown id: tags:
We draw some samples and plot the result, to eye-check whether the procedure works:
We draw some samples and plot the result to eye-check whether the procedure works. In addition we plot contour lines of the Gaussian proxy and the vMF density to illustrate how far off the proxy error model is.
%% Cell type:code id: tags:
``` python
SF = sample_Fisher(1000, mu=(-1, 0, 0), kappa=kappa);
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
D = np.arctan2(SF[1], SF[0])
I = np.arctan2(SF[2], np.sqrt(SF[0]**2 + SF[1]**2))
DIF_pdm = ( 4, 67, 49) # Decl, incl and F for Potsdam in 2015
DIF_rky = (-14, 75, 52) # Reykjavik
DIF_bkg = ( -1, 13, 42) # Bangkok
# List of sites
sites = (DIF_rky, DIF_pdm, DIF_bkg)
# Subplot axes
fig, axs = plt.subplots(1, len(sites), figsize=(10, 5))
axs[0].set_ylabel('I [deg.]') # Y-label only on the left
# Iterate
for (decl, incl, _), ax in zip(sites, axs):
# Location parameter
mu = dif2nez(decl, incl, 1) # w.r.t a Cartesian reference frame
# Draw samples from vMF distribution
SF = sample_Fisher(5000, mu=mu, kappa=kappa)
# Transform drawn samples to decl and incl (intensity == 1)
D, I, _ = nez2dif(*SF)
# 2d Histogram
counts, Dedges, Iedges, img = ax.hist2d(D, I, bins=20, cmap='hot_r')
ax.set_xlabel('D [deg.]')
# Grid to plot density
Ds, Is = np.mgrid[Dedges.min():Dedges.max():101j,
Iedges.min():Iedges.max():51j]
Xs, Ys, Zs = dif2nez(d=Ds, i=Is, f=1) # Transform to Cartesian frame
# vMF density & normalization w.r.t. [rad.]
p = np.exp(kappa*(Xs*mu[0] + Ys*mu[1] + Zs*mu[2]))*np.cos(np.deg2rad(Is))
p*= kappa/(4*np.pi*np.sinh(kappa))
# Add contour plot
levels = ax.contour(Ds, Is, p, colors='blue').levels
# Standard deviation for decl and incl
a95 = 140/np.sqrt(kappa) # 95% confidence cone
sd_I = 57.3/140*a95 # Standard deviation incl
sd_D = sd_I/np.cos(np.deg2rad(incl)) # Standard deviation decl
# Gaussian proxy density
p = np.exp(-(((decl - Ds)/sd_D)**2 + ((incl - Is)/sd_I)**2)/2)
# Normalization and conversion to rad to share identical level values
p/= (2*np.pi*np.deg2rad(sd_D)*np.deg2rad(sd_I))
ax.contour(Ds, Is, p, levels, colors='green')
```
%% Output
ax.set_aspect(1)
ax.set_xlim((-np.pi, np.pi));
ax.set_xlabel('D [rad.]')
ax.set_ylim((-np.pi/2., np.pi/2.));
ax.set_ylabel('I [rad.]')
ax.scatter(D, I, alpha=0.4);
%% Cell type:code id: tags:
``` python
fig, axs = plt.subplots(1, len(sites), figsize=(10, 5))
Fs = np.linspace(15, 80, 101)
# Iterate
for (_, _, inte), ax in zip(sites, axs):
ax.plot(Fs, norm(inte, dint/1000).pdf(Fs))
# Parameters are choosen such, that
# - the mean coincdes with the intensity
# - the standard deviation is 8.25 µT
b = inte/(dint/1000)**2
a = inte * b
ax.plot(Fs, gamma(a=a, scale=1/b).pdf(Fs))
ax.set_xlabel('F [µT]')
```
%% Output
%% Cell type:markdown id: tags:
## Generate synthetic data
We first need a set of coefficients for the field. At the time of writing this, IGRF-13 [1] had just been released and we directly download the reported coefficients as a reference model.
%% Cell type:code id: tags:
``` python
IGRF = pd.read_csv('https://www.ngdc.noaa.gov/IAGA/vmod/coeffs/igrf13coeffs.txt', header=0, delim_whitespace=True, skiprows=3)
coeffs = IGRF[['2020.0']].to_numpy().flatten()
# retrieve the maximal degree using pyfield and the index of the last entry in coeffs
l_max = pyfield.i2lm_l(len(coeffs)-1)
```
%% Cell type:markdown id: tags:
Then we generate random points as points of observation:
%% Cell type:code id: tags:
``` python
def ran_sph(npoints, r=1., ndim=3, nocluster=True):
""" Sample random points on the ndim-sphere.
Parameters:
-----------
npoints : int
The number of points to sample
r : float, optional
The radius of the sphere to be sampled on
ndim : int, optional
The dimension of the sphere
nocluster : bool, optional
Whether toexclude points sampled outside of the ball, i.e. sample
uniformly distributed
Returns:
--------
Array of shape (ndim, npoints) including the sampled points.
References:
-----------
[1] http://mathworld.wolfram.com/SpherePointPicking.html
[2] http://mathworld.wolfram.com/HyperspherePointPicking.html
"""
vec = np.random.randn(ndim, npoints)
if nocluster:
n = np.linalg.norm(vec, axis=0)
vec = vec[:, n <= 1.]
while vec.shape[1] < npoints:
ad = np.random.randn(ndim, npoints)
n = np.linalg.norm(ad, axis=0)
ad = ad[:, n <= 1.]
vec = np.concatenate((vec, ad), axis=1)
if vec.shape[1] > npoints:
vec = vec[:, 0:npoints]
vec /= np.linalg.norm(vec, axis=0)
if npoints == 1:
vec = vec.flatten()
return vec*r
```
%% Cell type:code id: tags:
``` python
if real_locs:
# load reference data
pars = load('../examples/Example_Parfile.py')
# ungroup the data
ref_data = pars.data.filter(lambda x: True)
ref_data.reset_index(inplace=True)
n_points = len(ref_data)
x_obs = np.zeros((3, n_points), order='F')
x_obs[0] = ref_data['co-lat']
x_obs[1] = ref_data['lon']
x_obs[2] = ref_data['rad']
else:
x_obs = ran_sph(n_points, r=pyfield.REARTH)
# transform x_obs from cartesian coordinates to co-lat, lon, rad
pyfield.mapLoc(fromSys=pyfield.SYS_GEO,
fromForm=pyfield.COOR_CAR,
toSys=pyfield.SYS_GEO,
toForm=pyfield.COOR_CLR,
t=0,
x=np.asfortranarray(x_obs))
```
%% Cell type:markdown id: tags:
Next we use the `pyfield` library to get the basis functions and generate a field from the coefficients.
%% Cell type:code id: tags:
``` python
dspharm = np.empty((len(coeffs), 3*n_points), order='F')
pyfield.dspharm(src=pyfield.SOURCE_INTERNAL,
gSys=pyfield.SYS_GEO,
atSys=pyfield.SYS_GEO,
atForm=pyfield.COOR_CLR,
bSys=pyfield.SYS_GEO,
bForm=pyfield.FIELD_NED,
lmax=l_max,
R=pyfield.REARTH,
t=0.,
at=x_obs,
B=dspharm)
```
%% Cell type:markdown id: tags:
The fields $N, E, Z$ components are then easily calculated using a dot product with the coefficients.
%% Cell type:code id: tags:
``` python
field_obs = np.dot(coeffs, dspharm).reshape(-1, 3).T
```
%% Cell type:markdown id: tags:
Using a utility function from `CORBASS`, we can transform these to $D,I,F$.
%% Cell type:code id: tags:
``` python
decs, incs, ints = nez2dif(*field_obs)
```
%% Cell type:markdown id: tags:
Now we set up error levels for the components and add some synthetic noise.
%% Cell type:code id: tags:
``` python
if noise:
mus = np.array([np.cos(np.deg2rad(incs))*np.cos(np.deg2rad(decs)),
np.cos(np.deg2rad(incs))*np.sin(np.deg2rad(decs)),
np.sin(np.deg2rad(incs))])
# the angular components retrieve an error from the Fisher-vonMises distribution
# the intensities retrieve an error from the gamma distribution
for it, mu, d, i, f in zip(np.arange(n_points), mus.T, decs, incs, ints):
samp = sample_Fisher(1, mu=mu, kappa=kappa)
decs[it] = np.rad2deg(np.arctan2(samp[1], samp[0]))
incs[it] = np.rad2deg(np.arctan2(samp[2], np.sqrt(samp[0]**2 + samp[1]**2)))
b = ints[it]/dint**2
a = ints[it] * b
ints[it] = gamma.rvs(a=a, scale=1./b)
# mimic some incompleteness in the data
if r_inc != 0.:
if real_locs:
# incompleteness of reference data
decs[ref_data.query('not (D==D)').index] = np.nan
incs[ref_data.query('not (I==I)').index] = np.nan
ints[ref_data.query('not (F==F)').index] = np.nan
else:
if r_inc != 0.:
# generate indices of missing points
ind = np.arange(n_points)
np.random.shuffle(ind)
mnum = np.int(r_inc*n_points)
mind = ind[0:mnum]
# generate a boolean array of triples of True and False for missing
# values at each point
mdist = np.zeros((3, n_points), dtype=bool)
bools = [True, True, False, False]
for j in mind:
np.random.shuffle(bools)
mdist[:, j] = bools[0:3]
# set the missing values to nan
decs[mdist[0]] = np.nan
incs[mdist[1]] = np.nan
ints[mdist[2]] = np.nan
```
%% Cell type:markdown id: tags:
Finally we build a pandas `DataFrame` from the synthetic data and store it.
%% Cell type:code id: tags:
``` python
synth_data = pd.DataFrame({'co-lat': x_obs[0],
'lat': 90-x_obs[0],
'lon': x_obs[1],
'rad': x_obs[2],
't': 2020,
'dt': 0,
'D': decs,
'I': incs,
'F': ints,
'dD': ddec,
'dI': dinc,
'dF': dint})
out_frame = synth_data.to_csv()
outfile = open(out, "w")
outfile.write(header + out_frame)
outfile.close()
```
%% Cell type:markdown id: tags:
## References
[1] P. Alken, E. Thebault, C. Beggan, H. Amit, J. Aubert, J. Baerenzung, T.N. Bondar,
W. Brown, S. Cali, A. Chambodut, A. Chulliat, G. Cox, C. C. Finlay, A. Fournier,
N. Gillet, A. Grayver, M. Hammer, M. Holschneider, L. Huder, G. Hulot, T. Jager,
C. Kloss, M. Korte, W. Kuang, A. Kuvshinov, B. Langlais, J.-M. Leger, V. Lesur,
P. W. Livermore, F. J. Lowes, S. Macmillan, W. Magnes, M. Mandea, S. Marsal,
J. Matzka, M. C. Metman, T. Minami, A. Morschhauser, J. E. Mound, M. Nair,
S. Nakano, N. Olsen, F. J. Pavon-Carrasco, V. G. Petrov, G. Ropp, M. Rother,
T. J. Sabaka, S. Sanchez, D. Saturnino, N. Schnepf, X. Shen, C. Stolle,
A. Tangborn, L. Tner-Clausen, H. Toh, J. M. Torta, J. Varner, P. Vigneron,
F. Vervelidou, I. Wardinski, J. Wicht, A. Woods, Y. Yang, Z. Zeren and B. Zhou,
"International Geomagnetic Reference Field: the thirteenth generation",
submitted to Earth, Planets and Space, see also:
https://www.ngdc.noaa.gov/IAGA/vmod/igrf.html
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment