diff --git a/tutorials/bayesian.py b/tutorials/bayesian.py index 42e16a6f..653d6b3d 100755 --- a/tutorials/bayesian.py +++ b/tutorials/bayesian.py @@ -21,19 +21,26 @@ Based on the above definition, we construct some prior models in the frequency domain, convert each of them to the time domain and use such an ensemble -to estimate the prior mean :math:`\mu_\mathbf{x}` and model -covariance :math:`\mathbf{C_x}`. +to estimate the prior mean :math:`\mathbf{x}_0` and model +covariance :math:`\mathbf{C}_{x_0}`. -We then create our data by sampling the true signal at certain locations +We then create our data by sampling the true signal at certain locations and solve the resconstruction problem within a Bayesian framework. Since we are assuming gaussianity in our priors, the equation to obtain the posterion mean -can be derived analytically: +and covariance can be derived analytically: .. math:: - \mathbf{x} = \mathbf{x_0} + \mathbf{C}_x \mathbf{R}^T - (\mathbf{R} \mathbf{C}_x \mathbf{R}^T + \mathbf{C}_y)^{-1} (\mathbf{y} - + \mathbf{x} = \mathbf{x_0} + \mathbf{C}_{x_0} \mathbf{R}^T + (\mathbf{R} \mathbf{C}_{x_0} \mathbf{R}^T + \mathbf{C}_y)^{-1} (\mathbf{y} - \mathbf{R} \mathbf{x_0}) +and + +.. math:: + \mathbf{C}_x = \mathbf{C}_{x_0} - \mathbf{C}_{x_0} \mathbf{R}^T + (\mathbf{R} \mathbf{C}_x \mathbf{R}^T + \mathbf{C}_y)^{-1} + \mathbf{R} \mathbf{C}_{x_0} + """ import matplotlib.pyplot as plt @@ -80,14 +87,15 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft): sigmaa = [0.1, 0.5, 0.6] phi0 = [-90.0, 0.0, 0.0] sigmaphi = [0.1, 0.2, 0.4] -sigmad = 1e-2 +sigmad = 1 +scaling = 100 # Scale by a factor to allow noise std=1 # Prior models nt = 200 nfft = 2**11 dt = 0.004 t = np.arange(nt) * dt -xs = np.array( +xs = scaling * np.array( [ prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft) for _ in range(nreals) @@ -95,7 +103,10 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft): ) # True model (taken as one possible realization) -x = prior_realization(f0, a0, phi0, [0, 0, 0], [0, 0, 0], [0, 0, 0], dt, nt, nfft) +x = scaling * prior_realization( + f0, a0, phi0, [0, 0, 0], [0, 0, 0], [0, 0, 0], dt, nt, nfft +) + ############################################################################### # We have now a set of prior models in time domain. We can easily use sample @@ -110,7 +121,7 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft): N = 30 # lenght of decorrelation diags = np.array([Cm[i, i - N : i + N + 1] for i in range(N, nt - N)]) diag_ave = np.average(diags, axis=0) -# add a taper at the end to avoid edge effects +# add a taper at the start and end to avoid edge effects diag_ave *= np.hamming(2 * N + 1) fig, ax = plt.subplots(1, 1, figsize=(12, 4)) @@ -157,65 +168,107 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft): ynmask = Rop.mask(x + n) ############################################################################### -# First we apply the Bayesian inversion equation -xbayes = x0 + Cm_op * Rop.H * ( +# First, since the problem is rather small, we construct the dense version of +# all our matrices and we compute the analytical posterior mean and covariance + +Cm = Cm_op.todense() +Cd = Cd_op.todense() +R = Rop.todense() + +# Bayesian analytical solution +xpost_ana = x0 + Cm @ R.T @ (np.linalg.solve(R @ Cm @ R.T + Cd, yn - R @ x0)) +Cmpost_ana = Cm - Cm @ R.T @ (np.linalg.solve(R @ Cm @ R.T + Cd, R @ Cm)) + +############################################################################### +# Next we solve the same Bayesian inversion equation iteratively. We will see +# that provided we use enough iterations we can retrieve the same values of +# the analytical posterior mean +xpost_iter = x0 + Cm_op * Rop.H * ( lsqr(Rop * Cm_op * Rop.H + Cd_op, yn - Rop * x0, iter_lim=400)[0] ) -# Visualize +############################################################################### +# But what is the problem did not allow creating dense matrices for both the +# operator and the input covariance matrices. In this case, we can resort to the +# Randomize-Then-Optimize algorithm of Bardsley et al., 2014, which simply solves +# the same problem that we solved to find the MAP solution repeatedly by adding +# random noise to the data. It can be shown that the sample mean and covariance +# of the solutions of the different perturbed problems provide a good +# approximation for the true posterior mean and covariance. + +# RTO number of solutions +nreals = 1000 + +xrto = [] +for ireal in range(nreals): + yreal = yn + Rop * np.random.normal(0, sigmad, nt) + xrto.append( + x0 + + Cm_op + * Rop.H + * (lsqr(Rop * Cm_op * Rop.H + Cd_op, yreal - Rop * x0, iter_lim=400))[0] + ) + +xrto = np.array(xrto) +xpost_rto = np.average(xrto, axis=0) +Cmpost_rto = ((xrto - xpost_rto).T @ (xrto - xpost_rto)) / nreals + +############################################################################### +# Finally we visualize the different results + +# Means fig, ax = plt.subplots(1, 1, figsize=(12, 5)) ax.plot(t, x, "k", lw=6, label="true") +ax.plot(t, xpost_ana, "r", lw=7, label="bayesian inverse (ana)") +ax.plot(t, xpost_iter, "g", lw=5, label="bayesian inverse (iter)") +ax.plot(t, xpost_rto, "b", lw=3, label="bayesian inverse (rto)") ax.plot(t, ymask, ".k", ms=25, label="available samples") ax.plot(t, ynmask, ".r", ms=25, label="available noisy samples") -ax.plot(t, xbayes, "r", lw=3, label="bayesian inverse") ax.legend() -ax.set_title("Signal") +ax.set_title("Mean reconstruction") ax.set_xlim(0, 0.8) -plt.tight_layout() -############################################################################### -# So far we have been able to estimate our posterion mean. What about its -# uncertainties (i.e., posterion covariance)? -# -# In real-life applications it is very difficult (if not impossible) -# to directly compute the posterior covariance matrix. It is much more -# useful to create a set of models that sample the posterion probability. -# We can do that by solving our problem several times using different prior -# realizations as starting guesses: - -xpost = [ - x0 - + Cm_op - * Rop.H - * (lsqr(Rop * Cm_op * Rop.H + Cd_op, yn - Rop * x0, iter_lim=400)[0]) - for x0 in xs[:30] -] -xpost = np.array(xpost) - -x0post = np.average(xpost, axis=0) -Cm_post = ((xpost - x0post).T @ (xpost - x0post)) / nreals - -# Visualize +# RTO realizations fig, ax = plt.subplots(1, 1, figsize=(12, 5)) ax.plot(t, x, "k", lw=6, label="true") -ax.plot(t, xpost.T, "--r", lw=1) -ax.plot(t, x0post, "r", lw=3, label="bayesian inverse") +ax.plot(t, xrto[::10].T, "--b", lw=0.5) +ax.plot(t, xpost_rto, "b", lw=3, label="bayesian inverse (rto)") ax.plot(t, ymask, ".k", ms=25, label="available samples") ax.plot(t, ynmask, ".r", ms=25, label="available noisy samples") ax.legend() -ax.set_title("Signal") +ax.set_title("RTO realizations") ax.set_xlim(0, 0.8) -fig, ax = plt.subplots(1, 1, figsize=(5, 4)) -im = ax.imshow( - Cm_post, interpolation="nearest", cmap="seismic", extent=(t[0], t[-1], t[-1], t[0]) +# Covariances +fig, axs = plt.subplots(1, 2, figsize=(12, 4)) +axs[0].imshow( + Cmpost_ana, + interpolation="nearest", + cmap="seismic", + vmin=-5e-1, + vmax=2, + extent=(t[0], t[-1], t[-1], t[0]), +) +axs[0].set_title(r"$\mathbf{C}_m^{post,ANA}$") +axs[0].axis("tight") + +axs[1].imshow( + Cmpost_rto, + interpolation="nearest", + cmap="seismic", + vmin=-5e-1, + vmax=2, + extent=(t[0], t[-1], t[-1], t[0]), ) -ax.set_title(r"$\mathbf{C}_m^{posterior}$") -ax.axis("tight") +axs[1].set_title(r"$\mathbf{C}_m^{post,RTO}$") +axs[1].axis("tight") plt.tight_layout() ############################################################################### # Note that here we have been able to compute a sample posterior covariance -# from its estimated samples. By displaying it we can see how both the overall +# from its estimated samples. By displaying it we can see how both the overall # variances and the correlation between different parameters have become -# narrower compared to their prior counterparts. +# narrower compared to their prior counterparts. Moreover, whilst the RTO +# covariance seems to be slightly under-estimated, this represents an appealing +# alternative to the closed-form solution for large-scale problems under +# Gaussian assumptions.