diff --git a/chemotools/utils/_finite_differences.py b/chemotools/utils/_finite_differences.py index 0ba05a0..c3a558b 100644 --- a/chemotools/utils/_finite_differences.py +++ b/chemotools/utils/_finite_differences.py @@ -1,9 +1,29 @@ -from math import comb -from numbers import Integral +""" +This utility submodule provides functions for the computation of forward finite +differences, namely + +- the kernel for forward and central finite differences, +- computation of related kernel matrices +- estimation of the noise standard deviation of a series + +""" + +### Imports ### + +from math import comb, factorial +from numbers import Integral, Real +from typing import Any, Callable, Literal, Optional, Tuple, Union import numpy as np +from scipy.ndimage import median_filter from sklearn.utils import check_scalar +### Constants ### + +_MAD_PREFACTOR = 1.482602 + +### Functions ### + def calc_forward_diff_kernel( *, @@ -36,7 +56,7 @@ def calc_forward_diff_kernel( Raises ------ ValueError - If the difference order is below 1. + If ``differences`` is below 1. """ # the input is validated @@ -58,6 +78,95 @@ def calc_forward_diff_kernel( ) +def calc_central_diff_kernel(*, differences: int, accuracy: int = 2) -> np.ndarray: + """ + Computes the kernel for central finite differences which can be applied to a + series by means of a convolution, e.g., + + ```python + kernel = calc_central_fin_diff_kernel(differences=2, accuracy=2) + differences = np.convolve(series, np.flip(kernel), mode="valid") + # NOTE: NumPy flips the kernel internally due to the definition of convolution + ``` + + Parameters + ---------- + differences : int + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Values below 1 are not allowed. + accuracy : int, default=2 + The accuracy of the finite difference approximation, which has to be an even + integer ``>= 2``. + The higher the accuracy, the better the approximation. + + Returns + ------- + fin_diff_kernel : ndarray of shape (kernel_size,) + A NumPy-1D-vector resembling the kernel from the code example above. Since the + elements are not necessarily integers, the data type is ``np.float64``. + Its size is given by ``2 * floor((differences + 1) / 2) - 1 + accuracy`` where + ``floor`` returns the next lower integer. + + Raises + ------ + ValueError + If ``differences`` is below 1. + ValueError + If ``accuracy`` is not an even integer ``>= 2``. + + References + ---------- + The computation is based on the description in [1]_. + + .. [1] Wikipedia, "Finite difference coefficient - Central finite difference", + URL: https://en.wikipedia.org/wiki/Finite_difference_coefficient#Central_finite_difference + + """ # noqa: E501 + + ### Input Validation ### + + # first, difference order and accuracy are validated + check_scalar( + differences, + name="differences", + target_type=Integral, + min_val=1, + include_boundaries="left", + ) + + check_scalar( + accuracy, + name="accuracy", + target_type=Integral, + min_val=2, + include_boundaries="left", + ) + if accuracy % 2 == 1: + raise ValueError("Got accuracy = {accuracy}, expected an even integer.") + + ### Central Difference Kernel Computation ### + + # first, the size of the kernel is computed + kernel_size = 2 * ((differences + 1) // 2) - 1 + accuracy + half_kernel_size = kernel_size // 2 + + # then, the linear system to solve for the coefficients is set up + grid_vect = np.arange( + start=-half_kernel_size, + stop=half_kernel_size + 1, + step=1, + dtype=np.int64, + ) + lhs_mat = np.vander(grid_vect, N=kernel_size, increasing=True).transpose() + rhs_vect = np.zeros(shape=(kernel_size,), dtype=np.int64) + rhs_vect[differences] = factorial(differences) + + # the coefficients are computed by solving the linear system + return np.linalg.solve(lhs_mat, rhs_vect) + + def _gen_squ_fw_fin_diff_mat_cho_banded_transp_first( *, n_data: int, @@ -199,7 +308,7 @@ def gen_squ_fw_fin_diff_mat_cho_banded( If ``n_data`` is below ``differences + 1``, i.e., the kernel does not fit into the data at least once. ValueError - If the difference order is below 1. + If ``differences`` is below 1. Notes ----- @@ -262,3 +371,223 @@ def gen_squ_fw_fin_diff_mat_cho_banded( n_data=n_data, differences=differences, ) + + +def estimate_noise_stddev( + series: np.ndarray, + differences: int = 6, + diff_accuracy: int = 2, + window_size: Optional[int] = None, + extrapolator: Callable[..., np.ndarray] = np.pad, + extrapolator_args: Tuple[Any, ...] = ("reflect",), + power: Literal[-2, -1, 1, 2] = 1, + stddev_min: Union[float, int] = 1e-10, +) -> np.ndarray: + """ + EXPERIMENTAL FEATURE + + Estimates the local/global noise standard deviation of a series even in the presence + of trends, like baselines and peaks, as well as outliers by using forward finite + differences. + Please see the Notes section for further details. + + Parameters + ---------- + series : ndarray of shape (n_data,) + The series for which the noise standard deviation is estimated. + differences : int, default=6 + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Empirically, 5-6 was found as a sweet spot, but even numbers work better with + the default ``extrapolator``. + Values below 1 are not allowed. + diff_accuracy : int, default=2 + The accuracy of the finite difference approximation, which has to be an even + integer ``>= 2``. + window_size : int or None, default=None + The odd window size around a datapoint to estimate its local noise standard + deviation. + Higher values will lead to a smoother noise standard deviation estimate by + sacrificing the local resolution. At the same time, edge effects start blurring + in if the ``extrapolator`` does not provide a good extrapolation. + If provided, it has to be at least 1. + If ``None``, the global noise standard deviation is estimated, i.e., it will + be the same for each data point. + extrapolator : callable, default=np.pad + The extrapolator function that is used to pad the series before the finite + differences and the median filter are applied. It will pad the signal with + ``pad_width = (diff_kernel_size // 2) + (window_size // 2)`` elements on each + side where ``diff_kernel_size`` is the size of the central finite differences + kernel (see the Notes for details). + It has to be a callable with the following signature: + + ```python + series_extrap = extrapolator(series, pad_width, *extrapolator_args) + ``` + + If ``window_size`` is ``None``, only the central finite differences kernel is + considered. + By default, the signal is padded by reflecting ``series`` at the edges on either + side, but of course the quality of the noise estimation can be improved by using + a more sophisticated extrapolation method. + extrapolator_args : tuple, default=("reflect",) + Additional arguments that are passed to the extrapolator function as described + for ``extrapolator``. + power : {-2, -1, 1, 2}, default=1 + The power to which the noise standard deviation is raised. + This can be used to compute the: + + - original noise standard deviation (``power=1``), + - the noise variance (``power=2``), + - the inverse noise standard deviation (``power=-1``), or + - the inverse noise variance (``power=-2``; typically used as weights). + + stddev_min : float or int, default=1e-10 + The minimum noise standard deviation that is allowed. + Any estimated noise standard deviation below this value will be set to this + value. + It must be at least ``1e-15``. + + Returns + ------- + noise_stddev : ndarray of shape (n_data,) + The estimated noise standard deviation raised to ``power`` for each data point + in the series. + + Raises + ------ + ValueError + If ``series.size`` is below less than the kernel or window size (see Notes for + details). + ValueError + If ``differences`` is below 1. + ValueError + If ``diff_accuracy`` is not an even integer ``>= 2``. + ValueError + If ``window_size`` is below 1. + + + References + ---------- + The estimation algorithm is an adaption of the global estimation logic applied for + the "DER SNR" proposed in [1]_ (see the Notes for further details). + + .. [1] Stoehr F., et al., "DER SNR: A Simple & General Spectroscopic Signal-to-Noise + Measurement Algorithm", Astronomical Data Analysis Software and Systems XVII P5.4 + ASP Conference Series, Vol. XXX, 2008 + + Notes + ----- + The "DER SNR" algorithm estimates a global noise level in a robust fashion by + applying a modified version of the Median Absolute Deviation (MAD) to the + derivative/differences of the signal. By using a moving MAD filter, the local noise + level can be estimated as well. + The algorithms does not work well for signals that are perfectly noise-free. + + The kernel size for the central finite difference kernel is given by + ``2 * floor((differences + 1) / 2) - 1 + diff_accuracy``. + + """ + + ### Input Validation ### + + # first, the window size, power, and minimum standard deviation are validated + # NOTE: the difference order and accuracy are by the central finite differences + # kernel function + # window size + if window_size is not None: + check_scalar( + window_size, + name="window_size", + target_type=Integral, + min_val=1, + include_boundaries="left", + ) + if window_size % 2 == 0: + raise ValueError( + "Got window_size = {window_size}, expected an odd integer." + ) + + # power + if power not in {-2, -1, 1, 2}: + raise ValueError(f"Got power = {power}, expected -2, -1, 1, or 2.") + + # minimum standard deviation + check_scalar( + stddev_min, + name="stddev_min", + target_type=Real, + min_val=1e-15, + include_boundaries="left", + ) + + # for validation of the series, the central finite differences kernel has to be + # computed + diff_kernel = calc_central_diff_kernel( + differences=differences, + accuracy=diff_accuracy, + ) + + # afterwards, the series is validated + if series.size < diff_kernel.size: + raise ValueError( + f"Got series.size = {series.size}, must be >= {diff_kernel.size} (kernel " + f"size)." + ) + + if window_size is not None: + if series.size < window_size: + raise ValueError( + f"Got series.size = {series.size}, must be >= {window_size} (window " + "size)." + ) + + ### Noise Standard Deviation Estimation ### + + # the signal is extrapolated to avoid edge effects + pad_width = diff_kernel.size // 2 + pad_width += 0 if window_size is None else window_size // 2 + series_extrap = extrapolator( + series, + pad_width, + *extrapolator_args, + ) + + # the absolute forward finite differences are computed ... + abs_diff_series = np.abs( + np.convolve(series_extrap, np.flip(diff_kernel), mode="valid") + ) + size_after_diff = abs_diff_series.size + + # ... and the median filter is applied to theses differences + prefactor = _MAD_PREFACTOR / np.linalg.norm(diff_kernel) + # Case 1: the global noise standard deviation is estimated + if window_size is None: + noise_stddev = np.full_like( + series, + fill_value=prefactor * np.median(abs_diff_series), + ) + + # Case 2: the local noise standard deviation is estimated + else: + half_window_size = window_size // 2 + noise_stddev = ( + prefactor + * median_filter( + abs_diff_series, + size=window_size, + mode="constant", + )[half_window_size : size_after_diff - half_window_size] + ) + + # the minimum-bounded noise standard deviation is raised to the power + noise_stddev = np.maximum(noise_stddev, stddev_min) + + if power in {-2, 2}: + noise_stddev = np.square(noise_stddev) + + if power in {-2, -1}: + noise_stddev = np.reciprocal(noise_stddev) + + return noise_stddev diff --git a/tests/fixtures.py b/tests/fixtures.py index 9176d3b..29fe6ed 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,13 +1,24 @@ +### Imports ### + import os from typing import List import numpy as np import pytest +from tests.test_for_utils.utils_models import ( + NoiseEstimationReference, + RefDifferenceKernel, +) + +### Constants ### + test_directory = os.path.dirname(os.path.abspath(__file__)) path_to_resources = os.path.join(test_directory, "resources") +### Fixtures ### + @pytest.fixture def spectrum() -> List[np.ndarray]: @@ -122,12 +133,11 @@ def noise_level_whittaker_auto_lambda() -> np.ndarray: @pytest.fixture -def reference_finite_differences() -> List[tuple[int, int, np.ndarray]]: +def reference_forward_finite_differences() -> List[RefDifferenceKernel]: fin_diff_table = np.genfromtxt( os.path.join(path_to_resources, "reference_finite_differences.csv"), skip_header=2, delimiter=",", - missing_values="#N/A", filling_values=np.nan, dtype=np.float64, ) @@ -138,11 +148,73 @@ def reference_finite_differences() -> List[tuple[int, int, np.ndarray]]: # removed row = fin_diff_table[row_idx, ::] fin_diff_ordered_coeffs.append( - ( - int(row[0]), - int(row[1]), - row[2:][~np.isnan(row[2:])], + RefDifferenceKernel( + differences=round(row[0]), + accuracy=round(row[1]), + kernel=row[2:][~np.isnan(row[2:])], ) ) return fin_diff_ordered_coeffs + + +@pytest.fixture +def noise_level_estimation_signal() -> np.ndarray: + fpath = os.path.join( + path_to_resources, + "noise_level_estimation/noise_estimation_refs.csv", + ) + data = np.genfromtxt( + fpath, + delimiter=",", + skip_header=1, + filling_values=np.nan, + dtype=np.float64, + ) + + # the original signal is indicated by the first 4 columns with metadata being NaN + metadata = data[::, 0:4] + signal_idx = np.where(np.isnan(metadata).all(axis=1))[0][0] + + return data[signal_idx, 4:] + + +@pytest.fixture +def noise_level_estimation_refs() -> List[NoiseEstimationReference]: + fpath = os.path.join( + path_to_resources, + "noise_level_estimation/noise_estimation_refs.csv", + ) + data = np.genfromtxt( + fpath, + delimiter=",", + skip_header=1, + filling_values=np.nan, + dtype=np.float64, + ) + + # the original signal is indicated by the first 4 columns with metadata being NaN + # it has to be excluded from the references + metadata = data[::, 0:4] + signal_idx = np.where(np.isnan(metadata).all(axis=1))[0][0] + data = np.delete(data, obj=signal_idx, axis=0) + + # then, all the references are extracted + noise_level_refs = [] + for row_idx in range(0, data.shape[0]): + row = data[row_idx, ::] + # if the window size is 0, it is set to None because this indicates that the + # global noise level is to be estimated rather than a local one + window_size = int(row[0]) + window_size = window_size if window_size > 0 else None + noise_level_refs.append( + NoiseEstimationReference( + window_size=window_size, + min_noise_level=row[1], + differences=round(row[2]), + accuracy=round(row[3]), + noise_level=row[4:], + ) + ) + + return noise_level_refs diff --git a/tests/resources/noise_level_estimation/noise_estimation_refs.csv b/tests/resources/noise_level_estimation/noise_estimation_refs.csv new file mode 100644 index 0000000..65db6a8 --- /dev/null +++ b/tests/resources/noise_level_estimation/noise_estimation_refs.csv @@ -0,0 +1,8 @@ +Window Size,Min Noise Level,Difference Order,Accuracy,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100 +,,,,-0.0590418038387701,0.0943535834281746,0.1996106967074170,0.2076665665445250,0.2565976378911490,0.3130822235037050,0.4272820436955330,0.4336807688473590,0.5529790761949000,0.6205116979795420,0.6367459615029690,0.6402323907080600,0.7612810390503280,0.7352360002255730,0.7677988900730690,0.8308269827744960,0.8643816497026570,0.8600246311281250,0.9150360580810840,0.8493913653842080,0.9063026644138470,0.9989087664664360,1.0102321479428500,0.9726077451254480,1.0229378673527500,1.0037443170690300,0.9506403941093230,0.8659025013506060,1.0425258859932300,1.0269948414240000,1.0210347447476800,0.9869294147835280,0.9694273518552160,0.8751237973313290,0.8520166643388930,0.8161569956410880,0.7678425167804110,0.7700632777094800,0.7446175573498970,0.6553331368003540,0.7449736813899550,0.6302711853278030,0.6799413262506640,0.7227251574958850,0.8517863323804210,0.8181644158567000,0.9213256152994750,1.2810814194972500,0.8946588499365640,1.3736047892705300,1.2260807298006400,1.0611294804157300,0.9219014587530430,0.5680112835139860,0.5377392990469220,0.3298106843898140,-0.1719686918509980,-0.2819466900079930,-0.1992173465552170,-0.3814389372698620,-0.5068395639674160,-0.5440866677749160,-0.7586247980167080,-0.7157705137539000,-0.7071693108577800,-0.7763499947312900,-0.8502087885332560,-0.9487019031392380,-0.8849128283116610,-1.0185413995359800,-0.9063415914629020,-0.8742798194799050,-1.0850527768695800,-0.9846158086303880,-0.9157723830506820,-0.9821058771097520,-0.9963697347184110,-0.9650347191718880,-1.0039196559098500,-1.0209341512160900,-0.9892918059416240,-0.9840537215786370,-0.8851893179125000,-0.8769338218316060,-0.8940188473463740,-0.8425875126869280,-0.8289408469012870,-0.7532063497603390,-0.7700985647336680,-0.6656819958289950,-0.5902129175975290,-0.5184668194250220,-0.4741418132576440,-0.4164773414535820,-0.3324609887739230,-0.2674577016845110,-0.3231561578354770,-0.1671549465881780,-0.0991688934094777,0.0184878102069366,0.0593230959994008 +0,0.02,5,2,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795 +1,0.02,5,2,0.0200000000000000,0.1451029282433400,0.0772047120710864,0.0200000000000000,0.0200000000000000,0.0489717431487879,0.0274495861433574,0.0357637508810199,0.0684993097853416,0.0200000000000000,0.0700522575202647,0.0612439573762328,0.0260638611495695,0.0812309192143508,0.0461588271225360,0.0222450189854631,0.0531891974716099,0.0513633900333628,0.0200000000000000,0.0733367134797685,0.0775479046939276,0.0217871289061783,0.0717740549983404,0.0200000000000000,0.0422277561726533,0.0200000000000000,0.1153667013297750,0.0589669911798556,0.0820261518839881,0.0926552550358730,0.0200000000000000,0.0257106262844677,0.0200000000000000,0.0254223605983094,0.0476623506657087,0.0364680830983335,0.0200000000000000,0.0679620090761974,0.1080690093938710,0.0765226435820176,0.0200000000000000,0.0788703870443346,0.0320053196940122,0.0459419537572654,0.0262818803884840,0.2703113465235740,0.4519676897481760,0.3204245391241220,0.0830564581735304,0.4104779108823870,0.3857155691067090,0.2175026323541390,0.1222975788331630,0.0636683385614897,0.2978745616108900,0.2054137210245730,0.1715374549314800,0.2692057206749080,0.0200000000000000,0.1740928219363430,0.1602392629955660,0.0694865118001325,0.0731196713058297,0.1363617883550300,0.0551904931965811,0.0200000000000000,0.0733696078640926,0.0964238550462272,0.0360082885289664,0.1034773002961070,0.2403755230857390,0.1714901119416400,0.0772462931068471,0.1921249073584610,0.0700341809602307,0.0647348495205131,0.0649691733350237,0.0200000000000000,0.0501078392212950,0.0421229430929182,0.0327442257339355,0.0200000000000000,0.0538442229886795,0.0727395594456153,0.0287270678498866,0.0200000000000000,0.0262830354573370,0.0200000000000000,0.0312480515132131,0.0509995579399268,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0796640661166812,0.0931431553133826,0.0200000000000000,0.0595196003015288,0.0510785917754346,0.0443036193627499,0.0200000000000000 +11,0.02,5,2,0.0489717431487881,0.0274495861433573,0.0357637508810200,0.0489717431487881,0.0357637508810200,0.0357637508810200,0.0489717431487881,0.0357637508810200,0.0357637508810200,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0513633900333629,0.0513633900333629,0.0513633900333629,0.0531891974716106,0.0513633900333629,0.0513633900333629,0.0513633900333629,0.0461588271225360,0.0422277561726497,0.0513633900333629,0.0513633900333629,0.0589669911798476,0.0717740549983399,0.0589669911798476,0.0422277561726497,0.0422277561726497,0.0257106262844691,0.0422277561726497,0.0364680830983336,0.0364680830983336,0.0364680830983336,0.0364680830983336,0.0364680830983336,0.0257106262844691,0.0364680830983336,0.0364680830983336,0.0459419537572652,0.0459419537572652,0.0459419537572652,0.0679620090761976,0.0765226435820174,0.0788703870443343,0.0788703870443343,0.0830564581735283,0.2175026323541430,0.2175026323541430,0.2175026323541430,0.2703113465235760,0.2703113465235760,0.2175026323541430,0.2175026323541430,0.2054137210245730,0.2054137210245730,0.1740928219363430,0.1715374549314810,0.1602392629955660,0.1602392629955660,0.1602392629955660,0.1363617883550290,0.0733696078640913,0.0733696078640913,0.0731196713058293,0.0733696078640913,0.0733696078640913,0.0733696078640913,0.0772462931068475,0.0964238550462242,0.0772462931068475,0.0772462931068475,0.0772462931068475,0.0772462931068475,0.0700341809602293,0.0700341809602293,0.0649691733350243,0.0647348495205135,0.0538442229886796,0.0538442229886796,0.0501078392212965,0.0421229430929192,0.0327442257339349,0.0287270678498867,0.0312480515132135,0.0312480515132135,0.0287270678498867,0.0262830354573363,0.0262830354573363,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0312480515132135,0.0443036193627497,0.0200000000000000,0.0443036193627497,0.0443036193627497,0.0510785917754344,0.0510785917754344,0.0510785917754344 +25,0.02,5,2,0.0357637508810199,0.0489717431487883,0.0461588271225360,0.0357637508810199,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0489717431487883,0.0489717431487883,0.0489717431487883,0.0513633900333628,0.0489717431487883,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0489717431487883,0.0513633900333628,0.0513633900333628,0.0513633900333628,0.0513633900333628,0.0461588271225360,0.0476623506657080,0.0461588271225360,0.0422277561726498,0.0461588271225360,0.0461588271225360,0.0476623506657080,0.0476623506657080,0.0476623506657080,0.0422277561726498,0.0459419537572654,0.0422277561726498,0.0422277561726498,0.0459419537572654,0.0459419537572654,0.0476623506657080,0.0589669911798475,0.0679620090761978,0.0679620090761978,0.0765226435820176,0.0679620090761978,0.0679620090761978,0.0765226435820176,0.0788703870443342,0.0830564581735285,0.0830564581735285,0.1080690093938710,0.1222975788331700,0.1222975788331700,0.1222975788331700,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1222975788331700,0.1363617883550290,0.1222975788331700,0.1034773002961070,0.0964238550462243,0.0772462931068477,0.0772462931068477,0.0733696078640913,0.0731196713058293,0.0700341809602294,0.0694865118001325,0.0700341809602294,0.0694865118001325,0.0649691733350244,0.0647348495205134,0.0551904931965811,0.0538442229886798,0.0509995579399262,0.0509995579399262,0.0501078392212966,0.0421229430929192,0.0421229430929192,0.0421229430929192,0.0421229430929192,0.0327442257339349,0.0327442257339349,0.0327442257339349,0.0327442257339349,0.0312480515132135,0.0312480515132135,0.0327442257339349,0.0327442257339349,0.0312480515132135,0.0312480515132135,0.0443036193627499,0.0312480515132135,0.0287270678498868,0.0262830354573363,0.0262830354573363,0.0312480515132135,0.0312480515132135 +11,0.02,5,6,0.0423226649461898,0.0213382604260963,0.0345832777894842,0.0423226649461898,0.0345832777894842,0.0345832777894842,0.0423226649461898,0.0345832777894842,0.0345832777894842,0.0423226649461898,0.0423226649461898,0.0466425139275356,0.0503726990070914,0.0503726990070914,0.0503726990070914,0.0522689341160636,0.0503726990070914,0.0503726990070914,0.0503726990070914,0.0466425139275356,0.0306469700143294,0.0503726990070914,0.0486302113596101,0.0486302113596101,0.0573742018717883,0.0486302113596101,0.0306469700143294,0.0306469700143294,0.0294598485057315,0.0306469700143294,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0294598485057315,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0486731792847783,0.0647624501115769,0.0767373878041499,0.0775063075538756,0.0796945243644425,0.0796945243644425,0.0855298076266875,0.2870710151500930,0.2870710151500930,0.2870710151500930,0.2870710151500930,0.2870710151500930,0.2717040293160000,0.2268032219320890,0.1736214700293550,0.1736214700293550,0.1713455406124420,0.1574551250434120,0.1317389301256910,0.1317389301256910,0.1317389301256910,0.1279514761457010,0.0821466649254990,0.0821466649254990,0.0796827022609885,0.0821466649254990,0.0821466649254990,0.0821466649254990,0.0821466649254990,0.0932762768253730,0.0821466649254990,0.0821466649254990,0.0821466649254990,0.0811093220566633,0.0573316881612044,0.0573316881612044,0.0563031203635952,0.0504775230475304,0.0504775230475304,0.0504775230475304,0.0471399118656961,0.0422763918127036,0.0422763918127036,0.0356466128786073,0.0356466128786073,0.0356466128786073,0.0303400136608894,0.0272555144285884,0.0272555144285884,0.0253196032708275,0.0253196032708275,0.0253196032708275,0.0253196032708275,0.0272555144285884,0.0303400136608894,0.0447277571118028,0.0272555144285884,0.0447277571118028,0.0447277571118028,0.0538429638790465,0.0538429638790465,0.0538429638790465 +11,0.02,6,2,0.0299189178180329,0.0299189178180329,0.0439481529019891,0.0439481529019891,0.0439481529019891,0.0330467400508825,0.0330467400508825,0.0330467400508825,0.0439481529019891,0.0439481529019891,0.0439481529019891,0.0330467400508825,0.0330467400508825,0.0330467400508825,0.0442014748656228,0.0442014748656228,0.0442014748656228,0.0213260087479265,0.0213260087479265,0.0213260087479265,0.0268493269616991,0.0348197958494532,0.0432342655432602,0.0432342655432602,0.0432342655432602,0.0348197958494532,0.0348197958494532,0.0415996328057206,0.0415996328057206,0.0385091946216850,0.0385091946216850,0.0385091946216850,0.0292728953624526,0.0292728953624526,0.0292728953624526,0.0292728953624526,0.0385091946216850,0.0415996328057206,0.0556937879041626,0.0688174135868027,0.0688174135868027,0.0730194668621024,0.0789472020180212,0.1022476970064910,0.1113502653907550,0.1302253619997060,0.1022476970064910,0.1404897947406030,0.1438409548185280,0.1438409548185280,0.1438409548185280,0.1438409548185280,0.1404897947406030,0.1128033154936850,0.1028090077284430,0.1025892369546820,0.1025892369546820,0.1028090077284430,0.1025892369546820,0.0539406445041053,0.0539406445041053,0.0518727238279349,0.0518727238279349,0.0539406445041053,0.0539406445041053,0.0539406445041053,0.1025742752639520,0.1025742752639520,0.0539406445041053,0.0528933997260198,0.0528933997260198,0.0528933997260198,0.0528933997260198,0.0287996226521029,0.0277483480339819,0.0277483480339819,0.0287996226521029,0.0287996226521029,0.0277483480339819,0.0277483480339819,0.0287996226521029,0.0309753027350675,0.0377844829340734,0.0388385937007103,0.0388385937007103,0.0388385937007103,0.0377844829340734,0.0372670600985638,0.0309753027350675,0.0309753027350675,0.0372670600985638,0.0372670600985638,0.0372670600985638,0.0372670600985638,0.0242780219496502,0.0238192920369165,0.0242780219496502,0.0242780219496502,0.0530368406787244,0.0530368406787244,0.0759226297230423 diff --git a/tests/resources/noise_level_estimation/noise_estimation_signal_refcalc.ods b/tests/resources/noise_level_estimation/noise_estimation_signal_refcalc.ods new file mode 100644 index 0000000..b684017 Binary files /dev/null and b/tests/resources/noise_level_estimation/noise_estimation_signal_refcalc.ods differ diff --git a/tests/test_for_utils/test_finite_differences.py b/tests/test_for_utils/test_finite_differences.py index 5818548..06f8786 100644 --- a/tests/test_for_utils/test_finite_differences.py +++ b/tests/test_for_utils/test_finite_differences.py @@ -6,39 +6,46 @@ ### Imports ### -from typing import List, Tuple +from typing import List, Optional import numpy as np import pytest from chemotools.utils._finite_differences import ( calc_forward_diff_kernel, + estimate_noise_stddev, gen_squ_fw_fin_diff_mat_cho_banded, ) -from tests.fixtures import reference_finite_differences # noqa: F401 +from tests.fixtures import noise_level_estimation_refs # noqa: F401 +from tests.fixtures import noise_level_estimation_signal # noqa: F401 +from tests.fixtures import reference_forward_finite_differences # noqa: F401 from tests.test_for_utils.utils_funcs import ( conv_upper_cho_banded_storage_to_sparse, multiply_vect_with_squ_fw_fin_diff_orig_first, multiply_vect_with_squ_fw_fin_diff_transpose_first, ) +from tests.test_for_utils.utils_models import ( + NoiseEstimationReference, + RefDifferenceKernel, +) ### Test Suite ### def test_forward_diff_kernel( - reference_finite_differences: List[Tuple[int, int, np.ndarray]] # noqa: F811 + reference_forward_finite_differences: List[RefDifferenceKernel], # noqa: F811 ) -> None: # each kernel is calculated and compared to the reference - for differences, _, reference in reference_finite_differences: - kernel = calc_forward_diff_kernel(differences=differences) + for ref_diff_kernel in reference_forward_finite_differences: + kernel = calc_forward_diff_kernel(differences=ref_diff_kernel.differences) - assert kernel.size == reference.size, ( - f"Difference order {differences} with accuracy 1 expected kernel size " - f"{reference.size} but got {kernel.size}" + assert kernel.size == ref_diff_kernel.size, ( + f"Difference order {ref_diff_kernel.differences} with accuracy 1 expected " + f"kernel size {ref_diff_kernel.size} but got {kernel.size}" ) - assert np.allclose(kernel, reference, atol=1e-8), ( - f"Difference order {differences} with accuracy 1 expected kernel " - f"{reference.tolist()} but got {kernel.tolist()}" + assert np.allclose(kernel, ref_diff_kernel.kernel, atol=1e-8), ( + f"Difference order {ref_diff_kernel.differences} with accuracy 1 expected " + f"kernel {ref_diff_kernel.kernel.tolist()} but got {kernel.tolist()}" ) @@ -142,3 +149,191 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( # NOTE: the following check has to be fairly strict when it comes to equivalence # since the NumPy and Chemotools are basically doing the same under the hood assert np.allclose(result, result_conv, atol=1e-10, rtol=1e-10) + + +@pytest.mark.parametrize( + "series, differences, accuracy, window_size, stddev_min", + [ + ( # Number 0 series is too small for difference kernel + np.arange(start=0, stop=5), + 10, + 2, + 3, + 1e-10, + ), + ( # Number 1 series is too small for difference kernel + np.arange(start=0, stop=5), + 10, + 2, + None, + 1e-10, + ), + ( # Number 2 series is too small for window size + np.arange(start=0, stop=5), + 1, + 2, + 11, + 1e-10, + ), + ( # Number 3 the difference order is 0 + np.arange(start=0, stop=10), + 0, + 2, + 3, + 1e-10, + ), + ( # Number 4 the difference order is negative + np.arange(start=0, stop=10), + -1, + 2, + 3, + 1e-10, + ), + ( # Number 5 the accuracy is odd + np.arange(start=0, stop=10), + 2, + 3, + 3, + 1e-10, + ), + ( # Number 6 the accuracy is odd + np.arange(start=0, stop=10), + 2, + 5, + 3, + 1e-10, + ), + ( # Number 7 the accuracy is 1 + np.arange(start=0, stop=10), + 2, + 1, + 3, + 1e-10, + ), + ( # Number 8 the accuracy is 0 + np.arange(start=0, stop=10), + 2, + 0, + 3, + 1e-10, + ), + ( # Number 9 the accuracy is negative + np.arange(start=0, stop=10), + 2, + -1, + 3, + 1e-10, + ), + ( # Number 10 the window size is even + np.arange(start=0, stop=10), + 1, + 2, + 6, + 1e-10, + ), + ( # Number 11 the window size is 0 + np.arange(start=0, stop=10), + 1, + 2, + 0, + 1e-10, + ), + ( # Number 12 the window size is negative + np.arange(start=0, stop=10), + 1, + 2, + -1, + 1e-10, + ), + ( # Number 13 the minimum standard deviation is zero + np.arange(start=0, stop=5), + 1, + 2, + 3, + 0.0, + ), + ( # Number 14 the minimum standard deviation is negative + np.arange(start=0, stop=5), + 1, + 2, + 3, + -10.0, + ), + ], +) +def test_estimate_noise_stddev_invalid_input( + series: np.ndarray, + differences: int, + accuracy: int, + window_size: Optional[int], + stddev_min: float, +) -> None: + """ + Tests the input validation of the function :func:`estimate_noise_stddev`. + + The combinations of + + - the series length, + - the difference order, + - the accuracy, + - the window size, and + - the minimum standard deviation + + are chosen such that the input is invalid. + + """ + + with pytest.raises(ValueError): + estimate_noise_stddev( + series=series, + differences=differences, + diff_accuracy=accuracy, + window_size=window_size, + stddev_min=stddev_min, + ) + + return + + +def test_noise_level_estimation( + noise_level_estimation_signal: np.ndarray, # noqa: F811 + noise_level_estimation_refs: List[NoiseEstimationReference], # noqa: F811 +) -> None: + """ + Tests the noise level estimation function :func:`estimate_noise_stddev`. + + The function is tested for all the reference noise levels. + + """ + + for ref in noise_level_estimation_refs: + # the noise level is estimated + noise_level = estimate_noise_stddev( + series=noise_level_estimation_signal, + differences=ref.differences, + diff_accuracy=ref.accuracy, + window_size=ref.window_size, + stddev_min=ref.min_noise_level, + ) + # then, the noise level itself is compared to the reference in a quite strict + # way because both results were computed in the same way with the only + # difference being that Chemotools uses Python and the reference uses + # LibreOffice Calc + assert np.allclose(noise_level, ref.noise_level, rtol=1e-12) + + # then, all the available powers to which the noise level can be raised are + # compared to the reference + for power, raised_noise_level_ref in ref.raised_noise_levels.items(): + raised_noise_level = estimate_noise_stddev( + series=noise_level_estimation_signal, + differences=ref.differences, + diff_accuracy=ref.accuracy, + window_size=ref.window_size, + stddev_min=ref.min_noise_level, + power=power, + ) + + # again, the comparison is quite strict + assert np.allclose(raised_noise_level, raised_noise_level_ref, atol=1e-12) + + return diff --git a/tests/test_for_utils/utils_models.py b/tests/test_for_utils/utils_models.py index 30b8a28..1a4c85b 100644 --- a/tests/test_for_utils/utils_models.py +++ b/tests/test_for_utils/utils_models.py @@ -6,8 +6,10 @@ ### Imports ### -from dataclasses import dataclass -from typing import Tuple +from dataclasses import dataclass, field +from typing import Dict, Literal, Optional, Tuple + +import numpy as np from chemotools.utils import _models from tests.test_for_utils.utils_funcs import float_is_bit_equal @@ -15,6 +17,44 @@ ### Dataclasses ### +@dataclass +class RefDifferenceKernel: + """ + Dataclass for storing the reference for the difference kernel validity check. + + """ + + differences: int + accuracy: int + kernel: np.ndarray + + size: int = field(init=False) + + def __post_init__(self) -> None: + self.size = self.kernel.size + + +@dataclass +class NoiseEstimationReference: + """ + Dataclass for storing the reference for the noise estimation validity check. + + """ + + window_size: Optional[int] + min_noise_level: float + differences: int + accuracy: int + noise_level: np.ndarray + + raised_noise_levels: Dict[Literal[-2, -1, 1, 2], np.ndarray] = field(init=False) + + def __post_init__(self) -> None: + self.raised_noise_levels = { + power: self.noise_level**power for power in (-2, -1, 1, 2) + } + + @dataclass class ExpectedWhittakerSmoothLambda: """