From 61a89adbd26ba97627574ffc18dd37204457eb6b Mon Sep 17 00:00:00 2001 From: georg Date: Mon, 2 Dec 2024 18:41:28 +0000 Subject: [PATCH 01/10] completely removed pynapple --- src/iblphotometry/bleach_corrections.py | 30 +-- src/iblphotometry/helpers.py | 20 +- src/iblphotometry/io.py | 17 +- src/iblphotometry/loaders.py | 54 +---- src/iblphotometry/metrics.py | 73 ++++--- src/iblphotometry/outlier_detection.py | 17 +- src/iblphotometry/pipelines.py | 250 ++++++++++++------------ src/iblphotometry/qc.py | 24 +-- src/iblphotometry/sliding_operations.py | 20 +- src/iblphotometry/synthetic.py | 21 +- src/tests/test_metrics.py | 6 +- src/tests/test_pipelines.py | 16 +- 12 files changed, 245 insertions(+), 303 deletions(-) diff --git a/src/iblphotometry/bleach_corrections.py b/src/iblphotometry/bleach_corrections.py index ceb84e0..a64cad9 100644 --- a/src/iblphotometry/bleach_corrections.py +++ b/src/iblphotometry/bleach_corrections.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod import warnings import numpy as np -import pynapple as nap +import pandas as pd from scipy.optimize import minimize from scipy.stats.distributions import norm from scipy.stats import gaussian_kde @@ -11,14 +11,14 @@ from inspect import signature -def correct(signal: nap.Tsd, reference: nap.Tsd, mode: str = 'subtract') -> nap.Tsd: +def correct(signal: pd.Series, reference: pd.Series, mode: str = 'subtract') -> pd.Series: if mode == 'subtract': signal_corrected = signal.values - reference.values if mode == 'divide': signal_corrected = signal.values / reference.values if mode == 'subtract-divide': signal_corrected = (signal.values - reference.values) / reference.values - return nap.Tsd(t=signal.times(), d=signal_corrected) + return pd.Series(signal_corrected, index=signal.index.values) def mse_loss(p, x, y, fun): @@ -240,8 +240,8 @@ def predict(self, x: np.ndarray, return_type='numpy'): y_hat = self.model.eq(x, *self.popt) if return_type == 'numpy': return y_hat - if return_type == 'pynapple': - return nap.Tsd(t=x, d=y_hat) + if return_type == 'pandas': + return pd.Series(y_hat, index=x) class BleachCorrection: @@ -258,9 +258,9 @@ def __init__( ) self.correction_method = correction_method - def correct(self, F: nap.Tsd): - self.regression.fit(F.times(), F.values) - ref = self.regression.predict(F.times(), return_type='pynapple') + def correct(self, F: pd.Series): + self.regression.fit(F.index.values, F.values) + ref = self.regression.predict(F.index.values, return_type='pandas') return correct(F, ref, mode=self.correction_method) @@ -282,14 +282,14 @@ def __init__( def correct( self, - F_ca: nap.Tsd, - F_iso: nap.Tsd, + F_ca: pd.Series, + F_iso: pd.Series, ): if self.lowpass_isosbestic is not None: F_iso = filt(F_iso, **self.lowpass_isosbestic) self.reg.fit(F_iso.values, F_ca.values) - F_iso_fit = self.reg.predict(F_iso.values, return_type='pynapple') + F_iso_fit = self.reg.predict(F_iso.values, return_type='pandas') return correct(F_ca, F_iso_fit, mode=self.correction_method) @@ -303,23 +303,23 @@ def __init__( self.filter_params = filter_params self.correction_method = correction_method - def correct(self, F: nap.Tsd): + def correct(self, F: pd.Series): F_filt = filt(F, **self.filter_params) return correct(F, F_filt, mode=self.correction_method) # convenience functions for pipelines -def lowpass_bleachcorrect(F: nap.Tsd, **kwargs): +def lowpass_bleachcorrect(F: pd.Series, **kwargs): bc = LowpassBleachCorrection(**kwargs) return bc.correct(F) -def exponential_bleachcorrect(F: nap.Tsd, **kwargs): +def exponential_bleachcorrect(F: pd.Series, **kwargs): model = DoubleExponDecay() ec = BleachCorrection(model, **kwargs) return ec.correct(F) -def isosbestic_correct(F_sig: nap.TsdFrame, F_ref: nap.TsdFrame, **kwargs): +def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs): ic = IsosbesticCorrection(**kwargs) return ic.correct(F_sig, F_ref) diff --git a/src/iblphotometry/helpers.py b/src/iblphotometry/helpers.py index 54fda65..16f9e1d 100644 --- a/src/iblphotometry/helpers.py +++ b/src/iblphotometry/helpers.py @@ -1,6 +1,6 @@ import numpy as np from scipy import signal -import pynapple as nap +import pandas as pd from ibldsp.utils import WindowGenerator from iblutil.numerical import rcoeff @@ -37,13 +37,13 @@ def mad(A: np.array): # TODO move to processing -def madscore(F: nap.Tsd): - y, t = F.values, F.times() - return nap.Tsd(t=t, d=mad(y)) +def madscore(F: pd.Series): + y, t = F.values, F.index.values + return pd.Series(mad(y), index=t) # TODO move to processing -def zscore(F: nap.Tsd, mode='classic'): +def zscore(F: pd.Series, mode='classic'): """pynapple friendly zscore Args: @@ -52,13 +52,13 @@ def zscore(F: nap.Tsd, mode='classic'): Returns: _type_: z-scored nap.Tsd """ - y, t = F.values, F.times() + y, t = F.values, F.index.values # mu, sig = np.average(y), np.std(y) - return nap.Tsd(t=t, d=z(y, mode=mode)) + return pd.Series(z(y, mode=mode), index=t) # TODO move to processing -def filt(F: nap.Tsd, N: int, Wn: float, fs: float = None, btype='low'): +def filt(F: pd.Series, N: int, Wn: float, fs: float = None, btype='low'): """a pynapple friendly wrapper for scipy.signal.butter and sosfiltfilt Args: @@ -71,12 +71,12 @@ def filt(F: nap.Tsd, N: int, Wn: float, fs: float = None, btype='low'): Returns: _type_: _description_ """ - y, t = F.values, F.times() + y, t = F.values, F.index.values if fs is None: fs = 1 / np.median(np.diff(t)) sos = signal.butter(N, Wn, btype, fs=fs, output='sos') y_filt = signal.sosfiltfilt(sos, y) - return nap.Tsd(t=t, d=y_filt) + return pd.Series(y_filt, index=t) def sliding_rcoeff(signal_a, signal_b, nswin, overlap=0): diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py index 21f3564..eefc875 100644 --- a/src/iblphotometry/io.py +++ b/src/iblphotometry/io.py @@ -1,7 +1,6 @@ # %% import numpy as np import pandas as pd -import pynapple as nap from pathlib import Path import warnings import pandera @@ -14,8 +13,8 @@ def from_array( times: np.array, data: np.array, channel_names: list[str] = None -) -> nap.TsdFrame: - return nap.TsdFrame(t=times, d=data, columns=channel_names) +) -> pd.DataFrame: + return pd.DataFrame(data, index=times, columns=channel_names) def from_dataframe( @@ -62,23 +61,17 @@ def from_dataframe( to_drop = ['None', ''] channel_names = [ch for ch in channel_names if ch not in to_drop] - raw_tfs = {} + raw_dfs = {} for channel in channel_names: - # TODO include the use of raw_df['include'] to set the time_support of the pynapple object - # requires conversion of boolen to nap.IntervalSet (have done this somewhere already. find code) - - # TODO check pynappe PR#343 https://github.com/pynapple-org/pynapple/pull/343 for future - # inclusion of locations_df as metadata - # get the data for the band df = raw_df.groupby(channel_column).get_group(channel) # if rename dict is passed, rename Region0X to the corresponding brain region if rename is not None: df = df.rename(columns=rename) data_columns = rename.values() - raw_tfs[channel] = nap.TsdFrame(df.set_index(time_column)[data_columns]) + raw_dfs[channel] = df.set_index(time_column)[data_columns] - return raw_tfs + return raw_dfs def from_pqt( diff --git a/src/iblphotometry/loaders.py b/src/iblphotometry/loaders.py index 30bb0bb..6786598 100644 --- a/src/iblphotometry/loaders.py +++ b/src/iblphotometry/loaders.py @@ -1,4 +1,3 @@ -import pynapple as nap import pandas as pd from pathlib import Path from iblphotometry import io @@ -28,31 +27,15 @@ def _load_data_from_eid(self, eid, rename=True) -> nap.TsdFrame: data_columns=list(locations_df.index), rename=locations_df['brain_region'].to_dict() if rename else None, ) - raw_tfs = io.from_dataframe(raw_photometry_df, **read_config) + raw_dfs = io.from_dataframe(raw_photometry_df, **read_config) - signal_band_names = list(raw_tfs.keys()) - col_names = list(raw_tfs[signal_band_names[0]].columns) + signal_band_names = list(raw_dfs.keys()) + col_names = list(raw_dfs[signal_band_names[0]].columns) if self.verbose: print(f'available signal bands: {signal_band_names}') print(f'available brain regions: {col_names}') - return raw_tfs - # if return_regions: - # return raw_tfs, cols - # else: - # return raw_tfs - - # def _load_data_from_pid(self, pid=None, signal=None) -> nap.Tsd: - # eid, pname = self.one.pid2eid(pid) - # locations = self._load_locations(eid) - # roi_name = dict(zip(locations['fiber'], locations.index))[pname] - # return self._load_data_from_eid(eid, signal=signal)[roi_name] - - # def pid2eid(self, pid: str) -> tuple[str, str]: - # return self.one.pid2eid(pid) - - # def eid2pid(self, eid: str): - # return self.one.eid2pid(eid) + return raw_dfs class KceniaLoader(PhotometryLoader): @@ -69,39 +52,20 @@ def _load_data_from_eid(self, eid: str, rename=True): signal_bands = ['raw_calcium', 'raw_isosbestic'] # HARDCODED but fine # flipping the data representation - raw_tfs = {} + raw_dfs = {} for band in signal_bands: df = pd.DataFrame([raw_dfs[pname][band].values for pname in pnames]).T df.columns = pnames df.index = raw_dfs[pname][band].index - raw_tfs[band] = nap.TsdFrame(df) + raw_dfs[band] = df if self.verbose: - print(f'available signal bands: {list(raw_tfs.keys())}') - cols = list(raw_tfs[list(raw_tfs.keys())[0]].columns) + print(f'available signal bands: {list(raw_dfs.keys())}') + cols = list(raw_dfs[list(raw_dfs.keys())[0]].columns) print(f'available brain regions: {cols}') - # if return_regions: - # return raw_tfs, pnames - # else: - return raw_tfs - - # def _load_data_from_eid(self, eid, signal=None): - # raise NotImplementedError - - # def get_mappable(self, eid): - # raise NotImplementedError - - # def get_mapping(self, eid, key=None, value=None): - # raise NotImplementedError - - # def pid2eid(self, pid: str) -> tuple[str, str]: - # return pid.split('_') + return raw_dfs - # def eid2pid(self, eid): - # pnames = self._eid2pnames(eid) - # pids = [f'{eid}_{pname}' for pname in pnames] - # return (pids, pnames) def _eid2pnames(self, eid: str): session_path = self.one.eid2path(eid) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index ce9950e..d227592 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -1,6 +1,5 @@ import numpy as np import pandas as pd -import pynapple as nap from scipy import stats from scipy.stats import ttest_ind @@ -27,99 +26,99 @@ # return P[1] - P[0] -def percentile_dist(A: nap.Tsd | np.ndarray, pc: tuple = (50, 95), axis=-1) -> float: +def percentile_dist(A: pd.Series | np.ndarray, pc: tuple = (50, 95), axis=-1) -> float: """the distance between two percentiles in units of z. Captures the magnitude of transients. Args: - A (nap.Tsd | np.ndarray): the input data, np.ndarray for stride tricks sliding windows + A (pd.Series | np.ndarray): the input data, np.ndarray for stride tricks sliding windows pc (tuple, optional): percentiles to be computed. Defaults to (50, 95). axis (int, optional): only for arrays, the axis to be computed. Defaults to -1. Returns: float: the value of the metric """ - if isinstance(A, nap.Tsd): # "overloading" + if isinstance(A, pd.Series): # "overloading" P = np.percentile(z(A.values), pc) elif isinstance(A, np.ndarray): P = np.percentile(z(A), pc, axis=axis) else: - raise TypeError('A must be nap.Tsd or np.ndarray.') + raise TypeError('A must be pd.Series or np.ndarray.') return P[1] - P[0] -def signal_asymmetry(A: nap.Tsd | np.ndarray, pc_comp: int = 95, axis=-1) -> float: +def signal_asymmetry(A: pd.Series | np.ndarray, pc_comp: int = 95, axis=-1) -> float: """the ratio between the distance of two percentiles to the median. Proportional to the the signal to noise. Args: - A (nap.Tsd | np.ndarray): _description_ + A (pd.Series | np.ndarray): _description_ pc_comp (int, optional): _description_. Defaults to 95. axis (int, optional): _description_. Defaults to -1. Returns: float: _description_ """ - if not (isinstance(A, nap.Tsd) or isinstance(A, np.ndarray)): - raise TypeError('A must be nap.Tsd or np.ndarray.') + if not (isinstance(A, pd.Series) or isinstance(A, np.ndarray)): + raise TypeError('A must be pd.Series or np.ndarray.') a = np.absolute(percentile_dist(A, (50, pc_comp), axis=axis)) b = np.absolute(percentile_dist(A, (100 - pc_comp, 50), axis=axis)) return a / b -def signal_skew(A: nap.Tsd | np.ndarray, axis=-1) -> float: - if isinstance(A, nap.Tsd): +def signal_skew(A: pd.Series | np.ndarray, axis=-1) -> float: + if isinstance(A, pd.Series): P = stats.skew(A.values) elif isinstance(A, np.ndarray): P = stats.skew(A, axis=axis) else: - raise TypeError('A must be nap.Tsd or np.ndarray.') + raise TypeError('A must be pd.Series or np.ndarray.') return P -def n_unique_samples(A: nap.Tsd | np.ndarray) -> int: +def n_unique_samples(A: pd.Series | np.ndarray) -> int: """number of unique samples in the signal. Low values indicate that the signal during acquisition was not within the range of the digitizer.""" - if isinstance(A, nap.Tsd): + if isinstance(A, pd.Series): return np.unique(A.values).shape[0] elif isinstance(A, np.ndarray): return A.shape[0] else: - raise TypeError('A must be nap.Tsd or np.ndarray.') + raise TypeError('A must be pd.Series or np.ndarray.') -def n_spikes(A: nap.Tsd | np.ndarray, sd: int = 5): +def n_spikes(A: pd.Series | np.ndarray, sd: int = 5): """count the number of spike artifacts in the recording.""" - if isinstance(A, nap.Tsd): + if isinstance(A, pd.Series): return detect_spikes(A.values, sd=sd).shape[0] elif isinstance(A, np.ndarray): return detect_spikes(A, sd=sd).shape[0] else: - raise TypeError('A must be nap.Tsd or np.ndarray.') + raise TypeError('A must be pd.Series or np.ndarray.') def n_outliers( - A: nap.Tsd | np.ndarray, w_size: int = 1000, alpha: float = 0.0005 + A: pd.Series | np.ndarray, w_size: int = 1000, alpha: float = 0.0005 ) -> int: """counts the number of outliers as detected by grubbs test for outliers. int: _description_ """ - if isinstance(A, nap.Tsd): + if isinstance(A, pd.Series): return detect_outliers(A.values, w_size=w_size, alpha=alpha).shape[0] elif isinstance(A, np.ndarray): return detect_outliers(A, w_size=w_size, alpha=alpha).shape[0] else: - raise TypeError('A must be nap.Tsd or np.ndarray.') + raise TypeError('A must be pd.Series or np.ndarray.') -def bleaching_tau(A: nap.Tsd) -> float: +def bleaching_tau(A: pd.Series) -> float: """overall tau of bleaching.""" - y, t = A.values, A.t + y, t = A.values, A.index.values reg = Regression(model=ExponDecay()) reg.fit(y, t) return reg.popt[1] def ttest_pre_post( - A: nap.Tsd, + A: pd.Series, trials: pd.DataFrame, event_name: str, fs=None, @@ -137,7 +136,7 @@ def ttest_pre_post( :param confid: float, confidence level (alpha) :return: boolean, True if metric passes """ - y, t = A.values, A.t + y, t = A.values, A.index.values fs = 1 / np.median(np.diff(t)) if fs is None else fs t_events = trials[event_name].values @@ -155,8 +154,8 @@ def ttest_pre_post( def has_response_to_event( - A: nap.Tsd, - event_times: nap.Ts, + A: pd.Series, + event_times: np.array, fs: float = None, window: tuple = (-1, 1), alpha: float = 0.005, @@ -165,14 +164,9 @@ def has_response_to_event( # checks if there is a significant response to an event # ibldsb way - y, t = A.values, A.t + y, t = A.values, A.index.values fs = 1 / np.median(np.diff(t)) if fs is None else fs - P = psth(y, t, event_times.t, fs=fs, event_window=window)[0] - - # or: pynapple style - # in the long run, this will be the preferred way as this will - # respect the .time_support of the pynapple object. # TODO verify this - # P = nap.compute_perievent_continuous(A, event_times, window).values + P, psth_ix = psth(y, t, event_times, fs=fs, event_window=window) # temporally averages the samples in the window. Sensitive to window size! if mode == 'mean': @@ -182,28 +176,27 @@ def has_response_to_event( sig_samples = np.max(P, axis=0) - np.std(y) # baseline is all samples that are not part of the response - ts = event_times.t - gaps = nap.IntervalSet(start=ts + window[0], end=ts + window[1]) - base_samples = A.restrict(A.time_support.set_diff(gaps)).values + base_ix = np.setdiff1d(np.arange(y.shape[0]), psth_ix.flatten()) + base_samples = y[base_ix] res = ttest_ind(sig_samples, base_samples) return res.pvalue < alpha def has_responses( - A: nap.Tsd, + A: pd.Series, trials: pd.DataFrame, event_names: list = None, fs: float = None, window: tuple = (-1, 1), alpha: float = 0.005, ) -> bool: - t = A.t + t = A.index.values fs = 1 / np.median(np.diff(t)) if fs is None else fs res = [] for event_name in event_names: - event_times = nap.Ts(t=trials[event_name].values) + event_times = trials[event_name] res.append( has_response_to_event(A, event_times, fs=fs, window=window, alpha=alpha) ) diff --git a/src/iblphotometry/outlier_detection.py b/src/iblphotometry/outlier_detection.py index 3978187..e31cd82 100644 --- a/src/iblphotometry/outlier_detection.py +++ b/src/iblphotometry/outlier_detection.py @@ -1,6 +1,5 @@ import numpy as np from scipy.stats import t -import pynapple as nap from ibldsp.utils import WindowGenerator from copy import copy import pandas as pd @@ -8,7 +7,7 @@ import warnings -def _grubbs_single(y, alpha=0.005, mode='median'): +def _grubbs_single(y: np.array, alpha: float =0.005, mode: str='median') -> bool: # to apply a single pass of grubbs outlier detection # see https://en.wikipedia.org/wiki/Grubbs%27s_test @@ -28,7 +27,7 @@ def _grubbs_single(y, alpha=0.005, mode='median'): return False -def grubbs_test(y, alpha=0.005, mode='median'): +def grubbs_test(y: np.array, alpha: float =0.005, mode:str='median'): # apply grubbs test iteratively until no more outliers are found outliers = [] while _grubbs_single(y, alpha=alpha): @@ -88,15 +87,15 @@ def fillnan_kde(y: np.array, w: int = 25): return y -def remove_outliers(F: nap.Tsd, w_size: int = 1000, alpha: float = 0.005, w: int = 25): - y, t = F.values, F.t +def remove_outliers(F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: int = 25): + y, t = F.values, F.index.values y = copy(y) outliers = detect_outliers(y, w_size=w_size, alpha=alpha) while len(outliers) > 0: y[outliers] = np.nan y = fillnan_kde(y, w=w) outliers = detect_outliers(y, w_size=w_size, alpha=alpha) - return nap.Tsd(t=t, d=y) + return pd.Series(y, index=t) def detect_spikes(t: np.array, sd: int = 5): @@ -105,8 +104,8 @@ def detect_spikes(t: np.array, sd: int = 5): return np.where(bad_inds)[0] -def remove_spikes(F: nap.Tsd, sd: int = 5, w: int = 25): - y, t = F.values, F.t +def remove_spikes(F: pd.Series, sd: int = 5, w: int = 25): + y, t = F.values, F.index.values y = copy(y) outliers = detect_spikes(y, sd=sd) y[outliers] = np.nan @@ -120,4 +119,4 @@ def remove_spikes(F: nap.Tsd, sd: int = 5, w: int = 25): y[outliers] = np.nanmedian(y) warnings.warn('KDE fillnan failed, using global median') # TODO logger - return nap.Tsd(t=t, d=y) + return pd.Series(y, index=t) diff --git a/src/iblphotometry/pipelines.py b/src/iblphotometry/pipelines.py index 6a95040..72bcfa4 100644 --- a/src/iblphotometry/pipelines.py +++ b/src/iblphotometry/pipelines.py @@ -1,7 +1,7 @@ """this module holds a collection of processing pipelines for fiber photometry data""" import numpy as np -import pynapple as nap +import pandas as pd from iblphotometry.helpers import z, filt # from ibldsp.utils import WindowGenerator @@ -22,17 +22,17 @@ def run_pipeline( pipeline, - F_signal: nap.TsdFrame, - F_reference: nap.TsdFrame = None, -) -> nap.TsdFrame: + F_signal: pd.DataFrame, + F_reference: pd.DataFrame = None, +) -> pd.DataFrame: # copy - Fc = copy(F_signal) + Fc = F_signal.copy() if F_reference is not None: - Fc_ref = copy(F_reference) + Fc_ref = F_reference.copy() - if isinstance(F_signal, nap.Tsd): + if isinstance(F_signal, pd.Series): raise TypeError( - 'F_signal can not be nap.Tsd, is now required to be nap.TsdFrame' + 'F_signal can not be pd.Series, is now required to be pd.DataFrame' ) # iterate over the individual processing steps of the pipeline @@ -41,17 +41,17 @@ def run_pipeline( if 'needs_reference' in pipe_args: _pipe_args = {k: v for k, v in pipe_args.items() if k != 'needs_reference'} # check if F_ref is not None - _d = np.zeros_like(Fc.d) + _d = np.zeros_like(Fc.values) # _Fcd_ref = np.zeros_like(Fc_ref.d) for i, col in enumerate(Fc.columns): _d[:, i] = pipe_func(Fc[col], Fc_ref[col], **_pipe_args) # this step consumes the reference! - Fc = nap.TsdFrame(t=Fc.t, d=_d, columns=Fc.columns) + Fc = pd.DataFrame(_d, index=Fc.index.values, columns=Fc.columns) else: - _d = np.zeros_like(Fc.d) + _d = np.zeros_like(Fc.values) for i, col in enumerate(Fc.columns): _d[:, i] = pipe_func(Fc[col], **pipe_args) - Fc = nap.TsdFrame(t=Fc.t, d=_d, columns=Fc.columns) + Fc = pd.DataFrame(_d, index=Fc.index.values, columns=Fc.columns) return Fc @@ -86,116 +86,116 @@ def run_pipeline( # TODO this is not following the definition of a pipeline anymore # to be reconstructed -def bc_lp_sliding_mad( - F: nap.Tsd | nap.TsdFrame, - w_len: float = 120, - overlap: int = 90, - butterworth_lowpass=dict(N=3, Wn=0.01, btype='lowpass'), - signal_name: str = 'raw_calcium', -): - """_summary_ - - Args: - F (nap.Tsd): _description_ - w_len (float, optional): _description_. Defaults to 120. - overlap (int, optional): _description_. Defaults to 90. - butterworth_lowpass (_type_, optional): _description_. Defaults to dict(N=3, Wn=0.01, btype="lowpass"). - - Returns: - _type_: _description_ - """ - - if isinstance( - F, nap.TsdFrame - ): # if F is as TsdFrame, then use signal name to get the correct column - this is needed for the pipeline functionality in run_qc - if signal_name is None: - logger.critical('no signal name is provided for the pipeline') - else: - F = F[signal_name] - - bleach_correction = bleach_corrections.LowpassBleachCorrection( - correction_method='subtract-divide', - filter_params=butterworth_lowpass, - ) - F_bc = bleach_correction.correct(F) - F_res = sliding_operations.sliding_mad(F_bc, w_len=w_len, overlap=overlap) - return F_res - - -# TODO this is not following the definition of a pipeline anymore -def jove2019( - F: nap.TsdFrame, - ca_signal_name: str = 'raw_calcium', - isosbestic_signal_name: str = 'raw_isosbestic', - **params, -): - """ - Martianova, Ekaterina, Sage Aronson, and Christophe D. Proulx. "Multi-fiber photometry to record neural activity in freely-moving animals." JoVE (Journal of Visualized Experiments) 152 (2019): e60278. - :param raw_calcium: - :param raw_isosbestic: - :param params: - :return: - """ - raw_calcium = F[ca_signal_name] - raw_isosbestic = F[isosbestic_signal_name] - - # replace this with a low pass corrector - # remove photobleaching - bleach_correction = bleach_corrections.LowpassBleachCorrection( - correction_method='subtract-divide', - filter_params=dict(N=3, Wn=0.01, btype='lowpass'), - ) - calcium = bleach_correction.correct( - raw_calcium, - mode='subtract', - ).values - isosbestic = bleach_correction.correct( - raw_isosbestic, - mode='subtract', - ).values - - # zscoring using median instead of mean - calcium = z(calcium, mode='median') - isosbestic = z(isosbestic, mode='median') - - # regular regression - m = np.polyfit(isosbestic, calcium, 1) - ref = isosbestic * m[0] + m[1] - ph = (calcium - ref) / 100 - return nap.Tsd(t=raw_calcium.times(), d=ph) - - -# TODO this is not following the definition of a pipeline anymore -def isosbestic_regression( - F: nap.TsdFrame, - ca_signal_name: str = 'raw_calcium', - isosbestic_signal_name: str = 'raw_isosbestic', - fs: float = None, - regression_method: str = 'irls', - correction_method: str = 'subtract-divide', - **params, -): - raw_calcium = F[ca_signal_name] - raw_isosbestic = F[isosbestic_signal_name] - - t = F.times() - fs = 1 / np.median(np.diff(t)) if fs is None else fs - - isosbestic_correction = bleach_corrections.IsosbesticCorrection( - regression_method=regression_method, - correction_method=correction_method, - lowpass_isosbestic=dict(N=3, Wn=0.01, btype='lowpass'), - ) - - F_corr = isosbestic_correction.correct( - raw_calcium, - raw_isosbestic, - ) - - butterworth_signal = params.get( - 'butterworth_signal', - dict(N=3, Wn=7, btype='lowpass', fs=fs), # changed from 10 to 7 - ) - - F_corr = filt(F_corr, **butterworth_signal) - return F_corr +# def bc_lp_sliding_mad( +# F: nap.Tsd | nap.TsdFrame, +# w_len: float = 120, +# overlap: int = 90, +# butterworth_lowpass=dict(N=3, Wn=0.01, btype='lowpass'), +# signal_name: str = 'raw_calcium', +# ): +# """_summary_ + +# Args: +# F (nap.Tsd): _description_ +# w_len (float, optional): _description_. Defaults to 120. +# overlap (int, optional): _description_. Defaults to 90. +# butterworth_lowpass (_type_, optional): _description_. Defaults to dict(N=3, Wn=0.01, btype="lowpass"). + +# Returns: +# _type_: _description_ +# """ + +# if isinstance( +# F, nap.TsdFrame +# ): # if F is as TsdFrame, then use signal name to get the correct column - this is needed for the pipeline functionality in run_qc +# if signal_name is None: +# logger.critical('no signal name is provided for the pipeline') +# else: +# F = F[signal_name] + +# bleach_correction = bleach_corrections.LowpassBleachCorrection( +# correction_method='subtract-divide', +# filter_params=butterworth_lowpass, +# ) +# F_bc = bleach_correction.correct(F) +# F_res = sliding_operations.sliding_mad(F_bc, w_len=w_len, overlap=overlap) +# return F_res + + +# # TODO this is not following the definition of a pipeline anymore +# def jove2019( +# F: nap.TsdFrame, +# ca_signal_name: str = 'raw_calcium', +# isosbestic_signal_name: str = 'raw_isosbestic', +# **params, +# ): +# """ +# Martianova, Ekaterina, Sage Aronson, and Christophe D. Proulx. "Multi-fiber photometry to record neural activity in freely-moving animals." JoVE (Journal of Visualized Experiments) 152 (2019): e60278. +# :param raw_calcium: +# :param raw_isosbestic: +# :param params: +# :return: +# """ +# raw_calcium = F[ca_signal_name] +# raw_isosbestic = F[isosbestic_signal_name] + +# # replace this with a low pass corrector +# # remove photobleaching +# bleach_correction = bleach_corrections.LowpassBleachCorrection( +# correction_method='subtract-divide', +# filter_params=dict(N=3, Wn=0.01, btype='lowpass'), +# ) +# calcium = bleach_correction.correct( +# raw_calcium, +# mode='subtract', +# ).values +# isosbestic = bleach_correction.correct( +# raw_isosbestic, +# mode='subtract', +# ).values + +# # zscoring using median instead of mean +# calcium = z(calcium, mode='median') +# isosbestic = z(isosbestic, mode='median') + +# # regular regression +# m = np.polyfit(isosbestic, calcium, 1) +# ref = isosbestic * m[0] + m[1] +# ph = (calcium - ref) / 100 +# return nap.Tsd(t=raw_calcium.times(), d=ph) + + +# # TODO this is not following the definition of a pipeline anymore +# def isosbestic_regression( +# F: nap.TsdFrame, +# ca_signal_name: str = 'raw_calcium', +# isosbestic_signal_name: str = 'raw_isosbestic', +# fs: float = None, +# regression_method: str = 'irls', +# correction_method: str = 'subtract-divide', +# **params, +# ): +# raw_calcium = F[ca_signal_name] +# raw_isosbestic = F[isosbestic_signal_name] + +# t = F.times() +# fs = 1 / np.median(np.diff(t)) if fs is None else fs + +# isosbestic_correction = bleach_corrections.IsosbesticCorrection( +# regression_method=regression_method, +# correction_method=correction_method, +# lowpass_isosbestic=dict(N=3, Wn=0.01, btype='lowpass'), +# ) + +# F_corr = isosbestic_correction.correct( +# raw_calcium, +# raw_isosbestic, +# ) + +# butterworth_signal = params.get( +# 'butterworth_signal', +# dict(N=3, Wn=7, btype='lowpass', fs=fs), # changed from 10 to 7 +# ) + +# F_corr = filt(F_corr, **butterworth_signal) +# return F_corr diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py index 794fecd..908b3c7 100644 --- a/src/iblphotometry/qc.py +++ b/src/iblphotometry/qc.py @@ -1,7 +1,7 @@ # %% import numpy as np from scipy.stats import linregress -import pynapple as nap +import pandas as pd from tqdm import tqdm import logging @@ -21,7 +21,7 @@ # %% # those could be in metrics def sliding_metric( - F: nap.Tsd, + F: pd.Series, w_len: float, fs: float = None, metric: callable = None, @@ -39,7 +39,7 @@ def sliding_metric( Returns: _type_: _description_ """ - y, t = F.values, F.times() + y, t = F.values, F.index.values fs = 1 / np.median(np.diff(t)) if fs is None else fs w_size = int(w_len * fs) @@ -53,12 +53,12 @@ def sliding_metric( m = metric(yw, **metric_kwargs) if metric_kwargs is not None else metric(yw) - return nap.Tsd(t=t[inds + int(w_size / 2)], d=m) + return pd.Series(m, index=t[inds + int(w_size / 2)]) # eval pipleline will be here def eval_metric( - F: nap.Tsd, + F: pd.Series, metric: callable = None, metric_kwargs: dict = None, sliding_kwargs: dict = None, @@ -77,16 +77,16 @@ def eval_metric( return dict(value=m, rval=r, pval=p) -def qc_Tsd( - F: nap.Tsd, +def qc_series( + F: pd.Series, qc_metrics: dict, sliding_kwargs=None, # if present, calculate everything in a sliding manner trials=None, # if present, put trials into params eid: str = None, # FIXME but left as is for now just to keep the logger happy brain_region: str = None, # FIXME but left as is for now just to keep the logger happy ) -> dict: - if isinstance(F, nap.TsdFrame): - raise TypeError('F can not be nap.TsdFrame') + if isinstance(F, pd.DataFrame): + raise TypeError('F can not be a dataframe') # should cover all cases qc_results = {} @@ -137,7 +137,7 @@ def run_qc( for band in signal_bands: raw_tf = raw_tfs[band] for region in brain_regions: - qc_result = qc_Tsd( + qc_result = qc_series( raw_tf[region], qc_metrics['raw'], sliding_kwargs=None, eid=eid ) qc_results.append( @@ -161,7 +161,7 @@ def run_qc( for region in brain_regions: # sliding qc of the processed data - qc_proc = qc_Tsd( + qc_proc = qc_series( proc_tf[region], qc_metrics=qc_metrics['processed'], sliding_kwargs=qc_metrics['sliding_kwargs'], @@ -170,7 +170,7 @@ def run_qc( ) # qc with metrics that use behavior - qc_resp = qc_Tsd( + qc_resp = qc_series( proc_tf[region], qc_metrics['response'], trials=trials, diff --git a/src/iblphotometry/sliding_operations.py b/src/iblphotometry/sliding_operations.py index f22ade5..f0baaf4 100644 --- a/src/iblphotometry/sliding_operations.py +++ b/src/iblphotometry/sliding_operations.py @@ -1,7 +1,7 @@ """this module holds a collection of processing pipelines for fiber photometry data""" import numpy as np -import pynapple as nap +import pandas as pd from iblphotometry.helpers import z from iblphotometry.behavior import psth from ibldsp.utils import WindowGenerator @@ -49,8 +49,8 @@ def make_sliding_window( return B -def sliding_dFF(F: nap.Tsd, w_len: float, fs=None, weights=None): - y, t = F.values, F.times() +def sliding_dFF(F: pd.Series, w_len: float, fs=None, weights=None): + y, t = F.values, F.index.values fs = 1 / np.median(np.diff(t)) if fs is None else fs w_size = int(w_len * fs) @@ -71,10 +71,10 @@ def _dFF(A: np.array): B = make_sliding_window(y, w_size) mus = np.average(B, axis=1) d = (y - mus) / mus - return nap.Tsd(t=t, d=d) + return pd.Series(d, index=t) -def sliding_z(F: nap.Tsd, w_len: float, fs=None, weights=None): +def sliding_z(F: pd.Series, w_len: float, fs=None, weights=None): """sliding window z-score of a pynapple time series with data with optional weighting Args: @@ -85,7 +85,7 @@ def sliding_z(F: nap.Tsd, w_len: float, fs=None, weights=None): Returns: _type_: _description_ """ - y, t = F.values, F.times() + y, t = F.values, F.index.values fs = 1 / np.median(np.diff(t)) if fs is None else fs w_size = int(w_len * fs) @@ -102,11 +102,11 @@ def sliding_z(F: nap.Tsd, w_len: float, fs=None, weights=None): B = make_sliding_window(y, w_size) mus, sds = np.average(B, axis=1), np.std(B, axis=1) d = (y - mus) / sds - return nap.Tsd(t=t, d=d) + return pd.Series(d, index=t) -def sliding_mad(F: nap.Tsd, w_len: float = None, fs=None, overlap=90): - y, t = F.values, F.times() +def sliding_mad(F: pd.Series, w_len: float = None, fs=None, overlap=90): + y, t = F.values, F.index.values fs = 1 / np.median(np.diff(t)) if fs is None else fs w_size = int(w_len * fs) @@ -117,7 +117,7 @@ def sliding_mad(F: nap.Tsd, w_len: float = None, fs=None, overlap=90): rmswin, _ = psth(y, t, t_events=trms, fs=fs, event_window=[0, w_len]) gain = np.nanmedian(np.abs(y)) / np.nanmedian(np.abs(rmswin), axis=0) gain = np.interp(t, trms, gain) - return nap.Tsd(t=t, d=y * gain) + return pd.Series(y * gain, index=t) # def sliding_mad_new(F: nap.Tsd, w_len: float = None, fs=None, overlap=90): diff --git a/src/iblphotometry/synthetic.py b/src/iblphotometry/synthetic.py index b41c2d1..2c44060 100644 --- a/src/iblphotometry/synthetic.py +++ b/src/iblphotometry/synthetic.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd import scipy.signal -import pynapple as nap + def synthetic101(fs=30, rl=1000, event_rate=0.2): @@ -37,7 +37,7 @@ def synthetic101(fs=30, rl=1000, event_rate=0.2): ), event_times -def generate_TsdFrame(sigma: float = 0.01): +def generate_dataframe(sigma: float = 0.01): # to generate synthetic data in the internal data representation in dict of nap.TsdFrames df, _ = synthetic101(30, 1000, 0.2) df = df.set_index('times') @@ -46,16 +46,9 @@ def generate_TsdFrame(sigma: float = 0.01): if sigma is not None: df[['raw_calcium', 'raw_isosbestic']] += np.random.randn(*df.shape) * sigma - raw_tfs = dict( - signal=nap.TsdFrame( - t=df.index, - d=df['raw_calcium'], - columns=['Region01'], - ), - reference=nap.TsdFrame( - t=df.index, - d=df['raw_isosbestic'], - columns=['Region01'], - ), + raw_dfs = dict( + raw_calcium=pd.DataFrame(df['raw_calcium'].values, index=df.index, columns=['Region01']), + raw_isosbestic=pd.DataFrame(df['raw_isosbestic'].values, index=df.index, columns=['Region01']), ) - return raw_tfs + + return raw_dfs diff --git a/src/tests/test_metrics.py b/src/tests/test_metrics.py index 75e63cc..f94b164 100644 --- a/src/tests/test_metrics.py +++ b/src/tests/test_metrics.py @@ -11,14 +11,14 @@ class TestMetrics(tests.base_tests.PhotometryDataTestCase): def test_metrics(self): # get data - raw_tfs = fio.from_pqt( + raw_dfs = fio.from_pqt( self.paths['photometry_signal_pqt'], self.paths['photometryROI_locations_pqt'], ) trials = pd.read_parquet(self.paths['trials_table_pqt']) # testing metrics with nap.Tsd - raw_tsd = raw_tfs['GCaMP']['DMS'] + raw_tsd = raw_dfs['GCaMP']['DMS'] metrics.bleaching_tau(raw_tsd) metrics.n_spikes(raw_tsd) @@ -43,7 +43,7 @@ def test_metrics(self): metrics.has_responses(raw_tsd, trials, BEHAV_EVENTS) # testing metrics with np.array - raw_array = raw_tfs['GCaMP']['DMS'].d + raw_array = raw_dfs['GCaMP']['DMS'].values # metrics.bleaching_tau(raw_tsd) metrics.n_spikes(raw_array) metrics.detect_spikes(raw_array) diff --git a/src/tests/test_pipelines.py b/src/tests/test_pipelines.py index eda5800..277df91 100644 --- a/src/tests/test_pipelines.py +++ b/src/tests/test_pipelines.py @@ -6,30 +6,30 @@ sliding_mad_pipeline, isosbestic_correction_pipeline, ) -from iblphotometry.synthetic import generate_TsdFrame +from iblphotometry.synthetic import generate_dataframe import tests.base_tests class TestPipelines(tests.base_tests.PhotometryDataTestCase): def test_single_band_pipeline(self): # on synthetic data - raw_tfs = generate_TsdFrame() - run_pipeline(sliding_mad_pipeline, raw_tfs['signal']) + raw_dfs = generate_dataframe() + run_pipeline(sliding_mad_pipeline, raw_dfs['raw_calcium']) Path(__file__).parent.joinpath() # on real data - raw_tfs = fio.from_pqt( + raw_dfs = fio.from_pqt( self.paths['photometry_signal_pqt'], self.paths['photometryROI_locations_pqt'], ) - signal_bands = list(raw_tfs.keys()) - run_pipeline(sliding_mad_pipeline, raw_tfs[signal_bands[0]]) + signal_bands = list(raw_dfs.keys()) + run_pipeline(sliding_mad_pipeline, raw_dfs[signal_bands[0]]) def test_isosbestic_pipeline(self): # on synthetic data - raw_tfs = generate_TsdFrame() + raw_dfs = generate_dataframe() # run pipeline run_pipeline( - isosbestic_correction_pipeline, raw_tfs['signal'], raw_tfs['reference'] + isosbestic_correction_pipeline, raw_dfs['raw_calcium'], raw_dfs['raw_isosbestic'] ) From bac70fbb9a5508471f0e104af8a274f9fc92a408 Mon Sep 17 00:00:00 2001 From: georg Date: Mon, 2 Dec 2024 18:47:38 +0000 Subject: [PATCH 02/10] and removed pynapple from the requirements as well --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 085e653..1313c5b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,4 @@ numpy matplotlib pytest scipy -pandera -pynapple \ No newline at end of file +pandera \ No newline at end of file From b83ba91bba812f788b524dd7e0838ec478c53fa5 Mon Sep 17 00:00:00 2001 From: georg Date: Tue, 3 Dec 2024 11:47:25 +0000 Subject: [PATCH 03/10] neurophotometrics.py contains ledstate constants and io.py imports them --- src/iblphotometry/io.py | 16 +++++++++++-- src/iblphotometry/neurophotometrics.py | 31 ++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 src/iblphotometry/neurophotometrics.py diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py index eefc875..18ba56c 100644 --- a/src/iblphotometry/io.py +++ b/src/iblphotometry/io.py @@ -5,7 +5,7 @@ import warnings import pandera -from ibllib.pipes.neurophotometrics import ( +from iblphotometry.neurophotometrics import ( LIGHT_SOURCE_MAP, LED_STATES, ) @@ -214,7 +214,6 @@ def from_raw_neurophotometrics( return from_dataframe(df, **read_config) -# TODO externalize the validator in iblrig as a seperate function and reuse here def _validate_dataframe( df: pd.DataFrame, data_columns=None, @@ -233,3 +232,16 @@ def _validate_dataframe( ) return schema_raw_data.validate(df) + +def _validate_neurophotometrics_digital_inputs(df: pd.DataFrame) -> pd.DataFrame: + schema_digital_inputs = pandera.DataFrameSchema( + columns=dict( + ChannelName=pandera.Column(str, coerce=True), + Channel=pandera.Column(pandera.Int8, coerce=True), + AlwaysTrue=pandera.Column(bool, coerce=True), + SystemTimestamp=pandera.Column(pandera.Float64), + ComputerTimestamp=pandera.Column(pandera.Float64), + ) + ) + return schema_digital_inputs.validate(df) + diff --git a/src/iblphotometry/neurophotometrics.py b/src/iblphotometry/neurophotometrics.py new file mode 100644 index 0000000..4fb0050 --- /dev/null +++ b/src/iblphotometry/neurophotometrics.py @@ -0,0 +1,31 @@ +""" +Neurophotometrics FP3002 specific information. +The light source map refers to the available LEDs on the system. +The flags refers to the byte encoding of led states in the system. +""" +LIGHT_SOURCE_MAP = { + 'color': ['None', 'Violet', 'Blue', 'Green'], + 'wavelength': [0, 415, 470, 560], + 'name': ['None', 'Isosbestic', 'GCaMP', 'RCaMP'], +} + +LED_STATES = { + 'Condition': { + 0: 'No additional signal', + 1: 'Output 1 signal HIGH', + 2: 'Output 0 signal HIGH', + 3: 'Stimulation ON', + 4: 'GPIO Line 2 HIGH', + 5: 'GPIO Line 3 HIGH', + 6: 'Input 1 HIGH', + 7: 'Input 0 HIGH', + 8: 'Output 0 signal HIGH + Stimulation', + 9: 'Output 0 signal HIGH + Input 0 signal HIGH', + 10: 'Input 0 signal HIGH + Stimulation', + 11: 'Output 0 HIGH + Input 0 HIGH + Stimulation', + }, + 'No LED ON': {0: 0, 1: 8, 2: 16, 3: 32, 4: 64, 5: 128, 6: 256, 7: 512, 8: 48, 9: 528, 10: 544, 11: 560}, + 'L415': {0: 1, 1: 9, 2: 17, 3: 33, 4: 65, 5: 129, 6: 257, 7: 513, 8: 49, 9: 529, 10: 545, 11: 561}, + 'L470': {0: 2, 1: 10, 2: 18, 3: 34, 4: 66, 5: 130, 6: 258, 7: 514, 8: 50, 9: 530, 10: 546, 11: 562}, + 'L560': {0: 4, 1: 12, 2: 20, 3: 36, 4: 68, 5: 132, 6: 260, 7: 516, 8: 52, 9: 532, 10: 548, 11: 564} +} \ No newline at end of file From d77ac23139f89cbfe91193e08b0a5465a3279f4d Mon Sep 17 00:00:00 2001 From: georg Date: Tue, 3 Dec 2024 12:35:28 +0000 Subject: [PATCH 04/10] processing.py created and functions moved there --- src/iblphotometry/bleach_corrections.py | 325 ----------- src/iblphotometry/helpers.py | 98 ---- src/iblphotometry/metrics.py | 8 +- src/iblphotometry/outlier_detection.py | 122 ---- src/iblphotometry/pipelines.py | 15 +- src/iblphotometry/processing.py | 730 ++++++++++++++++++++++++ src/iblphotometry/qc.py | 2 +- src/iblphotometry/sliding_operations.py | 138 ----- 8 files changed, 734 insertions(+), 704 deletions(-) delete mode 100644 src/iblphotometry/bleach_corrections.py delete mode 100644 src/iblphotometry/helpers.py delete mode 100644 src/iblphotometry/outlier_detection.py create mode 100644 src/iblphotometry/processing.py delete mode 100644 src/iblphotometry/sliding_operations.py diff --git a/src/iblphotometry/bleach_corrections.py b/src/iblphotometry/bleach_corrections.py deleted file mode 100644 index a64cad9..0000000 --- a/src/iblphotometry/bleach_corrections.py +++ /dev/null @@ -1,325 +0,0 @@ -from abc import ABC, abstractmethod -import warnings -import numpy as np -import pandas as pd -from scipy.optimize import minimize -from scipy.stats.distributions import norm -from scipy.stats import gaussian_kde -from scipy.special import pseudo_huber - -from iblphotometry.helpers import filt -from inspect import signature - - -def correct(signal: pd.Series, reference: pd.Series, mode: str = 'subtract') -> pd.Series: - if mode == 'subtract': - signal_corrected = signal.values - reference.values - if mode == 'divide': - signal_corrected = signal.values / reference.values - if mode == 'subtract-divide': - signal_corrected = (signal.values - reference.values) / reference.values - return pd.Series(signal_corrected, index=signal.index.values) - - -def mse_loss(p, x, y, fun): - # mean squared error - y_hat = fun(x, *p) - return np.sum((y - y_hat) ** 2) / y.shape[0] - - -def mae_loss(p, x, y, fun): - # mean absolute error - y_hat = fun(x, *p) - return np.sum(np.absolute(y - y_hat)) / y.shape[0] - - -def huber_loss(p, x, y, fun, d): - # huber loss function - y_hat = fun(x, *p) - r = y - y_hat - return np.sum(pseudo_huber(d, r)) / y.shape[0] - - -def irls_loss(p, x, y, fun, d=1e-7): - # iteratively reweighted least squares - loss function - y_hat = fun(x, *p) - a = y - y_hat - # irls weight - f = np.max(np.stack([np.abs(a), np.ones(a.shape[0]) * d]), axis=0) - w = 1 / f - return np.sum(w * np.abs(a) ** 2) / y.shape[0] - - -class AbstractModel(ABC): - @abstractmethod - def eq(): - # the model equation - ... - - @abstractmethod - def est_p0(): - # given data, estimate model parameters - ... - - def _calc_r_squared(self, y, y_hat): - r = 1 - np.sum((y - y_hat) ** 2) / np.sum((y - np.average(y)) ** 2) - return r - - def _calc_likelihood(self, y, y_hat, n_samples=-1, use_kde=False): - rs = y - y_hat - if n_samples > 0: - inds = np.random.randint(0, y.shape[0], size=n_samples) - rs = rs[inds] - if use_kde is True: - # explicit estimation of the distribution of residuals - if n_samples == -1: - warnings.warn( - f'calculating KDE on {y.values.shape[0]} samples. This might be slow' - ) - dist = gaussian_kde(rs) - else: - # using RSME - sig = np.sqrt(np.average((y - y_hat) ** 2)) - dist = norm(0, sig) - ll = np.sum(np.log(dist.pdf(rs))) - return ll - - def _calc_aic(self, k, ll): - aic = 2 * k - 2 * ll # np.log(ll) - return aic - - def calc_model_stats(self, y, y_hat, n_samples: int = -1, use_kde: bool = False): - r_sq = self._calc_r_squared(y, y_hat) - ll = self._calc_likelihood(y, y_hat, n_samples, use_kde) - k = len(signature(self.eq).parameters) - 1 - aic = self._calc_aic(ll, k) - return dict(r_sq=r_sq, ll=ll, aic=aic) - - -### MODELS -eps = np.finfo(np.float64).eps - - -class LinearModel(AbstractModel): - def eq(self, x, m, b): - return x * m + b - - def est_p0(self, x: np.array, y: np.array): - return tuple(np.polyfit(x, y, 1)) - # x, y = np.sort(x), np.sort(y) - # dx = x[-1] - x[0] - # dy = y[-1] - y[0] - # m = dy / dx - # b = np.average(y - (m * x)) - # return (m, b) - - -class ExponDecay(AbstractModel): - bounds = ((0, np.inf), (eps, np.inf), (-np.inf, np.inf)) - - def eq(self, t, A, tau, b): - return A * np.exp(-t / tau) + b - - def est_p0(self, t: np.array, y: np.array): - return (y[0], t[int(t.shape[0] / 3)], y[-1]) - - -class DoubleExponDecay(AbstractModel): - bounds = ( - (0, np.inf), - (eps, np.inf), - (0, np.inf), - (eps, np.inf), - (-np.inf, np.inf), - ) - - def eq(self, t, A1, tau1, A2, tau2, b): - return A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + b - - def est_p0(self, t: np.array, y: np.array): - A_est = y[0] - tau_est = t[int(t.shape[0] / 3)] - b_est = y[-1] - return (A_est, tau_est, A_est / 2, tau_est / 2, b_est) - - -class TripleExponDecay(AbstractModel): - bounds = ( - (0, np.inf), - (eps, np.inf), - (0, np.inf), - (eps, np.inf), - (0, np.inf), - (eps, np.inf), - (-np.inf, np.inf), - ) - - def eq(self, t, A1, tau1, A2, tau2, A3, tau3, b): - return ( - A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + A3 * np.exp(-t / tau3) + b - ) - - def est_p0(self, t: np.array, y: np.array): - A_est = y[0] - tau_est = t[int(t.shape[0] / 3)] - b_est = y[-1] - return ( - A_est, - tau_est, - A_est * 0.66, - tau_est * 0.66, - A_est * 0.33, - tau_est * 0.33, - b_est, - ) - - -class Regression: - def __init__(self, model=None, method: str = 'mse', method_params=None): - self.model = model - self.method = method - self.params = method_params if method_params is not None else {} - - def fit(self, x: np.ndarray, y: np.ndarray, algorithm='L-BFGS-B') -> tuple: - p0 = self.model.est_p0(x, y) - bounds = self.model.bounds if hasattr(self.model, 'bounds') else None - if self.method == 'mse': - minimize_result = minimize( - mse_loss, - p0, - args=(x, y, self.model.eq), - bounds=bounds, - method=algorithm, - ) - if self.method == 'mae': - minimize_result = minimize( - mae_loss, - p0, - args=(x, y, self.model.eq), - bounds=bounds, - method=algorithm, - ) - if self.method == 'huber': - d = self.params.get('d', 3) - minimize_result = minimize( - huber_loss, - p0, - args=(x, y, self.model.eq, d), - bounds=bounds, - method=algorithm, - ) - if self.method == 'irls': - d = self.params.get('d', 1e-7) - minimize_result = minimize( - irls_loss, - p0, - args=(x, y, self.model.eq, d), - bounds=bounds, - method=algorithm, - ) - if not minimize_result.success: - raise Exception(f'Fitting failed. {minimize_result.message}') - else: - self.popt = minimize_result.x - - # if self.method == 'sklearn-RANSAC': - # self.reg = RANSACRegressor(random_state=42) - # xr = x.reshape(-1, 1) - # self.reg.fit(xr, y) - # self.popt = (self.reg.estimator_.coef_, self.reg.estimator_.intercept_) - - # if self.method == 'sklearn-linear': - # self.reg = LinearRegression() - # xr = x.reshape(-1, 1) - # self.reg.fit(xr, y) - # self.popt = (self.reg.coef_, self.reg.intercept_) - # print(self.popt) - - def predict(self, x: np.ndarray, return_type='numpy'): - x = np.sort(x) # just in case - y_hat = self.model.eq(x, *self.popt) - if return_type == 'numpy': - return y_hat - if return_type == 'pandas': - return pd.Series(y_hat, index=x) - - -class BleachCorrection: - def __init__( - self, - model: AbstractModel = None, - regression_method: str = 'mse', - regression_params: dict = None, - correction_method: str = 'subtract', - ): - self.model = model - self.regression = Regression( - model=model, method=regression_method, method_params=regression_params - ) - self.correction_method = correction_method - - def correct(self, F: pd.Series): - self.regression.fit(F.index.values, F.values) - ref = self.regression.predict(F.index.values, return_type='pandas') - return correct(F, ref, mode=self.correction_method) - - -class IsosbesticCorrection: - def __init__( - self, - regression_method: str = 'mse', - regression_params: dict = None, - correction_method: str = 'subtract-divide', - lowpass_isosbestic: dict = None, - ): - self.reg = Regression( - model=LinearModel(), - method=regression_method, - method_params=regression_params, - ) - self.lowpass_isosbestic = lowpass_isosbestic - self.correction_method = correction_method - - def correct( - self, - F_ca: pd.Series, - F_iso: pd.Series, - ): - if self.lowpass_isosbestic is not None: - F_iso = filt(F_iso, **self.lowpass_isosbestic) - - self.reg.fit(F_iso.values, F_ca.values) - F_iso_fit = self.reg.predict(F_iso.values, return_type='pandas') - - return correct(F_ca, F_iso_fit, mode=self.correction_method) - - -class LowpassBleachCorrection: - def __init__( - self, - filter_params=dict(N=3, Wn=0.01, btype='lowpass'), - correction_method='subtract-divide', - ): - self.filter_params = filter_params - self.correction_method = correction_method - - def correct(self, F: pd.Series): - F_filt = filt(F, **self.filter_params) - return correct(F, F_filt, mode=self.correction_method) - - -# convenience functions for pipelines -def lowpass_bleachcorrect(F: pd.Series, **kwargs): - bc = LowpassBleachCorrection(**kwargs) - return bc.correct(F) - - -def exponential_bleachcorrect(F: pd.Series, **kwargs): - model = DoubleExponDecay() - ec = BleachCorrection(model, **kwargs) - return ec.correct(F) - - -def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs): - ic = IsosbesticCorrection(**kwargs) - return ic.correct(F_sig, F_ref) diff --git a/src/iblphotometry/helpers.py b/src/iblphotometry/helpers.py deleted file mode 100644 index 16f9e1d..0000000 --- a/src/iblphotometry/helpers.py +++ /dev/null @@ -1,98 +0,0 @@ -import numpy as np -from scipy import signal -import pandas as pd -from ibldsp.utils import WindowGenerator -from iblutil.numerical import rcoeff - - -# TODO move to processing -def z(A: np.array, mode='classic'): - """classic z-score. Deviation from sample mean in units of sd - - Args: - A (np.array): _description_ - - Returns: - _type_: _description_ - """ - if mode == 'classic': - return (A - np.average(A)) / np.std(A) - if mode == 'median': - return (A - np.median(A)) / np.std(A) - - -# TODO move to processing -def mad(A: np.array): - """ - https://en.wikipedia.org/wiki/Median_absolute_deviation - :the MAD is defined as the median of the absolute deviations from the data's median - - Args: - A (np.array): _description_ - - Returns: - _type_: _description_ - """ - return np.median(np.absolute(A - np.median(A)), axis=-1) - - -# TODO move to processing -def madscore(F: pd.Series): - y, t = F.values, F.index.values - return pd.Series(mad(y), index=t) - - -# TODO move to processing -def zscore(F: pd.Series, mode='classic'): - """pynapple friendly zscore - - Args: - F (nap.Tsd): signal to be z-scored - - Returns: - _type_: z-scored nap.Tsd - """ - y, t = F.values, F.index.values - # mu, sig = np.average(y), np.std(y) - return pd.Series(z(y, mode=mode), index=t) - - -# TODO move to processing -def filt(F: pd.Series, N: int, Wn: float, fs: float = None, btype='low'): - """a pynapple friendly wrapper for scipy.signal.butter and sosfiltfilt - - Args: - F (nap.Tsd): _description_ - N (int): _description_ - Wn (float): _description_ - fs (float, optional): _description_. Defaults to None. - btype (str, optional): _description_. Defaults to "low". - - Returns: - _type_: _description_ - """ - y, t = F.values, F.index.values - if fs is None: - fs = 1 / np.median(np.diff(t)) - sos = signal.butter(N, Wn, btype, fs=fs, output='sos') - y_filt = signal.sosfiltfilt(sos, y) - return pd.Series(y_filt, index=t) - - -def sliding_rcoeff(signal_a, signal_b, nswin, overlap=0): - """ - Computes the local correlation coefficient between two signals in sliding windows - :param signal_a: - :param signal_b: - :param nswin: window size in samples - :param overlap: overlap of successiv windows in samples - :return: ix: indices of the center of the windows, r: correlation coefficients - """ - wg = WindowGenerator(ns=signal_a.size, nswin=nswin, overlap=overlap) - first_samples = np.array([fl[0] for fl in wg.firstlast]) - iwin = np.zeros([wg.nwin, wg.nswin], dtype=np.int32) + np.arange(wg.nswin) - iwin += first_samples[:, np.newaxis] - iwin[iwin >= signal_a.size] = signal_a.size - 1 - r = rcoeff(signal_a[iwin], signal_b[iwin]) - ix = first_samples + nswin // 2 - return ix, r diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index d227592..4c8d7d7 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -1,13 +1,9 @@ import numpy as np import pandas as pd from scipy import stats -from scipy.stats import ttest_ind -from iblphotometry.helpers import z +from iblphotometry.processing import z, Regression, ExponDecay, detect_spikes, detect_outliers from iblphotometry.behavior import psth -from iblphotometry.bleach_corrections import Regression, ExponDecay -from iblphotometry.outlier_detection import detect_spikes, detect_outliers - ## this approach works as well # from functools import singledispatch @@ -179,7 +175,7 @@ def has_response_to_event( base_ix = np.setdiff1d(np.arange(y.shape[0]), psth_ix.flatten()) base_samples = y[base_ix] - res = ttest_ind(sig_samples, base_samples) + res = stats.ttest_ind(sig_samples, base_samples) return res.pvalue < alpha diff --git a/src/iblphotometry/outlier_detection.py b/src/iblphotometry/outlier_detection.py deleted file mode 100644 index e31cd82..0000000 --- a/src/iblphotometry/outlier_detection.py +++ /dev/null @@ -1,122 +0,0 @@ -import numpy as np -from scipy.stats import t -from ibldsp.utils import WindowGenerator -from copy import copy -import pandas as pd -from scipy.stats import gaussian_kde as kde -import warnings - - -def _grubbs_single(y: np.array, alpha: float =0.005, mode: str='median') -> bool: - # to apply a single pass of grubbs outlier detection - # see https://en.wikipedia.org/wiki/Grubbs%27s_test - - N = y.shape[0] - if mode == 'classic': - # this is the formulation as from wikipedia - G = np.max(np.absolute(y - np.average(y))) / np.std(y) - if mode == 'median': - # this is more robust against outliers - G = np.max(np.absolute(y - np.median(y))) / np.std(y) - tsq = t.ppf(1 - alpha / (2 * N), df=N - 2) ** 2 - g = (N - 1) / np.sqrt(N) * np.sqrt(tsq / (N - 2 + tsq)) - - if G > g: # if G > g, reject null hypothesis (of no outliers) - return True - else: - return False - - -def grubbs_test(y: np.array, alpha: float =0.005, mode:str='median'): - # apply grubbs test iteratively until no more outliers are found - outliers = [] - while _grubbs_single(y, alpha=alpha): - # get the outlier index - if mode == 'classic': - ix = np.argmax(np.absolute(y - np.average(y))) - if mode == 'median': - ix = np.argmax(np.absolute(y - np.median(y))) - outliers.append(ix) - y[ix] = np.median(y) - return np.sort(outliers) - - -def detect_outliers(y: np.array, w_size: int = 1000, alpha: float = 0.005): - # sliding grubbs test for a np.array - n_samples = y.shape[0] - wg = WindowGenerator(n_samples - (n_samples % w_size), w_size, 0) - dtype = np.dtype((np.float64, w_size)) - B = np.fromiter(wg.slice_array(y), dtype=dtype) - - # sliding window outlier detection - outlier_ix = [] - for i in range(B.shape[0]): - outlier_ix.append(grubbs_test(B[i, :], alpha=alpha) + i * w_size) - - # explicitly taking care of the remaining samples if they exist - if y.shape[0] > np.prod(B.shape): - outlier_ix.append( - grubbs_test(y[np.prod(B.shape) :], alpha=alpha) + B.shape[0] * w_size - ) - - outlier_ix = np.concatenate(outlier_ix).astype('int64') - return np.unique(outlier_ix) - - -def remove_nans(y: np.array): - y = y[~pd.isna(y)] # nanfilter - if y.shape[0] == 0: - warnings.warn('y was all NaN and is now empty') - return y - - -def fillnan_kde(y: np.array, w: int = 25): - # fill nans with KDE from edges - inds = np.where(pd.isna(y))[0] - if inds.shape[0] > 0: - for ix in inds: - ix_start = np.clip(ix - w, 0, y.shape[0] - 1) - ix_stop = np.clip(ix + w, 0, y.shape[0] - 1) - y_ = y[ix_start:ix_stop] - y_ = remove_nans(y_) - y[ix] = kde(y_).resample(1)[0][0] - - return y - else: - # no NaNs present, doing nothing - return y - - -def remove_outliers(F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: int = 25): - y, t = F.values, F.index.values - y = copy(y) - outliers = detect_outliers(y, w_size=w_size, alpha=alpha) - while len(outliers) > 0: - y[outliers] = np.nan - y = fillnan_kde(y, w=w) - outliers = detect_outliers(y, w_size=w_size, alpha=alpha) - return pd.Series(y, index=t) - - -def detect_spikes(t: np.array, sd: int = 5): - dt = np.diff(t) - bad_inds = dt < np.average(dt) - sd * np.std(dt) - return np.where(bad_inds)[0] - - -def remove_spikes(F: pd.Series, sd: int = 5, w: int = 25): - y, t = F.values, F.index.values - y = copy(y) - outliers = detect_spikes(y, sd=sd) - y[outliers] = np.nan - try: - y = fillnan_kde(y, w=w) - except np.linalg.LinAlgError: - if np.all(pd.isna(y[outliers])): # all are NaN! - y[:] = 0 - warnings.warn('all values NaN, setting to zeros') # TODO logger - else: - y[outliers] = np.nanmedian(y) - warnings.warn('KDE fillnan failed, using global median') # TODO logger - - return pd.Series(y, index=t) diff --git a/src/iblphotometry/pipelines.py b/src/iblphotometry/pipelines.py index 72bcfa4..159adc7 100644 --- a/src/iblphotometry/pipelines.py +++ b/src/iblphotometry/pipelines.py @@ -1,25 +1,12 @@ -"""this module holds a collection of processing pipelines for fiber photometry data""" - import numpy as np import pandas as pd -from iblphotometry.helpers import z, filt - -# from ibldsp.utils import WindowGenerator -from iblphotometry import sliding_operations -from iblphotometry import bleach_corrections -from iblphotometry.outlier_detection import remove_spikes -from iblphotometry.bleach_corrections import lowpass_bleachcorrect, isosbestic_correct -from iblphotometry.sliding_operations import sliding_mad - -# TODO this will probably be refactored to to processing -from iblphotometry.helpers import zscore +from iblphotometry.processing import remove_spikes, lowpass_bleachcorrect, isosbestic_correct, sliding_mad, zscore import logging from copy import copy logger = logging.getLogger() - def run_pipeline( pipeline, F_signal: pd.DataFrame, diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py new file mode 100644 index 0000000..c13dce9 --- /dev/null +++ b/src/iblphotometry/processing.py @@ -0,0 +1,730 @@ +from abc import ABC, abstractmethod +import warnings + +import numpy as np +import scipy as sp +from scipy import signal, stats +import pandas as pd + +from iblutil.numerical import rcoeff +from ibldsp.utils import WindowGenerator +from ibldsp.utils import WindowGenerator + +from scipy.optimize import minimize +from scipy.stats.distributions import norm +from scipy.stats import gaussian_kde +from scipy.stats import gaussian_kde as kde +from scipy.stats import t +from scipy.special import pseudo_huber + +from iblphotometry.behavior import psth +from inspect import signature +from copy import copy + + + +# machine resolution +eps = np.finfo(np.float64).eps + + +""" +######## ## ## ## ## ###### ######## #### ####### ## ## ###### +## ## ## ### ## ## ## ## ## ## ## ### ## ## ## +## ## ## #### ## ## ## ## ## ## #### ## ## +###### ## ## ## ## ## ## ## ## ## ## ## ## ## ###### +## ## ## ## #### ## ## ## ## ## ## #### ## +## ## ## ## ### ## ## ## ## ## ## ## ### ## ## +## ####### ## ## ###### ## #### ####### ## ## ###### +""" + +def z(A: np.array, mode='classic'): + """classic z-score. Deviation from sample mean in units of sd + + :param A: _description_ + :type A: np.array + :param mode: _description_, defaults to 'classic' + :type mode: str, optional + :return: _description_ + :rtype: _type_ + """ + if mode == 'classic': + return (A - np.average(A)) / np.std(A) + if mode == 'median': + return (A - np.median(A)) / np.std(A) + +def mad(A: np.array): + """ the MAD is defined as the median of the absolute deviations from the data's median + see https://en.wikipedia.org/wiki/Median_absolute_deviation + + :param A: _description_ + :type A: np.array + :return: _description_ + :rtype: _type_ + """ + return np.median(np.absolute(A - np.median(A)), axis=-1) + +def madscore(F: pd.Series): + # TODO overloading of mad? + y, t = F.values, F.index.values + return pd.Series(mad(y), index=t) + +def zscore(F: pd.Series, mode='classic'): + y, t = F.values, F.index.values + # mu, sig = np.average(y), np.std(y) + return pd.Series(z(y, mode=mode), index=t) + + +def filt(F: pd.Series, N: int, Wn: float, fs: float = None, btype='low'): + """ a wrapper for scipy.signal.butter and sosfiltfilt + """ + y, t = F.values, F.index.values + if fs is None: + fs = 1 / np.median(np.diff(t)) + sos = signal.butter(N, Wn, btype, fs=fs, output='sos') + y_filt = signal.sosfiltfilt(sos, y) + return pd.Series(y_filt, index=t) + +def sliding_rcoeff(signal_a, signal_b, nswin, overlap=0): + """ + Computes the local correlation coefficient between two signals in sliding windows + :param signal_a: + :param signal_b: + :param nswin: window size in samples + :param overlap: overlap of successiv windows in samples + :return: ix: indices of the center of the windows, r: correlation coefficients + """ + wg = WindowGenerator(ns=signal_a.size, nswin=nswin, overlap=overlap) + first_samples = np.array([fl[0] for fl in wg.firstlast]) + iwin = np.zeros([wg.nwin, wg.nswin], dtype=np.int32) + np.arange(wg.nswin) + iwin += first_samples[:, np.newaxis] + iwin[iwin >= signal_a.size] = signal_a.size - 1 + r = rcoeff(signal_a[iwin], signal_b[iwin]) + ix = first_samples + nswin // 2 + return ix, r + + +""" +## ####### ###### ###### ######## ## ## ## ## ###### ######## #### ####### ## ## ###### +## ## ## ## ## ## ## ## ## ## ### ## ## ## ## ## ## ## ### ## ## ## +## ## ## ## ## ## ## ## #### ## ## ## ## ## ## #### ## ## +## ## ## ###### ###### ###### ## ## ## ## ## ## ## ## ## ## ## ## ## ###### +## ## ## ## ## ## ## ## ## #### ## ## ## ## ## ## #### ## +## ## ## ## ## ## ## ## ## ## ## ### ## ## ## ## ## ## ## ### ## ## +######## ####### ###### ###### ## ####### ## ## ###### ## #### ####### ## ## ###### +""" + +def mse_loss(p, x, y, fun): + # mean squared error + y_hat = fun(x, *p) + return np.sum((y - y_hat) ** 2) / y.shape[0] + + +def mae_loss(p, x, y, fun): + # mean absolute error + y_hat = fun(x, *p) + return np.sum(np.absolute(y - y_hat)) / y.shape[0] + + +def huber_loss(p, x, y, fun, d): + # huber loss function + y_hat = fun(x, *p) + r = y - y_hat + return np.sum(pseudo_huber(d, r)) / y.shape[0] + + +def irls_loss(p, x, y, fun, d=1e-7): + # iteratively reweighted least squares - loss function + y_hat = fun(x, *p) + a = y - y_hat + # irls weight + f = np.max(np.stack([np.abs(a), np.ones(a.shape[0]) * d]), axis=0) + w = 1 / f + return np.sum(w * np.abs(a) ** 2) / y.shape[0] + +""" +######## ######## ###### ######## ######## ###### ###### #### ####### ## ## +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## +## ## ## ## ## ## ## ## ## ## ## ## #### ## +######## ###### ## #### ######## ###### ###### ###### ## ## ## ## ## ## +## ## ## ## ## ## ## ## ## ## ## ## ## ## #### +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### +## ## ######## ###### ## ## ######## ###### ###### #### ####### ## ## +""" + +# wrapper class for regressions with different loss functions +# following a sklearn style of .fit() and .predict() + +class Regression: + def __init__(self, model=None, method: str = 'mse', method_params=None): + self.model = model + self.method = method + self.params = method_params if method_params is not None else {} + + def fit(self, x: np.ndarray, y: np.ndarray, algorithm='L-BFGS-B') -> tuple: + p0 = self.model.est_p0(x, y) + bounds = self.model.bounds if hasattr(self.model, 'bounds') else None + if self.method == 'mse': + minimize_result = minimize( + mse_loss, + p0, + args=(x, y, self.model.eq), + bounds=bounds, + method=algorithm, + ) + if self.method == 'mae': + minimize_result = minimize( + mae_loss, + p0, + args=(x, y, self.model.eq), + bounds=bounds, + method=algorithm, + ) + if self.method == 'huber': + d = self.params.get('d', 3) + minimize_result = minimize( + huber_loss, + p0, + args=(x, y, self.model.eq, d), + bounds=bounds, + method=algorithm, + ) + if self.method == 'irls': + d = self.params.get('d', 1e-7) + minimize_result = minimize( + irls_loss, + p0, + args=(x, y, self.model.eq, d), + bounds=bounds, + method=algorithm, + ) + if not minimize_result.success: + raise Exception(f'Fitting failed. {minimize_result.message}') + else: + self.popt = minimize_result.x + + # if self.method == 'sklearn-RANSAC': + # self.reg = RANSACRegressor(random_state=42) + # xr = x.reshape(-1, 1) + # self.reg.fit(xr, y) + # self.popt = (self.reg.estimator_.coef_, self.reg.estimator_.intercept_) + + # if self.method == 'sklearn-linear': + # self.reg = LinearRegression() + # xr = x.reshape(-1, 1) + # self.reg.fit(xr, y) + # self.popt = (self.reg.coef_, self.reg.intercept_) + # print(self.popt) + + def predict(self, x: np.ndarray, return_type='numpy'): + x = np.sort(x) # just in case + y_hat = self.model.eq(x, *self.popt) + if return_type == 'numpy': + return y_hat + if return_type == 'pandas': + return pd.Series(y_hat, index=x) + +""" +######## ## ######## ### ###### ## ## ###### ####### ######## ######## ######## ###### ######## #### ####### ## ## +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## #### ## +######## ## ###### ## ## ## ######### ## ## ## ######## ######## ###### ## ## ## ## ## ## ## ## +## ## ## ## ######### ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## #### +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### +######## ######## ######## ## ## ###### ## ## ###### ####### ## ## ## ## ######## ###### ## #### ####### ## ## +""" + +class BleachCorrection: + def __init__( + self, + model: AbstractModel = None, + regression_method: str = 'mse', + regression_params: dict = None, + correction_method: str = 'subtract', + ): + self.model = model + self.regression = Regression( + model=model, method=regression_method, method_params=regression_params + ) + self.correction_method = correction_method + + def correct(self, F: pd.Series): + self.regression.fit(F.index.values, F.values) + ref = self.regression.predict(F.index.values, return_type='pandas') + return correct(F, ref, mode=self.correction_method) + + +class LowpassBleachCorrection: + def __init__( + self, + filter_params=dict(N=3, Wn=0.01, btype='lowpass'), + correction_method='subtract-divide', + ): + self.filter_params = filter_params + self.correction_method = correction_method + + def correct(self, F: pd.Series): + F_filt = filt(F, **self.filter_params) + return correct(F, F_filt, mode=self.correction_method) + + +class IsosbesticCorrection: + def __init__( + self, + regression_method: str = 'mse', + regression_params: dict = None, + correction_method: str = 'subtract-divide', + lowpass_isosbestic: dict = None, + ): + self.reg = Regression( + model=LinearModel(), + method=regression_method, + method_params=regression_params, + ) + self.lowpass_isosbestic = lowpass_isosbestic + self.correction_method = correction_method + + def correct( + self, + F_ca: pd.Series, + F_iso: pd.Series, + ): + if self.lowpass_isosbestic is not None: + F_iso = filt(F_iso, **self.lowpass_isosbestic) + + self.reg.fit(F_iso.values, F_ca.values) + F_iso_fit = self.reg.predict(F_iso.values, return_type='pandas') + + return correct(F_ca, F_iso_fit, mode=self.correction_method) + +def correct(signal: pd.Series, reference: pd.Series, mode: str = 'subtract') -> pd.Series: + """the main function that applies the correction of a signal with a reference. Correcions can be applied in 3 principle ways: + - The reference can be subtracted from the signal + - the signal can be divided by the reference + - both of the above (first subtraction, then division) - this is akin to df/f + + :param signal: _description_ + :type signal: pd.Series + :param reference: _description_ + :type reference: pd.Series + :param mode: _description_, defaults to 'subtract' + :type mode: str, optional + :return: _description_ + :rtype: pd.Series + """ + if mode == 'subtract': + signal_corrected = signal.values - reference.values + if mode == 'divide': + signal_corrected = signal.values / reference.values + if mode == 'subtract-divide': + signal_corrected = (signal.values - reference.values) / reference.values + return pd.Series(signal_corrected, index=signal.index.values) + + +""" +## ## ####### ######## ######## ## ###### +### ### ## ## ## ## ## ## ## ## +#### #### ## ## ## ## ## ## ## +## ### ## ## ## ## ## ###### ## ###### +## ## ## ## ## ## ## ## ## +## ## ## ## ## ## ## ## ## ## +## ## ####### ######## ######## ######## ###### +""" + + +class AbstractModel(ABC): + + @abstractmethod + def eq(): + # the model equation + ... + + @abstractmethod + def est_p0(): + # given data, estimate model parameters + ... + + def _calc_r_squared(self, y, y_hat): + r = 1 - np.sum((y - y_hat) ** 2) / np.sum((y - np.average(y)) ** 2) + return r + + def _calc_likelihood(self, y, y_hat, n_samples=-1, use_kde=False): + rs = y - y_hat + if n_samples > 0: + inds = np.random.randint(0, y.shape[0], size=n_samples) + rs = rs[inds] + if use_kde is True: + # explicit estimation of the distribution of residuals + if n_samples == -1: + warnings.warn( + f'calculating KDE on {y.values.shape[0]} samples. This might be slow' + ) + dist = gaussian_kde(rs) + else: + # using RSME + sig = np.sqrt(np.average((y - y_hat) ** 2)) + dist = norm(0, sig) + ll = np.sum(np.log(dist.pdf(rs))) + return ll + + def _calc_aic(self, k, ll): + aic = 2 * k - 2 * ll # np.log(ll) + return aic + + def calc_model_stats(self, y, y_hat, n_samples: int = -1, use_kde: bool = False): + r_sq = self._calc_r_squared(y, y_hat) + ll = self._calc_likelihood(y, y_hat, n_samples, use_kde) + k = len(signature(self.eq).parameters) - 1 + aic = self._calc_aic(ll, k) + return dict(r_sq=r_sq, ll=ll, aic=aic) + +# the actual models + +class LinearModel(AbstractModel): + def eq(self, x, m, b): + return x * m + b + + def est_p0(self, x: np.array, y: np.array): + return tuple(np.polyfit(x, y, 1)) + # x, y = np.sort(x), np.sort(y) + # dx = x[-1] - x[0] + # dy = y[-1] - y[0] + # m = dy / dx + # b = np.average(y - (m * x)) + # return (m, b) + + +class ExponDecay(AbstractModel): + bounds = ((0, np.inf), (eps, np.inf), (-np.inf, np.inf)) + + def eq(self, t, A, tau, b): + return A * np.exp(-t / tau) + b + + def est_p0(self, t: np.array, y: np.array): + return (y[0], t[int(t.shape[0] / 3)], y[-1]) + + +class DoubleExponDecay(AbstractModel): + bounds = ( + (0, np.inf), + (eps, np.inf), + (0, np.inf), + (eps, np.inf), + (-np.inf, np.inf), + ) + + def eq(self, t, A1, tau1, A2, tau2, b): + return A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + b + + def est_p0(self, t: np.array, y: np.array): + A_est = y[0] + tau_est = t[int(t.shape[0] / 3)] + b_est = y[-1] + return (A_est, tau_est, A_est / 2, tau_est / 2, b_est) + + +class TripleExponDecay(AbstractModel): + bounds = ( + (0, np.inf), + (eps, np.inf), + (0, np.inf), + (eps, np.inf), + (0, np.inf), + (eps, np.inf), + (-np.inf, np.inf), + ) + + def eq(self, t, A1, tau1, A2, tau2, A3, tau3, b): + return ( + A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + A3 * np.exp(-t / tau3) + b + ) + + def est_p0(self, t: np.array, y: np.array): + A_est = y[0] + tau_est = t[int(t.shape[0] / 3)] + b_est = y[-1] + return ( + A_est, + tau_est, + A_est * 0.66, + tau_est * 0.66, + A_est * 0.33, + tau_est * 0.33, + b_est, + ) + +""" + ###### ####### ######## ######## ######## ###### ######## #### ####### ## ## ######## ## ## ## ## ###### ######## #### ####### ## ## ###### +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## ## ## ## ### ## ## ## ## ## ## ## ### ## ## ## +## ## ## ## ## ## ## ## ## ## ## ## ## #### ## ## ## ## #### ## ## ## ## ## ## #### ## ## +## ## ## ######## ######## ###### ## ## ## ## ## ## ## ## ###### ## ## ## ## ## ## ## ## ## ## ## ## ## ###### +## ## ## ## ## ## ## ## ## ## ## ## ## ## #### ## ## ## ## #### ## ## ## ## ## ## #### ## +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## ## ## ## ### ## ## ## ## ## ## ## ### ## ## + ###### ####### ## ## ## ## ######## ###### ## #### ####### ## ## ## ####### ## ## ###### ## #### ####### ## ## ###### +""" + +# these are the convenience functions that are called in pipelines + +def lowpass_bleachcorrect(F: pd.Series, **kwargs): + bc = LowpassBleachCorrection(**kwargs) + return bc.correct(F) + +def exponential_bleachcorrect(F: pd.Series, **kwargs): + model = DoubleExponDecay() + ec = BleachCorrection(model, **kwargs) + return ec.correct(F) + +def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs): + ic = IsosbesticCorrection(**kwargs) + return ic.correct(F_sig, F_ref) + + +""" + ####### ## ## ######## ## #### ######## ######## ######## ######## ######## ######## ###### ######## #### ####### ## ## +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## #### ## +## ## ## ## ## ## ## ###### ######## ## ## ###### ## ###### ## ## ## ## ## ## ## ## +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## #### +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### + ####### ####### ## ######## #### ######## ## ## ######## ######## ## ######## ###### ## #### ####### ## ## +""" + +def _grubbs_single(y: np.array, alpha: float =0.005, mode: str='median') -> bool: + # to apply a single pass of grubbs outlier detection + # see https://en.wikipedia.org/wiki/Grubbs%27s_test + + N = y.shape[0] + if mode == 'classic': + # this is the formulation as from wikipedia + G = np.max(np.absolute(y - np.average(y))) / np.std(y) + if mode == 'median': + # this is more robust against outliers + G = np.max(np.absolute(y - np.median(y))) / np.std(y) + tsq = t.ppf(1 - alpha / (2 * N), df=N - 2) ** 2 + g = (N - 1) / np.sqrt(N) * np.sqrt(tsq / (N - 2 + tsq)) + + if G > g: # if G > g, reject null hypothesis (of no outliers) + return True + else: + return False + + +def grubbs_test(y: np.array, alpha: float =0.005, mode:str='median'): + # apply grubbs test iteratively until no more outliers are found + outliers = [] + while _grubbs_single(y, alpha=alpha): + # get the outlier index + if mode == 'classic': + ix = np.argmax(np.absolute(y - np.average(y))) + if mode == 'median': + ix = np.argmax(np.absolute(y - np.median(y))) + outliers.append(ix) + y[ix] = np.median(y) + return np.sort(outliers) + + +def detect_outliers(y: np.array, w_size: int = 1000, alpha: float = 0.005): + # sliding grubbs test for a np.array + n_samples = y.shape[0] + wg = WindowGenerator(n_samples - (n_samples % w_size), w_size, 0) + dtype = np.dtype((np.float64, w_size)) + B = np.fromiter(wg.slice_array(y), dtype=dtype) + + # sliding window outlier detection + outlier_ix = [] + for i in range(B.shape[0]): + outlier_ix.append(grubbs_test(B[i, :], alpha=alpha) + i * w_size) + + # explicitly taking care of the remaining samples if they exist + if y.shape[0] > np.prod(B.shape): + outlier_ix.append( + grubbs_test(y[np.prod(B.shape) :], alpha=alpha) + B.shape[0] * w_size + ) + + outlier_ix = np.concatenate(outlier_ix).astype('int64') + return np.unique(outlier_ix) + + +def remove_nans(y: np.array): + y = y[~pd.isna(y)] # nanfilter + if y.shape[0] == 0: + warnings.warn('y was all NaN and is now empty') + return y + + +def fillnan_kde(y: np.array, w: int = 25): + # fill nans with KDE from edges + inds = np.where(pd.isna(y))[0] + if inds.shape[0] > 0: + for ix in inds: + ix_start = np.clip(ix - w, 0, y.shape[0] - 1) + ix_stop = np.clip(ix + w, 0, y.shape[0] - 1) + y_ = y[ix_start:ix_stop] + y_ = remove_nans(y_) + y[ix] = kde(y_).resample(1)[0][0] + + return y + else: + # no NaNs present, doing nothing + return y + + +def remove_outliers(F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: int = 25): + y, t = F.values, F.index.values + y = copy(y) + outliers = detect_outliers(y, w_size=w_size, alpha=alpha) + while len(outliers) > 0: + y[outliers] = np.nan + y = fillnan_kde(y, w=w) + outliers = detect_outliers(y, w_size=w_size, alpha=alpha) + return pd.Series(y, index=t) + + +def detect_spikes(t: np.array, sd: int = 5): + dt = np.diff(t) + bad_inds = dt < np.average(dt) - sd * np.std(dt) + return np.where(bad_inds)[0] + + +def remove_spikes(F: pd.Series, sd: int = 5, w: int = 25): + y, t = F.values, F.index.values + y = copy(y) + outliers = detect_spikes(y, sd=sd) + y[outliers] = np.nan + try: + y = fillnan_kde(y, w=w) + except np.linalg.LinAlgError: + if np.all(pd.isna(y[outliers])): # all are NaN! + y[:] = 0 + warnings.warn('all values NaN, setting to zeros') # TODO logger + else: + y[outliers] = np.nanmedian(y) + warnings.warn('KDE fillnan failed, using global median') # TODO logger + + return pd.Series(y, index=t) + + +""" + ###### ## #### ######## #### ## ## ###### ####### ######## ######## ######## ### ######## #### ####### ## ## ###### +## ## ## ## ## ## ## ### ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## ## ## +## ## ## ## ## ## #### ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## #### ## ## + ###### ## ## ## ## ## ## ## ## ## #### ## ## ######## ###### ######## ## ## ## ## ## ## ## ## ## ###### + ## ## ## ## ## ## ## #### ## ## ## ## ## ## ## ## ######### ## ## ## ## ## #### ## +## ## ## ## ## ## ## ## ### ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## ## + ###### ######## #### ######## #### ## ## ###### ####### ## ######## ## ## ## ## ## #### ####### ## ## ###### +""" + +"""this module holds a collection of processing pipelines for fiber photometry data""" + + +def make_sliding_window( + A: np.ndarray, + w_size: int, + pad_mode='edge', + method='stride_tricks', + warning=None, +): + + """use np.stride_tricks to make a sliding window view of a 1-d np.array A + full overlap, step size 1 + assumes 8 byte numbers (to be exposed? but needs to be tested first) + pads beginning and end of array with edge values + + Args: + A (np.ndarray): Array to be windowed + w_size (int): window size in samples + + Raises: + ValueError: is raised if w_size is not an even number + + Returns: + _type_: The view on the array, with shape (A.shape[0], w_size) + """ + n_samples = A.shape[0] + + if method == 'stride_tricks': + if w_size % 2 != 0: + if warning == 'raise_exception': + raise ValueError('w_size needs to be an even number') + else: + w_size += 1 # dangerous + B = np.lib.stride_tricks.as_strided(A, ((n_samples - w_size), w_size), (8, 8)) + + if method == 'window_generator': + wg = WindowGenerator(n_samples - 1, w_size, w_size - 1) + dtype = np.dtype((np.float64, w_size)) + B = np.fromiter(wg.slice_array(A), dtype=dtype) + + if pad_mode is not None: + B = np.pad(B, ((int(w_size / 2), int(w_size / 2)), (0, 0)), mode=pad_mode) + return B + + +def sliding_dFF(F: pd.Series, w_len: float, fs=None, weights=None): + y, t = F.values, F.index.values + fs = 1 / np.median(np.diff(t)) if fs is None else fs + w_size = int(w_len * fs) + + def _dFF(A: np.array): + return (A - np.average(A)) / np.average(A) + + if weights is not None: + # note: passing weights makes the stride trick not possible, or only with allocating a matrix of shape (n_samples * w_size) + # true sliding operation implemented, much slower + weights /= weights.sum() + n_samples = y.shape[0] + wg = WindowGenerator(n_samples - 1, w_size, w_size - 1) + d = [ + np.sum(_dFF(F[first:last].values) * weights) + for (first, last) in wg.firstlast + ] + else: + B = make_sliding_window(y, w_size) + mus = np.average(B, axis=1) + d = (y - mus) / mus + return pd.Series(d, index=t) + + +def sliding_z(F: pd.Series, w_len: float, fs=None, weights=None): + """sliding window z-score of a pynapple time series with data with optional weighting + + Args: + F (nap.Tsd): Signal to be zscored + w_size (int): window size + w_weights (_type_, optional): weights of the window . Defaults to None. + + Returns: + _type_: _description_ + """ + y, t = F.values, F.index.values + fs = 1 / np.median(np.diff(t)) if fs is None else fs + w_size = int(w_len * fs) + + if weights is not None: + # note: passing weights makes the stride trick not possible, or only with allocating a matrix of shape (n_samples * w_size) + # true sliding operation implemented, much slower + weights /= weights.sum() + n_samples = y.shape[0] + wg = WindowGenerator(n_samples - 1, w_size, w_size - 1) + d = [ + np.sum(z(F[first:last].values) * weights) for (first, last) in wg.firstlast + ] + else: + B = make_sliding_window(y, w_size) + mus, sds = np.average(B, axis=1), np.std(B, axis=1) + d = (y - mus) / sds + return pd.Series(d, index=t) + + +def sliding_mad(F: pd.Series, w_len: float = None, fs=None, overlap=90): + y, t = F.values, F.index.values + fs = 1 / np.median(np.diff(t)) if fs is None else fs + w_size = int(w_len * fs) + + n_samples = y.shape[0] + wg = WindowGenerator(ns=n_samples, nswin=w_size, overlap=overlap) + trms = np.array([first for first, last in wg.firstlast]) / fs + t[0] + + rmswin, _ = psth(y, t, t_events=trms, fs=fs, event_window=[0, w_len]) + gain = np.nanmedian(np.abs(y)) / np.nanmedian(np.abs(rmswin), axis=0) + gain = np.interp(t, trms, gain) + return pd.Series(y * gain, index=t) diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py index 908b3c7..242a2f5 100644 --- a/src/iblphotometry/qc.py +++ b/src/iblphotometry/qc.py @@ -8,7 +8,7 @@ import gc -from iblphotometry.sliding_operations import make_sliding_window +from iblphotometry.processing import make_sliding_window from pipelines import run_pipeline import warnings diff --git a/src/iblphotometry/sliding_operations.py b/src/iblphotometry/sliding_operations.py deleted file mode 100644 index f0baaf4..0000000 --- a/src/iblphotometry/sliding_operations.py +++ /dev/null @@ -1,138 +0,0 @@ -"""this module holds a collection of processing pipelines for fiber photometry data""" - -import numpy as np -import pandas as pd -from iblphotometry.helpers import z -from iblphotometry.behavior import psth -from ibldsp.utils import WindowGenerator - - -def make_sliding_window( - A: np.ndarray, - w_size: int, - pad_mode='edge', - method='stride_tricks', - warning=None, -): - """use np.stride_tricks to make a sliding window view of a 1-d np.array A - full overlap, step size 1 - assumes 8 byte numbers (to be exposed? but needs to be tested first) - pads beginning and end of array with edge values - - Args: - A (np.ndarray): Array to be windowed - w_size (int): window size in samples - - Raises: - ValueError: is raised if w_size is not an even number - - Returns: - _type_: The view on the array, with shape (A.shape[0], w_size) - """ - n_samples = A.shape[0] - - if method == 'stride_tricks': - if w_size % 2 != 0: - if warning == 'raise_exception': - raise ValueError('w_size needs to be an even number') - else: - w_size += 1 # dangerous - B = np.lib.stride_tricks.as_strided(A, ((n_samples - w_size), w_size), (8, 8)) - - if method == 'window_generator': - wg = WindowGenerator(n_samples - 1, w_size, w_size - 1) - dtype = np.dtype((np.float64, w_size)) - B = np.fromiter(wg.slice_array(A), dtype=dtype) - - if pad_mode is not None: - B = np.pad(B, ((int(w_size / 2), int(w_size / 2)), (0, 0)), mode=pad_mode) - return B - - -def sliding_dFF(F: pd.Series, w_len: float, fs=None, weights=None): - y, t = F.values, F.index.values - fs = 1 / np.median(np.diff(t)) if fs is None else fs - w_size = int(w_len * fs) - - def _dFF(A: np.array): - return (A - np.average(A)) / np.average(A) - - if weights is not None: - # note: passing weights makes the stride trick not possible, or only with allocating a matrix of shape (n_samples * w_size) - # true sliding operation implemented, much slower - weights /= weights.sum() - n_samples = y.shape[0] - wg = WindowGenerator(n_samples - 1, w_size, w_size - 1) - d = [ - np.sum(_dFF(F[first:last].values) * weights) - for (first, last) in wg.firstlast - ] - else: - B = make_sliding_window(y, w_size) - mus = np.average(B, axis=1) - d = (y - mus) / mus - return pd.Series(d, index=t) - - -def sliding_z(F: pd.Series, w_len: float, fs=None, weights=None): - """sliding window z-score of a pynapple time series with data with optional weighting - - Args: - F (nap.Tsd): Signal to be zscored - w_size (int): window size - w_weights (_type_, optional): weights of the window . Defaults to None. - - Returns: - _type_: _description_ - """ - y, t = F.values, F.index.values - fs = 1 / np.median(np.diff(t)) if fs is None else fs - w_size = int(w_len * fs) - - if weights is not None: - # note: passing weights makes the stride trick not possible, or only with allocating a matrix of shape (n_samples * w_size) - # true sliding operation implemented, much slower - weights /= weights.sum() - n_samples = y.shape[0] - wg = WindowGenerator(n_samples - 1, w_size, w_size - 1) - d = [ - np.sum(z(F[first:last].values) * weights) for (first, last) in wg.firstlast - ] - else: - B = make_sliding_window(y, w_size) - mus, sds = np.average(B, axis=1), np.std(B, axis=1) - d = (y - mus) / sds - return pd.Series(d, index=t) - - -def sliding_mad(F: pd.Series, w_len: float = None, fs=None, overlap=90): - y, t = F.values, F.index.values - fs = 1 / np.median(np.diff(t)) if fs is None else fs - w_size = int(w_len * fs) - - n_samples = y.shape[0] - wg = WindowGenerator(ns=n_samples, nswin=w_size, overlap=overlap) - trms = np.array([first for first, last in wg.firstlast]) / fs + t[0] - - rmswin, _ = psth(y, t, t_events=trms, fs=fs, event_window=[0, w_len]) - gain = np.nanmedian(np.abs(y)) / np.nanmedian(np.abs(rmswin), axis=0) - gain = np.interp(t, trms, gain) - return pd.Series(y * gain, index=t) - - -# def sliding_mad_new(F: nap.Tsd, w_len: float = None, fs=None, overlap=90): -# y, t = F.values, F.times() -# fs = 1 / np.median(np.diff(t)) if fs is None else fs -# w_size = int(w_len * fs) - -# B = make_sliding_window(y, w_size) -# gain = np.nanmedian(y) / np.nanmedian(B, axis=1) -# return nap.Tsd(t=t, d=y * gain) - -# wg = WindowGenerator(ns=n_samples, nswin=w_size, overlap=overlap) -# trms = np.array([first for first, last in wg.firstlast]) / fs + t[0] - -# rmswin, _ = psth(y, t, t_events=trms, fs=fs, event_window=[0, w_len]) -# gain = np.nanmedian(np.abs(y)) / np.nanmedian(np.abs(rmswin), axis=0) -# gain = np.interp(t, trms, gain) -# return nap.Tsd(t=t, d=y * gain) From e7aedf758822d2bcb155632407809621a3212868 Mon Sep 17 00:00:00 2001 From: georg Date: Tue, 3 Dec 2024 14:33:12 +0000 Subject: [PATCH 05/10] type hinting --- src/iblphotometry/io.py | 20 ++++++------- src/iblphotometry/metrics.py | 52 ++++++++------------------------- src/iblphotometry/pipelines.py | 2 +- src/iblphotometry/processing.py | 51 +++++++++++++++----------------- src/iblphotometry/qc.py | 13 +++++---- 5 files changed, 54 insertions(+), 84 deletions(-) diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py index 18ba56c..85ba4c1 100644 --- a/src/iblphotometry/io.py +++ b/src/iblphotometry/io.py @@ -1,9 +1,9 @@ -# %% import numpy as np import pandas as pd from pathlib import Path import warnings import pandera +from typing import Optional from iblphotometry.neurophotometrics import ( LIGHT_SOURCE_MAP, @@ -12,18 +12,18 @@ def from_array( - times: np.array, data: np.array, channel_names: list[str] = None + times: np.ndarray, data: np.ndarray, channel_names: list[str] | None = None ) -> pd.DataFrame: return pd.DataFrame(data, index=times, columns=channel_names) def from_dataframe( raw_df: pd.DataFrame, - data_columns: list[str] = None, - time_column: str = None, + data_columns: list[str] | None = None, + time_column: str | None = None, channel_column: str = 'name', - channel_names: list[str] = None, - rename: dict = None, + channel_names: list[str] | None = None, + rename: dict | None = None, ) -> dict: """reads in a pandas.DataFrame and converts it into nap.TsdDataframes. Performs the time demultiplexing operation. @@ -49,9 +49,9 @@ def from_dataframe( # infer name of time column if not provided if time_column is None: - time_column = [col for col in raw_df.columns if 'time' in col.lower()] - assert len(time_column) == 1 - time_column = time_column[0] + time_columns = [col for col in raw_df.columns if 'time' in col.lower()] + assert len(time_columns) == 1 + time_column = time_columns[0] # infer channel names if they are not explicitly provided if channel_names is None: @@ -76,7 +76,7 @@ def from_dataframe( def from_pqt( signal_pqt_path: str | Path, - locations_pqt_path: str | Path = None, + locations_pqt_path: Optional[str | Path] = None, ): """reads in a photometry.signal.pqt files as they are registered in alyx. diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 4c8d7d7..ac34fde 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -1,3 +1,4 @@ +from typing import Optional import numpy as np import pandas as pd from scipy import stats @@ -5,23 +6,6 @@ from iblphotometry.processing import z, Regression, ExponDecay, detect_spikes, detect_outliers from iblphotometry.behavior import psth -## this approach works as well -# from functools import singledispatch -# @singledispatch -# def percentile_dist(A: nap.Tsd, pc: tuple = (50, 95), axis: None = None) -> float: -# """the distance between two percentiles in units of z -# should be proportional to SNR, assuming the signal is -# in the positive 5th percentile -# """ -# P = np.percentile(z(A.values), pc) -# return P[1] - P[0] - -# @percentile_dist.register -# def _(A: np.ndarray, pc: tuple = (50, 95), axis: int = -1) -> float: -# P = np.percentile(z(A), pc, axis=axis) -# return P[1] - P[0] - - def percentile_dist(A: pd.Series | np.ndarray, pc: tuple = (50, 95), axis=-1) -> float: """the distance between two percentiles in units of z. Captures the magnitude of transients. @@ -73,23 +57,15 @@ def signal_skew(A: pd.Series | np.ndarray, axis=-1) -> float: def n_unique_samples(A: pd.Series | np.ndarray) -> int: """number of unique samples in the signal. Low values indicate that the signal during acquisition was not within the range of the digitizer.""" - if isinstance(A, pd.Series): - return np.unique(A.values).shape[0] - elif isinstance(A, np.ndarray): - return A.shape[0] - else: - raise TypeError('A must be pd.Series or np.ndarray.') + a = A.values if isinstance(A, pd.Series) else A + return np.unique(a).shape[0] def n_spikes(A: pd.Series | np.ndarray, sd: int = 5): """count the number of spike artifacts in the recording.""" - if isinstance(A, pd.Series): - return detect_spikes(A.values, sd=sd).shape[0] - elif isinstance(A, np.ndarray): - return detect_spikes(A, sd=sd).shape[0] - else: - raise TypeError('A must be pd.Series or np.ndarray.') - + a = A.values if isinstance(A, pd.Series) else A + return detect_spikes(a, sd=sd).shape[0] + def n_outliers( A: pd.Series | np.ndarray, w_size: int = 1000, alpha: float = 0.0005 @@ -97,12 +73,8 @@ def n_outliers( """counts the number of outliers as detected by grubbs test for outliers. int: _description_ """ - if isinstance(A, pd.Series): - return detect_outliers(A.values, w_size=w_size, alpha=alpha).shape[0] - elif isinstance(A, np.ndarray): - return detect_outliers(A, w_size=w_size, alpha=alpha).shape[0] - else: - raise TypeError('A must be pd.Series or np.ndarray.') + a = A.values if isinstance(A, pd.Series) else A + return detect_outliers(a, w_size=w_size, alpha=alpha).shape[0] def bleaching_tau(A: pd.Series) -> float: @@ -151,8 +123,8 @@ def ttest_pre_post( def has_response_to_event( A: pd.Series, - event_times: np.array, - fs: float = None, + event_times: np.ndarray, + fs: Optional[float] = None, window: tuple = (-1, 1), alpha: float = 0.005, mode='peak', @@ -182,8 +154,8 @@ def has_response_to_event( def has_responses( A: pd.Series, trials: pd.DataFrame, - event_names: list = None, - fs: float = None, + event_names: list, + fs: Optional[float] = None, window: tuple = (-1, 1), alpha: float = 0.005, ) -> bool: diff --git a/src/iblphotometry/pipelines.py b/src/iblphotometry/pipelines.py index 159adc7..f51487c 100644 --- a/src/iblphotometry/pipelines.py +++ b/src/iblphotometry/pipelines.py @@ -10,7 +10,7 @@ def run_pipeline( pipeline, F_signal: pd.DataFrame, - F_reference: pd.DataFrame = None, + F_reference: pd.DataFrame | None = None, ) -> pd.DataFrame: # copy Fc = F_signal.copy() diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index c13dce9..abd1e88 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -3,18 +3,15 @@ import numpy as np import scipy as sp -from scipy import signal, stats +from scipy import signal import pandas as pd from iblutil.numerical import rcoeff from ibldsp.utils import WindowGenerator -from ibldsp.utils import WindowGenerator from scipy.optimize import minimize from scipy.stats.distributions import norm -from scipy.stats import gaussian_kde -from scipy.stats import gaussian_kde as kde -from scipy.stats import t +from scipy.stats import gaussian_kde, t from scipy.special import pseudo_huber from iblphotometry.behavior import psth @@ -37,11 +34,11 @@ ## ####### ## ## ###### ## #### ####### ## ## ###### """ -def z(A: np.array, mode='classic'): +def z(A: np.ndarray, mode='classic'): """classic z-score. Deviation from sample mean in units of sd :param A: _description_ - :type A: np.array + :type A: np.ndarray :param mode: _description_, defaults to 'classic' :type mode: str, optional :return: _description_ @@ -52,12 +49,12 @@ def z(A: np.array, mode='classic'): if mode == 'median': return (A - np.median(A)) / np.std(A) -def mad(A: np.array): +def mad(A: np.ndarray): """ the MAD is defined as the median of the absolute deviations from the data's median see https://en.wikipedia.org/wiki/Median_absolute_deviation :param A: _description_ - :type A: np.array + :type A: np.ndarray :return: _description_ :rtype: _type_ """ @@ -74,7 +71,7 @@ def zscore(F: pd.Series, mode='classic'): return pd.Series(z(y, mode=mode), index=t) -def filt(F: pd.Series, N: int, Wn: float, fs: float = None, btype='low'): +def filt(F: pd.Series, N: int, Wn: float, fs: float | None = None, btype='low'): """ a wrapper for scipy.signal.butter and sosfiltfilt """ y, t = F.values, F.index.values @@ -236,7 +233,7 @@ def predict(self, x: np.ndarray, return_type='numpy'): class BleachCorrection: def __init__( self, - model: AbstractModel = None, + model = None, # TODO bring back type checking regression_method: str = 'mse', regression_params: dict = None, correction_method: str = 'subtract', @@ -271,9 +268,9 @@ class IsosbesticCorrection: def __init__( self, regression_method: str = 'mse', - regression_params: dict = None, + regression_params: dict | None = None, correction_method: str = 'subtract-divide', - lowpass_isosbestic: dict = None, + lowpass_isosbestic: dict | None = None, ): self.reg = Regression( model=LinearModel(), @@ -383,7 +380,7 @@ class LinearModel(AbstractModel): def eq(self, x, m, b): return x * m + b - def est_p0(self, x: np.array, y: np.array): + def est_p0(self, x: np.ndarray, y: np.ndarray): return tuple(np.polyfit(x, y, 1)) # x, y = np.sort(x), np.sort(y) # dx = x[-1] - x[0] @@ -399,7 +396,7 @@ class ExponDecay(AbstractModel): def eq(self, t, A, tau, b): return A * np.exp(-t / tau) + b - def est_p0(self, t: np.array, y: np.array): + def est_p0(self, t: np.ndarray, y: np.ndarray): return (y[0], t[int(t.shape[0] / 3)], y[-1]) @@ -415,7 +412,7 @@ class DoubleExponDecay(AbstractModel): def eq(self, t, A1, tau1, A2, tau2, b): return A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + b - def est_p0(self, t: np.array, y: np.array): + def est_p0(self, t: np.ndarray, y: np.ndarray): A_est = y[0] tau_est = t[int(t.shape[0] / 3)] b_est = y[-1] @@ -438,7 +435,7 @@ def eq(self, t, A1, tau1, A2, tau2, A3, tau3, b): A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + A3 * np.exp(-t / tau3) + b ) - def est_p0(self, t: np.array, y: np.array): + def est_p0(self, t: np.ndarray, y: np.ndarray): A_est = y[0] tau_est = t[int(t.shape[0] / 3)] b_est = y[-1] @@ -488,7 +485,7 @@ def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs): ####### ####### ## ######## #### ######## ## ## ######## ######## ## ######## ###### ## #### ####### ## ## """ -def _grubbs_single(y: np.array, alpha: float =0.005, mode: str='median') -> bool: +def _grubbs_single(y: np.ndarray, alpha: float =0.005, mode: str='median') -> bool: # to apply a single pass of grubbs outlier detection # see https://en.wikipedia.org/wiki/Grubbs%27s_test @@ -508,7 +505,7 @@ def _grubbs_single(y: np.array, alpha: float =0.005, mode: str='median') -> bool return False -def grubbs_test(y: np.array, alpha: float =0.005, mode:str='median'): +def grubbs_test(y: np.ndarray, alpha: float =0.005, mode:str='median'): # apply grubbs test iteratively until no more outliers are found outliers = [] while _grubbs_single(y, alpha=alpha): @@ -522,8 +519,8 @@ def grubbs_test(y: np.array, alpha: float =0.005, mode:str='median'): return np.sort(outliers) -def detect_outliers(y: np.array, w_size: int = 1000, alpha: float = 0.005): - # sliding grubbs test for a np.array +def detect_outliers(y: np.ndarray, w_size: int = 1000, alpha: float = 0.005): + # sliding grubbs test for a np.ndarray n_samples = y.shape[0] wg = WindowGenerator(n_samples - (n_samples % w_size), w_size, 0) dtype = np.dtype((np.float64, w_size)) @@ -544,14 +541,14 @@ def detect_outliers(y: np.array, w_size: int = 1000, alpha: float = 0.005): return np.unique(outlier_ix) -def remove_nans(y: np.array): +def remove_nans(y: np.ndarray): y = y[~pd.isna(y)] # nanfilter if y.shape[0] == 0: warnings.warn('y was all NaN and is now empty') return y -def fillnan_kde(y: np.array, w: int = 25): +def fillnan_kde(y: np.ndarray, w: int = 25): # fill nans with KDE from edges inds = np.where(pd.isna(y))[0] if inds.shape[0] > 0: @@ -560,7 +557,7 @@ def fillnan_kde(y: np.array, w: int = 25): ix_stop = np.clip(ix + w, 0, y.shape[0] - 1) y_ = y[ix_start:ix_stop] y_ = remove_nans(y_) - y[ix] = kde(y_).resample(1)[0][0] + y[ix] = gaussian_kde(y_).resample(1)[0][0] return y else: @@ -579,7 +576,7 @@ def remove_outliers(F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: i return pd.Series(y, index=t) -def detect_spikes(t: np.array, sd: int = 5): +def detect_spikes(t: np.ndarray, sd: int = 5): dt = np.diff(t) bad_inds = dt < np.average(dt) - sd * np.std(dt) return np.where(bad_inds)[0] @@ -624,7 +621,7 @@ def make_sliding_window( warning=None, ): - """use np.stride_tricks to make a sliding window view of a 1-d np.array A + """use np.stride_tricks to make a sliding window view of a 1-d np.ndarray A full overlap, step size 1 assumes 8 byte numbers (to be exposed? but needs to be tested first) pads beginning and end of array with edge values @@ -664,7 +661,7 @@ def sliding_dFF(F: pd.Series, w_len: float, fs=None, weights=None): fs = 1 / np.median(np.diff(t)) if fs is None else fs w_size = int(w_len * fs) - def _dFF(A: np.array): + def _dFF(A: np.ndarray): return (A - np.average(A)) / np.average(A) if weights is not None: diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py index 242a2f5..088be9f 100644 --- a/src/iblphotometry/qc.py +++ b/src/iblphotometry/qc.py @@ -18,15 +18,16 @@ logger = logging.getLogger() +from collections.abc import Callable # %% # those could be in metrics def sliding_metric( F: pd.Series, w_len: float, - fs: float = None, - metric: callable = None, + metric: Callable, + fs: float | None = None, n_wins: int = -1, - metric_kwargs: dict = None, + metric_kwargs: dict | None = None, ): """applies a metric along time. @@ -59,9 +60,9 @@ def sliding_metric( # eval pipleline will be here def eval_metric( F: pd.Series, - metric: callable = None, - metric_kwargs: dict = None, - sliding_kwargs: dict = None, + metric: Callable, + metric_kwargs: dict | None = None, + sliding_kwargs: dict | None = None, ): m = metric(F, **metric_kwargs) if metric_kwargs is not None else metric(F) From 0cc4095f7eed37a6a45dea9fd18c6d783a4f191d Mon Sep 17 00:00:00 2001 From: georg Date: Wed, 4 Dec 2024 11:48:50 +0000 Subject: [PATCH 06/10] refactoring and adding io function for new notebook --- src/iblphotometry/io.py | 32 +++++++++++++++++++++++++------- src/iblphotometry/qc.py | 34 ++++++++++++++-------------------- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py index 85ba4c1..6e499b7 100644 --- a/src/iblphotometry/io.py +++ b/src/iblphotometry/io.py @@ -45,7 +45,9 @@ def from_dataframe( # infer if not explicitly provided: defaults to everything that starts with 'Region' if data_columns is None: - data_columns = [col for col in raw_df.columns if col.startswith('Region')] + # this hacky parser currently deals with the inconsistency between carolinas and alejandros extraction + # https://github.com/int-brain-lab/ibl-photometry/issues/35 + data_columns = [col for col in raw_df.columns if col.startswith('Region') or col.startswith('G')] # infer name of time column if not provided if time_column is None: @@ -73,6 +75,19 @@ def from_dataframe( return raw_dfs +def from_dataframes(raw_df: pd.DataFrame, locations_df: pd.DataFrame): + data_columns = (list(locations_df.index),) + rename = locations_df['brain_region'].to_dict() + + read_config = dict( + data_columns=data_columns, + time_column='times', + channel_column='name', + rename=rename, + ) + + return from_dataframe(raw_df, **read_config) + def from_pqt( signal_pqt_path: str | Path, @@ -110,7 +125,7 @@ def from_pqt( return from_dataframe(raw_df, **read_config) -def _read_raw_neurophotometrics_df(raw_df: pd.DataFrame, rois=None) -> pd.DataFrame: +def from_raw_neurophotometrics_df(raw_df: pd.DataFrame, rois=None, drop_first=True) -> pd.DataFrame: """reads in parses the output of the neurophotometrics FP3002 Args: @@ -159,10 +174,14 @@ def _read_raw_neurophotometrics_df(raw_df: pd.DataFrame, rois=None) -> pd.DataFr for cn in ['name', 'color', 'wavelength']: out_df.loc[states == state, cn] = channel_meta_map.iloc[ic[0]][cn] + # drop first frame + if drop_first: + out_df = out_df.iloc[1:].reset_index() + return out_df -def from_raw_neurophotometrics( +def from_raw_neurophotometrics_file( path: str | Path, drop_first=True, validate=True, @@ -181,8 +200,7 @@ def from_raw_neurophotometrics( nap.TsdFrame: _description_ # FIXME """ warnings.warn( - 'loading a photometry from raw neurophotometrics output. The data will _not_ be synced and\ - is being split into channels by LedState (converted to LED wavelength in nm)' + 'loading photometry from raw neurophotometrics output. The data will _not_ be synced and is being split into channels by LedState (converted to LED wavelength in nm)' ) if isinstance(path, str): path = Path(path) @@ -199,11 +217,11 @@ def from_raw_neurophotometrics( if validate: raw_df = _validate_dataframe(raw_df) - df = _read_raw_neurophotometrics_df(raw_df) + df = from_raw_neurophotometrics_df(raw_df) # drop first frame if drop_first: - df = df.iloc[1:] + df = df.iloc[1:].reset_index() data_columns = [col for col in df.columns if col.startswith('G')] read_config = dict( diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py index 088be9f..b424044 100644 --- a/src/iblphotometry/qc.py +++ b/src/iblphotometry/qc.py @@ -1,25 +1,19 @@ # %% -import numpy as np -from scipy.stats import linregress -import pandas as pd - +import gc +from collections.abc import Callable from tqdm import tqdm import logging - -import gc - -from iblphotometry.processing import make_sliding_window -from pipelines import run_pipeline import warnings -from brainbox.io.one import SessionLoader +import numpy as np +import pandas as pd +from scipy.stats import linregress -warnings.filterwarnings('once', category=DeprecationWarning, module='pynapple') +from iblphotometry.processing import make_sliding_window +from iblphotometry.pipelines import run_pipeline logger = logging.getLogger() -from collections.abc import Callable - # %% # those could be in metrics def sliding_metric( F: pd.Series, @@ -119,9 +113,9 @@ def run_qc( for eid in tqdm(eids): print(eid) # get photometry data - raw_tfs = data_loader.load_photometry_data(eid=eid) - signal_bands = list(raw_tfs.keys()) - brain_regions = raw_tfs[signal_bands[0]] + raw_dfs = data_loader.load_photometry_data(eid=eid) + signal_bands = list(raw_dfs.keys()) + brain_regions = raw_dfs[signal_bands[0]] # get behavioral data # TODO this should be provided @@ -136,7 +130,7 @@ def run_qc( # trials = data_loader.one.load_dataset(eid, '*trials.table.pqt') for band in signal_bands: - raw_tf = raw_tfs[band] + raw_tf = raw_dfs[band] for region in brain_regions: qc_result = qc_series( raw_tf[region], qc_metrics['raw'], sliding_kwargs=None, eid=eid @@ -151,14 +145,14 @@ def run_qc( if 'reference' in sigref_mapping: # this is for isosbestic pipelines proc_tf = run_pipeline( pipeline, - raw_tfs[sigref_mapping['signal']], - raw_tfs[sigref_mapping['reference']], + raw_dfs[sigref_mapping['signal']], + raw_dfs[sigref_mapping['reference']], ) else: # FIXME this fails for true-multiband # this hack works for single-band # possible fix could be that signal could be a list - proc_tf = run_pipeline(pipeline, raw_tfs[sigref_mapping['signal']]) + proc_tf = run_pipeline(pipeline, raw_dfs[sigref_mapping['signal']]) for region in brain_regions: # sliding qc of the processed data From b7296b8c30363157f69f02a7c5394643d37fb351 Mon Sep 17 00:00:00 2001 From: georg Date: Wed, 4 Dec 2024 12:39:48 +0000 Subject: [PATCH 07/10] refactor --- src/tests/test_loaders.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/test_loaders.py b/src/tests/test_loaders.py index 38599e1..124fe83 100644 --- a/src/tests/test_loaders.py +++ b/src/tests/test_loaders.py @@ -17,8 +17,8 @@ def test_from_array(self): fio.from_array(times, data, names) # for neurophotometrics hardware - def test_from_raw_neurophotometrics_output(self): - fio.from_raw_neurophotometrics(self.paths['raw_neurophotometrics_csv']) + def test_from_raw_neurophotometrics_file(self): + fio.from_raw_neurophotometrics_file(self.paths['raw_neurophotometrics_csv']) # from pqt files as they are returned from ONE by .load_dataset() def test_from_pqt(self): From 264a5ac9bd4441dc6b306d447f4d662c7ba3ddba Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 4 Dec 2024 15:00:02 +0000 Subject: [PATCH 08/10] removing old tests folder --- src/tests/test_pipelines.py | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 src/tests/test_pipelines.py diff --git a/src/tests/test_pipelines.py b/src/tests/test_pipelines.py deleted file mode 100644 index 277df91..0000000 --- a/src/tests/test_pipelines.py +++ /dev/null @@ -1,35 +0,0 @@ -import unittest -from pathlib import Path -import iblphotometry.io as fio -from iblphotometry.pipelines import ( - run_pipeline, - sliding_mad_pipeline, - isosbestic_correction_pipeline, -) -from iblphotometry.synthetic import generate_dataframe -import tests.base_tests - - -class TestPipelines(tests.base_tests.PhotometryDataTestCase): - def test_single_band_pipeline(self): - # on synthetic data - raw_dfs = generate_dataframe() - run_pipeline(sliding_mad_pipeline, raw_dfs['raw_calcium']) - - Path(__file__).parent.joinpath() - # on real data - raw_dfs = fio.from_pqt( - self.paths['photometry_signal_pqt'], - self.paths['photometryROI_locations_pqt'], - ) - signal_bands = list(raw_dfs.keys()) - run_pipeline(sliding_mad_pipeline, raw_dfs[signal_bands[0]]) - - def test_isosbestic_pipeline(self): - # on synthetic data - raw_dfs = generate_dataframe() - - # run pipeline - run_pipeline( - isosbestic_correction_pipeline, raw_dfs['raw_calcium'], raw_dfs['raw_isosbestic'] - ) From d32a5e5bd78adf75746e12572b2d04a9078ad1bc Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 4 Dec 2024 15:08:55 +0000 Subject: [PATCH 09/10] ruff --- src/iblphotometry/io.py | 15 ++++-- src/iblphotometry/loaders.py | 1 - src/iblphotometry/metrics.py | 11 ++++- src/iblphotometry/neurophotometrics.py | 63 ++++++++++++++++++++++++-- src/iblphotometry/pipelines.py | 9 +++- src/iblphotometry/processing.py | 44 ++++++++++++------ src/iblphotometry/qc.py | 1 + src/iblphotometry/synthetic.py | 9 ++-- 8 files changed, 124 insertions(+), 29 deletions(-) diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py index 6e499b7..3fa36a1 100644 --- a/src/iblphotometry/io.py +++ b/src/iblphotometry/io.py @@ -47,7 +47,11 @@ def from_dataframe( if data_columns is None: # this hacky parser currently deals with the inconsistency between carolinas and alejandros extraction # https://github.com/int-brain-lab/ibl-photometry/issues/35 - data_columns = [col for col in raw_df.columns if col.startswith('Region') or col.startswith('G')] + data_columns = [ + col + for col in raw_df.columns + if col.startswith('Region') or col.startswith('G') + ] # infer name of time column if not provided if time_column is None: @@ -75,10 +79,11 @@ def from_dataframe( return raw_dfs + def from_dataframes(raw_df: pd.DataFrame, locations_df: pd.DataFrame): data_columns = (list(locations_df.index),) rename = locations_df['brain_region'].to_dict() - + read_config = dict( data_columns=data_columns, time_column='times', @@ -125,7 +130,9 @@ def from_pqt( return from_dataframe(raw_df, **read_config) -def from_raw_neurophotometrics_df(raw_df: pd.DataFrame, rois=None, drop_first=True) -> pd.DataFrame: +def from_raw_neurophotometrics_df( + raw_df: pd.DataFrame, rois=None, drop_first=True +) -> pd.DataFrame: """reads in parses the output of the neurophotometrics FP3002 Args: @@ -251,6 +258,7 @@ def _validate_dataframe( return schema_raw_data.validate(df) + def _validate_neurophotometrics_digital_inputs(df: pd.DataFrame) -> pd.DataFrame: schema_digital_inputs = pandera.DataFrameSchema( columns=dict( @@ -262,4 +270,3 @@ def _validate_neurophotometrics_digital_inputs(df: pd.DataFrame) -> pd.DataFrame ) ) return schema_digital_inputs.validate(df) - diff --git a/src/iblphotometry/loaders.py b/src/iblphotometry/loaders.py index 75b4f46..15b6158 100644 --- a/src/iblphotometry/loaders.py +++ b/src/iblphotometry/loaders.py @@ -66,7 +66,6 @@ def _load_data_from_eid(self, eid: str, rename=True): return raw_dfs - def _eid2pnames(self, eid: str): session_path = self.one.eid2path(eid) pnames = [reg.name for reg in session_path.joinpath('alf').glob('Region*')] diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index ac34fde..d412766 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -3,9 +3,16 @@ import pandas as pd from scipy import stats -from iblphotometry.processing import z, Regression, ExponDecay, detect_spikes, detect_outliers +from iblphotometry.processing import ( + z, + Regression, + ExponDecay, + detect_spikes, + detect_outliers, +) from iblphotometry.behavior import psth + def percentile_dist(A: pd.Series | np.ndarray, pc: tuple = (50, 95), axis=-1) -> float: """the distance between two percentiles in units of z. Captures the magnitude of transients. @@ -65,7 +72,7 @@ def n_spikes(A: pd.Series | np.ndarray, sd: int = 5): """count the number of spike artifacts in the recording.""" a = A.values if isinstance(A, pd.Series) else A return detect_spikes(a, sd=sd).shape[0] - + def n_outliers( A: pd.Series | np.ndarray, w_size: int = 1000, alpha: float = 0.0005 diff --git a/src/iblphotometry/neurophotometrics.py b/src/iblphotometry/neurophotometrics.py index 4fb0050..869caea 100644 --- a/src/iblphotometry/neurophotometrics.py +++ b/src/iblphotometry/neurophotometrics.py @@ -3,6 +3,7 @@ The light source map refers to the available LEDs on the system. The flags refers to the byte encoding of led states in the system. """ + LIGHT_SOURCE_MAP = { 'color': ['None', 'Violet', 'Blue', 'Green'], 'wavelength': [0, 415, 470, 560], @@ -24,8 +25,60 @@ 10: 'Input 0 signal HIGH + Stimulation', 11: 'Output 0 HIGH + Input 0 HIGH + Stimulation', }, - 'No LED ON': {0: 0, 1: 8, 2: 16, 3: 32, 4: 64, 5: 128, 6: 256, 7: 512, 8: 48, 9: 528, 10: 544, 11: 560}, - 'L415': {0: 1, 1: 9, 2: 17, 3: 33, 4: 65, 5: 129, 6: 257, 7: 513, 8: 49, 9: 529, 10: 545, 11: 561}, - 'L470': {0: 2, 1: 10, 2: 18, 3: 34, 4: 66, 5: 130, 6: 258, 7: 514, 8: 50, 9: 530, 10: 546, 11: 562}, - 'L560': {0: 4, 1: 12, 2: 20, 3: 36, 4: 68, 5: 132, 6: 260, 7: 516, 8: 52, 9: 532, 10: 548, 11: 564} -} \ No newline at end of file + 'No LED ON': { + 0: 0, + 1: 8, + 2: 16, + 3: 32, + 4: 64, + 5: 128, + 6: 256, + 7: 512, + 8: 48, + 9: 528, + 10: 544, + 11: 560, + }, + 'L415': { + 0: 1, + 1: 9, + 2: 17, + 3: 33, + 4: 65, + 5: 129, + 6: 257, + 7: 513, + 8: 49, + 9: 529, + 10: 545, + 11: 561, + }, + 'L470': { + 0: 2, + 1: 10, + 2: 18, + 3: 34, + 4: 66, + 5: 130, + 6: 258, + 7: 514, + 8: 50, + 9: 530, + 10: 546, + 11: 562, + }, + 'L560': { + 0: 4, + 1: 12, + 2: 20, + 3: 36, + 4: 68, + 5: 132, + 6: 260, + 7: 516, + 8: 52, + 9: 532, + 10: 548, + 11: 564, + }, +} diff --git a/src/iblphotometry/pipelines.py b/src/iblphotometry/pipelines.py index c848473..f679583 100644 --- a/src/iblphotometry/pipelines.py +++ b/src/iblphotometry/pipelines.py @@ -1,11 +1,18 @@ import numpy as np import pandas as pd -from iblphotometry.processing import remove_spikes, lowpass_bleachcorrect, isosbestic_correct, sliding_mad, zscore +from iblphotometry.processing import ( + remove_spikes, + lowpass_bleachcorrect, + isosbestic_correct, + sliding_mad, + zscore, +) import logging logger = logging.getLogger() + def run_pipeline( pipeline, F_signal: pd.DataFrame, diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index abd1e88..415a3ae 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -19,7 +19,6 @@ from copy import copy - # machine resolution eps = np.finfo(np.float64).eps @@ -34,6 +33,7 @@ ## ####### ## ## ###### ## #### ####### ## ## ###### """ + def z(A: np.ndarray, mode='classic'): """classic z-score. Deviation from sample mean in units of sd @@ -49,10 +49,11 @@ def z(A: np.ndarray, mode='classic'): if mode == 'median': return (A - np.median(A)) / np.std(A) + def mad(A: np.ndarray): - """ the MAD is defined as the median of the absolute deviations from the data's median + """the MAD is defined as the median of the absolute deviations from the data's median see https://en.wikipedia.org/wiki/Median_absolute_deviation - + :param A: _description_ :type A: np.ndarray :return: _description_ @@ -60,11 +61,13 @@ def mad(A: np.ndarray): """ return np.median(np.absolute(A - np.median(A)), axis=-1) + def madscore(F: pd.Series): # TODO overloading of mad? y, t = F.values, F.index.values return pd.Series(mad(y), index=t) + def zscore(F: pd.Series, mode='classic'): y, t = F.values, F.index.values # mu, sig = np.average(y), np.std(y) @@ -72,8 +75,7 @@ def zscore(F: pd.Series, mode='classic'): def filt(F: pd.Series, N: int, Wn: float, fs: float | None = None, btype='low'): - """ a wrapper for scipy.signal.butter and sosfiltfilt - """ + """a wrapper for scipy.signal.butter and sosfiltfilt""" y, t = F.values, F.index.values if fs is None: fs = 1 / np.median(np.diff(t)) @@ -81,6 +83,7 @@ def filt(F: pd.Series, N: int, Wn: float, fs: float | None = None, btype='low'): y_filt = signal.sosfiltfilt(sos, y) return pd.Series(y_filt, index=t) + def sliding_rcoeff(signal_a, signal_b, nswin, overlap=0): """ Computes the local correlation coefficient between two signals in sliding windows @@ -110,6 +113,7 @@ def sliding_rcoeff(signal_a, signal_b, nswin, overlap=0): ######## ####### ###### ###### ## ####### ## ## ###### ## #### ####### ## ## ###### """ + def mse_loss(p, x, y, fun): # mean squared error y_hat = fun(x, *p) @@ -138,6 +142,7 @@ def irls_loss(p, x, y, fun, d=1e-7): w = 1 / f return np.sum(w * np.abs(a) ** 2) / y.shape[0] + """ ######## ######## ###### ######## ######## ###### ###### #### ####### ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## @@ -151,6 +156,7 @@ def irls_loss(p, x, y, fun, d=1e-7): # wrapper class for regressions with different loss functions # following a sklearn style of .fit() and .predict() + class Regression: def __init__(self, model=None, method: str = 'mse', method_params=None): self.model = model @@ -220,6 +226,7 @@ def predict(self, x: np.ndarray, return_type='numpy'): if return_type == 'pandas': return pd.Series(y_hat, index=x) + """ ######## ## ######## ### ###### ## ## ###### ####### ######## ######## ######## ###### ######## #### ####### ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## @@ -230,10 +237,11 @@ def predict(self, x: np.ndarray, return_type='numpy'): ######## ######## ######## ## ## ###### ## ## ###### ####### ## ## ## ## ######## ###### ## #### ####### ## ## """ + class BleachCorrection: def __init__( self, - model = None, # TODO bring back type checking + model=None, # TODO bring back type checking regression_method: str = 'mse', regression_params: dict = None, correction_method: str = 'subtract', @@ -262,7 +270,7 @@ def __init__( def correct(self, F: pd.Series): F_filt = filt(F, **self.filter_params) return correct(F, F_filt, mode=self.correction_method) - + class IsosbesticCorrection: def __init__( @@ -293,7 +301,10 @@ def correct( return correct(F_ca, F_iso_fit, mode=self.correction_method) -def correct(signal: pd.Series, reference: pd.Series, mode: str = 'subtract') -> pd.Series: + +def correct( + signal: pd.Series, reference: pd.Series, mode: str = 'subtract' +) -> pd.Series: """the main function that applies the correction of a signal with a reference. Correcions can be applied in 3 principle ways: - The reference can be subtracted from the signal - the signal can be divided by the reference @@ -329,7 +340,6 @@ def correct(signal: pd.Series, reference: pd.Series, mode: str = 'subtract') -> class AbstractModel(ABC): - @abstractmethod def eq(): # the model equation @@ -374,8 +384,10 @@ def calc_model_stats(self, y, y_hat, n_samples: int = -1, use_kde: bool = False) aic = self._calc_aic(ll, k) return dict(r_sq=r_sq, ll=ll, aic=aic) + # the actual models + class LinearModel(AbstractModel): def eq(self, x, m, b): return x * m + b @@ -449,6 +461,7 @@ def est_p0(self, t: np.ndarray, y: np.ndarray): b_est, ) + """ ###### ####### ######## ######## ######## ###### ######## #### ####### ## ## ######## ## ## ## ## ###### ######## #### ####### ## ## ###### ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## ## ## ## ### ## ## ## ## ## ## ## ### ## ## ## @@ -461,15 +474,18 @@ def est_p0(self, t: np.ndarray, y: np.ndarray): # these are the convenience functions that are called in pipelines + def lowpass_bleachcorrect(F: pd.Series, **kwargs): bc = LowpassBleachCorrection(**kwargs) return bc.correct(F) + def exponential_bleachcorrect(F: pd.Series, **kwargs): model = DoubleExponDecay() ec = BleachCorrection(model, **kwargs) return ec.correct(F) + def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs): ic = IsosbesticCorrection(**kwargs) return ic.correct(F_sig, F_ref) @@ -485,7 +501,8 @@ def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs): ####### ####### ## ######## #### ######## ## ## ######## ######## ## ######## ###### ## #### ####### ## ## """ -def _grubbs_single(y: np.ndarray, alpha: float =0.005, mode: str='median') -> bool: + +def _grubbs_single(y: np.ndarray, alpha: float = 0.005, mode: str = 'median') -> bool: # to apply a single pass of grubbs outlier detection # see https://en.wikipedia.org/wiki/Grubbs%27s_test @@ -505,7 +522,7 @@ def _grubbs_single(y: np.ndarray, alpha: float =0.005, mode: str='median') -> bo return False -def grubbs_test(y: np.ndarray, alpha: float =0.005, mode:str='median'): +def grubbs_test(y: np.ndarray, alpha: float = 0.005, mode: str = 'median'): # apply grubbs test iteratively until no more outliers are found outliers = [] while _grubbs_single(y, alpha=alpha): @@ -565,7 +582,9 @@ def fillnan_kde(y: np.ndarray, w: int = 25): return y -def remove_outliers(F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: int = 25): +def remove_outliers( + F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: int = 25 +): y, t = F.values, F.index.values y = copy(y) outliers = detect_outliers(y, w_size=w_size, alpha=alpha) @@ -620,7 +639,6 @@ def make_sliding_window( method='stride_tricks', warning=None, ): - """use np.stride_tricks to make a sliding window view of a 1-d np.ndarray A full overlap, step size 1 assumes 8 byte numbers (to be exposed? but needs to be tested first) diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py index b424044..464465d 100644 --- a/src/iblphotometry/qc.py +++ b/src/iblphotometry/qc.py @@ -14,6 +14,7 @@ logger = logging.getLogger() + # %% # those could be in metrics def sliding_metric( F: pd.Series, diff --git a/src/iblphotometry/synthetic.py b/src/iblphotometry/synthetic.py index 2c44060..9a7f1bc 100644 --- a/src/iblphotometry/synthetic.py +++ b/src/iblphotometry/synthetic.py @@ -7,7 +7,6 @@ import scipy.signal - def synthetic101(fs=30, rl=1000, event_rate=0.2): """ Generates synthetic photometry data with @@ -47,8 +46,12 @@ def generate_dataframe(sigma: float = 0.01): df[['raw_calcium', 'raw_isosbestic']] += np.random.randn(*df.shape) * sigma raw_dfs = dict( - raw_calcium=pd.DataFrame(df['raw_calcium'].values, index=df.index, columns=['Region01']), - raw_isosbestic=pd.DataFrame(df['raw_isosbestic'].values, index=df.index, columns=['Region01']), + raw_calcium=pd.DataFrame( + df['raw_calcium'].values, index=df.index, columns=['Region01'] + ), + raw_isosbestic=pd.DataFrame( + df['raw_isosbestic'].values, index=df.index, columns=['Region01'] + ), ) return raw_dfs From 08bc9c35d94cddacee240357618acb5379027756 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 4 Dec 2024 15:14:53 +0000 Subject: [PATCH 10/10] ruff again --- .gitignore | 1 + src/iblphotometry/processing.py | 1 - src/iblphotometry/qc.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index d5401f8..04237af 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,4 @@ cython_debug/ # local scripts src/local src/analysis +src/examples diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index 415a3ae..8ab3f48 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -2,7 +2,6 @@ import warnings import numpy as np -import scipy as sp from scipy import signal import pandas as pd diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py index 464465d..5276026 100644 --- a/src/iblphotometry/qc.py +++ b/src/iblphotometry/qc.py @@ -3,7 +3,6 @@ from collections.abc import Callable from tqdm import tqdm import logging -import warnings import numpy as np import pandas as pd @@ -11,6 +10,7 @@ from iblphotometry.processing import make_sliding_window from iblphotometry.pipelines import run_pipeline +from brainbox.io.one import SessionLoader logger = logging.getLogger()