From 61a89adbd26ba97627574ffc18dd37204457eb6b Mon Sep 17 00:00:00 2001
From: georg <grg2rsr@gmail.com>
Date: Mon, 2 Dec 2024 18:41:28 +0000
Subject: [PATCH 01/10] completely removed pynapple

---
 src/iblphotometry/bleach_corrections.py |  30 +--
 src/iblphotometry/helpers.py            |  20 +-
 src/iblphotometry/io.py                 |  17 +-
 src/iblphotometry/loaders.py            |  54 +----
 src/iblphotometry/metrics.py            |  73 ++++---
 src/iblphotometry/outlier_detection.py  |  17 +-
 src/iblphotometry/pipelines.py          | 250 ++++++++++++------------
 src/iblphotometry/qc.py                 |  24 +--
 src/iblphotometry/sliding_operations.py |  20 +-
 src/iblphotometry/synthetic.py          |  21 +-
 src/tests/test_metrics.py               |   6 +-
 src/tests/test_pipelines.py             |  16 +-
 12 files changed, 245 insertions(+), 303 deletions(-)

diff --git a/src/iblphotometry/bleach_corrections.py b/src/iblphotometry/bleach_corrections.py
index ceb84e0..a64cad9 100644
--- a/src/iblphotometry/bleach_corrections.py
+++ b/src/iblphotometry/bleach_corrections.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 import warnings
 import numpy as np
-import pynapple as nap
+import pandas as pd
 from scipy.optimize import minimize
 from scipy.stats.distributions import norm
 from scipy.stats import gaussian_kde
@@ -11,14 +11,14 @@
 from inspect import signature
 
 
-def correct(signal: nap.Tsd, reference: nap.Tsd, mode: str = 'subtract') -> nap.Tsd:
+def correct(signal: pd.Series, reference: pd.Series, mode: str = 'subtract') -> pd.Series:
     if mode == 'subtract':
         signal_corrected = signal.values - reference.values
     if mode == 'divide':
         signal_corrected = signal.values / reference.values
     if mode == 'subtract-divide':
         signal_corrected = (signal.values - reference.values) / reference.values
-    return nap.Tsd(t=signal.times(), d=signal_corrected)
+    return pd.Series(signal_corrected, index=signal.index.values)
 
 
 def mse_loss(p, x, y, fun):
@@ -240,8 +240,8 @@ def predict(self, x: np.ndarray, return_type='numpy'):
         y_hat = self.model.eq(x, *self.popt)
         if return_type == 'numpy':
             return y_hat
-        if return_type == 'pynapple':
-            return nap.Tsd(t=x, d=y_hat)
+        if return_type == 'pandas':
+            return pd.Series(y_hat, index=x)
 
 
 class BleachCorrection:
@@ -258,9 +258,9 @@ def __init__(
         )
         self.correction_method = correction_method
 
-    def correct(self, F: nap.Tsd):
-        self.regression.fit(F.times(), F.values)
-        ref = self.regression.predict(F.times(), return_type='pynapple')
+    def correct(self, F: pd.Series):
+        self.regression.fit(F.index.values, F.values)
+        ref = self.regression.predict(F.index.values, return_type='pandas')
         return correct(F, ref, mode=self.correction_method)
 
 
@@ -282,14 +282,14 @@ def __init__(
 
     def correct(
         self,
-        F_ca: nap.Tsd,
-        F_iso: nap.Tsd,
+        F_ca: pd.Series,
+        F_iso: pd.Series,
     ):
         if self.lowpass_isosbestic is not None:
             F_iso = filt(F_iso, **self.lowpass_isosbestic)
 
         self.reg.fit(F_iso.values, F_ca.values)
-        F_iso_fit = self.reg.predict(F_iso.values, return_type='pynapple')
+        F_iso_fit = self.reg.predict(F_iso.values, return_type='pandas')
 
         return correct(F_ca, F_iso_fit, mode=self.correction_method)
 
@@ -303,23 +303,23 @@ def __init__(
         self.filter_params = filter_params
         self.correction_method = correction_method
 
-    def correct(self, F: nap.Tsd):
+    def correct(self, F: pd.Series):
         F_filt = filt(F, **self.filter_params)
         return correct(F, F_filt, mode=self.correction_method)
 
 
 # convenience functions for pipelines
-def lowpass_bleachcorrect(F: nap.Tsd, **kwargs):
+def lowpass_bleachcorrect(F: pd.Series, **kwargs):
     bc = LowpassBleachCorrection(**kwargs)
     return bc.correct(F)
 
 
-def exponential_bleachcorrect(F: nap.Tsd, **kwargs):
+def exponential_bleachcorrect(F: pd.Series, **kwargs):
     model = DoubleExponDecay()
     ec = BleachCorrection(model, **kwargs)
     return ec.correct(F)
 
 
-def isosbestic_correct(F_sig: nap.TsdFrame, F_ref: nap.TsdFrame, **kwargs):
+def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs):
     ic = IsosbesticCorrection(**kwargs)
     return ic.correct(F_sig, F_ref)
diff --git a/src/iblphotometry/helpers.py b/src/iblphotometry/helpers.py
index 54fda65..16f9e1d 100644
--- a/src/iblphotometry/helpers.py
+++ b/src/iblphotometry/helpers.py
@@ -1,6 +1,6 @@
 import numpy as np
 from scipy import signal
-import pynapple as nap
+import pandas as pd
 from ibldsp.utils import WindowGenerator
 from iblutil.numerical import rcoeff
 
@@ -37,13 +37,13 @@ def mad(A: np.array):
 
 
 # TODO move to processing
-def madscore(F: nap.Tsd):
-    y, t = F.values, F.times()
-    return nap.Tsd(t=t, d=mad(y))
+def madscore(F: pd.Series):
+    y, t = F.values, F.index.values
+    return pd.Series(mad(y), index=t)
 
 
 # TODO move to processing
-def zscore(F: nap.Tsd, mode='classic'):
+def zscore(F: pd.Series, mode='classic'):
     """pynapple friendly zscore
 
     Args:
@@ -52,13 +52,13 @@ def zscore(F: nap.Tsd, mode='classic'):
     Returns:
         _type_: z-scored nap.Tsd
     """
-    y, t = F.values, F.times()
+    y, t = F.values, F.index.values
     # mu, sig = np.average(y), np.std(y)
-    return nap.Tsd(t=t, d=z(y, mode=mode))
+    return pd.Series(z(y, mode=mode), index=t)
 
 
 # TODO move to processing
-def filt(F: nap.Tsd, N: int, Wn: float, fs: float = None, btype='low'):
+def filt(F: pd.Series, N: int, Wn: float, fs: float = None, btype='low'):
     """a pynapple friendly wrapper for scipy.signal.butter and sosfiltfilt
 
     Args:
@@ -71,12 +71,12 @@ def filt(F: nap.Tsd, N: int, Wn: float, fs: float = None, btype='low'):
     Returns:
         _type_: _description_
     """
-    y, t = F.values, F.times()
+    y, t = F.values, F.index.values
     if fs is None:
         fs = 1 / np.median(np.diff(t))
     sos = signal.butter(N, Wn, btype, fs=fs, output='sos')
     y_filt = signal.sosfiltfilt(sos, y)
-    return nap.Tsd(t=t, d=y_filt)
+    return pd.Series(y_filt, index=t)
 
 
 def sliding_rcoeff(signal_a, signal_b, nswin, overlap=0):
diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py
index 21f3564..eefc875 100644
--- a/src/iblphotometry/io.py
+++ b/src/iblphotometry/io.py
@@ -1,7 +1,6 @@
 # %%
 import numpy as np
 import pandas as pd
-import pynapple as nap
 from pathlib import Path
 import warnings
 import pandera
@@ -14,8 +13,8 @@
 
 def from_array(
     times: np.array, data: np.array, channel_names: list[str] = None
-) -> nap.TsdFrame:
-    return nap.TsdFrame(t=times, d=data, columns=channel_names)
+) -> pd.DataFrame:
+    return pd.DataFrame(data, index=times, columns=channel_names)
 
 
 def from_dataframe(
@@ -62,23 +61,17 @@ def from_dataframe(
     to_drop = ['None', '']
     channel_names = [ch for ch in channel_names if ch not in to_drop]
 
-    raw_tfs = {}
+    raw_dfs = {}
     for channel in channel_names:
-        # TODO include the use of raw_df['include'] to set the time_support of the pynapple object
-        # requires conversion of boolen to nap.IntervalSet (have done this somewhere already. find code)
-
-        # TODO check pynappe PR#343 https://github.com/pynapple-org/pynapple/pull/343 for future
-        # inclusion of locations_df as metadata
-
         # get the data for the band
         df = raw_df.groupby(channel_column).get_group(channel)
         # if rename dict is passed, rename Region0X to the corresponding brain region
         if rename is not None:
             df = df.rename(columns=rename)
             data_columns = rename.values()
-        raw_tfs[channel] = nap.TsdFrame(df.set_index(time_column)[data_columns])
+        raw_dfs[channel] = df.set_index(time_column)[data_columns]
 
-    return raw_tfs
+    return raw_dfs
 
 
 def from_pqt(
diff --git a/src/iblphotometry/loaders.py b/src/iblphotometry/loaders.py
index 30bb0bb..6786598 100644
--- a/src/iblphotometry/loaders.py
+++ b/src/iblphotometry/loaders.py
@@ -1,4 +1,3 @@
-import pynapple as nap
 import pandas as pd
 from pathlib import Path
 from iblphotometry import io
@@ -28,31 +27,15 @@ def _load_data_from_eid(self, eid, rename=True) -> nap.TsdFrame:
             data_columns=list(locations_df.index),
             rename=locations_df['brain_region'].to_dict() if rename else None,
         )
-        raw_tfs = io.from_dataframe(raw_photometry_df, **read_config)
+        raw_dfs = io.from_dataframe(raw_photometry_df, **read_config)
 
-        signal_band_names = list(raw_tfs.keys())
-        col_names = list(raw_tfs[signal_band_names[0]].columns)
+        signal_band_names = list(raw_dfs.keys())
+        col_names = list(raw_dfs[signal_band_names[0]].columns)
         if self.verbose:
             print(f'available signal bands: {signal_band_names}')
             print(f'available brain regions: {col_names}')
 
-        return raw_tfs
-        # if return_regions:
-        #     return raw_tfs, cols
-        # else:
-        #     return raw_tfs
-
-    # def _load_data_from_pid(self, pid=None, signal=None) -> nap.Tsd:
-    #     eid, pname = self.one.pid2eid(pid)
-    #     locations = self._load_locations(eid)
-    #     roi_name = dict(zip(locations['fiber'], locations.index))[pname]
-    #     return self._load_data_from_eid(eid, signal=signal)[roi_name]
-
-    # def pid2eid(self, pid: str) -> tuple[str, str]:
-    #     return self.one.pid2eid(pid)
-
-    # def eid2pid(self, eid: str):
-    #     return self.one.eid2pid(eid)
+        return raw_dfs
 
 
 class KceniaLoader(PhotometryLoader):
@@ -69,39 +52,20 @@ def _load_data_from_eid(self, eid: str, rename=True):
         signal_bands = ['raw_calcium', 'raw_isosbestic']  # HARDCODED but fine
 
         # flipping the data representation
-        raw_tfs = {}
+        raw_dfs = {}
         for band in signal_bands:
             df = pd.DataFrame([raw_dfs[pname][band].values for pname in pnames]).T
             df.columns = pnames
             df.index = raw_dfs[pname][band].index
-            raw_tfs[band] = nap.TsdFrame(df)
+            raw_dfs[band] = df
 
         if self.verbose:
-            print(f'available signal bands: {list(raw_tfs.keys())}')
-            cols = list(raw_tfs[list(raw_tfs.keys())[0]].columns)
+            print(f'available signal bands: {list(raw_dfs.keys())}')
+            cols = list(raw_dfs[list(raw_dfs.keys())[0]].columns)
             print(f'available brain regions: {cols}')
 
-        # if return_regions:
-        #     return raw_tfs, pnames
-        # else:
-        return raw_tfs
-
-    # def _load_data_from_eid(self, eid, signal=None):
-    #     raise NotImplementedError
-
-    # def get_mappable(self, eid):
-    #     raise NotImplementedError
-
-    # def get_mapping(self, eid, key=None, value=None):
-    #     raise NotImplementedError
-
-    # def pid2eid(self, pid: str) -> tuple[str, str]:
-    #     return pid.split('_')
+        return raw_dfs
 
-    # def eid2pid(self, eid):
-    #     pnames = self._eid2pnames(eid)
-    #     pids = [f'{eid}_{pname}' for pname in pnames]
-    #     return (pids, pnames)
 
     def _eid2pnames(self, eid: str):
         session_path = self.one.eid2path(eid)
diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py
index ce9950e..d227592 100644
--- a/src/iblphotometry/metrics.py
+++ b/src/iblphotometry/metrics.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pandas as pd
-import pynapple as nap
 from scipy import stats
 from scipy.stats import ttest_ind
 
@@ -27,99 +26,99 @@
 #     return P[1] - P[0]
 
 
-def percentile_dist(A: nap.Tsd | np.ndarray, pc: tuple = (50, 95), axis=-1) -> float:
+def percentile_dist(A: pd.Series | np.ndarray, pc: tuple = (50, 95), axis=-1) -> float:
     """the distance between two percentiles in units of z. Captures the magnitude of transients.
 
     Args:
-        A (nap.Tsd | np.ndarray): the input data, np.ndarray for stride tricks sliding windows
+        A (pd.Series | np.ndarray): the input data, np.ndarray for stride tricks sliding windows
         pc (tuple, optional): percentiles to be computed. Defaults to (50, 95).
         axis (int, optional): only for arrays, the axis to be computed. Defaults to -1.
 
     Returns:
         float: the value of the metric
     """
-    if isinstance(A, nap.Tsd):  # "overloading"
+    if isinstance(A, pd.Series):  # "overloading"
         P = np.percentile(z(A.values), pc)
     elif isinstance(A, np.ndarray):
         P = np.percentile(z(A), pc, axis=axis)
     else:
-        raise TypeError('A must be nap.Tsd or np.ndarray.')
+        raise TypeError('A must be pd.Series or np.ndarray.')
     return P[1] - P[0]
 
 
-def signal_asymmetry(A: nap.Tsd | np.ndarray, pc_comp: int = 95, axis=-1) -> float:
+def signal_asymmetry(A: pd.Series | np.ndarray, pc_comp: int = 95, axis=-1) -> float:
     """the ratio between the distance of two percentiles to the median. Proportional to the the signal to noise.
 
     Args:
-        A (nap.Tsd | np.ndarray): _description_
+        A (pd.Series | np.ndarray): _description_
         pc_comp (int, optional): _description_. Defaults to 95.
         axis (int, optional): _description_. Defaults to -1.
 
     Returns:
         float: _description_
     """
-    if not (isinstance(A, nap.Tsd) or isinstance(A, np.ndarray)):
-        raise TypeError('A must be nap.Tsd or np.ndarray.')
+    if not (isinstance(A, pd.Series) or isinstance(A, np.ndarray)):
+        raise TypeError('A must be pd.Series or np.ndarray.')
 
     a = np.absolute(percentile_dist(A, (50, pc_comp), axis=axis))
     b = np.absolute(percentile_dist(A, (100 - pc_comp, 50), axis=axis))
     return a / b
 
 
-def signal_skew(A: nap.Tsd | np.ndarray, axis=-1) -> float:
-    if isinstance(A, nap.Tsd):
+def signal_skew(A: pd.Series | np.ndarray, axis=-1) -> float:
+    if isinstance(A, pd.Series):
         P = stats.skew(A.values)
     elif isinstance(A, np.ndarray):
         P = stats.skew(A, axis=axis)
     else:
-        raise TypeError('A must be nap.Tsd or np.ndarray.')
+        raise TypeError('A must be pd.Series or np.ndarray.')
     return P
 
 
-def n_unique_samples(A: nap.Tsd | np.ndarray) -> int:
+def n_unique_samples(A: pd.Series | np.ndarray) -> int:
     """number of unique samples in the signal. Low values indicate that the signal during acquisition was not within the range of the digitizer."""
-    if isinstance(A, nap.Tsd):
+    if isinstance(A, pd.Series):
         return np.unique(A.values).shape[0]
     elif isinstance(A, np.ndarray):
         return A.shape[0]
     else:
-        raise TypeError('A must be nap.Tsd or np.ndarray.')
+        raise TypeError('A must be pd.Series or np.ndarray.')
 
 
-def n_spikes(A: nap.Tsd | np.ndarray, sd: int = 5):
+def n_spikes(A: pd.Series | np.ndarray, sd: int = 5):
     """count the number of spike artifacts in the recording."""
-    if isinstance(A, nap.Tsd):
+    if isinstance(A, pd.Series):
         return detect_spikes(A.values, sd=sd).shape[0]
     elif isinstance(A, np.ndarray):
         return detect_spikes(A, sd=sd).shape[0]
     else:
-        raise TypeError('A must be nap.Tsd or np.ndarray.')
+        raise TypeError('A must be pd.Series or np.ndarray.')
 
 
 def n_outliers(
-    A: nap.Tsd | np.ndarray, w_size: int = 1000, alpha: float = 0.0005
+    A: pd.Series | np.ndarray, w_size: int = 1000, alpha: float = 0.0005
 ) -> int:
     """counts the number of outliers as detected by grubbs test for outliers.
     int: _description_
     """
-    if isinstance(A, nap.Tsd):
+    if isinstance(A, pd.Series):
         return detect_outliers(A.values, w_size=w_size, alpha=alpha).shape[0]
     elif isinstance(A, np.ndarray):
         return detect_outliers(A, w_size=w_size, alpha=alpha).shape[0]
     else:
-        raise TypeError('A must be nap.Tsd or np.ndarray.')
+        raise TypeError('A must be pd.Series or np.ndarray.')
 
 
-def bleaching_tau(A: nap.Tsd) -> float:
+def bleaching_tau(A: pd.Series) -> float:
     """overall tau of bleaching."""
-    y, t = A.values, A.t
+    y, t = A.values, A.index.values
     reg = Regression(model=ExponDecay())
     reg.fit(y, t)
     return reg.popt[1]
 
 
 def ttest_pre_post(
-    A: nap.Tsd,
+    A: pd.Series,
     trials: pd.DataFrame,
     event_name: str,
     fs=None,
@@ -137,7 +136,7 @@ def ttest_pre_post(
     :param confid: float, confidence level (alpha)
     :return: boolean, True if metric passes
     """
-    y, t = A.values, A.t
+    y, t = A.values, A.index.values
     fs = 1 / np.median(np.diff(t)) if fs is None else fs
 
     t_events = trials[event_name].values
@@ -155,8 +154,8 @@ def ttest_pre_post(
 
 
 def has_response_to_event(
-    A: nap.Tsd,
-    event_times: nap.Ts,
+    A: pd.Series,
+    event_times: np.array,
     fs: float = None,
     window: tuple = (-1, 1),
     alpha: float = 0.005,
@@ -165,14 +164,9 @@ def has_response_to_event(
     # checks if there is a significant response to an event
 
     # ibldsb way
-    y, t = A.values, A.t
+    y, t = A.values, A.index.values
     fs = 1 / np.median(np.diff(t)) if fs is None else fs
-    P = psth(y, t, event_times.t, fs=fs, event_window=window)[0]
-
-    # or: pynapple style
-    # in the long run, this will be the preferred way as this will
-    # respect the .time_support of the pynapple object. # TODO verify this
-    # P = nap.compute_perievent_continuous(A, event_times, window).values
+    P, psth_ix = psth(y, t, event_times, fs=fs, event_window=window)
 
     # temporally averages the samples in the window. Sensitive to window size!
     if mode == 'mean':
@@ -182,28 +176,27 @@ def has_response_to_event(
         sig_samples = np.max(P, axis=0) - np.std(y)
 
     # baseline is all samples that are not part of the response
-    ts = event_times.t
-    gaps = nap.IntervalSet(start=ts + window[0], end=ts + window[1])
-    base_samples = A.restrict(A.time_support.set_diff(gaps)).values
+    base_ix = np.setdiff1d(np.arange(y.shape[0]), psth_ix.flatten())
+    base_samples = y[base_ix]
 
     res = ttest_ind(sig_samples, base_samples)
     return res.pvalue < alpha
 
 
 def has_responses(
-    A: nap.Tsd,
+    A: pd.Series,
     trials: pd.DataFrame,
     event_names: list = None,
     fs: float = None,
     window: tuple = (-1, 1),
     alpha: float = 0.005,
 ) -> bool:
-    t = A.t
+    t = A.index.values
     fs = 1 / np.median(np.diff(t)) if fs is None else fs
 
     res = []
     for event_name in event_names:
-        event_times = nap.Ts(t=trials[event_name].values)
+        event_times = trials[event_name]
         res.append(
             has_response_to_event(A, event_times, fs=fs, window=window, alpha=alpha)
         )
diff --git a/src/iblphotometry/outlier_detection.py b/src/iblphotometry/outlier_detection.py
index 3978187..e31cd82 100644
--- a/src/iblphotometry/outlier_detection.py
+++ b/src/iblphotometry/outlier_detection.py
@@ -1,6 +1,5 @@
 import numpy as np
 from scipy.stats import t
-import pynapple as nap
 from ibldsp.utils import WindowGenerator
 from copy import copy
 import pandas as pd
@@ -8,7 +7,7 @@
 import warnings
 
 
-def _grubbs_single(y, alpha=0.005, mode='median'):
+def _grubbs_single(y: np.array, alpha: float =0.005, mode: str='median') -> bool:
     # to apply a single pass of grubbs outlier detection
     # see https://en.wikipedia.org/wiki/Grubbs%27s_test
 
@@ -28,7 +27,7 @@ def _grubbs_single(y, alpha=0.005, mode='median'):
         return False
 
 
-def grubbs_test(y, alpha=0.005, mode='median'):
+def grubbs_test(y: np.array, alpha: float =0.005, mode:str='median'):
     # apply grubbs test iteratively until no more outliers are found
     outliers = []
     while _grubbs_single(y, alpha=alpha):
@@ -88,15 +87,15 @@ def fillnan_kde(y: np.array, w: int = 25):
         return y
 
 
-def remove_outliers(F: nap.Tsd, w_size: int = 1000, alpha: float = 0.005, w: int = 25):
-    y, t = F.values, F.t
+def remove_outliers(F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: int = 25):
+    y, t = F.values, F.index.values
     y = copy(y)
     outliers = detect_outliers(y, w_size=w_size, alpha=alpha)
     while len(outliers) > 0:
         y[outliers] = np.nan
         y = fillnan_kde(y, w=w)
         outliers = detect_outliers(y, w_size=w_size, alpha=alpha)
-    return nap.Tsd(t=t, d=y)
+    return pd.Series(y, index=t)
 
 
 def detect_spikes(t: np.array, sd: int = 5):
@@ -105,8 +104,8 @@ def detect_spikes(t: np.array, sd: int = 5):
     return np.where(bad_inds)[0]
 
 
-def remove_spikes(F: nap.Tsd, sd: int = 5, w: int = 25):
-    y, t = F.values, F.t
+def remove_spikes(F: pd.Series, sd: int = 5, w: int = 25):
+    y, t = F.values, F.index.values
     y = copy(y)
     outliers = detect_spikes(y, sd=sd)
     y[outliers] = np.nan
@@ -120,4 +119,4 @@ def remove_spikes(F: nap.Tsd, sd: int = 5, w: int = 25):
             y[outliers] = np.nanmedian(y)
             warnings.warn('KDE fillnan failed, using global median')  # TODO logger
 
-    return nap.Tsd(t=t, d=y)
+    return pd.Series(y, index=t)
diff --git a/src/iblphotometry/pipelines.py b/src/iblphotometry/pipelines.py
index 6a95040..72bcfa4 100644
--- a/src/iblphotometry/pipelines.py
+++ b/src/iblphotometry/pipelines.py
@@ -1,7 +1,7 @@
 """this module holds a collection of processing pipelines for fiber photometry data"""
 
 import numpy as np
-import pynapple as nap
+import pandas as pd
 from iblphotometry.helpers import z, filt
 
 # from ibldsp.utils import WindowGenerator
@@ -22,17 +22,17 @@
 
 def run_pipeline(
     pipeline,
-    F_signal: nap.TsdFrame,
-    F_reference: nap.TsdFrame = None,
-) -> nap.TsdFrame:
+    F_signal: pd.DataFrame,
+    F_reference: pd.DataFrame = None,
+) -> pd.DataFrame:
     # copy
-    Fc = copy(F_signal)
+    Fc = F_signal.copy()
     if F_reference is not None:
-        Fc_ref = copy(F_reference)
+        Fc_ref = F_reference.copy()
 
-    if isinstance(F_signal, nap.Tsd):
+    if isinstance(F_signal, pd.Series):
         raise TypeError(
-            'F_signal can not be nap.Tsd, is now required to be nap.TsdFrame'
+            'F_signal can not be pd.Series, is now required to be pd.DataFrame'
         )
 
     # iterate over the individual processing steps of the pipeline
@@ -41,17 +41,17 @@ def run_pipeline(
         if 'needs_reference' in pipe_args:
             _pipe_args = {k: v for k, v in pipe_args.items() if k != 'needs_reference'}
             # check if F_ref is not None
-            _d = np.zeros_like(Fc.d)
+            _d = np.zeros_like(Fc.values)
             # _Fcd_ref = np.zeros_like(Fc_ref.d)
             for i, col in enumerate(Fc.columns):
                 _d[:, i] = pipe_func(Fc[col], Fc_ref[col], **_pipe_args)
             # this step consumes the reference!
-            Fc = nap.TsdFrame(t=Fc.t, d=_d, columns=Fc.columns)
+            Fc = pd.DataFrame(_d, index=Fc.index.values, columns=Fc.columns)
         else:
-            _d = np.zeros_like(Fc.d)
+            _d = np.zeros_like(Fc.values)
             for i, col in enumerate(Fc.columns):
                 _d[:, i] = pipe_func(Fc[col], **pipe_args)
-            Fc = nap.TsdFrame(t=Fc.t, d=_d, columns=Fc.columns)
+            Fc = pd.DataFrame(_d, index=Fc.index.values, columns=Fc.columns)
     return Fc
 
 
@@ -86,116 +86,116 @@ def run_pipeline(
 
 # TODO this is not following the definition of a pipeline anymore
 # to be reconstructed
-def bc_lp_sliding_mad(
-    F: nap.Tsd | nap.TsdFrame,
-    w_len: float = 120,
-    overlap: int = 90,
-    butterworth_lowpass=dict(N=3, Wn=0.01, btype='lowpass'),
-    signal_name: str = 'raw_calcium',
-):
-    """_summary_
-
-    Args:
-        F (nap.Tsd): _description_
-        w_len (float, optional): _description_. Defaults to 120.
-        overlap (int, optional): _description_. Defaults to 90.
-        butterworth_lowpass (_type_, optional): _description_. Defaults to dict(N=3, Wn=0.01, btype="lowpass").
-
-    Returns:
-        _type_: _description_
-    """
-
-    if isinstance(
-        F, nap.TsdFrame
-    ):  # if F is as TsdFrame, then use signal name to get the correct column - this is needed for the pipeline functionality in run_qc
-        if signal_name is None:
-            logger.critical('no signal name is provided for the pipeline')
-        else:
-            F = F[signal_name]
-
-    bleach_correction = bleach_corrections.LowpassBleachCorrection(
-        correction_method='subtract-divide',
-        filter_params=butterworth_lowpass,
-    )
-    F_bc = bleach_correction.correct(F)
-    F_res = sliding_operations.sliding_mad(F_bc, w_len=w_len, overlap=overlap)
-    return F_res
-
-
-# TODO this is not following the definition of a pipeline anymore
-def jove2019(
-    F: nap.TsdFrame,
-    ca_signal_name: str = 'raw_calcium',
-    isosbestic_signal_name: str = 'raw_isosbestic',
-    **params,
-):
-    """
-    Martianova, Ekaterina, Sage Aronson, and Christophe D. Proulx. "Multi-fiber photometry to record neural activity in freely-moving animals." JoVE (Journal of Visualized Experiments) 152 (2019): e60278.
-    :param raw_calcium:
-    :param raw_isosbestic:
-    :param params:
-    :return:
-    """
-    raw_calcium = F[ca_signal_name]
-    raw_isosbestic = F[isosbestic_signal_name]
-
-    # replace this with a low pass corrector
-    # remove photobleaching
-    bleach_correction = bleach_corrections.LowpassBleachCorrection(
-        correction_method='subtract-divide',
-        filter_params=dict(N=3, Wn=0.01, btype='lowpass'),
-    )
-    calcium = bleach_correction.correct(
-        raw_calcium,
-        mode='subtract',
-    ).values
-    isosbestic = bleach_correction.correct(
-        raw_isosbestic,
-        mode='subtract',
-    ).values
-
-    # zscoring using median instead of mean
-    calcium = z(calcium, mode='median')
-    isosbestic = z(isosbestic, mode='median')
-
-    # regular regression
-    m = np.polyfit(isosbestic, calcium, 1)
-    ref = isosbestic * m[0] + m[1]
-    ph = (calcium - ref) / 100
-    return nap.Tsd(t=raw_calcium.times(), d=ph)
-
-
-# TODO this is not following the definition of a pipeline anymore
-def isosbestic_regression(
-    F: nap.TsdFrame,
-    ca_signal_name: str = 'raw_calcium',
-    isosbestic_signal_name: str = 'raw_isosbestic',
-    fs: float = None,
-    regression_method: str = 'irls',
-    correction_method: str = 'subtract-divide',
-    **params,
-):
-    raw_calcium = F[ca_signal_name]
-    raw_isosbestic = F[isosbestic_signal_name]
-
-    t = F.times()
-    fs = 1 / np.median(np.diff(t)) if fs is None else fs
-
-    isosbestic_correction = bleach_corrections.IsosbesticCorrection(
-        regression_method=regression_method,
-        correction_method=correction_method,
-        lowpass_isosbestic=dict(N=3, Wn=0.01, btype='lowpass'),
-    )
-
-    F_corr = isosbestic_correction.correct(
-        raw_calcium,
-        raw_isosbestic,
-    )
-
-    butterworth_signal = params.get(
-        'butterworth_signal',
-        dict(N=3, Wn=7, btype='lowpass', fs=fs),  # changed from 10 to 7
-    )
-
-    F_corr = filt(F_corr, **butterworth_signal)
-    return F_corr
+# def bc_lp_sliding_mad(
+#     F: nap.Tsd | nap.TsdFrame,
+#     w_len: float = 120,
+#     overlap: int = 90,
+#     butterworth_lowpass=dict(N=3, Wn=0.01, btype='lowpass'),
+#     signal_name: str = 'raw_calcium',
+# ):
+#     """_summary_
+
+#     Args:
+#         F (nap.Tsd): _description_
+#         w_len (float, optional): _description_. Defaults to 120.
+#         overlap (int, optional): _description_. Defaults to 90.
+#         butterworth_lowpass (_type_, optional): _description_. Defaults to dict(N=3, Wn=0.01, btype="lowpass").
+
+#     Returns:
+#         _type_: _description_
+#     """
+
+#     if isinstance(
+#         F, nap.TsdFrame
+#     ):  # if F is as TsdFrame, then use signal name to get the correct column - this is needed for the pipeline functionality in run_qc
+#         if signal_name is None:
+#             logger.critical('no signal name is provided for the pipeline')
+#         else:
+#             F = F[signal_name]
+
+#     bleach_correction = bleach_corrections.LowpassBleachCorrection(
+#         correction_method='subtract-divide',
+#         filter_params=butterworth_lowpass,
+#     )
+#     F_bc = bleach_correction.correct(F)
+#     F_res = sliding_operations.sliding_mad(F_bc, w_len=w_len, overlap=overlap)
+#     return F_res
+
+
+# # TODO this is not following the definition of a pipeline anymore
+# def jove2019(
+#     F: nap.TsdFrame,
+#     ca_signal_name: str = 'raw_calcium',
+#     isosbestic_signal_name: str = 'raw_isosbestic',
+#     **params,
+# ):
+#     """
+#     Martianova, Ekaterina, Sage Aronson, and Christophe D. Proulx. "Multi-fiber photometry to record neural activity in freely-moving animals." JoVE (Journal of Visualized Experiments) 152 (2019): e60278.
+#     :param raw_calcium:
+#     :param raw_isosbestic:
+#     :param params:
+#     :return:
+#     """
+#     raw_calcium = F[ca_signal_name]
+#     raw_isosbestic = F[isosbestic_signal_name]
+
+#     # replace this with a low pass corrector
+#     # remove photobleaching
+#     bleach_correction = bleach_corrections.LowpassBleachCorrection(
+#         correction_method='subtract-divide',
+#         filter_params=dict(N=3, Wn=0.01, btype='lowpass'),
+#     )
+#     calcium = bleach_correction.correct(
+#         raw_calcium,
+#         mode='subtract',
+#     ).values
+#     isosbestic = bleach_correction.correct(
+#         raw_isosbestic,
+#         mode='subtract',
+#     ).values
+
+#     # zscoring using median instead of mean
+#     calcium = z(calcium, mode='median')
+#     isosbestic = z(isosbestic, mode='median')
+
+#     # regular regression
+#     m = np.polyfit(isosbestic, calcium, 1)
+#     ref = isosbestic * m[0] + m[1]
+#     ph = (calcium - ref) / 100
+#     return nap.Tsd(t=raw_calcium.times(), d=ph)
+
+
+# # TODO this is not following the definition of a pipeline anymore
+# def isosbestic_regression(
+#     F: nap.TsdFrame,
+#     ca_signal_name: str = 'raw_calcium',
+#     isosbestic_signal_name: str = 'raw_isosbestic',
+#     fs: float = None,
+#     regression_method: str = 'irls',
+#     correction_method: str = 'subtract-divide',
+#     **params,
+# ):
+#     raw_calcium = F[ca_signal_name]
+#     raw_isosbestic = F[isosbestic_signal_name]
+
+#     t = F.times()
+#     fs = 1 / np.median(np.diff(t)) if fs is None else fs
+
+#     isosbestic_correction = bleach_corrections.IsosbesticCorrection(
+#         regression_method=regression_method,
+#         correction_method=correction_method,
+#         lowpass_isosbestic=dict(N=3, Wn=0.01, btype='lowpass'),
+#     )
+
+#     F_corr = isosbestic_correction.correct(
+#         raw_calcium,
+#         raw_isosbestic,
+#     )
+
+#     butterworth_signal = params.get(
+#         'butterworth_signal',
+#         dict(N=3, Wn=7, btype='lowpass', fs=fs),  # changed from 10 to 7
+#     )
+
+#     F_corr = filt(F_corr, **butterworth_signal)
+#     return F_corr
diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py
index 794fecd..908b3c7 100644
--- a/src/iblphotometry/qc.py
+++ b/src/iblphotometry/qc.py
@@ -1,7 +1,7 @@
 # %%
 import numpy as np
 from scipy.stats import linregress
-import pynapple as nap
+import pandas as pd
 
 from tqdm import tqdm
 import logging
@@ -21,7 +21,7 @@
 
 # %% # those could be in metrics
 def sliding_metric(
-    F: nap.Tsd,
+    F: pd.Series,
     w_len: float,
     fs: float = None,
     metric: callable = None,
@@ -39,7 +39,7 @@ def sliding_metric(
     Returns:
         _type_: _description_
     """
-    y, t = F.values, F.times()
+    y, t = F.values, F.index.values
     fs = 1 / np.median(np.diff(t)) if fs is None else fs
     w_size = int(w_len * fs)
 
@@ -53,12 +53,12 @@ def sliding_metric(
 
     m = metric(yw, **metric_kwargs) if metric_kwargs is not None else metric(yw)
 
-    return nap.Tsd(t=t[inds + int(w_size / 2)], d=m)
+    return pd.Series(m, index=t[inds + int(w_size / 2)])
 
 
 # eval pipleline will be here
 def eval_metric(
-    F: nap.Tsd,
+    F: pd.Series,
     metric: callable = None,
     metric_kwargs: dict = None,
     sliding_kwargs: dict = None,
@@ -77,16 +77,16 @@ def eval_metric(
     return dict(value=m, rval=r, pval=p)
 
 
-def qc_Tsd(
-    F: nap.Tsd,
+def qc_series(
+    F: pd.Series,
     qc_metrics: dict,
     sliding_kwargs=None,  # if present, calculate everything in a sliding manner
     trials=None,  # if present, put trials into params
     eid: str = None,  # FIXME but left as is for now just to keep the logger happy
     brain_region: str = None,  # FIXME but left as is for now just to keep the logger happy
 ) -> dict:
-    if isinstance(F, nap.TsdFrame):
-        raise TypeError('F can not be nap.TsdFrame')
+    if isinstance(F, pd.DataFrame):
+        raise TypeError('F can not be a dataframe')
 
     # should cover all cases
     qc_results = {}
@@ -137,7 +137,7 @@ def run_qc(
         for band in signal_bands:
             raw_tf = raw_tfs[band]
             for region in brain_regions:
-                qc_result = qc_Tsd(
+                qc_result = qc_series(
                     raw_tf[region], qc_metrics['raw'], sliding_kwargs=None, eid=eid
                 )
                 qc_results.append(
@@ -161,7 +161,7 @@ def run_qc(
 
             for region in brain_regions:
                 # sliding qc of the processed data
-                qc_proc = qc_Tsd(
+                qc_proc = qc_series(
                     proc_tf[region],
                     qc_metrics=qc_metrics['processed'],
                     sliding_kwargs=qc_metrics['sliding_kwargs'],
@@ -170,7 +170,7 @@ def run_qc(
                 )
 
                 # qc with metrics that use behavior
-                qc_resp = qc_Tsd(
+                qc_resp = qc_series(
                     proc_tf[region],
                     qc_metrics['response'],
                     trials=trials,
diff --git a/src/iblphotometry/sliding_operations.py b/src/iblphotometry/sliding_operations.py
index f22ade5..f0baaf4 100644
--- a/src/iblphotometry/sliding_operations.py
+++ b/src/iblphotometry/sliding_operations.py
@@ -1,7 +1,7 @@
 """this module holds a collection of processing pipelines for fiber photometry data"""
 
 import numpy as np
-import pynapple as nap
+import pandas as pd
 from iblphotometry.helpers import z
 from iblphotometry.behavior import psth
 from ibldsp.utils import WindowGenerator
@@ -49,8 +49,8 @@ def make_sliding_window(
     return B
 
 
-def sliding_dFF(F: nap.Tsd, w_len: float, fs=None, weights=None):
-    y, t = F.values, F.times()
+def sliding_dFF(F: pd.Series, w_len: float, fs=None, weights=None):
+    y, t = F.values, F.index.values
     fs = 1 / np.median(np.diff(t)) if fs is None else fs
     w_size = int(w_len * fs)
 
@@ -71,10 +71,10 @@ def _dFF(A: np.array):
         B = make_sliding_window(y, w_size)
         mus = np.average(B, axis=1)
         d = (y - mus) / mus
-    return nap.Tsd(t=t, d=d)
+    return pd.Series(d, index=t)
 
 
-def sliding_z(F: nap.Tsd, w_len: float, fs=None, weights=None):
+def sliding_z(F: pd.Series, w_len: float, fs=None, weights=None):
     """sliding window z-score of a pynapple time series with data with optional weighting
 
     Args:
@@ -85,7 +85,7 @@ def sliding_z(F: nap.Tsd, w_len: float, fs=None, weights=None):
     Returns:
         _type_: _description_
     """
-    y, t = F.values, F.times()
+    y, t = F.values, F.index.values
     fs = 1 / np.median(np.diff(t)) if fs is None else fs
     w_size = int(w_len * fs)
 
@@ -102,11 +102,11 @@ def sliding_z(F: nap.Tsd, w_len: float, fs=None, weights=None):
         B = make_sliding_window(y, w_size)
         mus, sds = np.average(B, axis=1), np.std(B, axis=1)
         d = (y - mus) / sds
-    return nap.Tsd(t=t, d=d)
+    return pd.Series(d, index=t)
 
 
-def sliding_mad(F: nap.Tsd, w_len: float = None, fs=None, overlap=90):
-    y, t = F.values, F.times()
+def sliding_mad(F: pd.Series, w_len: float = None, fs=None, overlap=90):
+    y, t = F.values, F.index.values
     fs = 1 / np.median(np.diff(t)) if fs is None else fs
     w_size = int(w_len * fs)
 
@@ -117,7 +117,7 @@ def sliding_mad(F: nap.Tsd, w_len: float = None, fs=None, overlap=90):
     rmswin, _ = psth(y, t, t_events=trms, fs=fs, event_window=[0, w_len])
     gain = np.nanmedian(np.abs(y)) / np.nanmedian(np.abs(rmswin), axis=0)
     gain = np.interp(t, trms, gain)
-    return nap.Tsd(t=t, d=y * gain)
+    return pd.Series(y * gain, index=t)
 
 
 # def sliding_mad_new(F: nap.Tsd, w_len: float = None, fs=None, overlap=90):
diff --git a/src/iblphotometry/synthetic.py b/src/iblphotometry/synthetic.py
index b41c2d1..2c44060 100644
--- a/src/iblphotometry/synthetic.py
+++ b/src/iblphotometry/synthetic.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pandas as pd
 import scipy.signal
-import pynapple as nap
+
 
 
 def synthetic101(fs=30, rl=1000, event_rate=0.2):
@@ -37,7 +37,7 @@ def synthetic101(fs=30, rl=1000, event_rate=0.2):
     ), event_times
 
 
-def generate_TsdFrame(sigma: float = 0.01):
+def generate_dataframe(sigma: float = 0.01):
     # to generate synthetic data in the internal data representation in dict of nap.TsdFrames
     df, _ = synthetic101(30, 1000, 0.2)
     df = df.set_index('times')
@@ -46,16 +46,9 @@ def generate_TsdFrame(sigma: float = 0.01):
     if sigma is not None:
         df[['raw_calcium', 'raw_isosbestic']] += np.random.randn(*df.shape) * sigma
 
-    raw_tfs = dict(
-        signal=nap.TsdFrame(
-            t=df.index,
-            d=df['raw_calcium'],
-            columns=['Region01'],
-        ),
-        reference=nap.TsdFrame(
-            t=df.index,
-            d=df['raw_isosbestic'],
-            columns=['Region01'],
-        ),
+    raw_dfs = dict(
+        raw_calcium=pd.DataFrame(df['raw_calcium'].values, index=df.index, columns=['Region01']),
+        raw_isosbestic=pd.DataFrame(df['raw_isosbestic'].values, index=df.index, columns=['Region01']),
     )
-    return raw_tfs
+
+    return raw_dfs
diff --git a/src/tests/test_metrics.py b/src/tests/test_metrics.py
index 75e63cc..f94b164 100644
--- a/src/tests/test_metrics.py
+++ b/src/tests/test_metrics.py
@@ -11,14 +11,14 @@ class TestMetrics(tests.base_tests.PhotometryDataTestCase):
 
     def test_metrics(self):
         # get data
-        raw_tfs = fio.from_pqt(
+        raw_dfs = fio.from_pqt(
             self.paths['photometry_signal_pqt'],
             self.paths['photometryROI_locations_pqt'],
         )
         trials = pd.read_parquet(self.paths['trials_table_pqt'])
 
         # testing metrics with nap.Tsd
-        raw_tsd = raw_tfs['GCaMP']['DMS']
+        raw_tsd = raw_dfs['GCaMP']['DMS']
 
         metrics.bleaching_tau(raw_tsd)
         metrics.n_spikes(raw_tsd)
@@ -43,7 +43,7 @@ def test_metrics(self):
             metrics.has_responses(raw_tsd, trials, BEHAV_EVENTS)
 
         # testing metrics with np.array
-        raw_array = raw_tfs['GCaMP']['DMS'].d
+        raw_array = raw_dfs['GCaMP']['DMS'].values
         # metrics.bleaching_tau(raw_tsd)
         metrics.n_spikes(raw_array)
         metrics.detect_spikes(raw_array)
diff --git a/src/tests/test_pipelines.py b/src/tests/test_pipelines.py
index eda5800..277df91 100644
--- a/src/tests/test_pipelines.py
+++ b/src/tests/test_pipelines.py
@@ -6,30 +6,30 @@
     sliding_mad_pipeline,
     isosbestic_correction_pipeline,
 )
-from iblphotometry.synthetic import generate_TsdFrame
+from iblphotometry.synthetic import generate_dataframe
 import tests.base_tests
 
 
 class TestPipelines(tests.base_tests.PhotometryDataTestCase):
     def test_single_band_pipeline(self):
         # on synthetic data
-        raw_tfs = generate_TsdFrame()
-        run_pipeline(sliding_mad_pipeline, raw_tfs['signal'])
+        raw_dfs = generate_dataframe()
+        run_pipeline(sliding_mad_pipeline, raw_dfs['raw_calcium'])
 
         Path(__file__).parent.joinpath()
         # on real data
-        raw_tfs = fio.from_pqt(
+        raw_dfs = fio.from_pqt(
             self.paths['photometry_signal_pqt'],
             self.paths['photometryROI_locations_pqt'],
         )
-        signal_bands = list(raw_tfs.keys())
-        run_pipeline(sliding_mad_pipeline, raw_tfs[signal_bands[0]])
+        signal_bands = list(raw_dfs.keys())
+        run_pipeline(sliding_mad_pipeline, raw_dfs[signal_bands[0]])
 
     def test_isosbestic_pipeline(self):
         # on synthetic data
-        raw_tfs = generate_TsdFrame()
+        raw_dfs = generate_dataframe()
 
         # run pipeline
         run_pipeline(
-            isosbestic_correction_pipeline, raw_tfs['signal'], raw_tfs['reference']
+            isosbestic_correction_pipeline, raw_dfs['raw_calcium'], raw_dfs['raw_isosbestic']
         )

From bac70fbb9a5508471f0e104af8a274f9fc92a408 Mon Sep 17 00:00:00 2001
From: georg <grg2rsr@gmail.com>
Date: Mon, 2 Dec 2024 18:47:38 +0000
Subject: [PATCH 02/10] and removed pynapple from the requirements as well

---
 requirements.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 085e653..1313c5b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,5 +3,4 @@ numpy
 matplotlib
 pytest
 scipy
-pandera
-pynapple
\ No newline at end of file
+pandera
\ No newline at end of file

From b83ba91bba812f788b524dd7e0838ec478c53fa5 Mon Sep 17 00:00:00 2001
From: georg <grg2rsr@gmail.com>
Date: Tue, 3 Dec 2024 11:47:25 +0000
Subject: [PATCH 03/10] neurophotometrics.py contains ledstate constants and
 io.py imports them

---
 src/iblphotometry/io.py                | 16 +++++++++++--
 src/iblphotometry/neurophotometrics.py | 31 ++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)
 create mode 100644 src/iblphotometry/neurophotometrics.py

diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py
index eefc875..18ba56c 100644
--- a/src/iblphotometry/io.py
+++ b/src/iblphotometry/io.py
@@ -5,7 +5,7 @@
 import warnings
 import pandera
 
-from ibllib.pipes.neurophotometrics import (
+from iblphotometry.neurophotometrics import (
     LIGHT_SOURCE_MAP,
     LED_STATES,
 )
@@ -214,7 +214,6 @@ def from_raw_neurophotometrics(
     return from_dataframe(df, **read_config)
 
 
-# TODO externalize the validator in iblrig as a seperate function and reuse here
 def _validate_dataframe(
     df: pd.DataFrame,
     data_columns=None,
@@ -233,3 +232,16 @@ def _validate_dataframe(
     )
 
     return schema_raw_data.validate(df)
+
+def _validate_neurophotometrics_digital_inputs(df: pd.DataFrame) -> pd.DataFrame:
+    schema_digital_inputs = pandera.DataFrameSchema(
+        columns=dict(
+            ChannelName=pandera.Column(str, coerce=True),
+            Channel=pandera.Column(pandera.Int8, coerce=True),
+            AlwaysTrue=pandera.Column(bool, coerce=True),
+            SystemTimestamp=pandera.Column(pandera.Float64),
+            ComputerTimestamp=pandera.Column(pandera.Float64),
+        )
+    )
+    return schema_digital_inputs.validate(df)
+
diff --git a/src/iblphotometry/neurophotometrics.py b/src/iblphotometry/neurophotometrics.py
new file mode 100644
index 0000000..4fb0050
--- /dev/null
+++ b/src/iblphotometry/neurophotometrics.py
@@ -0,0 +1,31 @@
+"""
+Neurophotometrics FP3002 specific information.
+The light source map refers to the available LEDs on the system.
+The flags refers to the byte encoding of led states in the system.
+"""
+LIGHT_SOURCE_MAP = {
+    'color': ['None', 'Violet', 'Blue', 'Green'],
+    'wavelength': [0, 415, 470, 560],
+    'name': ['None', 'Isosbestic', 'GCaMP', 'RCaMP'],
+}
+
+LED_STATES = {
+    'Condition': {
+        0: 'No additional signal',
+        1: 'Output 1 signal HIGH',
+        2: 'Output 0 signal HIGH',
+        3: 'Stimulation ON',
+        4: 'GPIO Line 2 HIGH',
+        5: 'GPIO Line 3 HIGH',
+        6: 'Input 1 HIGH',
+        7: 'Input 0 HIGH',
+        8: 'Output 0 signal HIGH + Stimulation',
+        9: 'Output 0 signal HIGH + Input 0 signal HIGH',
+        10: 'Input 0 signal HIGH + Stimulation',
+        11: 'Output 0 HIGH + Input 0 HIGH + Stimulation',
+    },
+    'No LED ON': {0: 0, 1: 8, 2: 16, 3: 32, 4: 64, 5: 128, 6: 256, 7: 512, 8: 48, 9: 528, 10: 544, 11: 560},
+    'L415': {0: 1, 1: 9, 2: 17, 3: 33, 4: 65, 5: 129, 6: 257, 7: 513, 8: 49, 9: 529, 10: 545, 11: 561},
+    'L470': {0: 2, 1: 10, 2: 18, 3: 34, 4: 66, 5: 130, 6: 258, 7: 514, 8: 50, 9: 530, 10: 546, 11: 562},
+    'L560': {0: 4, 1: 12, 2: 20, 3: 36, 4: 68, 5: 132, 6: 260, 7: 516, 8: 52, 9: 532, 10: 548, 11: 564}
+}
\ No newline at end of file

From d77ac23139f89cbfe91193e08b0a5465a3279f4d Mon Sep 17 00:00:00 2001
From: georg <grg2rsr@gmail.com>
Date: Tue, 3 Dec 2024 12:35:28 +0000
Subject: [PATCH 04/10] processing.py created and functions moved there

---
 src/iblphotometry/bleach_corrections.py | 325 -----------
 src/iblphotometry/helpers.py            |  98 ----
 src/iblphotometry/metrics.py            |   8 +-
 src/iblphotometry/outlier_detection.py  | 122 ----
 src/iblphotometry/pipelines.py          |  15 +-
 src/iblphotometry/processing.py         | 730 ++++++++++++++++++++++++
 src/iblphotometry/qc.py                 |   2 +-
 src/iblphotometry/sliding_operations.py | 138 -----
 8 files changed, 734 insertions(+), 704 deletions(-)
 delete mode 100644 src/iblphotometry/bleach_corrections.py
 delete mode 100644 src/iblphotometry/helpers.py
 delete mode 100644 src/iblphotometry/outlier_detection.py
 create mode 100644 src/iblphotometry/processing.py
 delete mode 100644 src/iblphotometry/sliding_operations.py

diff --git a/src/iblphotometry/bleach_corrections.py b/src/iblphotometry/bleach_corrections.py
deleted file mode 100644
index a64cad9..0000000
--- a/src/iblphotometry/bleach_corrections.py
+++ /dev/null
@@ -1,325 +0,0 @@
-from abc import ABC, abstractmethod
-import warnings
-import numpy as np
-import pandas as pd
-from scipy.optimize import minimize
-from scipy.stats.distributions import norm
-from scipy.stats import gaussian_kde
-from scipy.special import pseudo_huber
-
-from iblphotometry.helpers import filt
-from inspect import signature
-
-
-def correct(signal: pd.Series, reference: pd.Series, mode: str = 'subtract') -> pd.Series:
-    if mode == 'subtract':
-        signal_corrected = signal.values - reference.values
-    if mode == 'divide':
-        signal_corrected = signal.values / reference.values
-    if mode == 'subtract-divide':
-        signal_corrected = (signal.values - reference.values) / reference.values
-    return pd.Series(signal_corrected, index=signal.index.values)
-
-
-def mse_loss(p, x, y, fun):
-    # mean squared error
-    y_hat = fun(x, *p)
-    return np.sum((y - y_hat) ** 2) / y.shape[0]
-
-
-def mae_loss(p, x, y, fun):
-    # mean absolute error
-    y_hat = fun(x, *p)
-    return np.sum(np.absolute(y - y_hat)) / y.shape[0]
-
-
-def huber_loss(p, x, y, fun, d):
-    # huber loss function
-    y_hat = fun(x, *p)
-    r = y - y_hat
-    return np.sum(pseudo_huber(d, r)) / y.shape[0]
-
-
-def irls_loss(p, x, y, fun, d=1e-7):
-    # iteratively reweighted least squares - loss function
-    y_hat = fun(x, *p)
-    a = y - y_hat
-    # irls weight
-    f = np.max(np.stack([np.abs(a), np.ones(a.shape[0]) * d]), axis=0)
-    w = 1 / f
-    return np.sum(w * np.abs(a) ** 2) / y.shape[0]
-
-
-class AbstractModel(ABC):
-    @abstractmethod
-    def eq():
-        # the model equation
-        ...
-
-    @abstractmethod
-    def est_p0():
-        # given data, estimate model parameters
-        ...
-
-    def _calc_r_squared(self, y, y_hat):
-        r = 1 - np.sum((y - y_hat) ** 2) / np.sum((y - np.average(y)) ** 2)
-        return r
-
-    def _calc_likelihood(self, y, y_hat, n_samples=-1, use_kde=False):
-        rs = y - y_hat
-        if n_samples > 0:
-            inds = np.random.randint(0, y.shape[0], size=n_samples)
-            rs = rs[inds]
-        if use_kde is True:
-            # explicit estimation of the distribution of residuals
-            if n_samples == -1:
-                warnings.warn(
-                    f'calculating KDE on {y.values.shape[0]} samples. This might be slow'
-                )
-            dist = gaussian_kde(rs)
-        else:
-            # using RSME
-            sig = np.sqrt(np.average((y - y_hat) ** 2))
-            dist = norm(0, sig)
-        ll = np.sum(np.log(dist.pdf(rs)))
-        return ll
-
-    def _calc_aic(self, k, ll):
-        aic = 2 * k - 2 * ll  # np.log(ll)
-        return aic
-
-    def calc_model_stats(self, y, y_hat, n_samples: int = -1, use_kde: bool = False):
-        r_sq = self._calc_r_squared(y, y_hat)
-        ll = self._calc_likelihood(y, y_hat, n_samples, use_kde)
-        k = len(signature(self.eq).parameters) - 1
-        aic = self._calc_aic(ll, k)
-        return dict(r_sq=r_sq, ll=ll, aic=aic)
-
-
-### MODELS
-eps = np.finfo(np.float64).eps
-
-
-class LinearModel(AbstractModel):
-    def eq(self, x, m, b):
-        return x * m + b
-
-    def est_p0(self, x: np.array, y: np.array):
-        return tuple(np.polyfit(x, y, 1))
-        # x, y = np.sort(x), np.sort(y)
-        # dx = x[-1] - x[0]
-        # dy = y[-1] - y[0]
-        # m = dy / dx
-        # b = np.average(y - (m * x))
-        # return (m, b)
-
-
-class ExponDecay(AbstractModel):
-    bounds = ((0, np.inf), (eps, np.inf), (-np.inf, np.inf))
-
-    def eq(self, t, A, tau, b):
-        return A * np.exp(-t / tau) + b
-
-    def est_p0(self, t: np.array, y: np.array):
-        return (y[0], t[int(t.shape[0] / 3)], y[-1])
-
-
-class DoubleExponDecay(AbstractModel):
-    bounds = (
-        (0, np.inf),
-        (eps, np.inf),
-        (0, np.inf),
-        (eps, np.inf),
-        (-np.inf, np.inf),
-    )
-
-    def eq(self, t, A1, tau1, A2, tau2, b):
-        return A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + b
-
-    def est_p0(self, t: np.array, y: np.array):
-        A_est = y[0]
-        tau_est = t[int(t.shape[0] / 3)]
-        b_est = y[-1]
-        return (A_est, tau_est, A_est / 2, tau_est / 2, b_est)
-
-
-class TripleExponDecay(AbstractModel):
-    bounds = (
-        (0, np.inf),
-        (eps, np.inf),
-        (0, np.inf),
-        (eps, np.inf),
-        (0, np.inf),
-        (eps, np.inf),
-        (-np.inf, np.inf),
-    )
-
-    def eq(self, t, A1, tau1, A2, tau2, A3, tau3, b):
-        return (
-            A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + A3 * np.exp(-t / tau3) + b
-        )
-
-    def est_p0(self, t: np.array, y: np.array):
-        A_est = y[0]
-        tau_est = t[int(t.shape[0] / 3)]
-        b_est = y[-1]
-        return (
-            A_est,
-            tau_est,
-            A_est * 0.66,
-            tau_est * 0.66,
-            A_est * 0.33,
-            tau_est * 0.33,
-            b_est,
-        )
-
-
-class Regression:
-    def __init__(self, model=None, method: str = 'mse', method_params=None):
-        self.model = model
-        self.method = method
-        self.params = method_params if method_params is not None else {}
-
-    def fit(self, x: np.ndarray, y: np.ndarray, algorithm='L-BFGS-B') -> tuple:
-        p0 = self.model.est_p0(x, y)
-        bounds = self.model.bounds if hasattr(self.model, 'bounds') else None
-        if self.method == 'mse':
-            minimize_result = minimize(
-                mse_loss,
-                p0,
-                args=(x, y, self.model.eq),
-                bounds=bounds,
-                method=algorithm,
-            )
-        if self.method == 'mae':
-            minimize_result = minimize(
-                mae_loss,
-                p0,
-                args=(x, y, self.model.eq),
-                bounds=bounds,
-                method=algorithm,
-            )
-        if self.method == 'huber':
-            d = self.params.get('d', 3)
-            minimize_result = minimize(
-                huber_loss,
-                p0,
-                args=(x, y, self.model.eq, d),
-                bounds=bounds,
-                method=algorithm,
-            )
-        if self.method == 'irls':
-            d = self.params.get('d', 1e-7)
-            minimize_result = minimize(
-                irls_loss,
-                p0,
-                args=(x, y, self.model.eq, d),
-                bounds=bounds,
-                method=algorithm,
-            )
-        if not minimize_result.success:
-            raise Exception(f'Fitting failed. {minimize_result.message}')
-        else:
-            self.popt = minimize_result.x
-
-        # if self.method == 'sklearn-RANSAC':
-        #     self.reg = RANSACRegressor(random_state=42)
-        #     xr = x.reshape(-1, 1)
-        #     self.reg.fit(xr, y)
-        #     self.popt = (self.reg.estimator_.coef_, self.reg.estimator_.intercept_)
-
-        # if self.method == 'sklearn-linear':
-        #     self.reg = LinearRegression()
-        #     xr = x.reshape(-1, 1)
-        #     self.reg.fit(xr, y)
-        #     self.popt = (self.reg.coef_, self.reg.intercept_)
-        #     print(self.popt)
-
-    def predict(self, x: np.ndarray, return_type='numpy'):
-        x = np.sort(x)  # just in case
-        y_hat = self.model.eq(x, *self.popt)
-        if return_type == 'numpy':
-            return y_hat
-        if return_type == 'pandas':
-            return pd.Series(y_hat, index=x)
-
-
-class BleachCorrection:
-    def __init__(
-        self,
-        model: AbstractModel = None,
-        regression_method: str = 'mse',
-        regression_params: dict = None,
-        correction_method: str = 'subtract',
-    ):
-        self.model = model
-        self.regression = Regression(
-            model=model, method=regression_method, method_params=regression_params
-        )
-        self.correction_method = correction_method
-
-    def correct(self, F: pd.Series):
-        self.regression.fit(F.index.values, F.values)
-        ref = self.regression.predict(F.index.values, return_type='pandas')
-        return correct(F, ref, mode=self.correction_method)
-
-
-class IsosbesticCorrection:
-    def __init__(
-        self,
-        regression_method: str = 'mse',
-        regression_params: dict = None,
-        correction_method: str = 'subtract-divide',
-        lowpass_isosbestic: dict = None,
-    ):
-        self.reg = Regression(
-            model=LinearModel(),
-            method=regression_method,
-            method_params=regression_params,
-        )
-        self.lowpass_isosbestic = lowpass_isosbestic
-        self.correction_method = correction_method
-
-    def correct(
-        self,
-        F_ca: pd.Series,
-        F_iso: pd.Series,
-    ):
-        if self.lowpass_isosbestic is not None:
-            F_iso = filt(F_iso, **self.lowpass_isosbestic)
-
-        self.reg.fit(F_iso.values, F_ca.values)
-        F_iso_fit = self.reg.predict(F_iso.values, return_type='pandas')
-
-        return correct(F_ca, F_iso_fit, mode=self.correction_method)
-
-
-class LowpassBleachCorrection:
-    def __init__(
-        self,
-        filter_params=dict(N=3, Wn=0.01, btype='lowpass'),
-        correction_method='subtract-divide',
-    ):
-        self.filter_params = filter_params
-        self.correction_method = correction_method
-
-    def correct(self, F: pd.Series):
-        F_filt = filt(F, **self.filter_params)
-        return correct(F, F_filt, mode=self.correction_method)
-
-
-# convenience functions for pipelines
-def lowpass_bleachcorrect(F: pd.Series, **kwargs):
-    bc = LowpassBleachCorrection(**kwargs)
-    return bc.correct(F)
-
-
-def exponential_bleachcorrect(F: pd.Series, **kwargs):
-    model = DoubleExponDecay()
-    ec = BleachCorrection(model, **kwargs)
-    return ec.correct(F)
-
-
-def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs):
-    ic = IsosbesticCorrection(**kwargs)
-    return ic.correct(F_sig, F_ref)
diff --git a/src/iblphotometry/helpers.py b/src/iblphotometry/helpers.py
deleted file mode 100644
index 16f9e1d..0000000
--- a/src/iblphotometry/helpers.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import numpy as np
-from scipy import signal
-import pandas as pd
-from ibldsp.utils import WindowGenerator
-from iblutil.numerical import rcoeff
-
-
-# TODO move to processing
-def z(A: np.array, mode='classic'):
-    """classic z-score. Deviation from sample mean in units of sd
-
-    Args:
-        A (np.array): _description_
-
-    Returns:
-        _type_: _description_
-    """
-    if mode == 'classic':
-        return (A - np.average(A)) / np.std(A)
-    if mode == 'median':
-        return (A - np.median(A)) / np.std(A)
-
-
-# TODO move to processing
-def mad(A: np.array):
-    """
-    https://en.wikipedia.org/wiki/Median_absolute_deviation
-    :the MAD is defined as the median of the absolute deviations from the data's median
-
-    Args:
-        A (np.array): _description_
-
-    Returns:
-        _type_: _description_
-    """
-    return np.median(np.absolute(A - np.median(A)), axis=-1)
-
-
-# TODO move to processing
-def madscore(F: pd.Series):
-    y, t = F.values, F.index.values
-    return pd.Series(mad(y), index=t)
-
-
-# TODO move to processing
-def zscore(F: pd.Series, mode='classic'):
-    """pynapple friendly zscore
-
-    Args:
-        F (nap.Tsd): signal to be z-scored
-
-    Returns:
-        _type_: z-scored nap.Tsd
-    """
-    y, t = F.values, F.index.values
-    # mu, sig = np.average(y), np.std(y)
-    return pd.Series(z(y, mode=mode), index=t)
-
-
-# TODO move to processing
-def filt(F: pd.Series, N: int, Wn: float, fs: float = None, btype='low'):
-    """a pynapple friendly wrapper for scipy.signal.butter and sosfiltfilt
-
-    Args:
-        F (nap.Tsd): _description_
-        N (int): _description_
-        Wn (float): _description_
-        fs (float, optional): _description_. Defaults to None.
-        btype (str, optional): _description_. Defaults to "low".
-
-    Returns:
-        _type_: _description_
-    """
-    y, t = F.values, F.index.values
-    if fs is None:
-        fs = 1 / np.median(np.diff(t))
-    sos = signal.butter(N, Wn, btype, fs=fs, output='sos')
-    y_filt = signal.sosfiltfilt(sos, y)
-    return pd.Series(y_filt, index=t)
-
-
-def sliding_rcoeff(signal_a, signal_b, nswin, overlap=0):
-    """
-    Computes the local correlation coefficient between two signals in sliding windows
-    :param signal_a:
-    :param signal_b:
-    :param nswin: window size in samples
-    :param overlap: overlap of successiv windows in samples
-    :return: ix: indices of the center of the windows, r: correlation coefficients
-    """
-    wg = WindowGenerator(ns=signal_a.size, nswin=nswin, overlap=overlap)
-    first_samples = np.array([fl[0] for fl in wg.firstlast])
-    iwin = np.zeros([wg.nwin, wg.nswin], dtype=np.int32) + np.arange(wg.nswin)
-    iwin += first_samples[:, np.newaxis]
-    iwin[iwin >= signal_a.size] = signal_a.size - 1
-    r = rcoeff(signal_a[iwin], signal_b[iwin])
-    ix = first_samples + nswin // 2
-    return ix, r
diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py
index d227592..4c8d7d7 100644
--- a/src/iblphotometry/metrics.py
+++ b/src/iblphotometry/metrics.py
@@ -1,13 +1,9 @@
 import numpy as np
 import pandas as pd
 from scipy import stats
-from scipy.stats import ttest_ind
 
-from iblphotometry.helpers import z
+from iblphotometry.processing import z, Regression, ExponDecay, detect_spikes, detect_outliers
 from iblphotometry.behavior import psth
-from iblphotometry.bleach_corrections import Regression, ExponDecay
-from iblphotometry.outlier_detection import detect_spikes, detect_outliers
-
 
 ## this approach works as well
 # from functools import singledispatch
@@ -179,7 +175,7 @@ def has_response_to_event(
     base_ix = np.setdiff1d(np.arange(y.shape[0]), psth_ix.flatten())
     base_samples = y[base_ix]
 
-    res = ttest_ind(sig_samples, base_samples)
+    res = stats.ttest_ind(sig_samples, base_samples)
     return res.pvalue < alpha
 
 
diff --git a/src/iblphotometry/outlier_detection.py b/src/iblphotometry/outlier_detection.py
deleted file mode 100644
index e31cd82..0000000
--- a/src/iblphotometry/outlier_detection.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import numpy as np
-from scipy.stats import t
-from ibldsp.utils import WindowGenerator
-from copy import copy
-import pandas as pd
-from scipy.stats import gaussian_kde as kde
-import warnings
-
-
-def _grubbs_single(y: np.array, alpha: float =0.005, mode: str='median') -> bool:
-    # to apply a single pass of grubbs outlier detection
-    # see https://en.wikipedia.org/wiki/Grubbs%27s_test
-
-    N = y.shape[0]
-    if mode == 'classic':
-        # this is the formulation as from wikipedia
-        G = np.max(np.absolute(y - np.average(y))) / np.std(y)
-    if mode == 'median':
-        # this is more robust against outliers
-        G = np.max(np.absolute(y - np.median(y))) / np.std(y)
-    tsq = t.ppf(1 - alpha / (2 * N), df=N - 2) ** 2
-    g = (N - 1) / np.sqrt(N) * np.sqrt(tsq / (N - 2 + tsq))
-
-    if G > g:  # if G > g, reject null hypothesis (of no outliers)
-        return True
-    else:
-        return False
-
-
-def grubbs_test(y: np.array, alpha: float =0.005, mode:str='median'):
-    # apply grubbs test iteratively until no more outliers are found
-    outliers = []
-    while _grubbs_single(y, alpha=alpha):
-        # get the outlier index
-        if mode == 'classic':
-            ix = np.argmax(np.absolute(y - np.average(y)))
-        if mode == 'median':
-            ix = np.argmax(np.absolute(y - np.median(y)))
-        outliers.append(ix)
-        y[ix] = np.median(y)
-    return np.sort(outliers)
-
-
-def detect_outliers(y: np.array, w_size: int = 1000, alpha: float = 0.005):
-    # sliding grubbs test for a np.array
-    n_samples = y.shape[0]
-    wg = WindowGenerator(n_samples - (n_samples % w_size), w_size, 0)
-    dtype = np.dtype((np.float64, w_size))
-    B = np.fromiter(wg.slice_array(y), dtype=dtype)
-
-    # sliding window outlier detection
-    outlier_ix = []
-    for i in range(B.shape[0]):
-        outlier_ix.append(grubbs_test(B[i, :], alpha=alpha) + i * w_size)
-
-    # explicitly taking care of the remaining samples if they exist
-    if y.shape[0] > np.prod(B.shape):
-        outlier_ix.append(
-            grubbs_test(y[np.prod(B.shape) :], alpha=alpha) + B.shape[0] * w_size
-        )
-
-    outlier_ix = np.concatenate(outlier_ix).astype('int64')
-    return np.unique(outlier_ix)
-
-
-def remove_nans(y: np.array):
-    y = y[~pd.isna(y)]  # nanfilter
-    if y.shape[0] == 0:
-        warnings.warn('y was all NaN and is now empty')
-    return y
-
-
-def fillnan_kde(y: np.array, w: int = 25):
-    # fill nans with KDE from edges
-    inds = np.where(pd.isna(y))[0]
-    if inds.shape[0] > 0:
-        for ix in inds:
-            ix_start = np.clip(ix - w, 0, y.shape[0] - 1)
-            ix_stop = np.clip(ix + w, 0, y.shape[0] - 1)
-            y_ = y[ix_start:ix_stop]
-            y_ = remove_nans(y_)
-            y[ix] = kde(y_).resample(1)[0][0]
-
-        return y
-    else:
-        # no NaNs present, doing nothing
-        return y
-
-
-def remove_outliers(F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: int = 25):
-    y, t = F.values, F.index.values
-    y = copy(y)
-    outliers = detect_outliers(y, w_size=w_size, alpha=alpha)
-    while len(outliers) > 0:
-        y[outliers] = np.nan
-        y = fillnan_kde(y, w=w)
-        outliers = detect_outliers(y, w_size=w_size, alpha=alpha)
-    return pd.Series(y, index=t)
-
-
-def detect_spikes(t: np.array, sd: int = 5):
-    dt = np.diff(t)
-    bad_inds = dt < np.average(dt) - sd * np.std(dt)
-    return np.where(bad_inds)[0]
-
-
-def remove_spikes(F: pd.Series, sd: int = 5, w: int = 25):
-    y, t = F.values, F.index.values
-    y = copy(y)
-    outliers = detect_spikes(y, sd=sd)
-    y[outliers] = np.nan
-    try:
-        y = fillnan_kde(y, w=w)
-    except np.linalg.LinAlgError:
-        if np.all(pd.isna(y[outliers])):  # all are NaN!
-            y[:] = 0
-            warnings.warn('all values NaN, setting to zeros')  # TODO logger
-        else:
-            y[outliers] = np.nanmedian(y)
-            warnings.warn('KDE fillnan failed, using global median')  # TODO logger
-
-    return pd.Series(y, index=t)
diff --git a/src/iblphotometry/pipelines.py b/src/iblphotometry/pipelines.py
index 72bcfa4..159adc7 100644
--- a/src/iblphotometry/pipelines.py
+++ b/src/iblphotometry/pipelines.py
@@ -1,25 +1,12 @@
-"""this module holds a collection of processing pipelines for fiber photometry data"""
-
 import numpy as np
 import pandas as pd
-from iblphotometry.helpers import z, filt
-
-# from ibldsp.utils import WindowGenerator
-from iblphotometry import sliding_operations
-from iblphotometry import bleach_corrections
-from iblphotometry.outlier_detection import remove_spikes
-from iblphotometry.bleach_corrections import lowpass_bleachcorrect, isosbestic_correct
-from iblphotometry.sliding_operations import sliding_mad
-
-# TODO this will probably be refactored to to processing
-from iblphotometry.helpers import zscore
+from iblphotometry.processing import remove_spikes, lowpass_bleachcorrect, isosbestic_correct, sliding_mad, zscore
 
 import logging
 from copy import copy
 
 logger = logging.getLogger()
 
-
 def run_pipeline(
     pipeline,
     F_signal: pd.DataFrame,
diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py
new file mode 100644
index 0000000..c13dce9
--- /dev/null
+++ b/src/iblphotometry/processing.py
@@ -0,0 +1,730 @@
+from abc import ABC, abstractmethod
+import warnings
+
+import numpy as np
+import scipy as sp
+from scipy import signal, stats
+import pandas as pd
+
+from iblutil.numerical import rcoeff
+from ibldsp.utils import WindowGenerator
+from ibldsp.utils import WindowGenerator
+
+from scipy.optimize import minimize
+from scipy.stats.distributions import norm
+from scipy.stats import gaussian_kde
+from scipy.stats import gaussian_kde as kde
+from scipy.stats import t
+from scipy.special import pseudo_huber
+
+from iblphotometry.behavior import psth
+from inspect import signature
+from copy import copy
+
+
+
+# machine resolution
+eps = np.finfo(np.float64).eps
+
+
+"""
+######## ##     ## ##    ##  ######  ######## ####  #######  ##    ##  ######
+##       ##     ## ###   ## ##    ##    ##     ##  ##     ## ###   ## ##    ##
+##       ##     ## ####  ## ##          ##     ##  ##     ## ####  ## ##
+######   ##     ## ## ## ## ##          ##     ##  ##     ## ## ## ##  ######
+##       ##     ## ##  #### ##          ##     ##  ##     ## ##  ####       ##
+##       ##     ## ##   ### ##    ##    ##     ##  ##     ## ##   ### ##    ##
+##        #######  ##    ##  ######     ##    ####  #######  ##    ##  ######
+"""
+
+def z(A: np.array, mode='classic'):
+    """classic z-score. Deviation from sample mean in units of sd
+
+    :param A: _description_
+    :type A: np.array
+    :param mode: _description_, defaults to 'classic'
+    :type mode: str, optional
+    :return: _description_
+    :rtype: _type_
+    """
+    if mode == 'classic':
+        return (A - np.average(A)) / np.std(A)
+    if mode == 'median':
+        return (A - np.median(A)) / np.std(A)
+
+def mad(A: np.array):
+    """ the MAD is defined as the median of the absolute deviations from the data's median
+    see https://en.wikipedia.org/wiki/Median_absolute_deviation
+    
+    :param A: _description_
+    :type A: np.array
+    :return: _description_
+    :rtype: _type_
+    """
+    return np.median(np.absolute(A - np.median(A)), axis=-1)
+
+def madscore(F: pd.Series):
+    # TODO overloading of mad?
+    y, t = F.values, F.index.values
+    return pd.Series(mad(y), index=t)
+
+def zscore(F: pd.Series, mode='classic'):
+    y, t = F.values, F.index.values
+    # mu, sig = np.average(y), np.std(y)
+    return pd.Series(z(y, mode=mode), index=t)
+
+
+def filt(F: pd.Series, N: int, Wn: float, fs: float = None, btype='low'):
+    """ a wrapper for scipy.signal.butter and sosfiltfilt
+    """
+    y, t = F.values, F.index.values
+    if fs is None:
+        fs = 1 / np.median(np.diff(t))
+    sos = signal.butter(N, Wn, btype, fs=fs, output='sos')
+    y_filt = signal.sosfiltfilt(sos, y)
+    return pd.Series(y_filt, index=t)
+
+def sliding_rcoeff(signal_a, signal_b, nswin, overlap=0):
+    """
+    Computes the local correlation coefficient between two signals in sliding windows
+    :param signal_a:
+    :param signal_b:
+    :param nswin: window size in samples
+    :param overlap: overlap of successiv windows in samples
+    :return: ix: indices of the center of the windows, r: correlation coefficients
+    """
+    wg = WindowGenerator(ns=signal_a.size, nswin=nswin, overlap=overlap)
+    first_samples = np.array([fl[0] for fl in wg.firstlast])
+    iwin = np.zeros([wg.nwin, wg.nswin], dtype=np.int32) + np.arange(wg.nswin)
+    iwin += first_samples[:, np.newaxis]
+    iwin[iwin >= signal_a.size] = signal_a.size - 1
+    r = rcoeff(signal_a[iwin], signal_b[iwin])
+    ix = first_samples + nswin // 2
+    return ix, r
+
+
+"""
+##        #######   ######   ######     ######## ##     ## ##    ##  ######  ######## ####  #######  ##    ##  ######
+##       ##     ## ##    ## ##    ##    ##       ##     ## ###   ## ##    ##    ##     ##  ##     ## ###   ## ##    ##
+##       ##     ## ##       ##          ##       ##     ## ####  ## ##          ##     ##  ##     ## ####  ## ##
+##       ##     ##  ######   ######     ######   ##     ## ## ## ## ##          ##     ##  ##     ## ## ## ##  ######
+##       ##     ##       ##       ##    ##       ##     ## ##  #### ##          ##     ##  ##     ## ##  ####       ##
+##       ##     ## ##    ## ##    ##    ##       ##     ## ##   ### ##    ##    ##     ##  ##     ## ##   ### ##    ##
+########  #######   ######   ######     ##        #######  ##    ##  ######     ##    ####  #######  ##    ##  ######
+"""
+
+def mse_loss(p, x, y, fun):
+    # mean squared error
+    y_hat = fun(x, *p)
+    return np.sum((y - y_hat) ** 2) / y.shape[0]
+
+
+def mae_loss(p, x, y, fun):
+    # mean absolute error
+    y_hat = fun(x, *p)
+    return np.sum(np.absolute(y - y_hat)) / y.shape[0]
+
+
+def huber_loss(p, x, y, fun, d):
+    # huber loss function
+    y_hat = fun(x, *p)
+    r = y - y_hat
+    return np.sum(pseudo_huber(d, r)) / y.shape[0]
+
+
+def irls_loss(p, x, y, fun, d=1e-7):
+    # iteratively reweighted least squares - loss function
+    y_hat = fun(x, *p)
+    a = y - y_hat
+    # irls weight
+    f = np.max(np.stack([np.abs(a), np.ones(a.shape[0]) * d]), axis=0)
+    w = 1 / f
+    return np.sum(w * np.abs(a) ** 2) / y.shape[0]
+
+"""
+########  ########  ######   ########  ########  ######   ######  ####  #######  ##    ##
+##     ## ##       ##    ##  ##     ## ##       ##    ## ##    ##  ##  ##     ## ###   ##
+##     ## ##       ##        ##     ## ##       ##       ##        ##  ##     ## ####  ##
+########  ######   ##   #### ########  ######    ######   ######   ##  ##     ## ## ## ##
+##   ##   ##       ##    ##  ##   ##   ##             ##       ##  ##  ##     ## ##  ####
+##    ##  ##       ##    ##  ##    ##  ##       ##    ## ##    ##  ##  ##     ## ##   ###
+##     ## ########  ######   ##     ## ########  ######   ######  ####  #######  ##    ##
+"""
+
+# wrapper class for regressions with different loss functions
+# following a sklearn style of .fit() and .predict()
+
+class Regression:
+    def __init__(self, model=None, method: str = 'mse', method_params=None):
+        self.model = model
+        self.method = method
+        self.params = method_params if method_params is not None else {}
+
+    def fit(self, x: np.ndarray, y: np.ndarray, algorithm='L-BFGS-B') -> tuple:
+        p0 = self.model.est_p0(x, y)
+        bounds = self.model.bounds if hasattr(self.model, 'bounds') else None
+        if self.method == 'mse':
+            minimize_result = minimize(
+                mse_loss,
+                p0,
+                args=(x, y, self.model.eq),
+                bounds=bounds,
+                method=algorithm,
+            )
+        if self.method == 'mae':
+            minimize_result = minimize(
+                mae_loss,
+                p0,
+                args=(x, y, self.model.eq),
+                bounds=bounds,
+                method=algorithm,
+            )
+        if self.method == 'huber':
+            d = self.params.get('d', 3)
+            minimize_result = minimize(
+                huber_loss,
+                p0,
+                args=(x, y, self.model.eq, d),
+                bounds=bounds,
+                method=algorithm,
+            )
+        if self.method == 'irls':
+            d = self.params.get('d', 1e-7)
+            minimize_result = minimize(
+                irls_loss,
+                p0,
+                args=(x, y, self.model.eq, d),
+                bounds=bounds,
+                method=algorithm,
+            )
+        if not minimize_result.success:
+            raise Exception(f'Fitting failed. {minimize_result.message}')
+        else:
+            self.popt = minimize_result.x
+
+        # if self.method == 'sklearn-RANSAC':
+        #     self.reg = RANSACRegressor(random_state=42)
+        #     xr = x.reshape(-1, 1)
+        #     self.reg.fit(xr, y)
+        #     self.popt = (self.reg.estimator_.coef_, self.reg.estimator_.intercept_)
+
+        # if self.method == 'sklearn-linear':
+        #     self.reg = LinearRegression()
+        #     xr = x.reshape(-1, 1)
+        #     self.reg.fit(xr, y)
+        #     self.popt = (self.reg.coef_, self.reg.intercept_)
+        #     print(self.popt)
+
+    def predict(self, x: np.ndarray, return_type='numpy'):
+        x = np.sort(x)  # just in case
+        y_hat = self.model.eq(x, *self.popt)
+        if return_type == 'numpy':
+            return y_hat
+        if return_type == 'pandas':
+            return pd.Series(y_hat, index=x)
+
+"""
+########  ##       ########    ###     ######  ##     ##     ######   #######  ########  ########  ########  ######  ######## ####  #######  ##    ##
+##     ## ##       ##         ## ##   ##    ## ##     ##    ##    ## ##     ## ##     ## ##     ## ##       ##    ##    ##     ##  ##     ## ###   ##
+##     ## ##       ##        ##   ##  ##       ##     ##    ##       ##     ## ##     ## ##     ## ##       ##          ##     ##  ##     ## ####  ##
+########  ##       ######   ##     ## ##       #########    ##       ##     ## ########  ########  ######   ##          ##     ##  ##     ## ## ## ##
+##     ## ##       ##       ######### ##       ##     ##    ##       ##     ## ##   ##   ##   ##   ##       ##          ##     ##  ##     ## ##  ####
+##     ## ##       ##       ##     ## ##    ## ##     ##    ##    ## ##     ## ##    ##  ##    ##  ##       ##    ##    ##     ##  ##     ## ##   ###
+########  ######## ######## ##     ##  ######  ##     ##     ######   #######  ##     ## ##     ## ########  ######     ##    ####  #######  ##    ##
+"""
+
+class BleachCorrection:
+    def __init__(
+        self,
+        model: AbstractModel = None,
+        regression_method: str = 'mse',
+        regression_params: dict = None,
+        correction_method: str = 'subtract',
+    ):
+        self.model = model
+        self.regression = Regression(
+            model=model, method=regression_method, method_params=regression_params
+        )
+        self.correction_method = correction_method
+
+    def correct(self, F: pd.Series):
+        self.regression.fit(F.index.values, F.values)
+        ref = self.regression.predict(F.index.values, return_type='pandas')
+        return correct(F, ref, mode=self.correction_method)
+
+
+class LowpassBleachCorrection:
+    def __init__(
+        self,
+        filter_params=dict(N=3, Wn=0.01, btype='lowpass'),
+        correction_method='subtract-divide',
+    ):
+        self.filter_params = filter_params
+        self.correction_method = correction_method
+
+    def correct(self, F: pd.Series):
+        F_filt = filt(F, **self.filter_params)
+        return correct(F, F_filt, mode=self.correction_method)
+    
+
+class IsosbesticCorrection:
+    def __init__(
+        self,
+        regression_method: str = 'mse',
+        regression_params: dict = None,
+        correction_method: str = 'subtract-divide',
+        lowpass_isosbestic: dict = None,
+    ):
+        self.reg = Regression(
+            model=LinearModel(),
+            method=regression_method,
+            method_params=regression_params,
+        )
+        self.lowpass_isosbestic = lowpass_isosbestic
+        self.correction_method = correction_method
+
+    def correct(
+        self,
+        F_ca: pd.Series,
+        F_iso: pd.Series,
+    ):
+        if self.lowpass_isosbestic is not None:
+            F_iso = filt(F_iso, **self.lowpass_isosbestic)
+
+        self.reg.fit(F_iso.values, F_ca.values)
+        F_iso_fit = self.reg.predict(F_iso.values, return_type='pandas')
+
+        return correct(F_ca, F_iso_fit, mode=self.correction_method)
+
+def correct(signal: pd.Series, reference: pd.Series, mode: str = 'subtract') -> pd.Series:
+    """the main function that applies the correction of a signal with a reference. Correcions can be applied in 3 principle ways:
+    - The reference can be subtracted from the signal
+    - the signal can be divided by the reference
+    - both of the above (first subtraction, then division) - this is akin to df/f
+
+    :param signal: _description_
+    :type signal: pd.Series
+    :param reference: _description_
+    :type reference: pd.Series
+    :param mode: _description_, defaults to 'subtract'
+    :type mode: str, optional
+    :return: _description_
+    :rtype: pd.Series
+    """
+    if mode == 'subtract':
+        signal_corrected = signal.values - reference.values
+    if mode == 'divide':
+        signal_corrected = signal.values / reference.values
+    if mode == 'subtract-divide':
+        signal_corrected = (signal.values - reference.values) / reference.values
+    return pd.Series(signal_corrected, index=signal.index.values)
+
+
+"""
+##     ##  #######  ########  ######## ##        ######
+###   ### ##     ## ##     ## ##       ##       ##    ##
+#### #### ##     ## ##     ## ##       ##       ##
+## ### ## ##     ## ##     ## ######   ##        ######
+##     ## ##     ## ##     ## ##       ##             ##
+##     ## ##     ## ##     ## ##       ##       ##    ##
+##     ##  #######  ########  ######## ########  ######
+"""
+
+
+class AbstractModel(ABC):
+
+    @abstractmethod
+    def eq():
+        # the model equation
+        ...
+
+    @abstractmethod
+    def est_p0():
+        # given data, estimate model parameters
+        ...
+
+    def _calc_r_squared(self, y, y_hat):
+        r = 1 - np.sum((y - y_hat) ** 2) / np.sum((y - np.average(y)) ** 2)
+        return r
+
+    def _calc_likelihood(self, y, y_hat, n_samples=-1, use_kde=False):
+        rs = y - y_hat
+        if n_samples > 0:
+            inds = np.random.randint(0, y.shape[0], size=n_samples)
+            rs = rs[inds]
+        if use_kde is True:
+            # explicit estimation of the distribution of residuals
+            if n_samples == -1:
+                warnings.warn(
+                    f'calculating KDE on {y.values.shape[0]} samples. This might be slow'
+                )
+            dist = gaussian_kde(rs)
+        else:
+            # using RSME
+            sig = np.sqrt(np.average((y - y_hat) ** 2))
+            dist = norm(0, sig)
+        ll = np.sum(np.log(dist.pdf(rs)))
+        return ll
+
+    def _calc_aic(self, k, ll):
+        aic = 2 * k - 2 * ll  # np.log(ll)
+        return aic
+
+    def calc_model_stats(self, y, y_hat, n_samples: int = -1, use_kde: bool = False):
+        r_sq = self._calc_r_squared(y, y_hat)
+        ll = self._calc_likelihood(y, y_hat, n_samples, use_kde)
+        k = len(signature(self.eq).parameters) - 1
+        aic = self._calc_aic(ll, k)
+        return dict(r_sq=r_sq, ll=ll, aic=aic)
+
+# the actual models
+
+class LinearModel(AbstractModel):
+    def eq(self, x, m, b):
+        return x * m + b
+
+    def est_p0(self, x: np.array, y: np.array):
+        return tuple(np.polyfit(x, y, 1))
+        # x, y = np.sort(x), np.sort(y)
+        # dx = x[-1] - x[0]
+        # dy = y[-1] - y[0]
+        # m = dy / dx
+        # b = np.average(y - (m * x))
+        # return (m, b)
+
+
+class ExponDecay(AbstractModel):
+    bounds = ((0, np.inf), (eps, np.inf), (-np.inf, np.inf))
+
+    def eq(self, t, A, tau, b):
+        return A * np.exp(-t / tau) + b
+
+    def est_p0(self, t: np.array, y: np.array):
+        return (y[0], t[int(t.shape[0] / 3)], y[-1])
+
+
+class DoubleExponDecay(AbstractModel):
+    bounds = (
+        (0, np.inf),
+        (eps, np.inf),
+        (0, np.inf),
+        (eps, np.inf),
+        (-np.inf, np.inf),
+    )
+
+    def eq(self, t, A1, tau1, A2, tau2, b):
+        return A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + b
+
+    def est_p0(self, t: np.array, y: np.array):
+        A_est = y[0]
+        tau_est = t[int(t.shape[0] / 3)]
+        b_est = y[-1]
+        return (A_est, tau_est, A_est / 2, tau_est / 2, b_est)
+
+
+class TripleExponDecay(AbstractModel):
+    bounds = (
+        (0, np.inf),
+        (eps, np.inf),
+        (0, np.inf),
+        (eps, np.inf),
+        (0, np.inf),
+        (eps, np.inf),
+        (-np.inf, np.inf),
+    )
+
+    def eq(self, t, A1, tau1, A2, tau2, A3, tau3, b):
+        return (
+            A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + A3 * np.exp(-t / tau3) + b
+        )
+
+    def est_p0(self, t: np.array, y: np.array):
+        A_est = y[0]
+        tau_est = t[int(t.shape[0] / 3)]
+        b_est = y[-1]
+        return (
+            A_est,
+            tau_est,
+            A_est * 0.66,
+            tau_est * 0.66,
+            A_est * 0.33,
+            tau_est * 0.33,
+            b_est,
+        )
+
+"""
+ ######   #######  ########  ########  ########  ######  ######## ####  #######  ##    ##    ######## ##     ## ##    ##  ######  ######## ####  #######  ##    ##  ######
+##    ## ##     ## ##     ## ##     ## ##       ##    ##    ##     ##  ##     ## ###   ##    ##       ##     ## ###   ## ##    ##    ##     ##  ##     ## ###   ## ##    ##
+##       ##     ## ##     ## ##     ## ##       ##          ##     ##  ##     ## ####  ##    ##       ##     ## ####  ## ##          ##     ##  ##     ## ####  ## ##
+##       ##     ## ########  ########  ######   ##          ##     ##  ##     ## ## ## ##    ######   ##     ## ## ## ## ##          ##     ##  ##     ## ## ## ##  ######
+##       ##     ## ##   ##   ##   ##   ##       ##          ##     ##  ##     ## ##  ####    ##       ##     ## ##  #### ##          ##     ##  ##     ## ##  ####       ##
+##    ## ##     ## ##    ##  ##    ##  ##       ##    ##    ##     ##  ##     ## ##   ###    ##       ##     ## ##   ### ##    ##    ##     ##  ##     ## ##   ### ##    ##
+ ######   #######  ##     ## ##     ## ########  ######     ##    ####  #######  ##    ##    ##        #######  ##    ##  ######     ##    ####  #######  ##    ##  ######
+"""
+
+# these are the convenience functions that are called in pipelines
+
+def lowpass_bleachcorrect(F: pd.Series, **kwargs):
+    bc = LowpassBleachCorrection(**kwargs)
+    return bc.correct(F)
+
+def exponential_bleachcorrect(F: pd.Series, **kwargs):
+    model = DoubleExponDecay()
+    ec = BleachCorrection(model, **kwargs)
+    return ec.correct(F)
+
+def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs):
+    ic = IsosbesticCorrection(**kwargs)
+    return ic.correct(F_sig, F_ref)
+
+
+"""
+ #######  ##     ## ######## ##       #### ######## ########     ########  ######## ######## ########  ######  ######## ####  #######  ##    ##
+##     ## ##     ##    ##    ##        ##  ##       ##     ##    ##     ## ##          ##    ##       ##    ##    ##     ##  ##     ## ###   ##
+##     ## ##     ##    ##    ##        ##  ##       ##     ##    ##     ## ##          ##    ##       ##          ##     ##  ##     ## ####  ##
+##     ## ##     ##    ##    ##        ##  ######   ########     ##     ## ######      ##    ######   ##          ##     ##  ##     ## ## ## ##
+##     ## ##     ##    ##    ##        ##  ##       ##   ##      ##     ## ##          ##    ##       ##          ##     ##  ##     ## ##  ####
+##     ## ##     ##    ##    ##        ##  ##       ##    ##     ##     ## ##          ##    ##       ##    ##    ##     ##  ##     ## ##   ###
+ #######   #######     ##    ######## #### ######## ##     ##    ########  ########    ##    ########  ######     ##    ####  #######  ##    ##
+"""
+
+def _grubbs_single(y: np.array, alpha: float =0.005, mode: str='median') -> bool:
+    # to apply a single pass of grubbs outlier detection
+    # see https://en.wikipedia.org/wiki/Grubbs%27s_test
+
+    N = y.shape[0]
+    if mode == 'classic':
+        # this is the formulation as from wikipedia
+        G = np.max(np.absolute(y - np.average(y))) / np.std(y)
+    if mode == 'median':
+        # this is more robust against outliers
+        G = np.max(np.absolute(y - np.median(y))) / np.std(y)
+    tsq = t.ppf(1 - alpha / (2 * N), df=N - 2) ** 2
+    g = (N - 1) / np.sqrt(N) * np.sqrt(tsq / (N - 2 + tsq))
+
+    if G > g:  # if G > g, reject null hypothesis (of no outliers)
+        return True
+    else:
+        return False
+
+
+def grubbs_test(y: np.array, alpha: float =0.005, mode:str='median'):
+    # apply grubbs test iteratively until no more outliers are found
+    outliers = []
+    while _grubbs_single(y, alpha=alpha):
+        # get the outlier index
+        if mode == 'classic':
+            ix = np.argmax(np.absolute(y - np.average(y)))
+        if mode == 'median':
+            ix = np.argmax(np.absolute(y - np.median(y)))
+        outliers.append(ix)
+        y[ix] = np.median(y)
+    return np.sort(outliers)
+
+
+def detect_outliers(y: np.array, w_size: int = 1000, alpha: float = 0.005):
+    # sliding grubbs test for a np.array
+    n_samples = y.shape[0]
+    wg = WindowGenerator(n_samples - (n_samples % w_size), w_size, 0)
+    dtype = np.dtype((np.float64, w_size))
+    B = np.fromiter(wg.slice_array(y), dtype=dtype)
+
+    # sliding window outlier detection
+    outlier_ix = []
+    for i in range(B.shape[0]):
+        outlier_ix.append(grubbs_test(B[i, :], alpha=alpha) + i * w_size)
+
+    # explicitly taking care of the remaining samples if they exist
+    if y.shape[0] > np.prod(B.shape):
+        outlier_ix.append(
+            grubbs_test(y[np.prod(B.shape) :], alpha=alpha) + B.shape[0] * w_size
+        )
+
+    outlier_ix = np.concatenate(outlier_ix).astype('int64')
+    return np.unique(outlier_ix)
+
+
+def remove_nans(y: np.array):
+    y = y[~pd.isna(y)]  # nanfilter
+    if y.shape[0] == 0:
+        warnings.warn('y was all NaN and is now empty')
+    return y
+
+
+def fillnan_kde(y: np.array, w: int = 25):
+    # fill nans with KDE from edges
+    inds = np.where(pd.isna(y))[0]
+    if inds.shape[0] > 0:
+        for ix in inds:
+            ix_start = np.clip(ix - w, 0, y.shape[0] - 1)
+            ix_stop = np.clip(ix + w, 0, y.shape[0] - 1)
+            y_ = y[ix_start:ix_stop]
+            y_ = remove_nans(y_)
+            y[ix] = kde(y_).resample(1)[0][0]
+
+        return y
+    else:
+        # no NaNs present, doing nothing
+        return y
+
+
+def remove_outliers(F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: int = 25):
+    y, t = F.values, F.index.values
+    y = copy(y)
+    outliers = detect_outliers(y, w_size=w_size, alpha=alpha)
+    while len(outliers) > 0:
+        y[outliers] = np.nan
+        y = fillnan_kde(y, w=w)
+        outliers = detect_outliers(y, w_size=w_size, alpha=alpha)
+    return pd.Series(y, index=t)
+
+
+def detect_spikes(t: np.array, sd: int = 5):
+    dt = np.diff(t)
+    bad_inds = dt < np.average(dt) - sd * np.std(dt)
+    return np.where(bad_inds)[0]
+
+
+def remove_spikes(F: pd.Series, sd: int = 5, w: int = 25):
+    y, t = F.values, F.index.values
+    y = copy(y)
+    outliers = detect_spikes(y, sd=sd)
+    y[outliers] = np.nan
+    try:
+        y = fillnan_kde(y, w=w)
+    except np.linalg.LinAlgError:
+        if np.all(pd.isna(y[outliers])):  # all are NaN!
+            y[:] = 0
+            warnings.warn('all values NaN, setting to zeros')  # TODO logger
+        else:
+            y[outliers] = np.nanmedian(y)
+            warnings.warn('KDE fillnan failed, using global median')  # TODO logger
+
+    return pd.Series(y, index=t)
+
+
+"""
+ ######  ##       #### ########  #### ##    ##  ######       #######  ########  ######## ########     ###    ######## ####  #######  ##    ##  ######
+##    ## ##        ##  ##     ##  ##  ###   ## ##    ##     ##     ## ##     ## ##       ##     ##   ## ##      ##     ##  ##     ## ###   ## ##    ##
+##       ##        ##  ##     ##  ##  ####  ## ##           ##     ## ##     ## ##       ##     ##  ##   ##     ##     ##  ##     ## ####  ## ##
+ ######  ##        ##  ##     ##  ##  ## ## ## ##   ####    ##     ## ########  ######   ########  ##     ##    ##     ##  ##     ## ## ## ##  ######
+      ## ##        ##  ##     ##  ##  ##  #### ##    ##     ##     ## ##        ##       ##   ##   #########    ##     ##  ##     ## ##  ####       ##
+##    ## ##        ##  ##     ##  ##  ##   ### ##    ##     ##     ## ##        ##       ##    ##  ##     ##    ##     ##  ##     ## ##   ### ##    ##
+ ######  ######## #### ########  #### ##    ##  ######       #######  ##        ######## ##     ## ##     ##    ##    ####  #######  ##    ##  ######
+"""
+
+"""this module holds a collection of processing pipelines for fiber photometry data"""
+
+
+def make_sliding_window(
+    A: np.ndarray,
+    w_size: int,
+    pad_mode='edge',
+    method='stride_tricks',
+    warning=None,
+):
+    
+    """use np.stride_tricks to make a sliding window view of a 1-d np.array A
+    full overlap, step size 1
+    assumes 8 byte numbers (to be exposed? but needs to be tested first)
+    pads beginning and end of array with edge values
+
+    Args:
+        A (np.ndarray): Array to be windowed
+        w_size (int): window size in samples
+
+    Raises:
+        ValueError: is raised if w_size is not an even number
+
+    Returns:
+        _type_: The view on the array, with shape (A.shape[0], w_size)
+    """
+    n_samples = A.shape[0]
+
+    if method == 'stride_tricks':
+        if w_size % 2 != 0:
+            if warning == 'raise_exception':
+                raise ValueError('w_size needs to be an even number')
+            else:
+                w_size += 1  # dangerous
+        B = np.lib.stride_tricks.as_strided(A, ((n_samples - w_size), w_size), (8, 8))
+
+    if method == 'window_generator':
+        wg = WindowGenerator(n_samples - 1, w_size, w_size - 1)
+        dtype = np.dtype((np.float64, w_size))
+        B = np.fromiter(wg.slice_array(A), dtype=dtype)
+
+    if pad_mode is not None:
+        B = np.pad(B, ((int(w_size / 2), int(w_size / 2)), (0, 0)), mode=pad_mode)
+    return B
+
+
+def sliding_dFF(F: pd.Series, w_len: float, fs=None, weights=None):
+    y, t = F.values, F.index.values
+    fs = 1 / np.median(np.diff(t)) if fs is None else fs
+    w_size = int(w_len * fs)
+
+    def _dFF(A: np.array):
+        return (A - np.average(A)) / np.average(A)
+
+    if weights is not None:
+        # note: passing weights makes the stride trick not possible, or only with allocating a matrix of shape (n_samples * w_size)
+        # true sliding operation implemented, much slower
+        weights /= weights.sum()
+        n_samples = y.shape[0]
+        wg = WindowGenerator(n_samples - 1, w_size, w_size - 1)
+        d = [
+            np.sum(_dFF(F[first:last].values) * weights)
+            for (first, last) in wg.firstlast
+        ]
+    else:
+        B = make_sliding_window(y, w_size)
+        mus = np.average(B, axis=1)
+        d = (y - mus) / mus
+    return pd.Series(d, index=t)
+
+
+def sliding_z(F: pd.Series, w_len: float, fs=None, weights=None):
+    """sliding window z-score of a pynapple time series with data with optional weighting
+
+    Args:
+        F (nap.Tsd): Signal to be zscored
+        w_size (int): window size
+        w_weights (_type_, optional): weights of the window . Defaults to None.
+
+    Returns:
+        _type_: _description_
+    """
+    y, t = F.values, F.index.values
+    fs = 1 / np.median(np.diff(t)) if fs is None else fs
+    w_size = int(w_len * fs)
+
+    if weights is not None:
+        # note: passing weights makes the stride trick not possible, or only with allocating a matrix of shape (n_samples * w_size)
+        # true sliding operation implemented, much slower
+        weights /= weights.sum()
+        n_samples = y.shape[0]
+        wg = WindowGenerator(n_samples - 1, w_size, w_size - 1)
+        d = [
+            np.sum(z(F[first:last].values) * weights) for (first, last) in wg.firstlast
+        ]
+    else:
+        B = make_sliding_window(y, w_size)
+        mus, sds = np.average(B, axis=1), np.std(B, axis=1)
+        d = (y - mus) / sds
+    return pd.Series(d, index=t)
+
+
+def sliding_mad(F: pd.Series, w_len: float = None, fs=None, overlap=90):
+    y, t = F.values, F.index.values
+    fs = 1 / np.median(np.diff(t)) if fs is None else fs
+    w_size = int(w_len * fs)
+
+    n_samples = y.shape[0]
+    wg = WindowGenerator(ns=n_samples, nswin=w_size, overlap=overlap)
+    trms = np.array([first for first, last in wg.firstlast]) / fs + t[0]
+
+    rmswin, _ = psth(y, t, t_events=trms, fs=fs, event_window=[0, w_len])
+    gain = np.nanmedian(np.abs(y)) / np.nanmedian(np.abs(rmswin), axis=0)
+    gain = np.interp(t, trms, gain)
+    return pd.Series(y * gain, index=t)
diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py
index 908b3c7..242a2f5 100644
--- a/src/iblphotometry/qc.py
+++ b/src/iblphotometry/qc.py
@@ -8,7 +8,7 @@
 
 import gc
 
-from iblphotometry.sliding_operations import make_sliding_window
+from iblphotometry.processing import make_sliding_window
 from pipelines import run_pipeline
 import warnings
 
diff --git a/src/iblphotometry/sliding_operations.py b/src/iblphotometry/sliding_operations.py
deleted file mode 100644
index f0baaf4..0000000
--- a/src/iblphotometry/sliding_operations.py
+++ /dev/null
@@ -1,138 +0,0 @@
-"""this module holds a collection of processing pipelines for fiber photometry data"""
-
-import numpy as np
-import pandas as pd
-from iblphotometry.helpers import z
-from iblphotometry.behavior import psth
-from ibldsp.utils import WindowGenerator
-
-
-def make_sliding_window(
-    A: np.ndarray,
-    w_size: int,
-    pad_mode='edge',
-    method='stride_tricks',
-    warning=None,
-):
-    """use np.stride_tricks to make a sliding window view of a 1-d np.array A
-    full overlap, step size 1
-    assumes 8 byte numbers (to be exposed? but needs to be tested first)
-    pads beginning and end of array with edge values
-
-    Args:
-        A (np.ndarray): Array to be windowed
-        w_size (int): window size in samples
-
-    Raises:
-        ValueError: is raised if w_size is not an even number
-
-    Returns:
-        _type_: The view on the array, with shape (A.shape[0], w_size)
-    """
-    n_samples = A.shape[0]
-
-    if method == 'stride_tricks':
-        if w_size % 2 != 0:
-            if warning == 'raise_exception':
-                raise ValueError('w_size needs to be an even number')
-            else:
-                w_size += 1  # dangerous
-        B = np.lib.stride_tricks.as_strided(A, ((n_samples - w_size), w_size), (8, 8))
-
-    if method == 'window_generator':
-        wg = WindowGenerator(n_samples - 1, w_size, w_size - 1)
-        dtype = np.dtype((np.float64, w_size))
-        B = np.fromiter(wg.slice_array(A), dtype=dtype)
-
-    if pad_mode is not None:
-        B = np.pad(B, ((int(w_size / 2), int(w_size / 2)), (0, 0)), mode=pad_mode)
-    return B
-
-
-def sliding_dFF(F: pd.Series, w_len: float, fs=None, weights=None):
-    y, t = F.values, F.index.values
-    fs = 1 / np.median(np.diff(t)) if fs is None else fs
-    w_size = int(w_len * fs)
-
-    def _dFF(A: np.array):
-        return (A - np.average(A)) / np.average(A)
-
-    if weights is not None:
-        # note: passing weights makes the stride trick not possible, or only with allocating a matrix of shape (n_samples * w_size)
-        # true sliding operation implemented, much slower
-        weights /= weights.sum()
-        n_samples = y.shape[0]
-        wg = WindowGenerator(n_samples - 1, w_size, w_size - 1)
-        d = [
-            np.sum(_dFF(F[first:last].values) * weights)
-            for (first, last) in wg.firstlast
-        ]
-    else:
-        B = make_sliding_window(y, w_size)
-        mus = np.average(B, axis=1)
-        d = (y - mus) / mus
-    return pd.Series(d, index=t)
-
-
-def sliding_z(F: pd.Series, w_len: float, fs=None, weights=None):
-    """sliding window z-score of a pynapple time series with data with optional weighting
-
-    Args:
-        F (nap.Tsd): Signal to be zscored
-        w_size (int): window size
-        w_weights (_type_, optional): weights of the window . Defaults to None.
-
-    Returns:
-        _type_: _description_
-    """
-    y, t = F.values, F.index.values
-    fs = 1 / np.median(np.diff(t)) if fs is None else fs
-    w_size = int(w_len * fs)
-
-    if weights is not None:
-        # note: passing weights makes the stride trick not possible, or only with allocating a matrix of shape (n_samples * w_size)
-        # true sliding operation implemented, much slower
-        weights /= weights.sum()
-        n_samples = y.shape[0]
-        wg = WindowGenerator(n_samples - 1, w_size, w_size - 1)
-        d = [
-            np.sum(z(F[first:last].values) * weights) for (first, last) in wg.firstlast
-        ]
-    else:
-        B = make_sliding_window(y, w_size)
-        mus, sds = np.average(B, axis=1), np.std(B, axis=1)
-        d = (y - mus) / sds
-    return pd.Series(d, index=t)
-
-
-def sliding_mad(F: pd.Series, w_len: float = None, fs=None, overlap=90):
-    y, t = F.values, F.index.values
-    fs = 1 / np.median(np.diff(t)) if fs is None else fs
-    w_size = int(w_len * fs)
-
-    n_samples = y.shape[0]
-    wg = WindowGenerator(ns=n_samples, nswin=w_size, overlap=overlap)
-    trms = np.array([first for first, last in wg.firstlast]) / fs + t[0]
-
-    rmswin, _ = psth(y, t, t_events=trms, fs=fs, event_window=[0, w_len])
-    gain = np.nanmedian(np.abs(y)) / np.nanmedian(np.abs(rmswin), axis=0)
-    gain = np.interp(t, trms, gain)
-    return pd.Series(y * gain, index=t)
-
-
-# def sliding_mad_new(F: nap.Tsd, w_len: float = None, fs=None, overlap=90):
-#     y, t = F.values, F.times()
-#     fs = 1 / np.median(np.diff(t)) if fs is None else fs
-#     w_size = int(w_len * fs)
-
-#     B = make_sliding_window(y, w_size)
-#     gain = np.nanmedian(y) / np.nanmedian(B, axis=1)
-#     return nap.Tsd(t=t, d=y * gain)
-
-# wg = WindowGenerator(ns=n_samples, nswin=w_size, overlap=overlap)
-# trms = np.array([first for first, last in wg.firstlast]) / fs + t[0]
-
-# rmswin, _ = psth(y, t, t_events=trms, fs=fs, event_window=[0, w_len])
-# gain = np.nanmedian(np.abs(y)) / np.nanmedian(np.abs(rmswin), axis=0)
-# gain = np.interp(t, trms, gain)
-# return nap.Tsd(t=t, d=y * gain)

From e7aedf758822d2bcb155632407809621a3212868 Mon Sep 17 00:00:00 2001
From: georg <grg2rsr@gmail.com>
Date: Tue, 3 Dec 2024 14:33:12 +0000
Subject: [PATCH 05/10] type hinting

---
 src/iblphotometry/io.py         | 20 ++++++-------
 src/iblphotometry/metrics.py    | 52 ++++++++-------------------------
 src/iblphotometry/pipelines.py  |  2 +-
 src/iblphotometry/processing.py | 51 +++++++++++++++-----------------
 src/iblphotometry/qc.py         | 13 +++++----
 5 files changed, 54 insertions(+), 84 deletions(-)

diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py
index 18ba56c..85ba4c1 100644
--- a/src/iblphotometry/io.py
+++ b/src/iblphotometry/io.py
@@ -1,9 +1,9 @@
-# %%
 import numpy as np
 import pandas as pd
 from pathlib import Path
 import warnings
 import pandera
+from typing import Optional
 
 from iblphotometry.neurophotometrics import (
     LIGHT_SOURCE_MAP,
@@ -12,18 +12,18 @@
 
 
 def from_array(
-    times: np.array, data: np.array, channel_names: list[str] = None
+    times: np.ndarray, data: np.ndarray, channel_names: list[str] | None = None
 ) -> pd.DataFrame:
     return pd.DataFrame(data, index=times, columns=channel_names)
 
 
 def from_dataframe(
     raw_df: pd.DataFrame,
-    data_columns: list[str] = None,
-    time_column: str = None,
+    data_columns: list[str] | None = None,
+    time_column: str | None = None,
     channel_column: str = 'name',
-    channel_names: list[str] = None,
-    rename: dict = None,
+    channel_names: list[str] | None = None,
+    rename: dict | None = None,
 ) -> dict:
     """reads in a pandas.DataFrame and converts it into nap.TsdDataframes. Performs the time demultiplexing operation.
 
@@ -49,9 +49,9 @@ def from_dataframe(
 
     # infer name of time column if not provided
     if time_column is None:
-        time_column = [col for col in raw_df.columns if 'time' in col.lower()]
-        assert len(time_column) == 1
-        time_column = time_column[0]
+        time_columns = [col for col in raw_df.columns if 'time' in col.lower()]
+        assert len(time_columns) == 1
+        time_column = time_columns[0]
 
     # infer channel names if they are not explicitly provided
     if channel_names is None:
@@ -76,7 +76,7 @@ def from_dataframe(
 
 def from_pqt(
     signal_pqt_path: str | Path,
-    locations_pqt_path: str | Path = None,
+    locations_pqt_path: Optional[str | Path] = None,
 ):
     """reads in a photometry.signal.pqt files as they are registered in alyx.
 
diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py
index 4c8d7d7..ac34fde 100644
--- a/src/iblphotometry/metrics.py
+++ b/src/iblphotometry/metrics.py
@@ -1,3 +1,4 @@
+from typing import Optional
 import numpy as np
 import pandas as pd
 from scipy import stats
@@ -5,23 +6,6 @@
 from iblphotometry.processing import z, Regression, ExponDecay, detect_spikes, detect_outliers
 from iblphotometry.behavior import psth
 
-## this approach works as well
-# from functools import singledispatch
-# @singledispatch
-# def percentile_dist(A: nap.Tsd, pc: tuple = (50, 95), axis: None = None) -> float:
-#     """the distance between two percentiles in units of z
-#     should be proportional to SNR, assuming the signal is
-#     in the positive 5th percentile
-#     """
-#     P = np.percentile(z(A.values), pc)
-#     return P[1] - P[0]
-
-# @percentile_dist.register
-# def _(A: np.ndarray, pc: tuple = (50, 95), axis: int = -1) -> float:
-#     P = np.percentile(z(A), pc, axis=axis)
-#     return P[1] - P[0]
-
-
 def percentile_dist(A: pd.Series | np.ndarray, pc: tuple = (50, 95), axis=-1) -> float:
     """the distance between two percentiles in units of z. Captures the magnitude of transients.
 
@@ -73,23 +57,15 @@ def signal_skew(A: pd.Series | np.ndarray, axis=-1) -> float:
 
 def n_unique_samples(A: pd.Series | np.ndarray) -> int:
     """number of unique samples in the signal. Low values indicate that the signal during acquisition was not within the range of the digitizer."""
-    if isinstance(A, pd.Series):
-        return np.unique(A.values).shape[0]
-    elif isinstance(A, np.ndarray):
-        return A.shape[0]
-    else:
-        raise TypeError('A must be pd.Series or np.ndarray.')
+    a = A.values if isinstance(A, pd.Series) else A
+    return np.unique(a).shape[0]
 
 
 def n_spikes(A: pd.Series | np.ndarray, sd: int = 5):
     """count the number of spike artifacts in the recording."""
-    if isinstance(A, pd.Series):
-        return detect_spikes(A.values, sd=sd).shape[0]
-    elif isinstance(A, np.ndarray):
-        return detect_spikes(A, sd=sd).shape[0]
-    else:
-        raise TypeError('A must be pd.Series or np.ndarray.')
-
+    a = A.values if isinstance(A, pd.Series) else A
+    return detect_spikes(a, sd=sd).shape[0]
+ 
 
 def n_outliers(
     A: pd.Series | np.ndarray, w_size: int = 1000, alpha: float = 0.0005
@@ -97,12 +73,8 @@ def n_outliers(
     """counts the number of outliers as detected by grubbs test for outliers.
     int: _description_
     """
-    if isinstance(A, pd.Series):
-        return detect_outliers(A.values, w_size=w_size, alpha=alpha).shape[0]
-    elif isinstance(A, np.ndarray):
-        return detect_outliers(A, w_size=w_size, alpha=alpha).shape[0]
-    else:
-        raise TypeError('A must be pd.Series or np.ndarray.')
+    a = A.values if isinstance(A, pd.Series) else A
+    return detect_outliers(a, w_size=w_size, alpha=alpha).shape[0]
 
 
 def bleaching_tau(A: pd.Series) -> float:
@@ -151,8 +123,8 @@ def ttest_pre_post(
 
 def has_response_to_event(
     A: pd.Series,
-    event_times: np.array,
-    fs: float = None,
+    event_times: np.ndarray,
+    fs: Optional[float] = None,
     window: tuple = (-1, 1),
     alpha: float = 0.005,
     mode='peak',
@@ -182,8 +154,8 @@ def has_response_to_event(
 def has_responses(
     A: pd.Series,
     trials: pd.DataFrame,
-    event_names: list = None,
-    fs: float = None,
+    event_names: list,
+    fs: Optional[float] = None,
     window: tuple = (-1, 1),
     alpha: float = 0.005,
 ) -> bool:
diff --git a/src/iblphotometry/pipelines.py b/src/iblphotometry/pipelines.py
index 159adc7..f51487c 100644
--- a/src/iblphotometry/pipelines.py
+++ b/src/iblphotometry/pipelines.py
@@ -10,7 +10,7 @@
 def run_pipeline(
     pipeline,
     F_signal: pd.DataFrame,
-    F_reference: pd.DataFrame = None,
+    F_reference: pd.DataFrame | None = None,
 ) -> pd.DataFrame:
     # copy
     Fc = F_signal.copy()
diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py
index c13dce9..abd1e88 100644
--- a/src/iblphotometry/processing.py
+++ b/src/iblphotometry/processing.py
@@ -3,18 +3,15 @@
 
 import numpy as np
 import scipy as sp
-from scipy import signal, stats
+from scipy import signal
 import pandas as pd
 
 from iblutil.numerical import rcoeff
 from ibldsp.utils import WindowGenerator
-from ibldsp.utils import WindowGenerator
 
 from scipy.optimize import minimize
 from scipy.stats.distributions import norm
-from scipy.stats import gaussian_kde
-from scipy.stats import gaussian_kde as kde
-from scipy.stats import t
+from scipy.stats import gaussian_kde, t
 from scipy.special import pseudo_huber
 
 from iblphotometry.behavior import psth
@@ -37,11 +34,11 @@
 ##        #######  ##    ##  ######     ##    ####  #######  ##    ##  ######
 """
 
-def z(A: np.array, mode='classic'):
+def z(A: np.ndarray, mode='classic'):
     """classic z-score. Deviation from sample mean in units of sd
 
     :param A: _description_
-    :type A: np.array
+    :type A: np.ndarray
     :param mode: _description_, defaults to 'classic'
     :type mode: str, optional
     :return: _description_
@@ -52,12 +49,12 @@ def z(A: np.array, mode='classic'):
     if mode == 'median':
         return (A - np.median(A)) / np.std(A)
 
-def mad(A: np.array):
+def mad(A: np.ndarray):
     """ the MAD is defined as the median of the absolute deviations from the data's median
     see https://en.wikipedia.org/wiki/Median_absolute_deviation
     
     :param A: _description_
-    :type A: np.array
+    :type A: np.ndarray
     :return: _description_
     :rtype: _type_
     """
@@ -74,7 +71,7 @@ def zscore(F: pd.Series, mode='classic'):
     return pd.Series(z(y, mode=mode), index=t)
 
 
-def filt(F: pd.Series, N: int, Wn: float, fs: float = None, btype='low'):
+def filt(F: pd.Series, N: int, Wn: float, fs: float | None = None, btype='low'):
     """ a wrapper for scipy.signal.butter and sosfiltfilt
     """
     y, t = F.values, F.index.values
@@ -236,7 +233,7 @@ def predict(self, x: np.ndarray, return_type='numpy'):
 class BleachCorrection:
     def __init__(
         self,
-        model: AbstractModel = None,
+        model = None, # TODO bring back type checking
         regression_method: str = 'mse',
         regression_params: dict = None,
         correction_method: str = 'subtract',
@@ -271,9 +268,9 @@ class IsosbesticCorrection:
     def __init__(
         self,
         regression_method: str = 'mse',
-        regression_params: dict = None,
+        regression_params: dict | None = None,
         correction_method: str = 'subtract-divide',
-        lowpass_isosbestic: dict = None,
+        lowpass_isosbestic: dict | None = None,
     ):
         self.reg = Regression(
             model=LinearModel(),
@@ -383,7 +380,7 @@ class LinearModel(AbstractModel):
     def eq(self, x, m, b):
         return x * m + b
 
-    def est_p0(self, x: np.array, y: np.array):
+    def est_p0(self, x: np.ndarray, y: np.ndarray):
         return tuple(np.polyfit(x, y, 1))
         # x, y = np.sort(x), np.sort(y)
         # dx = x[-1] - x[0]
@@ -399,7 +396,7 @@ class ExponDecay(AbstractModel):
     def eq(self, t, A, tau, b):
         return A * np.exp(-t / tau) + b
 
-    def est_p0(self, t: np.array, y: np.array):
+    def est_p0(self, t: np.ndarray, y: np.ndarray):
         return (y[0], t[int(t.shape[0] / 3)], y[-1])
 
 
@@ -415,7 +412,7 @@ class DoubleExponDecay(AbstractModel):
     def eq(self, t, A1, tau1, A2, tau2, b):
         return A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + b
 
-    def est_p0(self, t: np.array, y: np.array):
+    def est_p0(self, t: np.ndarray, y: np.ndarray):
         A_est = y[0]
         tau_est = t[int(t.shape[0] / 3)]
         b_est = y[-1]
@@ -438,7 +435,7 @@ def eq(self, t, A1, tau1, A2, tau2, A3, tau3, b):
             A1 * np.exp(-t / tau1) + A2 * np.exp(-t / tau2) + A3 * np.exp(-t / tau3) + b
         )
 
-    def est_p0(self, t: np.array, y: np.array):
+    def est_p0(self, t: np.ndarray, y: np.ndarray):
         A_est = y[0]
         tau_est = t[int(t.shape[0] / 3)]
         b_est = y[-1]
@@ -488,7 +485,7 @@ def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs):
  #######   #######     ##    ######## #### ######## ##     ##    ########  ########    ##    ########  ######     ##    ####  #######  ##    ##
 """
 
-def _grubbs_single(y: np.array, alpha: float =0.005, mode: str='median') -> bool:
+def _grubbs_single(y: np.ndarray, alpha: float =0.005, mode: str='median') -> bool:
     # to apply a single pass of grubbs outlier detection
     # see https://en.wikipedia.org/wiki/Grubbs%27s_test
 
@@ -508,7 +505,7 @@ def _grubbs_single(y: np.array, alpha: float =0.005, mode: str='median') -> bool
         return False
 
 
-def grubbs_test(y: np.array, alpha: float =0.005, mode:str='median'):
+def grubbs_test(y: np.ndarray, alpha: float =0.005, mode:str='median'):
     # apply grubbs test iteratively until no more outliers are found
     outliers = []
     while _grubbs_single(y, alpha=alpha):
@@ -522,8 +519,8 @@ def grubbs_test(y: np.array, alpha: float =0.005, mode:str='median'):
     return np.sort(outliers)
 
 
-def detect_outliers(y: np.array, w_size: int = 1000, alpha: float = 0.005):
-    # sliding grubbs test for a np.array
+def detect_outliers(y: np.ndarray, w_size: int = 1000, alpha: float = 0.005):
+    # sliding grubbs test for a np.ndarray
     n_samples = y.shape[0]
     wg = WindowGenerator(n_samples - (n_samples % w_size), w_size, 0)
     dtype = np.dtype((np.float64, w_size))
@@ -544,14 +541,14 @@ def detect_outliers(y: np.array, w_size: int = 1000, alpha: float = 0.005):
     return np.unique(outlier_ix)
 
 
-def remove_nans(y: np.array):
+def remove_nans(y: np.ndarray):
     y = y[~pd.isna(y)]  # nanfilter
     if y.shape[0] == 0:
         warnings.warn('y was all NaN and is now empty')
     return y
 
 
-def fillnan_kde(y: np.array, w: int = 25):
+def fillnan_kde(y: np.ndarray, w: int = 25):
     # fill nans with KDE from edges
     inds = np.where(pd.isna(y))[0]
     if inds.shape[0] > 0:
@@ -560,7 +557,7 @@ def fillnan_kde(y: np.array, w: int = 25):
             ix_stop = np.clip(ix + w, 0, y.shape[0] - 1)
             y_ = y[ix_start:ix_stop]
             y_ = remove_nans(y_)
-            y[ix] = kde(y_).resample(1)[0][0]
+            y[ix] = gaussian_kde(y_).resample(1)[0][0]
 
         return y
     else:
@@ -579,7 +576,7 @@ def remove_outliers(F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: i
     return pd.Series(y, index=t)
 
 
-def detect_spikes(t: np.array, sd: int = 5):
+def detect_spikes(t: np.ndarray, sd: int = 5):
     dt = np.diff(t)
     bad_inds = dt < np.average(dt) - sd * np.std(dt)
     return np.where(bad_inds)[0]
@@ -624,7 +621,7 @@ def make_sliding_window(
     warning=None,
 ):
     
-    """use np.stride_tricks to make a sliding window view of a 1-d np.array A
+    """use np.stride_tricks to make a sliding window view of a 1-d np.ndarray A
     full overlap, step size 1
     assumes 8 byte numbers (to be exposed? but needs to be tested first)
     pads beginning and end of array with edge values
@@ -664,7 +661,7 @@ def sliding_dFF(F: pd.Series, w_len: float, fs=None, weights=None):
     fs = 1 / np.median(np.diff(t)) if fs is None else fs
     w_size = int(w_len * fs)
 
-    def _dFF(A: np.array):
+    def _dFF(A: np.ndarray):
         return (A - np.average(A)) / np.average(A)
 
     if weights is not None:
diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py
index 242a2f5..088be9f 100644
--- a/src/iblphotometry/qc.py
+++ b/src/iblphotometry/qc.py
@@ -18,15 +18,16 @@
 
 logger = logging.getLogger()
 
+from collections.abc import Callable
 
 # %% # those could be in metrics
 def sliding_metric(
     F: pd.Series,
     w_len: float,
-    fs: float = None,
-    metric: callable = None,
+    metric: Callable,
+    fs: float | None = None,
     n_wins: int = -1,
-    metric_kwargs: dict = None,
+    metric_kwargs: dict | None = None,
 ):
     """applies a metric along time.
 
@@ -59,9 +60,9 @@ def sliding_metric(
 # eval pipleline will be here
 def eval_metric(
     F: pd.Series,
-    metric: callable = None,
-    metric_kwargs: dict = None,
-    sliding_kwargs: dict = None,
+    metric: Callable,
+    metric_kwargs: dict | None = None,
+    sliding_kwargs: dict | None = None,
 ):
     m = metric(F, **metric_kwargs) if metric_kwargs is not None else metric(F)
 

From 0cc4095f7eed37a6a45dea9fd18c6d783a4f191d Mon Sep 17 00:00:00 2001
From: georg <grg2rsr@gmail.com>
Date: Wed, 4 Dec 2024 11:48:50 +0000
Subject: [PATCH 06/10] refactoring and adding io function for new notebook

---
 src/iblphotometry/io.py | 32 +++++++++++++++++++++++++-------
 src/iblphotometry/qc.py | 34 ++++++++++++++--------------------
 2 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py
index 85ba4c1..6e499b7 100644
--- a/src/iblphotometry/io.py
+++ b/src/iblphotometry/io.py
@@ -45,7 +45,9 @@ def from_dataframe(
 
     # infer if not explicitly provided: defaults to everything that starts with 'Region'
     if data_columns is None:
-        data_columns = [col for col in raw_df.columns if col.startswith('Region')]
+        # this hacky parser currently deals with the inconsistency between carolinas and alejandros extraction
+        # https://github.com/int-brain-lab/ibl-photometry/issues/35
+        data_columns = [col for col in raw_df.columns if col.startswith('Region') or col.startswith('G')]
 
     # infer name of time column if not provided
     if time_column is None:
@@ -73,6 +75,19 @@ def from_dataframe(
 
     return raw_dfs
 
+def from_dataframes(raw_df: pd.DataFrame, locations_df: pd.DataFrame):
+    data_columns = (list(locations_df.index),)
+    rename = locations_df['brain_region'].to_dict()
+    
+    read_config = dict(
+        data_columns=data_columns,
+        time_column='times',
+        channel_column='name',
+        rename=rename,
+    )
+
+    return from_dataframe(raw_df, **read_config)
+
 
 def from_pqt(
     signal_pqt_path: str | Path,
@@ -110,7 +125,7 @@ def from_pqt(
     return from_dataframe(raw_df, **read_config)
 
 
-def _read_raw_neurophotometrics_df(raw_df: pd.DataFrame, rois=None) -> pd.DataFrame:
+def from_raw_neurophotometrics_df(raw_df: pd.DataFrame, rois=None, drop_first=True) -> pd.DataFrame:
     """reads in parses the output of the neurophotometrics FP3002
 
     Args:
@@ -159,10 +174,14 @@ def _read_raw_neurophotometrics_df(raw_df: pd.DataFrame, rois=None) -> pd.DataFr
             for cn in ['name', 'color', 'wavelength']:
                 out_df.loc[states == state, cn] = channel_meta_map.iloc[ic[0]][cn]
 
+    # drop first frame
+    if drop_first:
+        out_df = out_df.iloc[1:].reset_index()
+
     return out_df
 
 
-def from_raw_neurophotometrics(
+def from_raw_neurophotometrics_file(
     path: str | Path,
     drop_first=True,
     validate=True,
@@ -181,8 +200,7 @@ def from_raw_neurophotometrics(
         nap.TsdFrame: _description_ # FIXME
     """
     warnings.warn(
-        'loading a photometry from raw neurophotometrics output. The data will _not_ be synced and\
-            is being split into channels by LedState (converted to LED wavelength in nm)'
+        'loading photometry from raw neurophotometrics output. The data will _not_ be synced and is being split into channels by LedState (converted to LED wavelength in nm)'
     )
     if isinstance(path, str):
         path = Path(path)
@@ -199,11 +217,11 @@ def from_raw_neurophotometrics(
     if validate:
         raw_df = _validate_dataframe(raw_df)
 
-    df = _read_raw_neurophotometrics_df(raw_df)
+    df = from_raw_neurophotometrics_df(raw_df)
 
     # drop first frame
     if drop_first:
-        df = df.iloc[1:]
+        df = df.iloc[1:].reset_index()
 
     data_columns = [col for col in df.columns if col.startswith('G')]
     read_config = dict(
diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py
index 088be9f..b424044 100644
--- a/src/iblphotometry/qc.py
+++ b/src/iblphotometry/qc.py
@@ -1,25 +1,19 @@
 # %%
-import numpy as np
-from scipy.stats import linregress
-import pandas as pd
-
+import gc
+from collections.abc import Callable
 from tqdm import tqdm
 import logging
-
-import gc
-
-from iblphotometry.processing import make_sliding_window
-from pipelines import run_pipeline
 import warnings
 
-from brainbox.io.one import SessionLoader
+import numpy as np
+import pandas as pd
+from scipy.stats import linregress
 
-warnings.filterwarnings('once', category=DeprecationWarning, module='pynapple')
+from iblphotometry.processing import make_sliding_window
+from iblphotometry.pipelines import run_pipeline
 
 logger = logging.getLogger()
 
-from collections.abc import Callable
-
 # %% # those could be in metrics
 def sliding_metric(
     F: pd.Series,
@@ -119,9 +113,9 @@ def run_qc(
     for eid in tqdm(eids):
         print(eid)
         # get photometry data
-        raw_tfs = data_loader.load_photometry_data(eid=eid)
-        signal_bands = list(raw_tfs.keys())
-        brain_regions = raw_tfs[signal_bands[0]]
+        raw_dfs = data_loader.load_photometry_data(eid=eid)
+        signal_bands = list(raw_dfs.keys())
+        brain_regions = raw_dfs[signal_bands[0]]
 
         # get behavioral data
         # TODO this should be provided
@@ -136,7 +130,7 @@ def run_qc(
         # trials = data_loader.one.load_dataset(eid, '*trials.table.pqt')
 
         for band in signal_bands:
-            raw_tf = raw_tfs[band]
+            raw_tf = raw_dfs[band]
             for region in brain_regions:
                 qc_result = qc_series(
                     raw_tf[region], qc_metrics['raw'], sliding_kwargs=None, eid=eid
@@ -151,14 +145,14 @@ def run_qc(
             if 'reference' in sigref_mapping:  # this is for isosbestic pipelines
                 proc_tf = run_pipeline(
                     pipeline,
-                    raw_tfs[sigref_mapping['signal']],
-                    raw_tfs[sigref_mapping['reference']],
+                    raw_dfs[sigref_mapping['signal']],
+                    raw_dfs[sigref_mapping['reference']],
                 )
             else:
                 # FIXME this fails for true-multiband
                 # this hack works for single-band
                 # possible fix could be that signal could be a list
-                proc_tf = run_pipeline(pipeline, raw_tfs[sigref_mapping['signal']])
+                proc_tf = run_pipeline(pipeline, raw_dfs[sigref_mapping['signal']])
 
             for region in brain_regions:
                 # sliding qc of the processed data

From b7296b8c30363157f69f02a7c5394643d37fb351 Mon Sep 17 00:00:00 2001
From: georg <grg2rsr@gmail.com>
Date: Wed, 4 Dec 2024 12:39:48 +0000
Subject: [PATCH 07/10] refactor

---
 src/tests/test_loaders.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tests/test_loaders.py b/src/tests/test_loaders.py
index 38599e1..124fe83 100644
--- a/src/tests/test_loaders.py
+++ b/src/tests/test_loaders.py
@@ -17,8 +17,8 @@ def test_from_array(self):
         fio.from_array(times, data, names)
 
     # for neurophotometrics hardware
-    def test_from_raw_neurophotometrics_output(self):
-        fio.from_raw_neurophotometrics(self.paths['raw_neurophotometrics_csv'])
+    def test_from_raw_neurophotometrics_file(self):
+        fio.from_raw_neurophotometrics_file(self.paths['raw_neurophotometrics_csv'])
 
     # from pqt files as they are returned from ONE by .load_dataset()
     def test_from_pqt(self):

From 264a5ac9bd4441dc6b306d447f4d662c7ba3ddba Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 4 Dec 2024 15:00:02 +0000
Subject: [PATCH 08/10] removing old tests folder

---
 src/tests/test_pipelines.py | 35 -----------------------------------
 1 file changed, 35 deletions(-)
 delete mode 100644 src/tests/test_pipelines.py

diff --git a/src/tests/test_pipelines.py b/src/tests/test_pipelines.py
deleted file mode 100644
index 277df91..0000000
--- a/src/tests/test_pipelines.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import unittest
-from pathlib import Path
-import iblphotometry.io as fio
-from iblphotometry.pipelines import (
-    run_pipeline,
-    sliding_mad_pipeline,
-    isosbestic_correction_pipeline,
-)
-from iblphotometry.synthetic import generate_dataframe
-import tests.base_tests
-
-
-class TestPipelines(tests.base_tests.PhotometryDataTestCase):
-    def test_single_band_pipeline(self):
-        # on synthetic data
-        raw_dfs = generate_dataframe()
-        run_pipeline(sliding_mad_pipeline, raw_dfs['raw_calcium'])
-
-        Path(__file__).parent.joinpath()
-        # on real data
-        raw_dfs = fio.from_pqt(
-            self.paths['photometry_signal_pqt'],
-            self.paths['photometryROI_locations_pqt'],
-        )
-        signal_bands = list(raw_dfs.keys())
-        run_pipeline(sliding_mad_pipeline, raw_dfs[signal_bands[0]])
-
-    def test_isosbestic_pipeline(self):
-        # on synthetic data
-        raw_dfs = generate_dataframe()
-
-        # run pipeline
-        run_pipeline(
-            isosbestic_correction_pipeline, raw_dfs['raw_calcium'], raw_dfs['raw_isosbestic']
-        )

From d32a5e5bd78adf75746e12572b2d04a9078ad1bc Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 4 Dec 2024 15:08:55 +0000
Subject: [PATCH 09/10] ruff

---
 src/iblphotometry/io.py                | 15 ++++--
 src/iblphotometry/loaders.py           |  1 -
 src/iblphotometry/metrics.py           | 11 ++++-
 src/iblphotometry/neurophotometrics.py | 63 ++++++++++++++++++++++++--
 src/iblphotometry/pipelines.py         |  9 +++-
 src/iblphotometry/processing.py        | 44 ++++++++++++------
 src/iblphotometry/qc.py                |  1 +
 src/iblphotometry/synthetic.py         |  9 ++--
 8 files changed, 124 insertions(+), 29 deletions(-)

diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py
index 6e499b7..3fa36a1 100644
--- a/src/iblphotometry/io.py
+++ b/src/iblphotometry/io.py
@@ -47,7 +47,11 @@ def from_dataframe(
     if data_columns is None:
         # this hacky parser currently deals with the inconsistency between carolinas and alejandros extraction
         # https://github.com/int-brain-lab/ibl-photometry/issues/35
-        data_columns = [col for col in raw_df.columns if col.startswith('Region') or col.startswith('G')]
+        data_columns = [
+            col
+            for col in raw_df.columns
+            if col.startswith('Region') or col.startswith('G')
+        ]
 
     # infer name of time column if not provided
     if time_column is None:
@@ -75,10 +79,11 @@ def from_dataframe(
 
     return raw_dfs
 
+
 def from_dataframes(raw_df: pd.DataFrame, locations_df: pd.DataFrame):
     data_columns = (list(locations_df.index),)
     rename = locations_df['brain_region'].to_dict()
-    
+
     read_config = dict(
         data_columns=data_columns,
         time_column='times',
@@ -125,7 +130,9 @@ def from_pqt(
     return from_dataframe(raw_df, **read_config)
 
 
-def from_raw_neurophotometrics_df(raw_df: pd.DataFrame, rois=None, drop_first=True) -> pd.DataFrame:
+def from_raw_neurophotometrics_df(
+    raw_df: pd.DataFrame, rois=None, drop_first=True
+) -> pd.DataFrame:
     """reads in parses the output of the neurophotometrics FP3002
 
     Args:
@@ -251,6 +258,7 @@ def _validate_dataframe(
 
     return schema_raw_data.validate(df)
 
+
 def _validate_neurophotometrics_digital_inputs(df: pd.DataFrame) -> pd.DataFrame:
     schema_digital_inputs = pandera.DataFrameSchema(
         columns=dict(
@@ -262,4 +270,3 @@ def _validate_neurophotometrics_digital_inputs(df: pd.DataFrame) -> pd.DataFrame
         )
     )
     return schema_digital_inputs.validate(df)
-
diff --git a/src/iblphotometry/loaders.py b/src/iblphotometry/loaders.py
index 75b4f46..15b6158 100644
--- a/src/iblphotometry/loaders.py
+++ b/src/iblphotometry/loaders.py
@@ -66,7 +66,6 @@ def _load_data_from_eid(self, eid: str, rename=True):
 
         return raw_dfs
 
-
     def _eid2pnames(self, eid: str):
         session_path = self.one.eid2path(eid)
         pnames = [reg.name for reg in session_path.joinpath('alf').glob('Region*')]
diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py
index ac34fde..d412766 100644
--- a/src/iblphotometry/metrics.py
+++ b/src/iblphotometry/metrics.py
@@ -3,9 +3,16 @@
 import pandas as pd
 from scipy import stats
 
-from iblphotometry.processing import z, Regression, ExponDecay, detect_spikes, detect_outliers
+from iblphotometry.processing import (
+    z,
+    Regression,
+    ExponDecay,
+    detect_spikes,
+    detect_outliers,
+)
 from iblphotometry.behavior import psth
 
+
 def percentile_dist(A: pd.Series | np.ndarray, pc: tuple = (50, 95), axis=-1) -> float:
     """the distance between two percentiles in units of z. Captures the magnitude of transients.
 
@@ -65,7 +72,7 @@ def n_spikes(A: pd.Series | np.ndarray, sd: int = 5):
     """count the number of spike artifacts in the recording."""
     a = A.values if isinstance(A, pd.Series) else A
     return detect_spikes(a, sd=sd).shape[0]
- 
+
 
 def n_outliers(
     A: pd.Series | np.ndarray, w_size: int = 1000, alpha: float = 0.0005
diff --git a/src/iblphotometry/neurophotometrics.py b/src/iblphotometry/neurophotometrics.py
index 4fb0050..869caea 100644
--- a/src/iblphotometry/neurophotometrics.py
+++ b/src/iblphotometry/neurophotometrics.py
@@ -3,6 +3,7 @@
 The light source map refers to the available LEDs on the system.
 The flags refers to the byte encoding of led states in the system.
 """
+
 LIGHT_SOURCE_MAP = {
     'color': ['None', 'Violet', 'Blue', 'Green'],
     'wavelength': [0, 415, 470, 560],
@@ -24,8 +25,60 @@
         10: 'Input 0 signal HIGH + Stimulation',
         11: 'Output 0 HIGH + Input 0 HIGH + Stimulation',
     },
-    'No LED ON': {0: 0, 1: 8, 2: 16, 3: 32, 4: 64, 5: 128, 6: 256, 7: 512, 8: 48, 9: 528, 10: 544, 11: 560},
-    'L415': {0: 1, 1: 9, 2: 17, 3: 33, 4: 65, 5: 129, 6: 257, 7: 513, 8: 49, 9: 529, 10: 545, 11: 561},
-    'L470': {0: 2, 1: 10, 2: 18, 3: 34, 4: 66, 5: 130, 6: 258, 7: 514, 8: 50, 9: 530, 10: 546, 11: 562},
-    'L560': {0: 4, 1: 12, 2: 20, 3: 36, 4: 68, 5: 132, 6: 260, 7: 516, 8: 52, 9: 532, 10: 548, 11: 564}
-}
\ No newline at end of file
+    'No LED ON': {
+        0: 0,
+        1: 8,
+        2: 16,
+        3: 32,
+        4: 64,
+        5: 128,
+        6: 256,
+        7: 512,
+        8: 48,
+        9: 528,
+        10: 544,
+        11: 560,
+    },
+    'L415': {
+        0: 1,
+        1: 9,
+        2: 17,
+        3: 33,
+        4: 65,
+        5: 129,
+        6: 257,
+        7: 513,
+        8: 49,
+        9: 529,
+        10: 545,
+        11: 561,
+    },
+    'L470': {
+        0: 2,
+        1: 10,
+        2: 18,
+        3: 34,
+        4: 66,
+        5: 130,
+        6: 258,
+        7: 514,
+        8: 50,
+        9: 530,
+        10: 546,
+        11: 562,
+    },
+    'L560': {
+        0: 4,
+        1: 12,
+        2: 20,
+        3: 36,
+        4: 68,
+        5: 132,
+        6: 260,
+        7: 516,
+        8: 52,
+        9: 532,
+        10: 548,
+        11: 564,
+    },
+}
diff --git a/src/iblphotometry/pipelines.py b/src/iblphotometry/pipelines.py
index c848473..f679583 100644
--- a/src/iblphotometry/pipelines.py
+++ b/src/iblphotometry/pipelines.py
@@ -1,11 +1,18 @@
 import numpy as np
 import pandas as pd
-from iblphotometry.processing import remove_spikes, lowpass_bleachcorrect, isosbestic_correct, sliding_mad, zscore
+from iblphotometry.processing import (
+    remove_spikes,
+    lowpass_bleachcorrect,
+    isosbestic_correct,
+    sliding_mad,
+    zscore,
+)
 
 import logging
 
 logger = logging.getLogger()
 
+
 def run_pipeline(
     pipeline,
     F_signal: pd.DataFrame,
diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py
index abd1e88..415a3ae 100644
--- a/src/iblphotometry/processing.py
+++ b/src/iblphotometry/processing.py
@@ -19,7 +19,6 @@
 from copy import copy
 
 
-
 # machine resolution
 eps = np.finfo(np.float64).eps
 
@@ -34,6 +33,7 @@
 ##        #######  ##    ##  ######     ##    ####  #######  ##    ##  ######
 """
 
+
 def z(A: np.ndarray, mode='classic'):
     """classic z-score. Deviation from sample mean in units of sd
 
@@ -49,10 +49,11 @@ def z(A: np.ndarray, mode='classic'):
     if mode == 'median':
         return (A - np.median(A)) / np.std(A)
 
+
 def mad(A: np.ndarray):
-    """ the MAD is defined as the median of the absolute deviations from the data's median
+    """the MAD is defined as the median of the absolute deviations from the data's median
     see https://en.wikipedia.org/wiki/Median_absolute_deviation
-    
+
     :param A: _description_
     :type A: np.ndarray
     :return: _description_
@@ -60,11 +61,13 @@ def mad(A: np.ndarray):
     """
     return np.median(np.absolute(A - np.median(A)), axis=-1)
 
+
 def madscore(F: pd.Series):
     # TODO overloading of mad?
     y, t = F.values, F.index.values
     return pd.Series(mad(y), index=t)
 
+
 def zscore(F: pd.Series, mode='classic'):
     y, t = F.values, F.index.values
     # mu, sig = np.average(y), np.std(y)
@@ -72,8 +75,7 @@ def zscore(F: pd.Series, mode='classic'):
 
 
 def filt(F: pd.Series, N: int, Wn: float, fs: float | None = None, btype='low'):
-    """ a wrapper for scipy.signal.butter and sosfiltfilt
-    """
+    """a wrapper for scipy.signal.butter and sosfiltfilt"""
     y, t = F.values, F.index.values
     if fs is None:
         fs = 1 / np.median(np.diff(t))
@@ -81,6 +83,7 @@ def filt(F: pd.Series, N: int, Wn: float, fs: float | None = None, btype='low'):
     y_filt = signal.sosfiltfilt(sos, y)
     return pd.Series(y_filt, index=t)
 
+
 def sliding_rcoeff(signal_a, signal_b, nswin, overlap=0):
     """
     Computes the local correlation coefficient between two signals in sliding windows
@@ -110,6 +113,7 @@ def sliding_rcoeff(signal_a, signal_b, nswin, overlap=0):
 ########  #######   ######   ######     ##        #######  ##    ##  ######     ##    ####  #######  ##    ##  ######
 """
 
+
 def mse_loss(p, x, y, fun):
     # mean squared error
     y_hat = fun(x, *p)
@@ -138,6 +142,7 @@ def irls_loss(p, x, y, fun, d=1e-7):
     w = 1 / f
     return np.sum(w * np.abs(a) ** 2) / y.shape[0]
 
+
 """
 ########  ########  ######   ########  ########  ######   ######  ####  #######  ##    ##
 ##     ## ##       ##    ##  ##     ## ##       ##    ## ##    ##  ##  ##     ## ###   ##
@@ -151,6 +156,7 @@ def irls_loss(p, x, y, fun, d=1e-7):
 # wrapper class for regressions with different loss functions
 # following a sklearn style of .fit() and .predict()
 
+
 class Regression:
     def __init__(self, model=None, method: str = 'mse', method_params=None):
         self.model = model
@@ -220,6 +226,7 @@ def predict(self, x: np.ndarray, return_type='numpy'):
         if return_type == 'pandas':
             return pd.Series(y_hat, index=x)
 
+
 """
 ########  ##       ########    ###     ######  ##     ##     ######   #######  ########  ########  ########  ######  ######## ####  #######  ##    ##
 ##     ## ##       ##         ## ##   ##    ## ##     ##    ##    ## ##     ## ##     ## ##     ## ##       ##    ##    ##     ##  ##     ## ###   ##
@@ -230,10 +237,11 @@ def predict(self, x: np.ndarray, return_type='numpy'):
 ########  ######## ######## ##     ##  ######  ##     ##     ######   #######  ##     ## ##     ## ########  ######     ##    ####  #######  ##    ##
 """
 
+
 class BleachCorrection:
     def __init__(
         self,
-        model = None, # TODO bring back type checking
+        model=None,  # TODO bring back type checking
         regression_method: str = 'mse',
         regression_params: dict = None,
         correction_method: str = 'subtract',
@@ -262,7 +270,7 @@ def __init__(
     def correct(self, F: pd.Series):
         F_filt = filt(F, **self.filter_params)
         return correct(F, F_filt, mode=self.correction_method)
-    
+
 
 class IsosbesticCorrection:
     def __init__(
@@ -293,7 +301,10 @@ def correct(
 
         return correct(F_ca, F_iso_fit, mode=self.correction_method)
 
-def correct(signal: pd.Series, reference: pd.Series, mode: str = 'subtract') -> pd.Series:
+
+def correct(
+    signal: pd.Series, reference: pd.Series, mode: str = 'subtract'
+) -> pd.Series:
     """the main function that applies the correction of a signal with a reference. Correcions can be applied in 3 principle ways:
     - The reference can be subtracted from the signal
     - the signal can be divided by the reference
@@ -329,7 +340,6 @@ def correct(signal: pd.Series, reference: pd.Series, mode: str = 'subtract') ->
 
 
 class AbstractModel(ABC):
-
     @abstractmethod
     def eq():
         # the model equation
@@ -374,8 +384,10 @@ def calc_model_stats(self, y, y_hat, n_samples: int = -1, use_kde: bool = False)
         aic = self._calc_aic(ll, k)
         return dict(r_sq=r_sq, ll=ll, aic=aic)
 
+
 # the actual models
 
+
 class LinearModel(AbstractModel):
     def eq(self, x, m, b):
         return x * m + b
@@ -449,6 +461,7 @@ def est_p0(self, t: np.ndarray, y: np.ndarray):
             b_est,
         )
 
+
 """
  ######   #######  ########  ########  ########  ######  ######## ####  #######  ##    ##    ######## ##     ## ##    ##  ######  ######## ####  #######  ##    ##  ######
 ##    ## ##     ## ##     ## ##     ## ##       ##    ##    ##     ##  ##     ## ###   ##    ##       ##     ## ###   ## ##    ##    ##     ##  ##     ## ###   ## ##    ##
@@ -461,15 +474,18 @@ def est_p0(self, t: np.ndarray, y: np.ndarray):
 
 # these are the convenience functions that are called in pipelines
 
+
 def lowpass_bleachcorrect(F: pd.Series, **kwargs):
     bc = LowpassBleachCorrection(**kwargs)
     return bc.correct(F)
 
+
 def exponential_bleachcorrect(F: pd.Series, **kwargs):
     model = DoubleExponDecay()
     ec = BleachCorrection(model, **kwargs)
     return ec.correct(F)
 
+
 def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs):
     ic = IsosbesticCorrection(**kwargs)
     return ic.correct(F_sig, F_ref)
@@ -485,7 +501,8 @@ def isosbestic_correct(F_sig: pd.DataFrame, F_ref: pd.DataFrame, **kwargs):
  #######   #######     ##    ######## #### ######## ##     ##    ########  ########    ##    ########  ######     ##    ####  #######  ##    ##
 """
 
-def _grubbs_single(y: np.ndarray, alpha: float =0.005, mode: str='median') -> bool:
+
+def _grubbs_single(y: np.ndarray, alpha: float = 0.005, mode: str = 'median') -> bool:
     # to apply a single pass of grubbs outlier detection
     # see https://en.wikipedia.org/wiki/Grubbs%27s_test
 
@@ -505,7 +522,7 @@ def _grubbs_single(y: np.ndarray, alpha: float =0.005, mode: str='median') -> bo
         return False
 
 
-def grubbs_test(y: np.ndarray, alpha: float =0.005, mode:str='median'):
+def grubbs_test(y: np.ndarray, alpha: float = 0.005, mode: str = 'median'):
     # apply grubbs test iteratively until no more outliers are found
     outliers = []
     while _grubbs_single(y, alpha=alpha):
@@ -565,7 +582,9 @@ def fillnan_kde(y: np.ndarray, w: int = 25):
         return y
 
 
-def remove_outliers(F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: int = 25):
+def remove_outliers(
+    F: pd.Series, w_size: int = 1000, alpha: float = 0.005, w: int = 25
+):
     y, t = F.values, F.index.values
     y = copy(y)
     outliers = detect_outliers(y, w_size=w_size, alpha=alpha)
@@ -620,7 +639,6 @@ def make_sliding_window(
     method='stride_tricks',
     warning=None,
 ):
-    
     """use np.stride_tricks to make a sliding window view of a 1-d np.ndarray A
     full overlap, step size 1
     assumes 8 byte numbers (to be exposed? but needs to be tested first)
diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py
index b424044..464465d 100644
--- a/src/iblphotometry/qc.py
+++ b/src/iblphotometry/qc.py
@@ -14,6 +14,7 @@
 
 logger = logging.getLogger()
 
+
 # %% # those could be in metrics
 def sliding_metric(
     F: pd.Series,
diff --git a/src/iblphotometry/synthetic.py b/src/iblphotometry/synthetic.py
index 2c44060..9a7f1bc 100644
--- a/src/iblphotometry/synthetic.py
+++ b/src/iblphotometry/synthetic.py
@@ -7,7 +7,6 @@
 import scipy.signal
 
 
-
 def synthetic101(fs=30, rl=1000, event_rate=0.2):
     """
     Generates synthetic photometry data with
@@ -47,8 +46,12 @@ def generate_dataframe(sigma: float = 0.01):
         df[['raw_calcium', 'raw_isosbestic']] += np.random.randn(*df.shape) * sigma
 
     raw_dfs = dict(
-        raw_calcium=pd.DataFrame(df['raw_calcium'].values, index=df.index, columns=['Region01']),
-        raw_isosbestic=pd.DataFrame(df['raw_isosbestic'].values, index=df.index, columns=['Region01']),
+        raw_calcium=pd.DataFrame(
+            df['raw_calcium'].values, index=df.index, columns=['Region01']
+        ),
+        raw_isosbestic=pd.DataFrame(
+            df['raw_isosbestic'].values, index=df.index, columns=['Region01']
+        ),
     )
 
     return raw_dfs

From 08bc9c35d94cddacee240357618acb5379027756 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 4 Dec 2024 15:14:53 +0000
Subject: [PATCH 10/10] ruff again

---
 .gitignore                      | 1 +
 src/iblphotometry/processing.py | 1 -
 src/iblphotometry/qc.py         | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index d5401f8..04237af 100644
--- a/.gitignore
+++ b/.gitignore
@@ -164,3 +164,4 @@ cython_debug/
 # local scripts
 src/local
 src/analysis
+src/examples
diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py
index 415a3ae..8ab3f48 100644
--- a/src/iblphotometry/processing.py
+++ b/src/iblphotometry/processing.py
@@ -2,7 +2,6 @@
 import warnings
 
 import numpy as np
-import scipy as sp
 from scipy import signal
 import pandas as pd
 
diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py
index 464465d..5276026 100644
--- a/src/iblphotometry/qc.py
+++ b/src/iblphotometry/qc.py
@@ -3,7 +3,6 @@
 from collections.abc import Callable
 from tqdm import tqdm
 import logging
-import warnings
 
 import numpy as np
 import pandas as pd
@@ -11,6 +10,7 @@
 
 from iblphotometry.processing import make_sliding_window
 from iblphotometry.pipelines import run_pipeline
+from brainbox.io.one import SessionLoader
 
 logger = logging.getLogger()