Merge pull request #37 from int-brain-lab/io_dev

changes in io functions, testing updated and extended, some bugfixing…
int-brain-lab · Dec 4, 2024 · d0a2178 · d0a2178
2 parents 84d3034 + 376d091
commit d0a2178
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 38 deletions.
diff --git a/.gitignore b/.gitignore
@@ -161,6 +161,9 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
+# vscode
+.vscode
+
 # local scripts
 src/local
 src/analysis

diff --git a/src/gui/rawdata_visualizer.py b/src/gui/rawdata_visualizer.py
@@ -108,28 +108,28 @@ def load_file(self):
                     or file_path.endswith('.pqt')
                     or file_path.endswith('.parquet')
                 ):
-                    self.td = from_raw_neurophotometrics_file(file_path)
+                    self.dfs = from_raw_neurophotometrics_file(file_path)
                 else:
                     raise ValueError('Unsupported file format')
 
-                if 'GCaMP' in self.td.keys():
-                    self.df = self.td['GCaMP'].as_dataframe()
-                    self.times = self.td['GCaMP'].t
+                if 'GCaMP' in self.dfs.keys():
+                    self.df = self.dfs['GCaMP']
+                    self.times = self.dfs['GCaMP'].index.values
                     self.plot_time_index = np.arange(0, len(self.times))
                     self.filtered_df = None
                 else:
                     raise ValueError('No GCaMP found')
 
-                if 'Isosbestic' in self.td.keys():
-                    self.dfiso = self.td['Isosbestic'].as_dataframe()
+                if 'Isosbestic' in self.dfs.keys():
+                    self.dfiso = self.dfs['Isosbestic']
 
                 # Display the dataframe in the table
                 # self.display_dataframe()
                 # Update the column selector
                 self.update_column_selector()
 
                 # Load into Pynapple dataframe
-                self.td = from_raw_neurophotometrics_file(file_path)
+                self.dfs = from_raw_neurophotometrics_file(file_path)
 
                 # Set filter combo box
                 self.filter_selector.setCurrentIndex(0)  # Reset to "Select Filter"

diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py
@@ -17,7 +17,7 @@ def from_array(
     return pd.DataFrame(data, index=times, columns=channel_names)
 
 
-def from_dataframe(
+def from_ibl_dataframe(
     raw_df: pd.DataFrame,
     data_columns: list[str] | None = None,
     time_column: str | None = None,
@@ -80,7 +80,7 @@ def from_dataframe(
     return raw_dfs
 
 
-def from_dataframes(raw_df: pd.DataFrame, locations_df: pd.DataFrame):
+def from_ibl_dataframes(raw_df: pd.DataFrame, locations_df: pd.DataFrame):
     data_columns = (list(locations_df.index),)
     rename = locations_df['brain_region'].to_dict()
 
@@ -91,10 +91,10 @@ def from_dataframes(raw_df: pd.DataFrame, locations_df: pd.DataFrame):
         rename=rename,
     )
 
-    return from_dataframe(raw_df, **read_config)
+    return from_ibl_dataframe(raw_df, **read_config)
 
 
-def from_pqt(
+def from_ibl_pqt(
     signal_pqt_path: str | Path,
     locations_pqt_path: Optional[str | Path] = None,
 ):
@@ -111,8 +111,7 @@ def from_pqt(
     raw_df = pd.read_parquet(signal_pqt_path)
     if locations_pqt_path is not None:
         locations_df = pd.read_parquet(locations_pqt_path)
-        data_columns = (list(locations_df.index),)
-        rename = locations_df['brain_region'].to_dict()
+        return from_ibl_dataframes(raw_df, locations_df)
     else:
         warnings.warn(
             'loading a photometry.signal.pqt file without its corresponding photometryROI.locations.pqt'
@@ -127,10 +126,10 @@ def from_pqt(
         rename=rename,
     )
 
-    return from_dataframe(raw_df, **read_config)
+    return from_ibl_dataframe(raw_df, **read_config)
 
 
-def from_raw_neurophotometrics_df(
+def from_raw_neurophotometrics_ibl_df(
     raw_df: pd.DataFrame, rois=None, drop_first=True
 ) -> pd.DataFrame:
     """reads in parses the output of the neurophotometrics FP3002
@@ -145,14 +144,14 @@ def from_raw_neurophotometrics_df(
     if rois is None:
         rois = [col for col in raw_df.columns if col.startswith('G')]
 
-    out_df = raw_df.filter(items=rois, axis=1).sort_index(axis=1)
+    df = raw_df.filter(items=rois, axis=1).sort_index(axis=1)
     timestamp_name = (
         'SystemTimestamp' if 'SystemTimestamp' in raw_df.columns else 'Timestamp'
     )
-    out_df['times'] = raw_df[timestamp_name]
-    out_df['wavelength'] = np.nan
-    out_df['name'] = ''
-    out_df['color'] = ''
+    df['times'] = raw_df[timestamp_name]
+    df['wavelength'] = np.nan
+    df['name'] = ''
+    df['color'] = ''
 
     # TODO the names column in channel_meta_map should actually be user defined (experiment description file?)
     channel_meta_map = pd.DataFrame(LIGHT_SOURCE_MAP)
@@ -172,27 +171,27 @@ def from_raw_neurophotometrics_df(
                     name = '+'.join([channel_meta_map['name'][c] for c in combo])
                     color = '+'.join([channel_meta_map['color'][c] for c in combo])
                     wavelength = np.nan
-                    out_df.loc[states == state, ['name', 'color', 'wavelength']] = (
+                    df.loc[states == state, ['name', 'color', 'wavelength']] = (
                         name,
                         color,
                         wavelength,
                     )
         else:
             for cn in ['name', 'color', 'wavelength']:
-                out_df.loc[states == state, cn] = channel_meta_map.iloc[ic[0]][cn]
+                df.loc[states == state, cn] = channel_meta_map.iloc[ic[0]][cn]
 
     # drop first frame
     if drop_first:
-        out_df = out_df.iloc[1:].reset_index()
+        df = df.iloc[1:].reset_index()
 
-    return out_df
+    return df
 
 
-def from_raw_neurophotometrics_file(
+def from_raw_neurophotometrics_file_to_ibl_df(
     path: str | Path,
     drop_first=True,
     validate=True,
-) -> dict:
+) -> pd.DataFrame:
     """reads a raw neurophotometrics file (in .csv or .pqt format) as they are written by the neurophotometrics software
 
     Args:
@@ -222,24 +221,35 @@ def from_raw_neurophotometrics_file(
         raise NotImplementedError
 
     if validate:
-        raw_df = _validate_dataframe(raw_df)
+        raw_df = _validate_ibl_dataframe(raw_df)
 
-    df = from_raw_neurophotometrics_df(raw_df)
+    df = from_raw_neurophotometrics_ibl_df(raw_df)
 
     # drop first frame
     if drop_first:
         df = df.iloc[1:].reset_index()
 
+    return df
+
+
+def from_raw_neurophotometrics_file(
+    path: str | Path,
+    drop_first=True,
+    validate=True,
+) -> dict:
+    df = from_raw_neurophotometrics_file_to_ibl_df(
+        path, drop_first=drop_first, validate=validate
+    )
     data_columns = [col for col in df.columns if col.startswith('G')]
     read_config = dict(
         data_columns=data_columns,
         time_column='times',
         channel_column='name',
     )
-    return from_dataframe(df, **read_config)
+    return from_ibl_dataframe(df, **read_config)
 
 
-def _validate_dataframe(
+def _validate_ibl_dataframe(
     df: pd.DataFrame,
     data_columns=None,
 ) -> pd.DataFrame:

diff --git a/src/iblphotometry_tests/test_loaders.py b/src/iblphotometry_tests/test_loaders.py
@@ -1,6 +1,7 @@
-import iblphotometry.io as fio
+import iblphotometry.io as fpio
 import numpy as np
 from iblphotometry_tests.base_tests import PhotometryDataTestCase
+import pandas as pd
 
 
 class TestLoaders(PhotometryDataTestCase):
@@ -13,16 +14,30 @@ def test_from_array(self):
         times = np.linspace(0, 100, n_samples)
         data = np.random.randn(n_samples, n_channels)
         names = ['a', 'b', 'c']
-        fio.from_array(times, data, names)
+        fpio.from_array(times, data, names)
 
     # for neurophotometrics hardware
     def test_from_raw_neurophotometrics_file(self):
-        fio.from_raw_neurophotometrics_file(self.paths['raw_neurophotometrics_csv'])
+        # the single direct version
+        raw_dfs_a = fpio.from_raw_neurophotometrics_file(
+            self.paths['raw_neurophotometrics_csv']
+        )
+
+        # the chained version
+        df = fpio.from_raw_neurophotometrics_file_to_ibl_df(
+            self.paths['raw_neurophotometrics_csv']
+        )
+        raw_dfs_b = fpio.from_ibl_dataframe(df)
+
+        # check if they are the same
+        assert raw_dfs_a.keys() == raw_dfs_b.keys()
+        for key in raw_dfs_a.keys():
+            pd.testing.assert_frame_equal(raw_dfs_a[key], raw_dfs_b[key])
 
     # from pqt files as they are returned from ONE by .load_dataset()
-    def test_from_pqt(self):
-        fio.from_pqt(self.paths['photometry_signal_pqt'])
-        fio.from_pqt(
+    def test_from_ibl_pqt(self):
+        fpio.from_ibl_pqt(self.paths['photometry_signal_pqt'])
+        fpio.from_ibl_pqt(
             self.paths['photometry_signal_pqt'],
             self.paths['photometryROI_locations_pqt'],
         )
diff --git a/src/iblphotometry_tests/test_metrics.py b/src/iblphotometry_tests/test_metrics.py
@@ -10,7 +10,7 @@ class TestMetrics(PhotometryDataTestCase):
 
     def test_metrics(self):
         # get data
-        raw_dfs = fio.from_pqt(
+        raw_dfs = fio.from_ibl_pqt(
             self.paths['photometry_signal_pqt'],
             self.paths['photometryROI_locations_pqt'],
         )

diff --git a/src/iblphotometry_tests/test_pipelines.py b/src/iblphotometry_tests/test_pipelines.py
@@ -17,7 +17,7 @@ def test_single_band_pipeline(self):
 
         Path(__file__).parent.joinpath()
         # on real data
-        raw_dfs = fio.from_pqt(
+        raw_dfs = fio.from_ibl_pqt(
             self.paths['photometry_signal_pqt'],
             self.paths['photometryROI_locations_pqt'],
         )