old neurophotometrics validator added plus tests for old and new

int-brain-lab · Dec 6, 2024 · b379783 · b379783
1 parent 970de68
commit b379783
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 46 deletions.
diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py
@@ -12,8 +12,7 @@
 
 
 def from_raw_neurophotometrics_file_to_raw_df(
-    path: str | Path,
-    validate=True,
+    path: str | Path, validate=True, version='new'
 ) -> pd.DataFrame:
     path = Path(path) if isinstance(path, str) else path
     match path.suffix:
@@ -23,7 +22,7 @@ def from_raw_neurophotometrics_file_to_raw_df(
             raw_df = pd.read_parquet(path)
 
     if validate:
-        raw_df = validate_neurophotometrics_df(raw_df)
+        raw_df = validate_neurophotometrics_df(raw_df, version=version)
 
     return raw_df
 
@@ -78,11 +77,11 @@ def from_raw_neurophotometrics_df_to_ibl_df(
 
 
 def from_raw_neurophotometrics_file_to_ibl_df(
-    path: str | Path,
-    drop_first=True,
-    validate=True,
+    path: str | Path, drop_first=True, validate=True, version='new'
 ) -> pd.DataFrame:
-    raw_df = from_raw_neurophotometrics_file_to_raw_df(path, validate=validate)
+    raw_df = from_raw_neurophotometrics_file_to_raw_df(
+        path, validate=validate, version=version
+    )
     ibl_df = from_raw_neurophotometrics_df_to_ibl_df(raw_df, drop_first=drop_first)
 
     return ibl_df
@@ -194,13 +193,11 @@ def from_ibl_dataframes(ibl_df: pd.DataFrame, locations_df: pd.DataFrame):
 
 
 def from_raw_neurophotometrics_file(
-    path: str | Path,
-    drop_first=True,
-    validate=True,
+    path: str | Path, drop_first=True, validate=True, version='new'
 ) -> dict:
     # this one bypasses everything
     ibl_df = from_raw_neurophotometrics_file_to_ibl_df(
-        path, drop_first=drop_first, validate=validate
+        path, drop_first=drop_first, validate=validate, version=version
     )
     # data_columns = infer_data_columns(ibl_df) if data_columns is None else data_columns
     read_config = dict(
@@ -242,18 +239,30 @@ def validate_ibl_dataframe(df: pd.DataFrame) -> pd.DataFrame: ...
 def validate_neurophotometrics_df(
     df: pd.DataFrame,
     data_columns=None,
+    version='new',  # or 'old' - TODO to be replaced
 ) -> pd.DataFrame:
     data_columns = infer_data_columns(df) if data_columns is None else data_columns
 
-    schema_raw_data = pandera.DataFrameSchema(
-        columns=dict(
-            FrameCounter=pandera.Column(pandera.Int64),
-            SystemTimestamp=pandera.Column(pandera.Float64),
-            LedState=pandera.Column(pandera.Int16, coerce=True),
-            ComputerTimestamp=pandera.Column(pandera.Float64),
-            **{k: pandera.Column(pandera.Float64) for k in data_columns},
-        )
-    )
+    match version:
+        case 'new':  # kcenia, carolina
+            schema_raw_data = pandera.DataFrameSchema(
+                columns=dict(
+                    FrameCounter=pandera.Column(pandera.Int64),
+                    SystemTimestamp=pandera.Column(pandera.Float64),
+                    LedState=pandera.Column(pandera.Int16, coerce=True),
+                    ComputerTimestamp=pandera.Column(pandera.Float64),
+                    **{k: pandera.Column(pandera.Float64) for k in data_columns},
+                )
+            )
+        case 'old':  # alejandro
+            schema_raw_data = pandera.DataFrameSchema(
+                columns=dict(
+                    FrameCounter=pandera.Column(pandera.Int64),
+                    Timestamp=pandera.Column(pandera.Float64),
+                    LedState=pandera.Column(pandera.Int16, coerce=True),
+                    **{k: pandera.Column(pandera.Float64) for k in data_columns},
+                )
+            )
 
     return schema_raw_data.validate(df)
 

diff --git a/src/iblphotometry_tests/test_loaders.py b/src/iblphotometry_tests/test_loaders.py
@@ -17,32 +17,37 @@ class TestLoaders(PhotometryDataTestCase):
 
     # for neurophotometrics hardware
     def test_from_raw_neurophotometrics_file(self):
-        self.set_paths('carolina')
-        # 1) validation reading a raw photometrics file
-        # unfortunately I don't have the corresponding pqt files. TODO change this
-        # fpio.from_raw_neurophotometrics_file_to_raw_df(self.paths['raw_neurophotometrics_csv'])
-
-        # 2) read a pqt file, compare
-        raw_df = fpio.from_raw_neurophotometrics_file_to_raw_df(
-            self.paths['raw_neurophotometrics_pqt']
-        )
-        ibl_df_a = fpio.from_raw_neurophotometrics_file_to_ibl_df(
-            self.paths['raw_neurophotometrics_pqt']
-        )
-
-        ibl_df_b = fpio.from_raw_neurophotometrics_df_to_ibl_df(raw_df)
-        pd.testing.assert_frame_equal(ibl_df_a, ibl_df_b)
-
-        # 2) converting from ibl format to final
-        dfs_a = fpio.from_ibl_dataframe(ibl_df_a)
-        dfs_b = fpio.from_raw_neurophotometrics_file(
-            self.paths['raw_neurophotometrics_pqt']
-        )
-
-        # check if they are the same
-        assert dfs_a.keys() == dfs_b.keys()
-        for key in dfs_a.keys():
-            pd.testing.assert_frame_equal(dfs_a[key], dfs_b[key])
+        datasets = ['carolina', 'alejandro']
+
+        for dataset in datasets:
+            self.set_paths(dataset)
+            version = 'old' if dataset == 'alejandro' else 'new'
+
+            # 1) validation reading a raw photometrics file
+            # unfortunately I don't have the corresponding pqt files. TODO change this
+            # fpio.from_raw_neurophotometrics_file_to_raw_df(self.paths['raw_neurophotometrics_csv'])
+
+            # 2) read a pqt file, compare
+            raw_df = fpio.from_raw_neurophotometrics_file_to_raw_df(
+                self.paths['raw_neurophotometrics_pqt'], version=version
+            )
+            ibl_df_a = fpio.from_raw_neurophotometrics_file_to_ibl_df(
+                self.paths['raw_neurophotometrics_pqt'], version=version
+            )
+
+            ibl_df_b = fpio.from_raw_neurophotometrics_df_to_ibl_df(raw_df)
+            pd.testing.assert_frame_equal(ibl_df_a, ibl_df_b)
+
+            # 2) converting from ibl format to final
+            dfs_a = fpio.from_ibl_dataframe(ibl_df_a)
+            dfs_b = fpio.from_raw_neurophotometrics_file(
+                self.paths['raw_neurophotometrics_pqt'], version=version
+            )
+
+            # check if they are the same
+            assert dfs_a.keys() == dfs_b.keys()
+            for key in dfs_a.keys():
+                pd.testing.assert_frame_equal(dfs_a[key], dfs_b[key])
 
     # from pqt files as they are returned from ONE by .load_dataset()
     # def test_from_ibl_pqt(self):