Merge pull request #42 from int-brain-lab/neurophotometrics_dev

Neurophotometrics dev
int-brain-lab · Dec 10, 2024 · 73b5f37 · 73b5f37
2 parents 4990459 + eeb15ea
commit 73b5f37
Show file tree

Hide file tree

Showing 2 changed files with 85 additions and 15 deletions.
diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py
@@ -14,6 +14,16 @@
 def from_raw_neurophotometrics_file_to_raw_df(
     path: str | Path, validate=True, version='new'
 ) -> pd.DataFrame:
+    """reads in a file as generated by the neurophotometrics FP3002 (both new and old versions) with validation
+
+    Args:
+        path (str | Path): path to the file, can be in either .csv or .pqt format
+        validate (bool, optional): If True, validates the file. Defaults to True.
+        version (str, optional): 'new' or 'old' version of the neurophotometrics file format. Defaults to 'new'.
+
+    Returns:
+        pd.DataFrame: the data as a raw dataframe format
+    """
     path = Path(path) if isinstance(path, str) else path
     match path.suffix:
         case '.csv':
@@ -30,6 +40,17 @@ def from_raw_neurophotometrics_file_to_raw_df(
 def from_raw_neurophotometrics_df_to_ibl_df(
     raw_df: pd.DataFrame, rois=None, drop_first=True
 ) -> pd.DataFrame:
+    """reads in a dataframe with the raw photometry data as generated by the neurophotometrics FP3002 into the ibl photometry dataformat.
+
+
+    Args:
+        raw_df (pd.DataFrame): as returned by `from_raw_neurophotometrics_file_to_raw_df`
+        rois (_type_, optional): names of the rois as selected by the user in the acquisition UI. If None, the names are inferred from the data. Defaults to None.
+        drop_first (bool, optional): Drop the The first frame, which has all LEDs on by default. Defaults to True.
+
+    Returns:
+        pd.DataFrame: the data in the ibl photometry data format
+    """
     if rois is None:
         rois = infer_data_columns(raw_df)
 
@@ -79,6 +100,17 @@ def from_raw_neurophotometrics_df_to_ibl_df(
 def from_raw_neurophotometrics_file_to_ibl_df(
     path: str | Path, drop_first=True, validate=True, version='new'
 ) -> pd.DataFrame:
+    """convenience function that chains `from_raw_neurophotometrics_file_to_raw_df` and `from_raw_neurophotometrics_df_to_ibl_df`. See docstrings
+
+    Args:
+        path (str | Path): _description_
+        drop_first (bool, optional): Drop the The first frame, which has all LEDs on by default. Defaults to True.
+        validate (bool, optional): If True, validates the file. Defaults to True.
+        version (str, optional): 'new' or 'old' version of the neurophotometrics file format. Defaults to 'new'.
+
+    Returns:
+        pd.DataFrame: _description_
+    """
     raw_df = from_raw_neurophotometrics_file_to_raw_df(
         path, validate=validate, version=version
     )
@@ -88,10 +120,10 @@ def from_raw_neurophotometrics_file_to_ibl_df(
 
 
 def from_ibl_pqt_to_ibl_df(path: str | Path, validate=False):
+    ibl_df = pd.read_parquet(path)
     if validate is True:
-        # TODO
-        raise NotImplementedError
-    return pd.read_parquet(path)
+        ibl_df = validate_ibl_dataframe(ibl_df)
+    return ibl_df
 
 
 def from_ibl_dataframe(
@@ -101,13 +133,14 @@ def from_ibl_dataframe(
     channel_column: str = 'name',
     channel_names: list[str] | None = None,
     rename: dict | None = None,
+    validate: bool = True,
 ) -> dict:
     """main function to convert to analysis ready format
 
 
     Args:
         ibl_df (pd.DataFrame): the dataframe, as stored in the photometry.signal.pqt
-        data_columns (list[str], optional): The names of the columns in the dataframe that contain the signals of different fibers. By default, they are named RegionXX. If None is provided, All columns that start with `Region` are treated as data columns. Defaults to None.
+        data_columns (list[str], optional): The names of the columns in the dataframe that contain the signals of different fibers. By default, they are named RegionXX. If None is provided, All columns that start with `Region` or `G` are treated as data columns. Defaults to None.
         time_column (str, optional): The name of the column that contains the timestamps. If None is provided, it is assumed that `time` is in the name. Defaults to None.
         channel_column (str, optional): The name of the column that contains. Defaults to 'name'.
         channel_names (list[str], optional): The names of the acquisition channel / frequency bands that are acquired. Defaults to None.
@@ -120,6 +153,9 @@ def from_ibl_dataframe(
     # data_columns is a list of str that specifies the names of the column that hold the actual data, like 'RegionXX'
     # channel_column is the column that specifies the temporally multiplexed acquisition channels
 
+    if validate:
+        ibl_df = validate_ibl_dataframe(ibl_df)
+
     data_columns = infer_data_columns(ibl_df) if data_columns is None else data_columns
 
     # infer name of time column if not provided
@@ -152,11 +188,19 @@ def from_ibl_dataframe(
 def from_ibl_pqt(
     signal_pqt_path: str | Path,
     locations_pqt_path: Optional[str | Path] = None,
+    validate=True,
 ):
-    # read from a single pqt
-    # if both are provided, do both
+    """reads in photometry data stored in the ibl format as a .pqt file. If provided, uses the metadata stored in the locations.pqt file as well.
+
+    Args:
+        signal_pqt_path (str | Path): _description_
+        locations_pqt_path (Optional[str  |  Path], optional): _description_. Defaults to None.
 
-    ibl_df = pd.read_parquet(signal_pqt_path)
+    Returns:
+        _type_: _description_
+    """
+
+    ibl_df = from_ibl_pqt_to_ibl_df(signal_pqt_path, validate=validate)
     if locations_pqt_path is not None:
         locations_df = pd.read_parquet(locations_pqt_path)
         return from_ibl_dataframes(ibl_df, locations_df)
@@ -195,7 +239,17 @@ def from_ibl_dataframes(ibl_df: pd.DataFrame, locations_df: pd.DataFrame):
 def from_raw_neurophotometrics_file(
     path: str | Path, drop_first=True, validate=True, version='new'
 ) -> dict:
-    # this one bypasses everything
+    """reads in a file generated by the neurophotometrics FP3002 into the analysis ready format
+
+    Args:
+        path (str | Path): _description_
+        drop_first (bool, optional): Drop the The first frame, which has all LEDs on by default. Defaults to True.
+        validate (bool, optional): If True, validates the file. Defaults to True.
+        version (str, optional): 'new' or 'old' version of the neurophotometrics file format. Defaults to 'new'.
+
+    Returns:
+        dict: _description_
+    """
     ibl_df = from_raw_neurophotometrics_file_to_ibl_df(
         path, drop_first=drop_first, validate=validate, version=version
     )
@@ -233,7 +287,19 @@ def read_digital_inputs_csv(path: str | Path, validate=True) -> pd.DataFrame:
 """
 
 
-def validate_ibl_dataframe(df: pd.DataFrame) -> pd.DataFrame: ...
+def validate_ibl_dataframe(ibl_df: pd.DataFrame, data_columns=None) -> pd.DataFrame:
+    data_columns = infer_data_columns(ibl_df) if data_columns is None else data_columns
+    schema_ibl_data = pandera.DataFrameSchema(
+        columns=dict(
+            times=pandera.Column(pandera.Float64),
+            # valid=pandera.Column(pandera.Bool), # optionally present
+            wavelength=pandera.Column(pandera.Float64, nullable=True),
+            name=pandera.Column(pandera.String),
+            color=pandera.Column(pandera.String),
+            **{k: pandera.Column(pandera.Float64) for k in data_columns},
+        )
+    )
+    return schema_ibl_data.validate(ibl_df)
 
 
 def validate_neurophotometrics_df(

diff --git a/src/iblphotometry_tests/test_loaders.py b/src/iblphotometry_tests/test_loaders.py
@@ -50,9 +50,13 @@ def test_from_raw_neurophotometrics_file(self):
                 pd.testing.assert_frame_equal(dfs_a[key], dfs_b[key])
 
     # from pqt files as they are returned from ONE by .load_dataset()
-    # def test_from_ibl_pqt(self):
-    #     fpio.from_ibl_pqt(self.paths['photometry_signal_pqt'])
-    #     fpio.from_ibl_pqt(
-    #         self.paths['photometry_signal_pqt'],
-    #         self.paths['photometryROI_locations_pqt'],
-    #     )
+    def test_from_ibl_pqt(self):
+        datasets = ['carolina', 'alejandro']
+
+        for dataset in datasets:
+            self.set_paths(dataset)
+            fpio.from_ibl_pqt(self.paths['photometry_signal_pqt'])
+            fpio.from_ibl_pqt(
+                self.paths['photometry_signal_pqt'],
+                self.paths['photometryROI_locations_pqt'],
+            )