Skip to content

Commit

Permalink
Merge pull request #42 from int-brain-lab/neurophotometrics_dev
Browse files Browse the repository at this point in the history
Neurophotometrics dev
  • Loading branch information
grg2rsr authored Dec 10, 2024
2 parents 4990459 + eeb15ea commit 73b5f37
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 15 deletions.
84 changes: 75 additions & 9 deletions src/iblphotometry/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
def from_raw_neurophotometrics_file_to_raw_df(
path: str | Path, validate=True, version='new'
) -> pd.DataFrame:
"""reads in a file as generated by the neurophotometrics FP3002 (both new and old versions) with validation
Args:
path (str | Path): path to the file, can be in either .csv or .pqt format
validate (bool, optional): If True, validates the file. Defaults to True.
version (str, optional): 'new' or 'old' version of the neurophotometrics file format. Defaults to 'new'.
Returns:
pd.DataFrame: the data as a raw dataframe format
"""
path = Path(path) if isinstance(path, str) else path
match path.suffix:
case '.csv':
Expand All @@ -30,6 +40,17 @@ def from_raw_neurophotometrics_file_to_raw_df(
def from_raw_neurophotometrics_df_to_ibl_df(
raw_df: pd.DataFrame, rois=None, drop_first=True
) -> pd.DataFrame:
"""reads in a dataframe with the raw photometry data as generated by the neurophotometrics FP3002 into the ibl photometry dataformat.
Args:
raw_df (pd.DataFrame): as returned by `from_raw_neurophotometrics_file_to_raw_df`
rois (_type_, optional): names of the rois as selected by the user in the acquisition UI. If None, the names are inferred from the data. Defaults to None.
drop_first (bool, optional): Drop the The first frame, which has all LEDs on by default. Defaults to True.
Returns:
pd.DataFrame: the data in the ibl photometry data format
"""
if rois is None:
rois = infer_data_columns(raw_df)

Expand Down Expand Up @@ -79,6 +100,17 @@ def from_raw_neurophotometrics_df_to_ibl_df(
def from_raw_neurophotometrics_file_to_ibl_df(
path: str | Path, drop_first=True, validate=True, version='new'
) -> pd.DataFrame:
"""convenience function that chains `from_raw_neurophotometrics_file_to_raw_df` and `from_raw_neurophotometrics_df_to_ibl_df`. See docstrings
Args:
path (str | Path): _description_
drop_first (bool, optional): Drop the The first frame, which has all LEDs on by default. Defaults to True.
validate (bool, optional): If True, validates the file. Defaults to True.
version (str, optional): 'new' or 'old' version of the neurophotometrics file format. Defaults to 'new'.
Returns:
pd.DataFrame: _description_
"""
raw_df = from_raw_neurophotometrics_file_to_raw_df(
path, validate=validate, version=version
)
Expand All @@ -88,10 +120,10 @@ def from_raw_neurophotometrics_file_to_ibl_df(


def from_ibl_pqt_to_ibl_df(path: str | Path, validate=False):
ibl_df = pd.read_parquet(path)
if validate is True:
# TODO
raise NotImplementedError
return pd.read_parquet(path)
ibl_df = validate_ibl_dataframe(ibl_df)
return ibl_df


def from_ibl_dataframe(
Expand All @@ -101,13 +133,14 @@ def from_ibl_dataframe(
channel_column: str = 'name',
channel_names: list[str] | None = None,
rename: dict | None = None,
validate: bool = True,
) -> dict:
"""main function to convert to analysis ready format
Args:
ibl_df (pd.DataFrame): the dataframe, as stored in the photometry.signal.pqt
data_columns (list[str], optional): The names of the columns in the dataframe that contain the signals of different fibers. By default, they are named RegionXX. If None is provided, All columns that start with `Region` are treated as data columns. Defaults to None.
data_columns (list[str], optional): The names of the columns in the dataframe that contain the signals of different fibers. By default, they are named RegionXX. If None is provided, All columns that start with `Region` or `G` are treated as data columns. Defaults to None.
time_column (str, optional): The name of the column that contains the timestamps. If None is provided, it is assumed that `time` is in the name. Defaults to None.
channel_column (str, optional): The name of the column that contains. Defaults to 'name'.
channel_names (list[str], optional): The names of the acquisition channel / frequency bands that are acquired. Defaults to None.
Expand All @@ -120,6 +153,9 @@ def from_ibl_dataframe(
# data_columns is a list of str that specifies the names of the column that hold the actual data, like 'RegionXX'
# channel_column is the column that specifies the temporally multiplexed acquisition channels

if validate:
ibl_df = validate_ibl_dataframe(ibl_df)

data_columns = infer_data_columns(ibl_df) if data_columns is None else data_columns

# infer name of time column if not provided
Expand Down Expand Up @@ -152,11 +188,19 @@ def from_ibl_dataframe(
def from_ibl_pqt(
signal_pqt_path: str | Path,
locations_pqt_path: Optional[str | Path] = None,
validate=True,
):
# read from a single pqt
# if both are provided, do both
"""reads in photometry data stored in the ibl format as a .pqt file. If provided, uses the metadata stored in the locations.pqt file as well.
Args:
signal_pqt_path (str | Path): _description_
locations_pqt_path (Optional[str | Path], optional): _description_. Defaults to None.
ibl_df = pd.read_parquet(signal_pqt_path)
Returns:
_type_: _description_
"""

ibl_df = from_ibl_pqt_to_ibl_df(signal_pqt_path, validate=validate)
if locations_pqt_path is not None:
locations_df = pd.read_parquet(locations_pqt_path)
return from_ibl_dataframes(ibl_df, locations_df)
Expand Down Expand Up @@ -195,7 +239,17 @@ def from_ibl_dataframes(ibl_df: pd.DataFrame, locations_df: pd.DataFrame):
def from_raw_neurophotometrics_file(
path: str | Path, drop_first=True, validate=True, version='new'
) -> dict:
# this one bypasses everything
"""reads in a file generated by the neurophotometrics FP3002 into the analysis ready format
Args:
path (str | Path): _description_
drop_first (bool, optional): Drop the The first frame, which has all LEDs on by default. Defaults to True.
validate (bool, optional): If True, validates the file. Defaults to True.
version (str, optional): 'new' or 'old' version of the neurophotometrics file format. Defaults to 'new'.
Returns:
dict: _description_
"""
ibl_df = from_raw_neurophotometrics_file_to_ibl_df(
path, drop_first=drop_first, validate=validate, version=version
)
Expand Down Expand Up @@ -233,7 +287,19 @@ def read_digital_inputs_csv(path: str | Path, validate=True) -> pd.DataFrame:
"""


def validate_ibl_dataframe(df: pd.DataFrame) -> pd.DataFrame: ...
def validate_ibl_dataframe(ibl_df: pd.DataFrame, data_columns=None) -> pd.DataFrame:
data_columns = infer_data_columns(ibl_df) if data_columns is None else data_columns
schema_ibl_data = pandera.DataFrameSchema(
columns=dict(
times=pandera.Column(pandera.Float64),
# valid=pandera.Column(pandera.Bool), # optionally present
wavelength=pandera.Column(pandera.Float64, nullable=True),
name=pandera.Column(pandera.String),
color=pandera.Column(pandera.String),
**{k: pandera.Column(pandera.Float64) for k in data_columns},
)
)
return schema_ibl_data.validate(ibl_df)


def validate_neurophotometrics_df(
Expand Down
16 changes: 10 additions & 6 deletions src/iblphotometry_tests/test_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,13 @@ def test_from_raw_neurophotometrics_file(self):
pd.testing.assert_frame_equal(dfs_a[key], dfs_b[key])

# from pqt files as they are returned from ONE by .load_dataset()
# def test_from_ibl_pqt(self):
# fpio.from_ibl_pqt(self.paths['photometry_signal_pqt'])
# fpio.from_ibl_pqt(
# self.paths['photometry_signal_pqt'],
# self.paths['photometryROI_locations_pqt'],
# )
def test_from_ibl_pqt(self):
datasets = ['carolina', 'alejandro']

for dataset in datasets:
self.set_paths(dataset)
fpio.from_ibl_pqt(self.paths['photometry_signal_pqt'])
fpio.from_ibl_pqt(
self.paths['photometry_signal_pqt'],
self.paths['photometryROI_locations_pqt'],
)

0 comments on commit 73b5f37

Please sign in to comment.