From 142ce3e3782ce213bfe9823c9f921842bda04722 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 4 Nov 2024 12:12:12 -0600 Subject: [PATCH 1/2] update kim conversion script --- pyproject.toml | 6 +- .../__init__.py | 0 .../utils.py | 0 .../__init__.py | 0 src/dickerson_lab_to_nwb/paye_conversion.py | 2 +- src/kim_lab/kim_conversion.py | 131 ------------ src/kim_lab_to_nwb/__init__.py | 0 src/kim_lab_to_nwb/conversion_notes.md | 13 ++ src/kim_lab_to_nwb/kim_conversion.py | 199 ++++++++++++++++++ src/{kim_lab => kim_lab_to_nwb}/ophys.py | 2 +- src/suver_lab_to_nwb/__init__.py | 0 .../suver_conversion.py | 0 12 files changed, 218 insertions(+), 135 deletions(-) rename src/{ => cohen_u01_nwb_conversion_utils}/__init__.py (100%) rename src/{ => cohen_u01_nwb_conversion_utils}/utils.py (100%) rename src/{suver_lab => dickerson_lab_to_nwb}/__init__.py (100%) delete mode 100644 src/kim_lab/kim_conversion.py create mode 100644 src/kim_lab_to_nwb/__init__.py create mode 100644 src/kim_lab_to_nwb/conversion_notes.md create mode 100644 src/kim_lab_to_nwb/kim_conversion.py rename src/{kim_lab => kim_lab_to_nwb}/ophys.py (98%) create mode 100644 src/suver_lab_to_nwb/__init__.py rename src/{suver_lab => suver_lab_to_nwb}/suver_conversion.py (100%) diff --git a/pyproject.toml b/pyproject.toml index a16a613..937ff3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,14 +17,16 @@ classifiers = [ dependencies = [ "neuroconv[video]", "nwbinspector", - #"roiextractors", + "roiextractors", + "tifffile", + "pymatreader", ] [project.urls] Repository="https://github.com/catalystneuro/cohen-lab-to-nwb" [build-system] -requires = ["setuptools>=58.0.0", "wheel"] +requires = ["setuptools>=64.0.0"] build-backend = "setuptools.build_meta" [tool.setuptools] diff --git a/src/__init__.py b/src/cohen_u01_nwb_conversion_utils/__init__.py similarity index 100% rename from src/__init__.py rename to src/cohen_u01_nwb_conversion_utils/__init__.py diff --git a/src/utils.py b/src/cohen_u01_nwb_conversion_utils/utils.py similarity index 100% rename from src/utils.py rename to src/cohen_u01_nwb_conversion_utils/utils.py diff --git a/src/suver_lab/__init__.py b/src/dickerson_lab_to_nwb/__init__.py similarity index 100% rename from src/suver_lab/__init__.py rename to src/dickerson_lab_to_nwb/__init__.py diff --git a/src/dickerson_lab_to_nwb/paye_conversion.py b/src/dickerson_lab_to_nwb/paye_conversion.py index 4ac1da4..208ffd1 100644 --- a/src/dickerson_lab_to_nwb/paye_conversion.py +++ b/src/dickerson_lab_to_nwb/paye_conversion.py @@ -10,7 +10,7 @@ from roiextractors.extraction_tools import PathType from roiextractors.imagingextractor import ImagingExtractor -from ..utils import match_paths +from ..cohen_u01_utils.utils import match_paths def extract_experiment_details(xml_file_path: str): diff --git a/src/kim_lab/kim_conversion.py b/src/kim_lab/kim_conversion.py deleted file mode 100644 index 8ac1364..0000000 --- a/src/kim_lab/kim_conversion.py +++ /dev/null @@ -1,131 +0,0 @@ -import os -from datetime import datetime - -import h5py -import numpy as np -from neuroconv.tools.nwb_helpers import get_default_backend_configuration, configure_backend -from neuroconv.datainterfaces import VideoInterface -from pynwb import NWBFile, TimeSeries, NWBHDF5IO -from pynwb.file import Subject -from scipy.io import loadmat -from tqdm import tqdm -import uuid - -from src.kim_lab.ophys import MultiTiffMultiPageTiffImagingInterface -from src.utils import detect_threshold_crossings - -data_dir = "/Users/bendichter/Downloads/Kim Lab/input" -output_dir = "/Users/bendichter/Downloads/Kim Lab/nwb" - -# /Users/bendichter/Downloads/Kim Lab/20240108b_00003/raw data/data_20240108b_00003.mat - -# data(1,:) is time -# data(2,:) is left wingbeat -# data(3,:) is left-right wingbeat -# data(4,:) is x-position of the visual pattern -# data(5,:) is y-position of the visual pattern -# data(6,:) is 2-photon frame synchronization signal (1 pulse corresponds to 1 frame) -# data(7,:) is behavior camera signal (1 pulse corresponds to 1 frame) -# data(8,:) indicates the start of a stimulus (it is empty in this example) - - -for session_path in tqdm(os.listdir(data_dir)): - if os.path.isdir(os.path.join(data_dir, session_path)): - session_id = session_path - session_dir = os.path.join(data_dir, session_path) - session_data_dir = os.path.join(session_dir, "raw data") - session_data_fpath = os.path.join(session_data_dir, f"data_{session_id}.mat") - if not os.path.exists(session_data_fpath): - continue - - # Load data - mat_data = loadmat(session_data_fpath) - data = mat_data["data"] - protocol = mat_data["protocol"] - - with h5py.File(os.path.join(session_data_dir, "exp_info.mat"), "r") as f: - age = f["age"][0,0] - genotype = ''.join([chr(x) for x in f["cross"][:].ravel()]) - - print(f"Processing {session_id=} ({age=}, {genotype=})") - - session_start_time = datetime.strptime(session_id[:8], '%Y%m%d').date() - print(session_start_time) - - time = data[0] - left_wingbeat = data[1] - left_right_wingbeat = data[2] - x_position = data[3] - y_position = data[4] - two_photon_frame_sync = data[5] - behavior_camera_sync = data[6] - stimulus_start = data[7] - - # Create NWB file - nwbfile = NWBFile( - session_description=f"protocol: {protocol}", - identifier=str(uuid.uuid4()), - session_start_time=session_start_time, - session_id=session_id, - ) - - ophys_interface = MultiTiffMultiPageTiffImagingInterface( - session_data_dir, - pattern=session_id + "_{frame:05d}.tif", - sampling_frequency=30.0, - verbose=True - ) - - aligned_timestamps = time[detect_threshold_crossings(two_photon_frame_sync, 0.5)] - aligned_timestamps = aligned_timestamps[:ophys_interface.imaging_extractor.get_num_frames()] - ophys_interface.set_aligned_timestamps(aligned_timestamps=aligned_timestamps) - - ophys_interface.add_to_nwbfile(nwbfile, metadata=dict()) - - video_interface = VideoInterface( - file_paths=["/Users/bendichter/Downloads/Kim Lab/input/20240108b_00003/raw data/20240108b_00003.avi"], - ) - - video_timestamps = time[detect_threshold_crossings(behavior_camera_sync, 0.5)] - video_timestamps = video_timestamps[:video_interface.get_num_frames()[0]] - video_interface.set_aligned_timestamps([video_timestamps]) - video_interface.add_to_nwbfile(nwbfile, metadata=dict()) - - nwbfile.subject = Subject( - subject_id=session_id, - genotype=genotype, - age=f"P{age}D", - ) - - # Add data - timeseries_wingbeat = TimeSeries( - name="wingbeat", - data=left_wingbeat, - unit="n.a.", - timestamps=time, - description="wingbeat", - ) - nwbfile.add_acquisition(timeseries_wingbeat) - - timeseries_left_right_wingbeat = TimeSeries( - name="left_right_wingbeat", - data=left_right_wingbeat, - unit="n.a.", - timestamps=timeseries_wingbeat, - description="left-right wingbeat", - ) - nwbfile.add_acquisition(timeseries_left_right_wingbeat) - - timeseries_x_position = TimeSeries( - name="stimulus_position", - data=np.c_[x_position, y_position], - unit="n.a.", - timestamps=timeseries_wingbeat, - description="position of the visual pattern", - ) - - backend_configuration = get_default_backend_configuration(nwbfile, backend="hdf5") - configure_backend(nwbfile=nwbfile, backend_configuration=backend_configuration) - - with NWBHDF5IO(os.path.join(output_dir, session_id + ".nwb"), mode="w") as io: - io.write(nwbfile) diff --git a/src/kim_lab_to_nwb/__init__.py b/src/kim_lab_to_nwb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/kim_lab_to_nwb/conversion_notes.md b/src/kim_lab_to_nwb/conversion_notes.md new file mode 100644 index 0000000..f80b523 --- /dev/null +++ b/src/kim_lab_to_nwb/conversion_notes.md @@ -0,0 +1,13 @@ + + +## Structure of the matlab file + + +# data(1,:) is time +# data(2,:) is left wingbeat +# data(3,:) is left-right wingbeat +# data(4,:) is x-position of the visual pattern +# data(5,:) is y-position of the visual pattern +# data(6,:) is 2-photon frame synchronization signal (1 pulse corresponds to 1 frame) +# data(7,:) is behavior camera signal (1 pulse corresponds to 1 frame) +# data(8,:) indicates the start of a stimulus (it is empty in this example) \ No newline at end of file diff --git a/src/kim_lab_to_nwb/kim_conversion.py b/src/kim_lab_to_nwb/kim_conversion.py new file mode 100644 index 0000000..35c2c03 --- /dev/null +++ b/src/kim_lab_to_nwb/kim_conversion.py @@ -0,0 +1,199 @@ +import os +from datetime import datetime +from pathlib import Path +import uuid + +import h5py +import numpy as np +from neuroconv.tools.nwb_helpers import get_default_backend_configuration, configure_backend +from neuroconv.datainterfaces import VideoInterface +from pynwb import NWBFile, TimeSeries, NWBHDF5IO +from pynwb.file import Subject +from tqdm import tqdm +from pymatreader import read_mat + +from kim_lab_to_nwb.ophys import MultiTiffMultiPageTiffImagingInterface +from cohen_u01_nwb_conversion_utils.utils import detect_threshold_crossings + + +def convert_session_to_nwb( + matlab_data_file_path: str | Path, + video_file_path: str | Path, + experiment_info_file_path: str | Path, + tiff_folder_path: str | Path, + output_dir: str | Path, + verbose: bool = False, +) -> str: + """ + Convert a single experimental session from Kim Lab data to NWB format. + + Parameters + ---------- + matlab_data_file_path : str or Path + Path to the MATLAB data file + video_file_path : str or Path + Path to the video file + experiment_info_file_path : str or Path + Path to the experiment info file + tiff_folder_path : str or Path + Path to the folder containing TIFF files + output_dir : str or Path + Directory where the NWB file will be saved + verbose : bool, optional + Whether to print progress information, by default False + + Returns + ------- + str + Path to the created NWB file + + Raises + ------ + FileNotFoundError + If any of the required input files are not found + """ + # Convert all paths to Path objects + matlab_data_file_path = Path(matlab_data_file_path) + video_file_path = Path(video_file_path) + experiment_info_file_path = Path(experiment_info_file_path) + tiff_folder_path = Path(tiff_folder_path) + output_dir = Path(output_dir) + + # Validate input files exist + if not matlab_data_file_path.is_file(): + raise FileNotFoundError(f"Matlab data file not found at {matlab_data_file_path}") + if not video_file_path.is_file(): + raise FileNotFoundError(f"Video file not found at {video_file_path}") + if not experiment_info_file_path.is_file(): + raise FileNotFoundError(f"Experiment info file not found at {experiment_info_file_path}") + if not tiff_folder_path.exists(): + raise FileNotFoundError(f"Tiff folder not found at {tiff_folder_path}") + + # Load data + mat_data = read_mat(matlab_data_file_path) + data = mat_data["data"] + protocol = mat_data["protocol"] + + experiment_info = read_mat(experiment_info_file_path) + age = experiment_info["age"] + genotype = experiment_info["cross"] + + # Extract session ID from matlab file path + session_id = matlab_data_file_path.stem.split('data_')[1] + if verbose: + print(f"Processing {session_id=} ({age=}, {genotype=})") + + session_start_time = datetime.strptime(session_id[:8], '%Y%m%d') + if verbose: + print(f"Session start time: {session_start_time}") + + # Unpack data + time = data[0] + left_wingbeat = data[1] + left_right_wingbeat = data[2] + x_position = data[3] + y_position = data[4] + two_photon_frame_sync = data[5] + behavior_camera_sync = data[6] + stimulus_start = data[7] + + # Create NWB file + nwbfile = NWBFile( + session_description=f"protocol: {protocol}", + identifier=str(uuid.uuid4()), + session_start_time=session_start_time, + session_id=session_id, + ) + + # Set up imaging interface + ophys_interface = MultiTiffMultiPageTiffImagingInterface( + tiff_folder_path, + pattern=session_id + "_{frame:05d}.tif", + sampling_frequency=30.0, + verbose=verbose + ) + + aligned_timestamps = time[detect_threshold_crossings(two_photon_frame_sync, 0.5)] + aligned_timestamps = aligned_timestamps[:ophys_interface.imaging_extractor.get_num_frames()] + ophys_interface.set_aligned_timestamps(aligned_timestamps=aligned_timestamps) + ophys_interface.add_to_nwbfile(nwbfile, metadata=dict()) + + # Set up video interface + video_interface = VideoInterface( + file_paths=[video_file_path], + ) + + video_timestamps = time[detect_threshold_crossings(behavior_camera_sync, 0.5)] + video_interface.set_aligned_timestamps([video_timestamps]) + video_interface.add_to_nwbfile(nwbfile, metadata=dict()) + + # Add subject information + nwbfile.subject = Subject( + subject_id=session_id, + genotype=genotype, + age=f"P{age}D", + ) + + # Add timeseries data + timeseries_wingbeat = TimeSeries( + name="wingbeat", + data=left_wingbeat, + unit="n.a.", + timestamps=time, + description="wingbeat", + ) + nwbfile.add_acquisition(timeseries_wingbeat) + + timeseries_left_right_wingbeat = TimeSeries( + name="left_right_wingbeat", + data=left_right_wingbeat, + unit="n.a.", + timestamps=timeseries_wingbeat, + description="left-right wingbeat", + ) + nwbfile.add_acquisition(timeseries_left_right_wingbeat) + + timeseries_x_position = TimeSeries( + name="stimulus_position", + data=np.c_[x_position, y_position], + unit="n.a.", + timestamps=timeseries_wingbeat, + description="position of the visual pattern", + ) + nwbfile.add_acquisition(timeseries_x_position) + + # Configure and save the NWB file + backend_configuration = get_default_backend_configuration(nwbfile, backend="hdf5") + configure_backend(nwbfile=nwbfile, backend_configuration=backend_configuration) + + nwbfile_path = output_dir / f"{session_id}.nwb" + with NWBHDF5IO(nwbfile_path, mode="w") as io: + io.write(nwbfile) + + if verbose: + print(f"Created NWB file: {nwbfile_path}") + + return nwbfile_path + + +if __name__ == "__main__": + # Example usage with the original paths + data_folder_path = Path("/Users/heberto/project_data/Sample data-selected/Kim Lab") + + # Define input paths + matlab_data_file_path = data_folder_path / "raw data" / "data_20240108b_00003.mat" + video_file_path = data_folder_path / "raw data" / "20240108b_00003.avi" + experiment_info_file_path = data_folder_path / "raw data" / "exp_info.mat" + tiff_folder_path = data_folder_path / "raw data" + output_dir = data_folder_path / "nwb" + + output_dir.mkdir(exist_ok=True, parents=True) + + output_file = convert_session_to_nwb( + matlab_data_file_path=matlab_data_file_path, + video_file_path=video_file_path, + experiment_info_file_path=experiment_info_file_path, + tiff_folder_path=tiff_folder_path, + output_dir=output_dir, + verbose=True # Enable verbose output for demonstration + ) diff --git a/src/kim_lab/ophys.py b/src/kim_lab_to_nwb/ophys.py similarity index 98% rename from src/kim_lab/ophys.py rename to src/kim_lab_to_nwb/ophys.py index 128786d..b98a840 100644 --- a/src/kim_lab/ophys.py +++ b/src/kim_lab_to_nwb/ophys.py @@ -8,7 +8,7 @@ from tifffile import TiffFile from tqdm import tqdm -from ..utils import match_paths +from cohen_u01_nwb_conversion_utils.utils import match_paths class MultiTiffMultiPageTiffImagingExtractor(ImagingExtractor): diff --git a/src/suver_lab_to_nwb/__init__.py b/src/suver_lab_to_nwb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/suver_lab/suver_conversion.py b/src/suver_lab_to_nwb/suver_conversion.py similarity index 100% rename from src/suver_lab/suver_conversion.py rename to src/suver_lab_to_nwb/suver_conversion.py From c4bcdde270ecf212b879dcb0614114acb0ca6f9e Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 4 Nov 2024 12:13:49 -0600 Subject: [PATCH 2/2] import and typing --- src/kim_lab_to_nwb/kim_conversion.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/kim_lab_to_nwb/kim_conversion.py b/src/kim_lab_to_nwb/kim_conversion.py index 35c2c03..3dc7545 100644 --- a/src/kim_lab_to_nwb/kim_conversion.py +++ b/src/kim_lab_to_nwb/kim_conversion.py @@ -1,15 +1,12 @@ -import os from datetime import datetime from pathlib import Path import uuid -import h5py import numpy as np from neuroconv.tools.nwb_helpers import get_default_backend_configuration, configure_backend from neuroconv.datainterfaces import VideoInterface from pynwb import NWBFile, TimeSeries, NWBHDF5IO from pynwb.file import Subject -from tqdm import tqdm from pymatreader import read_mat from kim_lab_to_nwb.ophys import MultiTiffMultiPageTiffImagingInterface @@ -23,7 +20,7 @@ def convert_session_to_nwb( tiff_folder_path: str | Path, output_dir: str | Path, verbose: bool = False, -) -> str: +) -> Path: """ Convert a single experimental session from Kim Lab data to NWB format. @@ -44,7 +41,7 @@ def convert_session_to_nwb( Returns ------- - str + Path Path to the created NWB file Raises @@ -189,7 +186,7 @@ def convert_session_to_nwb( output_dir.mkdir(exist_ok=True, parents=True) - output_file = convert_session_to_nwb( + nwbfile_path = convert_session_to_nwb( matlab_data_file_path=matlab_data_file_path, video_file_path=video_file_path, experiment_info_file_path=experiment_info_file_path,