diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py index 53d4f1d..e9eaa91 100644 --- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py +++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py @@ -1,75 +1,13 @@ from pathlib import Path from typing import Union, Optional -from warnings import warn -import pandas as pd from dateutil import tz from neuroconv.utils import load_dict_from_file, dict_deep_update -from pymatreader import read_mat +from constantinople_lab_to_nwb.utils import get_subject_metadata_from_rat_info_folder from constantinople_lab_to_nwb.mah_2024 import Mah2024NWBConverter -def get_subject_metadata_from_rat_info_folder( - folder_path: Union[str, Path], - subject_id: str, - date: str, -) -> dict: - """ - Load subject metadata from the rat info files. - The "registry.mat" file contains information about the subject such as date of birth, sex, and vendor. - The "Mass_registry.mat" file contains information about the weight of the subject. - - Parameters - ---------- - folder_path: Union[str, Path] - The folder path containing the rat info files. - subject_id: str - The subject ID. - date: str - The date of the session in the format "yyyy-mm-dd". - """ - - folder_path = Path(folder_path) - rat_registry_file_path = folder_path / "registry.mat" - - subject_metadata = dict() - if rat_registry_file_path.exists(): - rat_registry = read_mat(str(rat_registry_file_path)) - rat_registry = pd.DataFrame(rat_registry["Registry"]) - - filtered_rat_registry = rat_registry[rat_registry["RatName"] == subject_id] - if not filtered_rat_registry.empty: - date_of_birth = filtered_rat_registry["DOB"].values[0] - if date_of_birth: - # convert date of birth to datetime with format "yyyy-mm-dd" - date_of_birth = pd.to_datetime(date_of_birth, format="%Y-%m-%d") - subject_metadata.update(date_of_birth=date_of_birth) - else: - # TODO: what to do if date of birth is missing? - warn("Date of birth is missing. We recommend adding this information to the rat info files.") - # Using age range specified in the manuscript - subject_metadata.update(age="P6M/P24M") - subject_metadata.update(sex=filtered_rat_registry["sex"].values[0]) - vendor = filtered_rat_registry["vendor"].values[0] - if vendor: - subject_metadata.update(description=f"Vendor: {vendor}") - - mass_registry_file_path = folder_path / "Mass_registry.mat" - if mass_registry_file_path.exists(): - mass_registry = read_mat(str(mass_registry_file_path)) - mass_registry = pd.DataFrame(mass_registry["Mass_registry"]) - - filtered_mass_registry = mass_registry[(mass_registry["rat"] == subject_id) & (mass_registry["date"] == date)] - if not filtered_mass_registry.empty: - weight_g = filtered_mass_registry["mass"].astype(int).values[0] # in grams - # convert mass to kg - weight_kg = weight_g / 1000 - subject_metadata.update(weight=str(weight_kg)) - - return subject_metadata - - def session_to_nwb( raw_behavior_file_path: Union[str, Path], processed_behavior_file_path: Union[str, Path], diff --git a/src/constantinople_lab_to_nwb/schierek_embargo_2024/metadata/schierek_embargo_2024_ecephys_metadata.yaml b/src/constantinople_lab_to_nwb/schierek_embargo_2024/metadata/schierek_embargo_2024_ecephys_metadata.yaml new file mode 100644 index 0000000..27695ab --- /dev/null +++ b/src/constantinople_lab_to_nwb/schierek_embargo_2024/metadata/schierek_embargo_2024_ecephys_metadata.yaml @@ -0,0 +1,18 @@ +Ecephys: + Device: + - name: DeviceEcephys + description: OpenEphys recording + ElectrodeGroup: + - name: ElectrodeGroup + description: The electrode group on the Neuropixels probe. + location: LO + device: DeviceEcephys + electrical_series: + name: electrical_series + description: The raw acquisition traces from Neuropixels probe (384 channels, 30 kHz sampling rate) using Neuropix-PXI hardware and OpenEphys. + lfp_electrical_series: + name: lfp_electrical_series + description: The processed traces from the Neuropixels probe (384 channels, 1 kHz sampling rate) using Neuropix-PXI hardware and OpenEphys. + UnitProperties: + - name: channel_depth_um + description: The distance of the channel from the tip of the neuropixels probe in micrometers. diff --git a/src/constantinople_lab_to_nwb/schierek_embargo_2024/metadata/schierek_embargo_2024_general_metadata.yaml b/src/constantinople_lab_to_nwb/schierek_embargo_2024/metadata/schierek_embargo_2024_general_metadata.yaml new file mode 100644 index 0000000..f3f0fb4 --- /dev/null +++ b/src/constantinople_lab_to_nwb/schierek_embargo_2024/metadata/schierek_embargo_2024_general_metadata.yaml @@ -0,0 +1,38 @@ +NWBFile: +# related_publications: +# https://doi.org/### or link to APA or MLA citation of the publication + experiment_description: | + This dataset contains in vivo extracellular electrophysiology recordings from rats performing a value-based + decision-making task. Neural data were acquired using Neuropixels probes (384 channels, 30 kHz sampling rate) with + Neuropix-PXI hardware and OpenEphys, and preprocessed using Kilosort 2.5 with manual curation in Phy. + Trials were initiated by a nose-poke in a lit center port and required maintaining a center fixation for 0.8 to 1.2 + seconds, during which a tone indicated the possible reward size. A subsequent side LED indicated the potential + reward location, followed by a delay period drawn from an exponential distribution (mean = 2.5 s). Rats could opt + out at any time by poking the unlit port, restarting the trial. Catch trials, where the delay period only ended if + the rat opted out, constituted 15-25% of the trials. Rats received penalties for premature fixation breaks. + Additionally, the tasks introduced semi-observable hidden states by varying reward statistics across uncued blocks + (high, low, and mixed), structured hierarchically, with blocks transitioning after 40 successfully completed trials. + session_description: | + This session contains extracellular electrophysiology acquired from 384 channels at 30 kHz using Neuropix-PXI hardware and OpenEphys. + The neural data were preprocessed using Kilosort 2.5. After preprocessing, clusters that were identified from Kilosort as single-units + were manually inspected using Phy. The behavioral tasks were conducted in a high-throughput facility where rats + were trained in increasingly complex protocols. Trials were initiated by a nose-poke in a lit center port and + required maintaining a center fixation for 0.8 to 1.2 seconds, during which a tone indicated the possible reward + size. A subsequent side LED indicated the potential reward location, followed by a delay period drawn from an + exponential distribution (mean = 2.5 s). Rats could opt out at any time by poking the unlit port, restarting the + trial. Catch trials, where the delay period only ended if the rat opted out, constituted 15-25% of the trials. + Rats received penalties for premature fixation breaks. Additionally, the tasks introduced semi-observable hidden + states by varying reward statistics across uncued blocks (high, low, and mixed), structured hierarchically, with + blocks transitioning after 40 successfully completed trials. + institution: NYU Center for Neural Science + lab: Constantinople + keywords: + - decision making + - reinforcement learning + - hidden state inference + - extracellular electrophysiology + - single-unit activity + experimenter: + - Schiereck, Shannon +Subject: + species: Rattus norvegicus diff --git a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_convert_session.py b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_convert_session.py index ca140ec..ee6998a 100644 --- a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_convert_session.py +++ b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_convert_session.py @@ -4,14 +4,62 @@ from pathlib import Path from typing import Union, Optional +import numpy as np +import pandas as pd from dateutil import tz from neuroconv.datainterfaces import OpenEphysRecordingInterface from neuroconv.utils import load_dict_from_file, dict_deep_update +from nwbinspector import inspect_nwbfile, save_report, format_messages +from pymatreader import read_mat from spikeinterface.extractors import OpenEphysBinaryRecordingExtractor +from constantinople_lab_to_nwb.utils import get_subject_metadata_from_rat_info_folder from constantinople_lab_to_nwb.schierek_embargo_2024 import SchierekEmbargo2024NWBConverter +def update_ephys_device_metadata_for_subject( + epys_registry_file_path: Union[str, Path], + subject_id: str, + metadata: dict, +): + if not os.path.exists(epys_registry_file_path): + raise FileNotFoundError(f"File not found: {epys_registry_file_path}") + + ephys_registry = read_mat(epys_registry_file_path) + if "Registry" not in ephys_registry: + raise ValueError(f"'Registry' key not found in {epys_registry_file_path}.") + ephys_registry = pd.DataFrame(ephys_registry["Registry"]) + if "ratname" not in ephys_registry.columns: + raise ValueError(f"'ratname' column not found in {epys_registry_file_path}.") + filtered_ephys_registry = ephys_registry[ephys_registry["ratname"] == subject_id] + + if not filtered_ephys_registry.empty: + ap_value = filtered_ephys_registry["AP"].values[0] + ml_value = filtered_ephys_registry["ML"].values[0] + dv_value = filtered_ephys_registry["DV"].values[0] + + coordinates_in_mm = f"AP: {ap_value} mm, ML: {ml_value} mm" + if not np.isnan(dv_value): + coordinates_in_mm += f", DV: {dv_value}." + + recording_hemisphere = filtered_ephys_registry["recordinghemisphere"].values[0] + recording_hemisphere = dict(L="left", R="right").get(recording_hemisphere, recording_hemisphere) + probe_type = filtered_ephys_registry["probetype"].values[0] + + brain_region = filtered_ephys_registry["recordingsite"].values[0] + description = f"The {probe_type} probe implanted in {brain_region} brain region, at {coordinates_in_mm}, {recording_hemisphere} hemisphere." + if "distance2LO" in filtered_ephys_registry.columns: + distance_to_LO_um = filtered_ephys_registry["distance2LO"].values[0] + # TODO: confirm unit + description += f" Distance to LO: {distance_to_LO_um} μm." + + metadata["Ecephys"]["Device"][0].update( + description=description, + ) + + return metadata + + def session_to_nwb( openephys_recording_folder_path: Union[str, Path], spike_sorting_folder_path: Union[str, Path], @@ -20,6 +68,8 @@ def session_to_nwb( nwbfile_path: Union[str, Path], column_name_mapping: Optional[dict] = None, column_descriptions: Optional[dict] = None, + ephys_registry_file_path: Optional[Union[str, Path]] = None, + subject_metadata: Optional[dict] = None, stub_test: bool = False, overwrite: bool = False, ): @@ -36,6 +86,10 @@ def session_to_nwb( The path to the processed spike sorting file (.mat). nwbfile_path : str or Path The path to the NWB file to write. + ephys_registry_file_path: str or Path + The path to the ephys registry (.mat) file. + subject_metadata: dict, optional + Additional subject metadata. e.g. dict( stub_test : bool, default: False Whether to run a stub test conversion. overwrite : bool, default: False @@ -66,7 +120,14 @@ def session_to_nwb( # Add Sorting source_data.update(dict(PhySorting=dict(folder_path=spike_sorting_folder_path))) - conversion_options.update(dict(PhySorting=dict(stub_test=False))) + conversion_options.update( + dict( + PhySorting=dict( + stub_test=False, + units_description="Units table with spike times from Kilosort 2.5 and manually curated using Phy.", + ) + ) + ) # Add processed sorting output if processed_spike_sorting_file_path is not None: @@ -85,7 +146,13 @@ def session_to_nwb( ), ) ) - conversion_options.update(dict(ProcessedSorting=dict(write_as="processing", stub_test=False))) + conversion_options.update( + dict( + ProcessedSorting=dict( + write_as="processing", stub_test=False, units_description="The curated single-units from Phy." + ), + ), + ) conversion_options.update( dict( ProcessedBehavior=dict(column_name_mapping=column_name_mapping, column_descriptions=column_descriptions) @@ -136,7 +203,7 @@ def session_to_nwb( ) # Update default metadata with the editable in the corresponding yaml file - editable_metadata_path = Path(__file__).parent / "schierek_embargo_2024_metadata.yaml" + editable_metadata_path = Path(__file__).parent / "metadata" / "schierek_embargo_2024_general_metadata.yaml" editable_metadata = load_dict_from_file(editable_metadata_path) metadata = dict_deep_update(metadata, editable_metadata) @@ -145,7 +212,20 @@ def session_to_nwb( behavior_metadata = load_dict_from_file(behavior_metadata_path) metadata = dict_deep_update(metadata, behavior_metadata) - metadata["Subject"].update(subject_id=subject_id) + # Update ecephys metadata + ephys_metadata_path = Path(__file__).parent / "metadata" / "schierek_embargo_2024_ecephys_metadata.yaml" + ephys_metadata = load_dict_from_file(ephys_metadata_path) + metadata = dict_deep_update(metadata, ephys_metadata) + + if ephys_registry_file_path is not None: + metadata = update_ephys_device_metadata_for_subject( + epys_registry_file_path=ephys_registry_file_path, + subject_id=subject_id, + metadata=metadata, + ) + + if subject_metadata is not None: + metadata["Subject"].update(subject_id=subject_id, **subject_metadata) # Run conversion converter.run_conversion( @@ -155,6 +235,17 @@ def session_to_nwb( overwrite=overwrite, ) + results = list(inspect_nwbfile(nwbfile_path=nwbfile_path)) + report_path = Path(nwbfile_path).parent / f"{subject_id}-{session_id}_nwbinspector_result.txt" + if not report_path.exists(): + save_report( + report_file_path=report_path, + formatted_messages=format_messages( + results, + levels=["importance", "file_path"], + ), + ) + if __name__ == "__main__": @@ -225,13 +316,24 @@ def session_to_nwb( wait_thresh="The threshold in seconds to remove wait-times (mean + 1*std of all cumulative wait-times).", ) - nwbfile_path = Path("/Volumes/T9/Constantinople/nwbfiles/J076_2023-12-12_14-52-04.nwb") + nwbfile_path = Path("/Users/weian/data/demo/J076_2023-12-12_14-52-04.nwb") if not nwbfile_path.parent.exists(): os.makedirs(nwbfile_path.parent, exist_ok=True) + # Ephys registry file path (constains metadata for the neuropixels probe) + ephys_registry_file_path = "/Volumes/T9/Constantinople/Ephys Data/Ephys_registry.mat" + stub_test = True overwrite = True + # Get subject metadata from rat registry + rat_registry_folder_path = "/Volumes/T9/Constantinople/Rat_info" + subject_metadata = get_subject_metadata_from_rat_info_folder( + folder_path=rat_registry_folder_path, + subject_id="J076", + date="2023-12-12", + ) + session_to_nwb( openephys_recording_folder_path=openephys_recording_folder_path, spike_sorting_folder_path=phy_sorting_folder_path, @@ -239,6 +341,8 @@ def session_to_nwb( raw_behavior_file_path=bpod_file_path, column_name_mapping=column_name_mapping, column_descriptions=column_descriptions, + ephys_registry_file_path=ephys_registry_file_path, + subject_metadata=subject_metadata, nwbfile_path=nwbfile_path, stub_test=stub_test, overwrite=overwrite, diff --git a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_metadata.yaml b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_metadata.yaml deleted file mode 100644 index 9c9a318..0000000 --- a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_metadata.yaml +++ /dev/null @@ -1,13 +0,0 @@ -NWBFile: -# related_publications: -# https://doi.org/### or link to APA or MLA citation of the publication - session_description: - A rich text description of the experiment. Can also just be the abstract of the publication. - institution: NYU Center for Neural Science - lab: Constantinople - experimenter: - - Schiereck, Shannon -Subject: - species: Rattus norvegicus - age: TBD # in ISO 8601, such as "P1W2D" - sex: U # TODO: One of M, F, U, or O diff --git a/src/constantinople_lab_to_nwb/utils/__init__.py b/src/constantinople_lab_to_nwb/utils/__init__.py new file mode 100644 index 0000000..c4a1cd8 --- /dev/null +++ b/src/constantinople_lab_to_nwb/utils/__init__.py @@ -0,0 +1 @@ +from .get_subject_metadata import get_subject_metadata_from_rat_info_folder diff --git a/src/constantinople_lab_to_nwb/utils/get_subject_metadata.py b/src/constantinople_lab_to_nwb/utils/get_subject_metadata.py new file mode 100644 index 0000000..e15795d --- /dev/null +++ b/src/constantinople_lab_to_nwb/utils/get_subject_metadata.py @@ -0,0 +1,66 @@ +from pathlib import Path +from typing import Union +from warnings import warn + +import pandas as pd +from pymatreader import read_mat + + +def get_subject_metadata_from_rat_info_folder( + folder_path: Union[str, Path], + subject_id: str, + date: str, +) -> dict: + """ + Load subject metadata from the rat info files. + The "registry.mat" file contains information about the subject such as date of birth, sex, and vendor. + The "Mass_registry.mat" file contains information about the weight of the subject. + + Parameters + ---------- + folder_path: Union[str, Path] + The folder path containing the rat info files. + subject_id: str + The subject ID. + date: str + The date of the session in the format "yyyy-mm-dd". + """ + + folder_path = Path(folder_path) + rat_registry_file_path = folder_path / "registry.mat" + + subject_metadata = dict() + if rat_registry_file_path.exists(): + rat_registry = read_mat(str(rat_registry_file_path)) + rat_registry = pd.DataFrame(rat_registry["Registry"]) + + filtered_rat_registry = rat_registry[rat_registry["RatName"] == subject_id] + if not filtered_rat_registry.empty: + date_of_birth = filtered_rat_registry["DOB"].values[0] + if date_of_birth: + # convert date of birth to datetime with format "yyyy-mm-dd" + date_of_birth = pd.to_datetime(date_of_birth, format="%Y-%m-%d") + subject_metadata.update(date_of_birth=date_of_birth) + else: + # TODO: what to do if date of birth is missing? + warn("Date of birth is missing. We recommend adding this information to the rat info files.") + # Using age range specified in the manuscript + subject_metadata.update(age="P6M/P24M") + subject_metadata.update(sex=filtered_rat_registry["sex"].values[0]) + vendor = filtered_rat_registry["vendor"].values[0] + if vendor: + subject_metadata.update(description=f"Vendor: {vendor}") + + mass_registry_file_path = folder_path / "Mass_registry.mat" + if mass_registry_file_path.exists(): + mass_registry = read_mat(str(mass_registry_file_path)) + mass_registry = pd.DataFrame(mass_registry["Mass_registry"]) + + filtered_mass_registry = mass_registry[(mass_registry["rat"] == subject_id) & (mass_registry["date"] == date)] + if not filtered_mass_registry.empty: + weight_g = filtered_mass_registry["mass"].astype(int).values[0] # in grams + # convert mass to kg + weight_kg = weight_g / 1000 + subject_metadata.update(weight=str(weight_kg)) + + return subject_metadata