Skip to content

Commit

Permalink
Merge pull request #24 from catalystneuro/Neuropixels/extend-metadata
Browse files Browse the repository at this point in the history
[Neuropixels] Extend metadata
  • Loading branch information
weiglszonja authored Dec 5, 2024
2 parents 1696453 + 6a7ccd4 commit a36600d
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 81 deletions.
Original file line number Diff line number Diff line change
@@ -1,75 +1,13 @@
from pathlib import Path
from typing import Union, Optional
from warnings import warn

import pandas as pd
from dateutil import tz
from neuroconv.utils import load_dict_from_file, dict_deep_update
from pymatreader import read_mat

from constantinople_lab_to_nwb.utils import get_subject_metadata_from_rat_info_folder
from constantinople_lab_to_nwb.mah_2024 import Mah2024NWBConverter


def get_subject_metadata_from_rat_info_folder(
folder_path: Union[str, Path],
subject_id: str,
date: str,
) -> dict:
"""
Load subject metadata from the rat info files.
The "registry.mat" file contains information about the subject such as date of birth, sex, and vendor.
The "Mass_registry.mat" file contains information about the weight of the subject.
Parameters
----------
folder_path: Union[str, Path]
The folder path containing the rat info files.
subject_id: str
The subject ID.
date: str
The date of the session in the format "yyyy-mm-dd".
"""

folder_path = Path(folder_path)
rat_registry_file_path = folder_path / "registry.mat"

subject_metadata = dict()
if rat_registry_file_path.exists():
rat_registry = read_mat(str(rat_registry_file_path))
rat_registry = pd.DataFrame(rat_registry["Registry"])

filtered_rat_registry = rat_registry[rat_registry["RatName"] == subject_id]
if not filtered_rat_registry.empty:
date_of_birth = filtered_rat_registry["DOB"].values[0]
if date_of_birth:
# convert date of birth to datetime with format "yyyy-mm-dd"
date_of_birth = pd.to_datetime(date_of_birth, format="%Y-%m-%d")
subject_metadata.update(date_of_birth=date_of_birth)
else:
# TODO: what to do if date of birth is missing?
warn("Date of birth is missing. We recommend adding this information to the rat info files.")
# Using age range specified in the manuscript
subject_metadata.update(age="P6M/P24M")
subject_metadata.update(sex=filtered_rat_registry["sex"].values[0])
vendor = filtered_rat_registry["vendor"].values[0]
if vendor:
subject_metadata.update(description=f"Vendor: {vendor}")

mass_registry_file_path = folder_path / "Mass_registry.mat"
if mass_registry_file_path.exists():
mass_registry = read_mat(str(mass_registry_file_path))
mass_registry = pd.DataFrame(mass_registry["Mass_registry"])

filtered_mass_registry = mass_registry[(mass_registry["rat"] == subject_id) & (mass_registry["date"] == date)]
if not filtered_mass_registry.empty:
weight_g = filtered_mass_registry["mass"].astype(int).values[0] # in grams
# convert mass to kg
weight_kg = weight_g / 1000
subject_metadata.update(weight=str(weight_kg))

return subject_metadata


def session_to_nwb(
raw_behavior_file_path: Union[str, Path],
processed_behavior_file_path: Union[str, Path],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Ecephys:
Device:
- name: DeviceEcephys
description: OpenEphys recording
ElectrodeGroup:
- name: ElectrodeGroup
description: The electrode group on the Neuropixels probe.
location: LO
device: DeviceEcephys
electrical_series:
name: electrical_series
description: The raw acquisition traces from Neuropixels probe (384 channels, 30 kHz sampling rate) using Neuropix-PXI hardware and OpenEphys.
lfp_electrical_series:
name: lfp_electrical_series
description: The processed traces from the Neuropixels probe (384 channels, 1 kHz sampling rate) using Neuropix-PXI hardware and OpenEphys.
UnitProperties:
- name: channel_depth_um
description: The distance of the channel from the tip of the neuropixels probe in micrometers.
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
NWBFile:
# related_publications:
# https://doi.org/### or link to APA or MLA citation of the publication
experiment_description: |
This dataset contains in vivo extracellular electrophysiology recordings from rats performing a value-based
decision-making task. Neural data were acquired using Neuropixels probes (384 channels, 30 kHz sampling rate) with
Neuropix-PXI hardware and OpenEphys, and preprocessed using Kilosort 2.5 with manual curation in Phy.
Trials were initiated by a nose-poke in a lit center port and required maintaining a center fixation for 0.8 to 1.2
seconds, during which a tone indicated the possible reward size. A subsequent side LED indicated the potential
reward location, followed by a delay period drawn from an exponential distribution (mean = 2.5 s). Rats could opt
out at any time by poking the unlit port, restarting the trial. Catch trials, where the delay period only ended if
the rat opted out, constituted 15-25% of the trials. Rats received penalties for premature fixation breaks.
Additionally, the tasks introduced semi-observable hidden states by varying reward statistics across uncued blocks
(high, low, and mixed), structured hierarchically, with blocks transitioning after 40 successfully completed trials.
session_description: |
This session contains extracellular electrophysiology acquired from 384 channels at 30 kHz using Neuropix-PXI hardware and OpenEphys.
The neural data were preprocessed using Kilosort 2.5. After preprocessing, clusters that were identified from Kilosort as single-units
were manually inspected using Phy. The behavioral tasks were conducted in a high-throughput facility where rats
were trained in increasingly complex protocols. Trials were initiated by a nose-poke in a lit center port and
required maintaining a center fixation for 0.8 to 1.2 seconds, during which a tone indicated the possible reward
size. A subsequent side LED indicated the potential reward location, followed by a delay period drawn from an
exponential distribution (mean = 2.5 s). Rats could opt out at any time by poking the unlit port, restarting the
trial. Catch trials, where the delay period only ended if the rat opted out, constituted 15-25% of the trials.
Rats received penalties for premature fixation breaks. Additionally, the tasks introduced semi-observable hidden
states by varying reward statistics across uncued blocks (high, low, and mixed), structured hierarchically, with
blocks transitioning after 40 successfully completed trials.
institution: NYU Center for Neural Science
lab: Constantinople
keywords:
- decision making
- reinforcement learning
- hidden state inference
- extracellular electrophysiology
- single-unit activity
experimenter:
- Schiereck, Shannon
Subject:
species: Rattus norvegicus
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,62 @@
from pathlib import Path
from typing import Union, Optional

import numpy as np
import pandas as pd
from dateutil import tz
from neuroconv.datainterfaces import OpenEphysRecordingInterface
from neuroconv.utils import load_dict_from_file, dict_deep_update
from nwbinspector import inspect_nwbfile, save_report, format_messages
from pymatreader import read_mat
from spikeinterface.extractors import OpenEphysBinaryRecordingExtractor

from constantinople_lab_to_nwb.utils import get_subject_metadata_from_rat_info_folder
from constantinople_lab_to_nwb.schierek_embargo_2024 import SchierekEmbargo2024NWBConverter


def update_ephys_device_metadata_for_subject(
epys_registry_file_path: Union[str, Path],
subject_id: str,
metadata: dict,
):
if not os.path.exists(epys_registry_file_path):
raise FileNotFoundError(f"File not found: {epys_registry_file_path}")

ephys_registry = read_mat(epys_registry_file_path)
if "Registry" not in ephys_registry:
raise ValueError(f"'Registry' key not found in {epys_registry_file_path}.")
ephys_registry = pd.DataFrame(ephys_registry["Registry"])
if "ratname" not in ephys_registry.columns:
raise ValueError(f"'ratname' column not found in {epys_registry_file_path}.")
filtered_ephys_registry = ephys_registry[ephys_registry["ratname"] == subject_id]

if not filtered_ephys_registry.empty:
ap_value = filtered_ephys_registry["AP"].values[0]
ml_value = filtered_ephys_registry["ML"].values[0]
dv_value = filtered_ephys_registry["DV"].values[0]

coordinates_in_mm = f"AP: {ap_value} mm, ML: {ml_value} mm"
if not np.isnan(dv_value):
coordinates_in_mm += f", DV: {dv_value}."

recording_hemisphere = filtered_ephys_registry["recordinghemisphere"].values[0]
recording_hemisphere = dict(L="left", R="right").get(recording_hemisphere, recording_hemisphere)
probe_type = filtered_ephys_registry["probetype"].values[0]

brain_region = filtered_ephys_registry["recordingsite"].values[0]
description = f"The {probe_type} probe implanted in {brain_region} brain region, at {coordinates_in_mm}, {recording_hemisphere} hemisphere."
if "distance2LO" in filtered_ephys_registry.columns:
distance_to_LO_um = filtered_ephys_registry["distance2LO"].values[0]
# TODO: confirm unit
description += f" Distance to LO: {distance_to_LO_um} μm."

metadata["Ecephys"]["Device"][0].update(
description=description,
)

return metadata


def session_to_nwb(
openephys_recording_folder_path: Union[str, Path],
spike_sorting_folder_path: Union[str, Path],
Expand All @@ -20,6 +68,8 @@ def session_to_nwb(
nwbfile_path: Union[str, Path],
column_name_mapping: Optional[dict] = None,
column_descriptions: Optional[dict] = None,
ephys_registry_file_path: Optional[Union[str, Path]] = None,
subject_metadata: Optional[dict] = None,
stub_test: bool = False,
overwrite: bool = False,
):
Expand All @@ -36,6 +86,10 @@ def session_to_nwb(
The path to the processed spike sorting file (.mat).
nwbfile_path : str or Path
The path to the NWB file to write.
ephys_registry_file_path: str or Path
The path to the ephys registry (.mat) file.
subject_metadata: dict, optional
Additional subject metadata. e.g. dict(
stub_test : bool, default: False
Whether to run a stub test conversion.
overwrite : bool, default: False
Expand Down Expand Up @@ -66,7 +120,14 @@ def session_to_nwb(

# Add Sorting
source_data.update(dict(PhySorting=dict(folder_path=spike_sorting_folder_path)))
conversion_options.update(dict(PhySorting=dict(stub_test=False)))
conversion_options.update(
dict(
PhySorting=dict(
stub_test=False,
units_description="Units table with spike times from Kilosort 2.5 and manually curated using Phy.",
)
)
)

# Add processed sorting output
if processed_spike_sorting_file_path is not None:
Expand All @@ -85,7 +146,13 @@ def session_to_nwb(
),
)
)
conversion_options.update(dict(ProcessedSorting=dict(write_as="processing", stub_test=False)))
conversion_options.update(
dict(
ProcessedSorting=dict(
write_as="processing", stub_test=False, units_description="The curated single-units from Phy."
),
),
)
conversion_options.update(
dict(
ProcessedBehavior=dict(column_name_mapping=column_name_mapping, column_descriptions=column_descriptions)
Expand Down Expand Up @@ -136,7 +203,7 @@ def session_to_nwb(
)

# Update default metadata with the editable in the corresponding yaml file
editable_metadata_path = Path(__file__).parent / "schierek_embargo_2024_metadata.yaml"
editable_metadata_path = Path(__file__).parent / "metadata" / "schierek_embargo_2024_general_metadata.yaml"
editable_metadata = load_dict_from_file(editable_metadata_path)
metadata = dict_deep_update(metadata, editable_metadata)

Expand All @@ -145,7 +212,20 @@ def session_to_nwb(
behavior_metadata = load_dict_from_file(behavior_metadata_path)
metadata = dict_deep_update(metadata, behavior_metadata)

metadata["Subject"].update(subject_id=subject_id)
# Update ecephys metadata
ephys_metadata_path = Path(__file__).parent / "metadata" / "schierek_embargo_2024_ecephys_metadata.yaml"
ephys_metadata = load_dict_from_file(ephys_metadata_path)
metadata = dict_deep_update(metadata, ephys_metadata)

if ephys_registry_file_path is not None:
metadata = update_ephys_device_metadata_for_subject(
epys_registry_file_path=ephys_registry_file_path,
subject_id=subject_id,
metadata=metadata,
)

if subject_metadata is not None:
metadata["Subject"].update(subject_id=subject_id, **subject_metadata)

# Run conversion
converter.run_conversion(
Expand All @@ -155,6 +235,17 @@ def session_to_nwb(
overwrite=overwrite,
)

results = list(inspect_nwbfile(nwbfile_path=nwbfile_path))
report_path = Path(nwbfile_path).parent / f"{subject_id}-{session_id}_nwbinspector_result.txt"
if not report_path.exists():
save_report(
report_file_path=report_path,
formatted_messages=format_messages(
results,
levels=["importance", "file_path"],
),
)


if __name__ == "__main__":

Expand Down Expand Up @@ -225,20 +316,33 @@ def session_to_nwb(
wait_thresh="The threshold in seconds to remove wait-times (mean + 1*std of all cumulative wait-times).",
)

nwbfile_path = Path("/Volumes/T9/Constantinople/nwbfiles/J076_2023-12-12_14-52-04.nwb")
nwbfile_path = Path("/Users/weian/data/demo/J076_2023-12-12_14-52-04.nwb")
if not nwbfile_path.parent.exists():
os.makedirs(nwbfile_path.parent, exist_ok=True)

# Ephys registry file path (constains metadata for the neuropixels probe)
ephys_registry_file_path = "/Volumes/T9/Constantinople/Ephys Data/Ephys_registry.mat"

stub_test = True
overwrite = True

# Get subject metadata from rat registry
rat_registry_folder_path = "/Volumes/T9/Constantinople/Rat_info"
subject_metadata = get_subject_metadata_from_rat_info_folder(
folder_path=rat_registry_folder_path,
subject_id="J076",
date="2023-12-12",
)

session_to_nwb(
openephys_recording_folder_path=openephys_recording_folder_path,
spike_sorting_folder_path=phy_sorting_folder_path,
processed_spike_sorting_file_path=processed_sorting_file_path,
raw_behavior_file_path=bpod_file_path,
column_name_mapping=column_name_mapping,
column_descriptions=column_descriptions,
ephys_registry_file_path=ephys_registry_file_path,
subject_metadata=subject_metadata,
nwbfile_path=nwbfile_path,
stub_test=stub_test,
overwrite=overwrite,
Expand Down

This file was deleted.

1 change: 1 addition & 0 deletions src/constantinople_lab_to_nwb/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .get_subject_metadata import get_subject_metadata_from_rat_info_folder
Loading

0 comments on commit a36600d

Please sign in to comment.