Support arbitrary multi probe and multi trigger structures in spikeglx (

#1150)
catalystneuro · Dec 10, 2024 · 737468c · 737468c
1 parent 4b3172c
commit 737468c
Show file tree

Hide file tree

Showing 8 changed files with 95 additions and 72 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,11 +7,13 @@
 ## Bug Fixes
 * datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126)
 * Fix a bug where data in `DeepLabCutInterface` failed to write when `ndx-pose` was not imported. [#1144](https://github.com/catalystneuro/neuroconv/pull/1144)
+* `SpikeGLXConverterPipe` converter now accepts multi-probe structures with multi-trigger and does not assume a specific folder structure [#1150](https://github.com/catalystneuro/neuroconv/pull/1150)
 
 ## Features
 * Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124)
 * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125)
 * Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140)
+* `SpikeGLXRecordingInterface` now also accepts `folder_path` making its behavior equivalent to SpikeInterface [#1150](https://github.com/catalystneuro/neuroconv/pull/1150)
 * Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085)
 * Added the `deploy_neuroconv_batch_job` helper function for deploying NeuroConv AWS Batch jobs. [PR #1086](https://github.com/catalystneuro/neuroconv/pull/1086)
 * YAML specification files now accept an outer keyword `upload_to_dandiset="< six-digit ID >"` to automatically upload the produced NWB files to the DANDI archive [PR #1089](https://github.com/catalystneuro/neuroconv/pull/1089)

diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py
@@ -29,8 +29,10 @@ def get_source_schema(cls):
 
     @classmethod
     def get_streams(cls, folder_path: DirectoryPath) -> list[str]:
+        "Return the stream ids available in the folder."
         from spikeinterface.extractors import SpikeGLXRecordingExtractor
 
+        # The first entry is the stream ids the second is the stream names
         return SpikeGLXRecordingExtractor.get_streams(folder_path=folder_path)[0]
 
     @validate_call
@@ -61,28 +63,17 @@ def __init__(
         """
         folder_path = Path(folder_path)
 
-        streams = streams or self.get_streams(folder_path=folder_path)
+        streams_ids = streams or self.get_streams(folder_path=folder_path)
 
         data_interfaces = dict()
-        for stream in streams:
-            if "ap" in stream:
-                probe_name = stream[:5]
-                file_path = (
-                    folder_path / f"{folder_path.stem}_{probe_name}" / f"{folder_path.stem}_t0.{probe_name}.ap.bin"
-                )
-                es_key = f"ElectricalSeriesAP{probe_name.capitalize()}"
-                interface = SpikeGLXRecordingInterface(file_path=file_path, es_key=es_key)
-            elif "lf" in stream:
-                probe_name = stream[:5]
-                file_path = (
-                    folder_path / f"{folder_path.stem}_{probe_name}" / f"{folder_path.stem}_t0.{probe_name}.lf.bin"
-                )
-                es_key = f"ElectricalSeriesLF{probe_name.capitalize()}"
-                interface = SpikeGLXRecordingInterface(file_path=file_path, es_key=es_key)
-            elif "nidq" in stream:
-                file_path = folder_path / f"{folder_path.stem}_t0.nidq.bin"
-                interface = SpikeGLXNIDQInterface(file_path=file_path)
-            data_interfaces.update({str(stream): interface})  # Without str() casting, is a numpy string
+
+        nidq_streams = [stream_id for stream_id in streams_ids if stream_id == "nidq"]
+        electrical_streams = [stream_id for stream_id in streams_ids if stream_id not in nidq_streams]
+        for stream_id in electrical_streams:
+            data_interfaces[stream_id] = SpikeGLXRecordingInterface(folder_path=folder_path, stream_id=stream_id)
+
+        for stream_id in nidq_streams:
+            data_interfaces[stream_id] = SpikeGLXNIDQInterface(folder_path=folder_path)
 
         super().__init__(data_interfaces=data_interfaces, verbose=verbose)
 

diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py
@@ -4,7 +4,7 @@
 from typing import Optional
 
 import numpy as np
-from pydantic import FilePath, validate_call
+from pydantic import DirectoryPath, FilePath, validate_call
 
 from .spikeglx_utils import (
     add_recording_extractor_properties,
@@ -45,7 +45,6 @@ def get_source_schema(cls) -> dict:
     def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict:
 
         extractor_kwargs = source_data.copy()
-        extractor_kwargs.pop("file_path")
         extractor_kwargs["folder_path"] = self.folder_path
         extractor_kwargs["all_annotations"] = True
         extractor_kwargs["stream_id"] = self.stream_id
@@ -54,38 +53,59 @@ def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict:
     @validate_call
     def __init__(
         self,
-        file_path: FilePath,
+        file_path: Optional[FilePath] = None,
         verbose: bool = True,
         es_key: Optional[str] = None,
+        folder_path: Optional[DirectoryPath] = None,
+        stream_id: Optional[str] = None,
     ):
         """
         Parameters
         ----------
+        folder_path: DirectoryPath
+            Folder path containing the binary files of the SpikeGLX recording.
+        stream_id: str, optional
+            Stream ID of the SpikeGLX recording.
+            Examples are 'nidq', 'imec0.ap', 'imec0.lf', 'imec1.ap', 'imec1.lf', etc.
         file_path : FilePathType
             Path to .bin file. Point to .ap.bin for SpikeGLXRecordingInterface and .lf.bin for SpikeGLXLFPInterface.
         verbose : bool, default: True
             Whether to output verbose text.
-        es_key : str, default: "ElectricalSeries"
+        es_key : str, the key to access the metadata of the ElectricalSeries.
         """
 
-        self.stream_id = fetch_stream_id_for_spikelgx_file(file_path)
-        if es_key is None:
-            if "lf" in self.stream_id:
-                es_key = "ElectricalSeriesLF"
-            elif "ap" in self.stream_id:
-                es_key = "ElectricalSeriesAP"
-            else:
-                raise ValueError("Cannot automatically determine es_key from path")
-        file_path = Path(file_path)
-        self.folder_path = file_path.parent
+        if file_path is not None and stream_id is None:
+            self.stream_id = fetch_stream_id_for_spikelgx_file(file_path)
+            self.folder_path = Path(file_path).parent
+
+        else:
+            self.stream_id = stream_id
+            self.folder_path = Path(folder_path)
 
         super().__init__(
-            file_path=file_path,
+            folder_path=folder_path,
             verbose=verbose,
             es_key=es_key,
         )
-        self.source_data["file_path"] = str(file_path)
-        self.meta = self.recording_extractor.neo_reader.signals_info_dict[(0, self.stream_id)]["meta"]
+
+        signal_info_key = (0, self.stream_id)  # Key format is (segment_index, stream_id)
+        self._signals_info_dict = self.recording_extractor.neo_reader.signals_info_dict[signal_info_key]
+        self.meta = self._signals_info_dict["meta"]
+
+        if es_key is None:
+            stream_kind = self._signals_info_dict["stream_kind"]  # ap or lf
+            stream_kind_caps = stream_kind.upper()
+            device = self._signals_info_dict["device"].capitalize()  # imec0, imec1, etc.
+
+            electrical_series_name = f"ElectricalSeries{stream_kind_caps}"
+
+            # Add imec{probe_index} to the electrical series name when there are multiple probes
+            # or undefined, `typeImEnabled` is present in the meta of all the production probes
+            self.probes_enabled_in_run = int(self.meta.get("typeImEnabled", 0))
+            if self.probes_enabled_in_run != 1:
+                electrical_series_name += f"{device}"
+
+            self.es_key = electrical_series_name
 
         # Set electrodes properties
         add_recording_extractor_properties(self.recording_extractor)
@@ -100,7 +120,7 @@ def get_metadata(self) -> dict:
         device = get_device_metadata(self.meta)
 
         # Should follow pattern 'Imec0', 'Imec1', etc.
-        probe_name = self.stream_id[:5].capitalize()
+        probe_name = self._signals_info_dict["device"].capitalize()
         device["name"] = f"Neuropixel{probe_name}"
 
         # Add groups metadata

diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py
@@ -1,7 +1,8 @@
 from pathlib import Path
+from typing import Optional
 
 import numpy as np
-from pydantic import ConfigDict, FilePath, validate_call
+from pydantic import ConfigDict, DirectoryPath, FilePath, validate_call
 
 from .spikeglx_utils import get_session_start_time
 from ..baserecordingextractorinterface import BaseRecordingExtractorInterface
@@ -29,18 +30,18 @@ def get_source_schema(cls) -> dict:
     def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict:
 
         extractor_kwargs = source_data.copy()
-        extractor_kwargs.pop("file_path")
         extractor_kwargs["folder_path"] = self.folder_path
         extractor_kwargs["stream_id"] = self.stream_id
         return extractor_kwargs
 
     @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
     def __init__(
         self,
-        file_path: FilePath,
+        file_path: Optional[FilePath] = None,
         verbose: bool = True,
         load_sync_channel: bool = False,
         es_key: str = "ElectricalSeriesNIDQ",
+        folder_path: Optional[DirectoryPath] = None,
     ):
         """
         Read channel data from the NIDQ board for the SpikeGLX recording.
@@ -49,6 +50,8 @@ def __init__(
 
         Parameters
         ----------
+        folder_path : DirectoryPath
+            Path to the folder containing the .nidq.bin file.
         file_path : FilePathType
             Path to .nidq.bin file.
         verbose : bool, default: True
@@ -59,10 +62,17 @@ def __init__(
         es_key : str, default: "ElectricalSeriesNIDQ"
         """
 
-        self.file_path = Path(file_path)
-        self.folder_path = self.file_path.parent
+        if file_path is None and folder_path is None:
+            raise ValueError("Either 'file_path' or 'folder_path' must be provided.")
+
+        if file_path is not None:
+            file_path = Path(file_path)
+            self.folder_path = file_path.parent
+
+        if folder_path is not None:
+            self.folder_path = Path(folder_path)
+
         super().__init__(
-            file_path=self.file_path,
             verbose=verbose,
             load_sync_channel=load_sync_channel,
             es_key=es_key,
@@ -72,7 +82,10 @@ def __init__(
         self.recording_extractor.set_property(
             key="group_name", values=["NIDQChannelGroup"] * self.recording_extractor.get_num_channels()
         )
-        self.meta = self.recording_extractor.neo_reader.signals_info_dict[(0, "nidq")]["meta"]
+
+        signal_info_key = (0, self.stream_id)  # Key format is (segment_index, stream_id)
+        self._signals_info_dict = self.recording_extractor.neo_reader.signals_info_dict[signal_info_key]
+        self.meta = self._signals_info_dict["meta"]
 
     def get_metadata(self) -> dict:
         metadata = super().get_metadata()

diff --git a/tests/test_on_data/ecephys/spikeglx_single_probe_metadata.json b/tests/test_on_data/ecephys/spikeglx_single_probe_metadata.json
@@ -29,9 +29,9 @@
         "device": "NIDQBoard"
       }
     ],
-    "ElectricalSeriesAPImec0": {
-      "name": "ElectricalSeriesAPImec0",
-      "description": "Acquisition traces for the ElectricalSeriesAPImec0."
+    "ElectricalSeriesAP": {
+      "name": "ElectricalSeriesAP",
+      "description": "Acquisition traces for the ElectricalSeriesAP."
     },
     "Electrodes": [
       {
@@ -51,9 +51,9 @@
       "name": "ElectricalSeriesNIDQ",
       "description": "Raw acquisition traces from the NIDQ (.nidq.bin) channels."
     },
-    "ElectricalSeriesLFImec0": {
-      "name": "ElectricalSeriesLFImec0",
-      "description": "Acquisition traces for the ElectricalSeriesLFImec0."
+    "ElectricalSeriesLF": {
+      "name": "ElectricalSeriesLF",
+      "description": "Acquisition traces for the ElectricalSeriesLF."
     }
   }
 }
diff --git a/tests/test_on_data/ecephys/test_lfp.py b/tests/test_on_data/ecephys/test_lfp.py
@@ -57,9 +57,7 @@ class TestEcephysLFPNwbConversions(unittest.TestCase):
         param(
             data_interface=SpikeGLXRecordingInterface,
             interface_kwargs=dict(
-                file_path=(
-                    DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0" / "Noise4Sam_g0_t0.imec0.lf.bin"
-                )
+                folder_path=DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0", stream_id="imec0.lf"
             ),
             expected_write_module="raw",
         ),

diff --git a/tests/test_on_data/ecephys/test_recording_interfaces.py b/tests/test_on_data/ecephys/test_recording_interfaces.py
@@ -641,9 +641,7 @@ def test_extracted_metadata(self, setup_interface):
 class TestSpikeGLXRecordingInterface(RecordingExtractorInterfaceTestMixin):
     data_interface_cls = SpikeGLXRecordingInterface
     interface_kwargs = dict(
-        file_path=str(
-            ECEPHY_DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0" / "Noise4Sam_g0_t0.imec0.ap.bin"
-        )
+        folder_path=ECEPHY_DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0", stream_id="imec0.ap"
     )
     save_directory = OUTPUT_PATH