Add condition_labels as an argument (#18)

* add condition labels * Update src/pynwb/ndx_binned_spikes/__init__.py * Update spec/ndx-binned-spikes.extensions.yaml * remove automatic creation of labels in the mock * typo on the spec generation --------- Co-authored-by: Ben Dichter <ben.dichter@gmail.com>
catalystneuro · Sep 3, 2024 · fe376e5 · fe376e5
1 parent 59bfd8e
commit fe376e5
Show file tree

Hide file tree

Showing 6 changed files with 82 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -192,13 +192,16 @@ binned_aligned_spikes = BinnedAlignedSpikes(
     data=data,  # Shape (number_of_units, number_of_events, number_of_bins)
     timestamps=timestamps,  # Shape (number_of_events,)
     condition_indices=condition_indices,  # Shape (number_of_events,)
+    condition_labels=condition_labels,  # Shape (number_of_conditions,) or np.unique(condition_indices).size
 )
 ```
 
 Note that `number_of_events` here represents the total number of repetitions for all the conditions being aggregated. For example, if data is being aggregated from two stimuli where the first stimulus appeared twice and the second appeared three times, the `number_of_events` would be 5.
 
 The `condition_indices` is an indicator vector that should be constructed so that `data[:, condition_indices == condition_index, :]` corresponds to the binned spike counts for the condition with the specified condition_index. You can retrieve the same data using the convenience method `binned_aligned_spikes.get_data_for_condition(condition_index)`.
 
+The `condition_labels` argument is optional and can be used to store the labels of the conditions. This is meant to help to understand the nature of the conditions
+
 It's important to note that the timestamps must be in ascending order and must correspond positionally to the condition indices and the second dimension of the data. If they are not, a ValueError will be raised. To help organize the data correctly, you can use the convenience method `BinnedAlignedSpikes.sort_data_by_event_timestamps(data=data, event_timestamps=event_timestamps, condition_indices=condition_indices)`, which ensures the data is properly sorted. Here’s how it can be used:
 
 ```python
@@ -209,7 +212,8 @@ binned_aligned_spikes = BinnedAlignedSpikes(
     milliseconds_from_event_to_first_bin=milliseconds_from_event_to_first_bin,
     data=sorted_data,   
     event_timestamps=sorted_event_timestamps,  
-    condition_indices=sorted_condition_indices,  
+    condition_indices=sorted_condition_indices,
+    condition_labels=condition_labels
 )
 ```
 
@@ -278,6 +282,7 @@ milliseconds_from_event_to_first_bin = -50.0
 data = np.concatenate([data_for_first_stimuli, data_for_second_stimuli], axis=1)
 event_timestamps = np.concatenate([timestamps_first_stimuli, timestamps_second_stimuli])
 condition_indices = np.concatenate([np.zeros(2), np.ones(3)])
+condition_labels = ["a", "b"]
 
 sorted_data, sorted_event_timestamps, sorted_condition_indices = BinnedAlignedSpikes.sort_data_by_event_timestamps(data=data, event_timestamps=event_timestamps, condition_indices=condition_indices)
 

diff --git a/spec/ndx-binned-spikes.extensions.yaml b/spec/ndx-binned-spikes.extensions.yaml
@@ -2,7 +2,7 @@ groups:
 - neurodata_type_def: BinnedAlignedSpikes
   neurodata_type_inc: NWBDataInterface
   default_name: BinnedAlignedSpikes
-  doc: A data interface for binned spike data aligned to an event (e.g. a stimuli
+  doc: A data interface for binned spike data aligned to an event (e.g. a stimulus
     or the beginning of a trial).
   attributes:
   - name: name
@@ -11,7 +11,8 @@ groups:
     doc: The name of this container
   - name: description
     dtype: text
-    value: Spikes data binned and aligned to the timestamps of one or multiple conditions.
+    value: Spikes data binned and aligned to the event timestamps of one or multiple
+      conditions.
     doc: A description of what the data represents
   - name: bin_width_in_milliseconds
     dtype: float64
@@ -25,7 +26,7 @@ groups:
     required: false
   datasets:
   - name: data
-    dtype: numeric
+    dtype: uint64
     dims:
     - num_units
     - number_of_events
@@ -54,6 +55,17 @@ groups:
       type, trial number, category, etc.).This is only used when the data is aligned
       to multiple conditions
     quantity: '?'
+  - name: condition_labels
+    dtype: text
+    dims:
+    - number_of_conditions
+    shape:
+    - null
+    doc: The labels of the conditions that the data is aligned to. The size of this
+      array should match the number of conditions. This is only used when the data
+      is aligned to multiple conditions. First condition is index 0, second is index
+      1, etc.
+    quantity: '?'
   - name: units_region
     neurodata_type_inc: DynamicTableRegion
     doc: A reference to the Units table region that contains the units of the data.

diff --git a/src/pynwb/ndx_binned_spikes/__init__.py b/src/pynwb/ndx_binned_spikes/__init__.py
@@ -38,7 +38,7 @@ class BinnedAlignedSpikes(NWBDataInterface):
     )
 
     DEFAULT_NAME = "BinnedAlignedSpikes"
-    DEFAULT_DESCRIPTION = "Spikes data binned and aligned to the timestamps of one or multiple conditions."
+    DEFAULT_DESCRIPTION = "Spikes data binned and aligned to the event timestamps of one or multiple conditions."
 
     @docval(
         {
@@ -97,6 +97,17 @@ class BinnedAlignedSpikes(NWBDataInterface):
             "shape": (None,),
             "default": None,
         },
+        {
+            "name":"condition_labels",
+            "type": "array_data",
+            "doc": (
+                "The labels of the conditions that the data is aligned to. The size of this array should match "
+                "the number of conditions. This is only used when the data is aligned to multiple conditions. "
+                "First condition is index 0, second is index 1, etc."
+            ),
+            "shape": (None,),
+            "default": None,
+        },
         {
             "name": "units_region",
             "type": DynamicTableRegion,

diff --git a/src/pynwb/ndx_binned_spikes/testing/mock.py b/src/pynwb/ndx_binned_spikes/testing/mock.py
@@ -6,6 +6,7 @@
 from pynwb.misc import Units
 from hdmf.common import DynamicTableRegion
 
+
 # TODO: Remove once pynwb 2.7.0 is released and use the mock class there
 def mock_Units(
     num_units: int = 10,
@@ -47,11 +48,12 @@ def mock_BinnedAlignedSpikes(
     event_timestamps: Optional[np.ndarray] = None,
     data: Optional[np.ndarray] = None,
     condition_indices: Optional[np.ndarray] = None,
+    condition_labels: Optional[np.ndarray] = None,
     units_region: Optional[DynamicTableRegion] = None,
     sort_data: bool = True,
 ) -> BinnedAlignedSpikes:
     """
-    Generate a mock BinnedAlignedSpikes object with specified parameters or from given data. 
+    Generate a mock BinnedAlignedSpikes object with specified parameters or from given data.
 
     Parameters
     ----------
@@ -77,11 +79,16 @@ def mock_BinnedAlignedSpikes(
         An array of event_timestamps for each event. If not provided, it will be automatically generated.
         It should have size `number_of_events`.
     condition_indices : np.ndarray, optional
-        An array of indices characterizing each condition. If not provided, it will be automatically generated.
+        An array of indices characterizing each condition. If not provided, it will be automatically generated
+        from the number of conditions and number of events. It should have size `number_of_events`.
+        If provided, the `number_of_conditions` parameter will be ignored and the number of conditions will be
+        inferred from the unique values in `condition_indices`.
+    condition_labels: np.ndarray, optional
+        An array of labels for each condition. It should have size `number_of_conditions`.
     units_region: DynamicTableRegion, optional
         A reference to the Units table region that contains the units of the data.
     sort_data: bool, optional
-        If True, the data will be sorted by timestamps. 
+        If True, the data will be sorted by timestamps.
     Returns
     -------
     BinnedAlignedSpikes
@@ -107,14 +114,13 @@ def mock_BinnedAlignedSpikes(
 
     if event_timestamps.shape[0] != number_of_events:
         raise ValueError("The shape of `event_timestamps` does not match `number_of_events`.")
-    
+
     if condition_indices is None and number_of_conditions > 0:
-
-
-        assert number_of_conditions < number_of_events, (
-            "The number of conditions should be less than the number of events."
-        )
-
+
+        assert (
+            number_of_conditions < number_of_events
+        ), "The number of conditions should be less than the number of events."
+
         condition_indices = np.zeros(number_of_events, dtype=int)
         all_indices = np.arange(number_of_conditions, dtype=int)
 
@@ -126,12 +132,16 @@ def mock_BinnedAlignedSpikes(
             size=number_of_events - number_of_conditions,
             replace=True,
         )
+
 
     if condition_indices is not None:
-        assert (
-            condition_indices.shape[0] == number_of_events
-        ), "The shape of `condition_indices` does not match `number_of_events`."
-        condition_indices = np.array(condition_indices, dtype=int)
+        number_of_conditions = np.unique(condition_indices).size
+
+        if condition_labels is not None:
+            condition_labels = np.asarray(condition_labels, dtype="U")
+
+            if condition_labels.size != number_of_conditions:
+                raise ValueError("The number of condition labels should match the number of conditions.")
 
     # Sort the data by timestamps
     if sort_data:
@@ -146,6 +156,7 @@ def mock_BinnedAlignedSpikes(
         data=data,
         event_timestamps=event_timestamps,
         condition_indices=condition_indices,
+        condition_labels=condition_labels,
         units_region=units_region,
     )
     return binned_aligned_spikes
diff --git a/src/pynwb/tests/test_binned_aligned_spikes.py b/src/pynwb/tests/test_binned_aligned_spikes.py
@@ -167,6 +167,8 @@ def setUp(self):
         self.event_timestamps = np.concatenate([self.timestamps_first_condition, self.timestamps_second_condition])
 
         self.sorted_indices = np.argsort(self.event_timestamps)
+
+        self.condition_labels = ["first", "second"]
 
     def test_constructor(self):
         """Test that the constructor for BinnedAlignedSpikes sets values as expected."""
@@ -193,6 +195,7 @@ def test_constructor(self):
             data=data,
             event_timestamps=event_timestamps,
             condition_indices=condition_indices,
+            condition_labels=self.condition_labels,
         )
 
         np.testing.assert_array_equal(aggregated_binnned_align_spikes.data, self.data[:, self.sorted_indices, :])
@@ -202,6 +205,11 @@ def test_constructor(self):
         np.testing.assert_array_equal(
             aggregated_binnned_align_spikes.event_timestamps, self.event_timestamps[self.sorted_indices]
         )
+
+        np.testing.assert_array_equal(
+            aggregated_binnned_align_spikes.condition_labels, self.condition_labels
+        )
+
         self.assertEqual(aggregated_binnned_align_spikes.bin_width_in_milliseconds, self.bin_width_in_milliseconds)
         self.assertEqual(
             aggregated_binnned_align_spikes.milliseconds_from_event_to_first_bin,
@@ -259,7 +267,7 @@ def test_roundtrip_acquisition(self):
         """
 
         # Testing here
-        self.binned_aligned_spikes = mock_BinnedAlignedSpikes(number_of_conditions=0)
+        self.binned_aligned_spikes = mock_BinnedAlignedSpikes(number_of_conditions=3, condition_labels=["a", "b", "c"])
 
         self.nwbfile.add_acquisition(self.binned_aligned_spikes)
 

diff --git a/src/spec/create_extension_spec.py b/src/spec/create_extension_spec.py
@@ -29,7 +29,7 @@ def main():
             "The binned data. It should be an array whose first dimension is the number of units, the second dimension "
             "is the number of events, and the third dimension is the number of bins."
             ),
-        dtype="numeric",  # TODO should this be a uint64?
+        dtype="uint64",  
         shape=[None, None, None],
         dims=["num_units", "number_of_events", "number_of_bins"],
     )
@@ -63,12 +63,25 @@ def main():
         quantity="?",
     )
 
+    condition_labels = NWBDatasetSpec(
+        name="condition_labels",
+        doc=(
+            "The labels of the conditions that the data is aligned to. The size of this array should match "
+            "the number of conditions. This is only used when the data is aligned to multiple conditions. "
+            "First condition is index 0, second is index 1, etc."
+        ),
+        dtype="text",
+        shape=[None],
+        dims=["number_of_conditions"],
+        quantity="?",
+    )
+
     binned_aligned_spikes = NWBGroupSpec(
         neurodata_type_def="BinnedAlignedSpikes",
         neurodata_type_inc="NWBDataInterface",
         default_name="BinnedAlignedSpikes",
         doc="A data interface for binned spike data aligned to an event (e.g. a stimulus or the beginning of a trial).",
-        datasets=[binned_aligned_spikes_data, event_timestamps, condition_indices, units_region],
+        datasets=[binned_aligned_spikes_data, event_timestamps, condition_indices, condition_labels, units_region],
         attributes=[
             NWBAttributeSpec(
                 name="name",