Skip to content

Commit

Permalink
Add condition_labels as an argument (#18)
Browse files Browse the repository at this point in the history
* add condition labels

* Update src/pynwb/ndx_binned_spikes/__init__.py

* Update spec/ndx-binned-spikes.extensions.yaml

* remove automatic creation of labels in the mock

* typo on the spec generation

---------

Co-authored-by: Ben Dichter <ben.dichter@gmail.com>
  • Loading branch information
h-mayorquin and bendichter authored Sep 3, 2024
1 parent 59bfd8e commit fe376e5
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 22 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,13 +192,16 @@ binned_aligned_spikes = BinnedAlignedSpikes(
data=data, # Shape (number_of_units, number_of_events, number_of_bins)
timestamps=timestamps, # Shape (number_of_events,)
condition_indices=condition_indices, # Shape (number_of_events,)
condition_labels=condition_labels, # Shape (number_of_conditions,) or np.unique(condition_indices).size
)
```

Note that `number_of_events` here represents the total number of repetitions for all the conditions being aggregated. For example, if data is being aggregated from two stimuli where the first stimulus appeared twice and the second appeared three times, the `number_of_events` would be 5.

The `condition_indices` is an indicator vector that should be constructed so that `data[:, condition_indices == condition_index, :]` corresponds to the binned spike counts for the condition with the specified condition_index. You can retrieve the same data using the convenience method `binned_aligned_spikes.get_data_for_condition(condition_index)`.

The `condition_labels` argument is optional and can be used to store the labels of the conditions. This is meant to help to understand the nature of the conditions

It's important to note that the timestamps must be in ascending order and must correspond positionally to the condition indices and the second dimension of the data. If they are not, a ValueError will be raised. To help organize the data correctly, you can use the convenience method `BinnedAlignedSpikes.sort_data_by_event_timestamps(data=data, event_timestamps=event_timestamps, condition_indices=condition_indices)`, which ensures the data is properly sorted. Here’s how it can be used:

```python
Expand All @@ -209,7 +212,8 @@ binned_aligned_spikes = BinnedAlignedSpikes(
milliseconds_from_event_to_first_bin=milliseconds_from_event_to_first_bin,
data=sorted_data,
event_timestamps=sorted_event_timestamps,
condition_indices=sorted_condition_indices,
condition_indices=sorted_condition_indices,
condition_labels=condition_labels
)
```

Expand Down Expand Up @@ -278,6 +282,7 @@ milliseconds_from_event_to_first_bin = -50.0
data = np.concatenate([data_for_first_stimuli, data_for_second_stimuli], axis=1)
event_timestamps = np.concatenate([timestamps_first_stimuli, timestamps_second_stimuli])
condition_indices = np.concatenate([np.zeros(2), np.ones(3)])
condition_labels = ["a", "b"]

sorted_data, sorted_event_timestamps, sorted_condition_indices = BinnedAlignedSpikes.sort_data_by_event_timestamps(data=data, event_timestamps=event_timestamps, condition_indices=condition_indices)

Expand Down
18 changes: 15 additions & 3 deletions spec/ndx-binned-spikes.extensions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ groups:
- neurodata_type_def: BinnedAlignedSpikes
neurodata_type_inc: NWBDataInterface
default_name: BinnedAlignedSpikes
doc: A data interface for binned spike data aligned to an event (e.g. a stimuli
doc: A data interface for binned spike data aligned to an event (e.g. a stimulus
or the beginning of a trial).
attributes:
- name: name
Expand All @@ -11,7 +11,8 @@ groups:
doc: The name of this container
- name: description
dtype: text
value: Spikes data binned and aligned to the timestamps of one or multiple conditions.
value: Spikes data binned and aligned to the event timestamps of one or multiple
conditions.
doc: A description of what the data represents
- name: bin_width_in_milliseconds
dtype: float64
Expand All @@ -25,7 +26,7 @@ groups:
required: false
datasets:
- name: data
dtype: numeric
dtype: uint64
dims:
- num_units
- number_of_events
Expand Down Expand Up @@ -54,6 +55,17 @@ groups:
type, trial number, category, etc.).This is only used when the data is aligned
to multiple conditions
quantity: '?'
- name: condition_labels
dtype: text
dims:
- number_of_conditions
shape:
- null
doc: The labels of the conditions that the data is aligned to. The size of this
array should match the number of conditions. This is only used when the data
is aligned to multiple conditions. First condition is index 0, second is index
1, etc.
quantity: '?'
- name: units_region
neurodata_type_inc: DynamicTableRegion
doc: A reference to the Units table region that contains the units of the data.
Expand Down
13 changes: 12 additions & 1 deletion src/pynwb/ndx_binned_spikes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class BinnedAlignedSpikes(NWBDataInterface):
)

DEFAULT_NAME = "BinnedAlignedSpikes"
DEFAULT_DESCRIPTION = "Spikes data binned and aligned to the timestamps of one or multiple conditions."
DEFAULT_DESCRIPTION = "Spikes data binned and aligned to the event timestamps of one or multiple conditions."

@docval(
{
Expand Down Expand Up @@ -97,6 +97,17 @@ class BinnedAlignedSpikes(NWBDataInterface):
"shape": (None,),
"default": None,
},
{
"name":"condition_labels",
"type": "array_data",
"doc": (
"The labels of the conditions that the data is aligned to. The size of this array should match "
"the number of conditions. This is only used when the data is aligned to multiple conditions. "
"First condition is index 0, second is index 1, etc."
),
"shape": (None,),
"default": None,
},
{
"name": "units_region",
"type": DynamicTableRegion,
Expand Down
39 changes: 25 additions & 14 deletions src/pynwb/ndx_binned_spikes/testing/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pynwb.misc import Units
from hdmf.common import DynamicTableRegion


# TODO: Remove once pynwb 2.7.0 is released and use the mock class there
def mock_Units(
num_units: int = 10,
Expand Down Expand Up @@ -47,11 +48,12 @@ def mock_BinnedAlignedSpikes(
event_timestamps: Optional[np.ndarray] = None,
data: Optional[np.ndarray] = None,
condition_indices: Optional[np.ndarray] = None,
condition_labels: Optional[np.ndarray] = None,
units_region: Optional[DynamicTableRegion] = None,
sort_data: bool = True,
) -> BinnedAlignedSpikes:
"""
Generate a mock BinnedAlignedSpikes object with specified parameters or from given data.
Generate a mock BinnedAlignedSpikes object with specified parameters or from given data.
Parameters
----------
Expand All @@ -77,11 +79,16 @@ def mock_BinnedAlignedSpikes(
An array of event_timestamps for each event. If not provided, it will be automatically generated.
It should have size `number_of_events`.
condition_indices : np.ndarray, optional
An array of indices characterizing each condition. If not provided, it will be automatically generated.
An array of indices characterizing each condition. If not provided, it will be automatically generated
from the number of conditions and number of events. It should have size `number_of_events`.
If provided, the `number_of_conditions` parameter will be ignored and the number of conditions will be
inferred from the unique values in `condition_indices`.
condition_labels: np.ndarray, optional
An array of labels for each condition. It should have size `number_of_conditions`.
units_region: DynamicTableRegion, optional
A reference to the Units table region that contains the units of the data.
sort_data: bool, optional
If True, the data will be sorted by timestamps.
If True, the data will be sorted by timestamps.
Returns
-------
BinnedAlignedSpikes
Expand All @@ -107,14 +114,13 @@ def mock_BinnedAlignedSpikes(

if event_timestamps.shape[0] != number_of_events:
raise ValueError("The shape of `event_timestamps` does not match `number_of_events`.")

if condition_indices is None and number_of_conditions > 0:


assert number_of_conditions < number_of_events, (
"The number of conditions should be less than the number of events."
)


assert (
number_of_conditions < number_of_events
), "The number of conditions should be less than the number of events."

condition_indices = np.zeros(number_of_events, dtype=int)
all_indices = np.arange(number_of_conditions, dtype=int)

Expand All @@ -126,12 +132,16 @@ def mock_BinnedAlignedSpikes(
size=number_of_events - number_of_conditions,
replace=True,
)


if condition_indices is not None:
assert (
condition_indices.shape[0] == number_of_events
), "The shape of `condition_indices` does not match `number_of_events`."
condition_indices = np.array(condition_indices, dtype=int)
number_of_conditions = np.unique(condition_indices).size

if condition_labels is not None:
condition_labels = np.asarray(condition_labels, dtype="U")

if condition_labels.size != number_of_conditions:
raise ValueError("The number of condition labels should match the number of conditions.")

# Sort the data by timestamps
if sort_data:
Expand All @@ -146,6 +156,7 @@ def mock_BinnedAlignedSpikes(
data=data,
event_timestamps=event_timestamps,
condition_indices=condition_indices,
condition_labels=condition_labels,
units_region=units_region,
)
return binned_aligned_spikes
10 changes: 9 additions & 1 deletion src/pynwb/tests/test_binned_aligned_spikes.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ def setUp(self):
self.event_timestamps = np.concatenate([self.timestamps_first_condition, self.timestamps_second_condition])

self.sorted_indices = np.argsort(self.event_timestamps)

self.condition_labels = ["first", "second"]

def test_constructor(self):
"""Test that the constructor for BinnedAlignedSpikes sets values as expected."""
Expand All @@ -193,6 +195,7 @@ def test_constructor(self):
data=data,
event_timestamps=event_timestamps,
condition_indices=condition_indices,
condition_labels=self.condition_labels,
)

np.testing.assert_array_equal(aggregated_binnned_align_spikes.data, self.data[:, self.sorted_indices, :])
Expand All @@ -202,6 +205,11 @@ def test_constructor(self):
np.testing.assert_array_equal(
aggregated_binnned_align_spikes.event_timestamps, self.event_timestamps[self.sorted_indices]
)

np.testing.assert_array_equal(
aggregated_binnned_align_spikes.condition_labels, self.condition_labels
)

self.assertEqual(aggregated_binnned_align_spikes.bin_width_in_milliseconds, self.bin_width_in_milliseconds)
self.assertEqual(
aggregated_binnned_align_spikes.milliseconds_from_event_to_first_bin,
Expand Down Expand Up @@ -259,7 +267,7 @@ def test_roundtrip_acquisition(self):
"""

# Testing here
self.binned_aligned_spikes = mock_BinnedAlignedSpikes(number_of_conditions=0)
self.binned_aligned_spikes = mock_BinnedAlignedSpikes(number_of_conditions=3, condition_labels=["a", "b", "c"])

self.nwbfile.add_acquisition(self.binned_aligned_spikes)

Expand Down
17 changes: 15 additions & 2 deletions src/spec/create_extension_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def main():
"The binned data. It should be an array whose first dimension is the number of units, the second dimension "
"is the number of events, and the third dimension is the number of bins."
),
dtype="numeric", # TODO should this be a uint64?
dtype="uint64",
shape=[None, None, None],
dims=["num_units", "number_of_events", "number_of_bins"],
)
Expand Down Expand Up @@ -63,12 +63,25 @@ def main():
quantity="?",
)

condition_labels = NWBDatasetSpec(
name="condition_labels",
doc=(
"The labels of the conditions that the data is aligned to. The size of this array should match "
"the number of conditions. This is only used when the data is aligned to multiple conditions. "
"First condition is index 0, second is index 1, etc."
),
dtype="text",
shape=[None],
dims=["number_of_conditions"],
quantity="?",
)

binned_aligned_spikes = NWBGroupSpec(
neurodata_type_def="BinnedAlignedSpikes",
neurodata_type_inc="NWBDataInterface",
default_name="BinnedAlignedSpikes",
doc="A data interface for binned spike data aligned to an event (e.g. a stimulus or the beginning of a trial).",
datasets=[binned_aligned_spikes_data, event_timestamps, condition_indices, units_region],
datasets=[binned_aligned_spikes_data, event_timestamps, condition_indices, condition_labels, units_region],
attributes=[
NWBAttributeSpec(
name="name",
Expand Down

0 comments on commit fe376e5

Please sign in to comment.