Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xarray Support #207

Merged
merged 8 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* Added support for appending a dataset of references. @mavaylon1 [#203](https://github.com/hdmf-dev/hdmf-zarr/pull/203)
* NWBZarrIO load_namespaces=True by default. @mavaylon1 [#204](https://github.com/hdmf-dev/hdmf-zarr/pull/204)
* Added test for opening file with consolidated metadata from DANDI. @mavaylon1 [#206](https://github.com/hdmf-dev/hdmf-zarr/pull/206)
* Add dimension labels compatible with xarray. @mavaylon1 [#207](https://github.com/hdmf-dev/hdmf-zarr/pull/207)

## 0.8.0 (June 4, 2024)
### Bug Fixes
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ classifiers = [
"Topic :: Scientific/Engineering :: Medical Science Apps."
]
dependencies = [
'hdmf>=3.14.2',
'hdmf>=3.14.3',
'zarr>=2.11.0, <3.0', # pin below 3.0 until HDMF-zarr supports zarr 3.0
'numpy>=1.24, <2.0', # pin below 2.0 until HDMF supports numpy 2.0
'numpy>=1.24, <2.0', # pin below 2.0 until HDMF-zarr supports numpy 2.0
'numcodecs>=0.9.1',
'pynwb>=2.5.0',
'threadpoolctl>=3.1.0',
Expand Down
3 changes: 3 additions & 0 deletions src/hdmf_zarr/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,9 @@ def write_dataset(self, **kwargs): # noqa: C901
else:
options['io_settings'] = {}

if builder.dimension_labels is not None:
builder.attributes['_ARRAY_DIMENSIONS'] = builder.dimension_labels

attributes = builder.attributes
options['dtype'] = builder.dtype

Expand Down
40 changes: 40 additions & 0 deletions tests/unit/test_zarrio.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from tests.unit.utils import (Baz, BazData, BazBucket, get_baz_buildmanager)
import zarr
from hdmf_zarr.backend import ZarrIO
from .utils import BuildDatasetShapeMixin, BarData, BarDataHolder
from hdmf.spec import DatasetSpec
import os
import shutil
import warnings
Expand Down Expand Up @@ -186,6 +188,44 @@ def test_force_open_without_consolidated_fails(self):
self.fail("ZarrIO.__open_file_consolidated raised an unexpected ValueError: {}".format(e))


class TestDimensionLabels(BuildDatasetShapeMixin):
"""
This is to test setting the dimension_labels as a zarr attribute '_ARRAY_DIMENSIONS'.

Workflow:
i) We need to define a `get_dataset_inc_spec` to set the dim in the spec (via BuildDatasetShapeMixin)
ii) Create and write a BarDataHolder with a BarData.
iii) Read and check that the _ARRAY_DIMENSIONS attribute is set.
"""
def tearDown(self):
shutil.rmtree(self.store)

def get_base_shape_dims(self):
return [None, None], ['a', 'b']

def get_dataset_inc_spec(self):
dataset_inc_spec = DatasetSpec(
doc='A BarData',
data_type_inc='BarData',
quantity='*',
)
return dataset_inc_spec

def test_build(self):
bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string')
bar_data_holder_inst = BarDataHolder(
name='my_bar_holder',
bar_datas=[bar_data_inst],
)

with ZarrIO(self.store, manager=self.manager, mode='w') as io:
io.write(bar_data_holder_inst)

with ZarrIO(self.store, manager=self.manager, mode='r') as io:
file = io.read()
self.assertEqual(file.bar_datas[0].data.attrs['_ARRAY_DIMENSIONS'], ['a', 'b'])


class TestDatasetofReferences(ZarrStoreTestCase):
def setUp(self):
self.store_path = "test_io.zarr"
Expand Down
121 changes: 120 additions & 1 deletion tests/unit/utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import os
import tempfile
from copy import copy, deepcopy
from abc import ABCMeta, abstractmethod

from hdmf.build import (ObjectMapper, TypeMap, BuildManager)
from hdmf.container import (Container, Data)
from hdmf.spec import (GroupSpec, DatasetSpec, AttributeSpec, LinkSpec,
RefSpec, DtypeSpec, NamespaceCatalog, SpecCatalog,
SpecNamespace, NamespaceBuilder)
SpecNamespace, NamespaceBuilder, Spec)
from hdmf.spec.spec import (ZERO_OR_MANY, ONE_OR_MANY, ZERO_OR_ONE)
from hdmf.utils import (docval, getargs, get_docval)
from hdmf.testing import TestCase
from hdmf_zarr.backend import ROOT_NAME

CORE_NAMESPACE = 'test_core'
Expand Down Expand Up @@ -591,3 +593,120 @@ class CustomSpecNamespace(SpecNamespace):
@classmethod
def types_key(cls):
return cls.__types_key


class BarData(Data):

@docval({'name': 'name', 'type': str, 'doc': 'the name of this BarData'},
{'name': 'data', 'type': ('data', 'array_data'), 'doc': 'the data'},
{'name': 'attr1', 'type': str, 'doc': 'a string attribute', 'default': None},
{'name': 'attr2', 'type': 'int', 'doc': 'an int attribute', 'default': None},
{'name': 'ext_attr', 'type': bool, 'doc': 'a boolean attribute', 'default': True})
def __init__(self, **kwargs):
name, data, attr1, attr2, ext_attr = getargs('name', 'data', 'attr1', 'attr2', 'ext_attr', kwargs)
super().__init__(name=name, data=data)
self.__attr1 = attr1
self.__attr2 = attr2
self.__ext_attr = kwargs['ext_attr']

@property
def data_type(self):
return 'BarData'

@property
def attr1(self):
return self.__attr1

@property
def attr2(self):
return self.__attr2

@property
def ext_attr(self):
return self.__ext_attr


class BarDataHolder(Container):

@docval({'name': 'name', 'type': str, 'doc': 'the name of this BarDataHolder'},
{'name': 'bar_datas', 'type': ('data', 'array_data'), 'doc': 'bar_datas', 'default': list()})
def __init__(self, **kwargs):
name, bar_datas = getargs('name', 'bar_datas', kwargs)
super().__init__(name=name)
self.__bar_datas = bar_datas
for b in bar_datas:
if b is not None and b.parent is None:
b.parent = self

@property
def data_type(self):
return 'BarDataHolder'

@property
def bar_datas(self):
return self.__bar_datas


class ExtBarDataMapper(ObjectMapper):

@docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute value for"},
{"name": "container", "type": BarData, "doc": "the container to get the attribute value from"},
{"name": "manager", "type": BuildManager, "doc": "the BuildManager used for managing this build"},
returns='the value of the attribute')
def get_attr_value(self, **kwargs):
''' Get the value of the attribute corresponding to this spec from the given container '''
spec, container, manager = getargs('spec', 'container', 'manager', kwargs)
# handle custom mapping of field 'ext_attr' within container
# BardataHolder/BarData -> spec BarDataHolder/BarData.ext_attr
if isinstance(container.parent, BarDataHolder):
if spec.name == 'ext_attr':
return container.ext_attr
return super().get_attr_value(**kwargs)


class BuildDatasetShapeMixin(TestCase, metaclass=ABCMeta):

def setUp(self):
self.store = "tests/unit/test_io.zarr"
self.set_up_specs()
spec_catalog = SpecCatalog()
spec_catalog.register_spec(self.bar_data_spec, 'test.yaml')
spec_catalog.register_spec(self.bar_data_holder_spec, 'test.yaml')
namespace = SpecNamespace(
doc='a test namespace',
name=CORE_NAMESPACE,
schema=[{'source': 'test.yaml'}],
version='0.1.0',
catalog=spec_catalog
)
namespace_catalog = NamespaceCatalog()
namespace_catalog.add_namespace(CORE_NAMESPACE, namespace)
type_map = TypeMap(namespace_catalog)
type_map.register_container_type(CORE_NAMESPACE, 'BarData', BarData)
type_map.register_container_type(CORE_NAMESPACE, 'BarDataHolder', BarDataHolder)
type_map.register_map(BarData, ExtBarDataMapper)
type_map.register_map(BarDataHolder, ObjectMapper)
self.manager = BuildManager(type_map)

def set_up_specs(self):
shape, dims = self.get_base_shape_dims()
self.bar_data_spec = DatasetSpec(
doc='A test dataset specification with a data type',
data_type_def='BarData',
dtype='int',
shape=shape,
dims=dims,
)
self.bar_data_holder_spec = GroupSpec(
doc='A container of multiple extended BarData objects',
data_type_def='BarDataHolder',
datasets=[self.get_dataset_inc_spec()],
)

@abstractmethod
def get_base_shape_dims(self):
pass

@abstractmethod
def get_dataset_inc_spec(self):
pass
Loading