Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve test coverage for storage classes #2693

Open
wants to merge 45 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
2ea442c
Run Store tests on logging
maxrjones Jan 11, 2025
7f76575
Run store tests on wrapper
maxrjones Jan 11, 2025
98b7392
Add read only open tests to WrapperStore
maxrjones Jan 11, 2025
18be47f
Ignore new coverage files
maxrjones Jan 11, 2025
69ce1d7
Simplify wrapper tests
maxrjones Jan 11, 2025
5877355
Fix __eq__ method in WrapperStore
maxrjones Jan 11, 2025
b4310fd
Implement __repr__ for WrapperStore
maxrjones Jan 11, 2025
d08458e
Allow separate open and init kwargs
maxrjones Jan 11, 2025
f663694
Add open class method to LoggingStore
maxrjones Jan 11, 2025
cf62f67
Add __str__ to WrapperStore
maxrjones Jan 11, 2025
31f9931
Add repr test for LoggingStore
maxrjones Jan 11, 2025
964aeaa
Fix __eq__ in LoggingStore
maxrjones Jan 11, 2025
332f564
Test getsize for stores
maxrjones Jan 11, 2025
4d4d728
Test for invalid ByteRequest
maxrjones Jan 11, 2025
30d1323
Use stdout rather than stderr as the default logging stream
maxrjones Jan 12, 2025
9764204
Test default logging stream
maxrjones Jan 12, 2025
fefd666
Add test for getsize_prefix
maxrjones Jan 12, 2025
6f240c2
Document buffer prototype parameter
maxrjones Jan 12, 2025
d2bbd9d
Add test for invalid modes in StorePath.open()
maxrjones Jan 12, 2025
85f44db
Add test for contains_group
maxrjones Jan 12, 2025
51c0c15
Add tests for contains_array
maxrjones Jan 12, 2025
ddd6bc9
Test for invalid root type for LocalStore
maxrjones Jan 12, 2025
62a528c
Test LocalStore.get with default prototype
maxrjones Jan 12, 2025
5f00efd
Test for invalid set buffer arguments
maxrjones Jan 13, 2025
6923337
Test get and set on closed stores
maxrjones Jan 13, 2025
0792fa8
Test using stores in a context manager
maxrjones Jan 13, 2025
dd0de05
Specify abstract methods for StoreTests
maxrjones Jan 13, 2025
4dba40c
Apply suggestions from code review
maxrjones Jan 13, 2025
48abe94
Lint
maxrjones Jan 14, 2025
bf4589d
Fix typing for LoggingStore
maxrjones Jan 14, 2025
5b37417
Match specific Errors in tests
maxrjones Jan 14, 2025
74647de
Add docstring
maxrjones Jan 14, 2025
c8ebcd0
Parametrize tests
maxrjones Jan 14, 2025
1e96600
Test for contains group/array at multiple heirarchies
maxrjones Jan 14, 2025
cc14e07
Update TypeError on GpuMemoryStore
maxrjones Jan 14, 2025
5148dd6
Merge branch 'main' into testing-storage
maxrjones Jan 15, 2025
bf58808
Don't implement _is_open setter on wrapped stores
maxrjones Jan 16, 2025
e1caef0
Update reprs for LoggingStore and WrapperStore
maxrjones Jan 16, 2025
1922d2d
Test check_writeable and close for WrapperStore
maxrjones Jan 16, 2025
45ea40d
Update pull request template (#2717)
brokkoli71 Jan 16, 2025
b281bc9
Add release notes
maxrjones Jan 16, 2025
9a75da2
Merge branch 'main' into testing-storage
maxrjones Jan 16, 2025
ca83bd6
Merge branch 'main' into testing-storage
maxrjones Jan 23, 2025
0d6eccf
Comprehensive changelog entry
maxrjones Jan 23, 2025
9059135
Match error message
maxrjones Jan 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ src/zarr/_version.py
data/*
src/fixture/
fixture/
junit.xml

.DS_Store
tests/.hypothesis
Expand Down
6 changes: 4 additions & 2 deletions src/zarr/abc/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,10 @@ async def get(
Parameters
----------
key : str
prototype : BufferPrototype
The prototype of the output buffer. Stores may support a default buffer prototype.
byte_range : ByteRequest, optional

ByteRequest may be one of the following. If not provided, all data associated with the key is retrieved.

- RangeByteRequest(int, int): Request a specific range of bytes in the form (start, end). The end is exclusive. If the given range is zero-length or starts after the end of the object, an error will be returned. Additionally, if the range ends after the end of the object, the entire remainder of the object will be returned. Otherwise, the exact requested range will be returned.
- OffsetByteRequest(int): Request all bytes starting from a given byte offset. This is equivalent to bytes={int}- as an HTTP header.
- SuffixByteRequest(int): Request the last int bytes. Note that here, int is the size of the request, not the byte offset. This is equivalent to bytes=-{int} as an HTTP header.
Expand All @@ -200,6 +200,8 @@ async def get_partial_values(

Parameters
----------
prototype : BufferPrototype
The prototype of the output buffer. Stores may support a default buffer prototype.
key_ranges : Iterable[tuple[str, tuple[int | None, int | None]]]
Ordered set of key, range pairs, a key may occur multiple times with different ranges

Expand Down
5 changes: 4 additions & 1 deletion src/zarr/storage/_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
Store,
SuffixByteRequest,
)
from zarr.core.buffer import Buffer
from zarr.storage._common import _dereference_path

if TYPE_CHECKING:
from collections.abc import AsyncIterator, Iterable

from fsspec.asyn import AsyncFileSystem

from zarr.core.buffer import Buffer, BufferPrototype
from zarr.core.buffer import BufferPrototype
from zarr.core.common import BytesLike


Expand Down Expand Up @@ -253,6 +254,8 @@ async def set(
if not self._is_open:
await self._open()
self._check_writable()
if not isinstance(value, Buffer):
raise TypeError("FsspecStore.set(): `value` must a Buffer instance")
maxrjones marked this conversation as resolved.
Show resolved Hide resolved
path = _dereference_path(self.path, key)
# write data
if byte_range:
Expand Down
17 changes: 14 additions & 3 deletions src/zarr/storage/_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

import inspect
import logging
import sys
import time
from collections import defaultdict
from contextlib import contextmanager
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Self, TypeVar

from zarr.abc.store import Store
from zarr.storage._wrapper import WrapperStore
Expand All @@ -18,6 +19,8 @@

counter: defaultdict[str, int]

T_Store = TypeVar("T_Store", bound=Store)


class LoggingStore(WrapperStore[Store]):
maxrjones marked this conversation as resolved.
Show resolved Hide resolved
"""
Expand Down Expand Up @@ -67,7 +70,7 @@ def _configure_logger(

def _default_handler(self) -> logging.Handler:
"""Define a default log handler"""
handler = logging.StreamHandler()
handler = logging.StreamHandler(stream=sys.stdout)
handler.setLevel(self.log_level)
handler.setFormatter(
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
Expand All @@ -94,6 +97,14 @@ def log(self, hint: Any = "") -> Generator[None, None, None]:
end_time = time.time()
self.logger.info("Finished %s [%.2f s]", op, end_time - start_time)

@classmethod
async def open(cls: type[Self], store_cls: type[T_Store], *args: Any, **kwargs: Any) -> Self:
log_level = kwargs.pop("log_level", "DEBUG")
log_handler = kwargs.pop("log_handler", None)
store = store_cls(*args, **kwargs)
await store._open()
return cls(store=store, log_level=log_level, log_handler=log_handler)

@property
def supports_writes(self) -> bool:
with self.log():
Expand Down Expand Up @@ -155,7 +166,7 @@ def __repr__(self) -> str:

def __eq__(self, other: object) -> bool:
with self.log(other):
return self._store == other
return type(self) is type(other) and self._store.__eq__(other._store) # type: ignore[attr-defined]

async def get(
self,
Expand Down
16 changes: 15 additions & 1 deletion src/zarr/storage/_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@
async def is_empty(self, prefix: str) -> bool:
return await self._store.is_empty(prefix)

@property
def _is_open(self) -> bool:
return self._store._is_open

@_is_open.setter
def _is_open(self, value: bool) -> None:
self._store._is_open = value

Check warning on line 65 in src/zarr/storage/_wrapper.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/storage/_wrapper.py#L65

Added line #L65 was not covered by tests

async def clear(self) -> None:
return await self._store.clear()

Expand All @@ -67,7 +75,13 @@
return self._store._check_writable()

def __eq__(self, value: object) -> bool:
return type(self) is type(value) and self._store.__eq__(value)
return type(self) is type(value) and self._store.__eq__(value._store) # type: ignore[attr-defined]

def __str__(self) -> str:
return f"wrapping-{self._store}"

def __repr__(self) -> str:
return f"WrapperStore({repr(self._store)!r})"

Check warning on line 84 in src/zarr/storage/_wrapper.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/storage/_wrapper.py#L84

Added line #L84 was not covered by tests

async def get(
self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None
Expand Down
6 changes: 6 additions & 0 deletions src/zarr/storage/_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@
prototype: BufferPrototype,
byte_range: ByteRequest | None = None,
) -> Buffer | None:
if not self._is_open:
self._sync_open()

Check warning on line 150 in src/zarr/storage/_zip.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/storage/_zip.py#L150

Added line #L150 was not covered by tests
# docstring inherited
try:
with self._zf.open(key) as f: # will raise KeyError
Expand Down Expand Up @@ -190,6 +192,8 @@
return out

def _set(self, key: str, value: Buffer) -> None:
if not self._is_open:
self._sync_open()
# generally, this should be called inside a lock
keyinfo = zipfile.ZipInfo(filename=key, date_time=time.localtime(time.time())[:6])
keyinfo.compress_type = self.compression
Expand All @@ -203,6 +207,8 @@
async def set(self, key: str, value: Buffer) -> None:
# docstring inherited
self._check_writable()
if not self._is_open:
self._sync_open()
assert isinstance(key, str)
if not isinstance(value, Buffer):
raise TypeError("ZipStore.set(): `value` must a Buffer instance")
Expand Down
138 changes: 113 additions & 25 deletions src/zarr/testing/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
import pickle
from abc import abstractmethod
from typing import TYPE_CHECKING, Generic, TypeVar

from zarr.storage import WrapperStore
Expand Down Expand Up @@ -37,30 +38,53 @@ class StoreTests(Generic[S, B]):
store_cls: type[S]
buffer_cls: type[B]

@abstractmethod
async def set(self, store: S, key: str, value: Buffer) -> None:
"""
Insert a value into a storage backend, with a specific key.
This should not not use any store methods. Bypassing the store methods allows them to be
tested.
"""
raise NotImplementedError
...

@abstractmethod
async def get(self, store: S, key: str) -> Buffer:
"""
Retrieve a value from a storage backend, by key.
This should not not use any store methods. Bypassing the store methods allows them to be
tested.
"""
...

raise NotImplementedError

@abstractmethod
@pytest.fixture
def store_kwargs(self) -> dict[str, Any]:
return {"read_only": False}
"""Kwargs for instantiating a store"""
...

@abstractmethod
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could these new abstract methods get a changelog entry? I also presume this is a breaking change, because anyone implementing a child of StoreTests will now be forced to implement these methods?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I can mention it in the changelog entry. I do not consider this a breaking change because the previous 'NotImplementedError' caused these tests to fail unless they were implemented in the inheriting class. I think this is just a more Pythonic way to indicate that they need to be implemented.

def test_store_repr(self, store: S) -> None: ...

@abstractmethod
def test_store_supports_writes(self, store: S) -> None: ...

@abstractmethod
def test_store_supports_partial_writes(self, store: S) -> None: ...

@abstractmethod
def test_store_supports_listing(self, store: S) -> None: ...

@pytest.fixture
async def store(self, store_kwargs: dict[str, Any]) -> Store:
return await self.store_cls.open(**store_kwargs)
def open_kwargs(self, store_kwargs: dict[str, Any]) -> dict[str, Any]:
return store_kwargs

@pytest.fixture
async def store(self, open_kwargs: dict[str, Any]) -> Store:
return await self.store_cls.open(**open_kwargs)

@pytest.fixture
async def store_not_open(self, store_kwargs: dict[str, Any]) -> Store:
return self.store_cls(**store_kwargs)

def test_store_type(self, store: S) -> None:
assert isinstance(store, Store)
Expand All @@ -86,16 +110,23 @@ def test_store_read_only(self, store: S) -> None:
store.read_only = False # type: ignore[misc]

@pytest.mark.parametrize("read_only", [True, False])
async def test_store_open_read_only(
self, store_kwargs: dict[str, Any], read_only: bool
) -> None:
store_kwargs["read_only"] = read_only
store = await self.store_cls.open(**store_kwargs)
async def test_store_open_read_only(self, open_kwargs: dict[str, Any], read_only: bool) -> None:
open_kwargs["read_only"] = read_only
store = await self.store_cls.open(**open_kwargs)
assert store._is_open
assert store.read_only == read_only

async def test_read_only_store_raises(self, store_kwargs: dict[str, Any]) -> None:
kwargs = {**store_kwargs, "read_only": True}
async def test_store_context_manager(self, open_kwargs: dict[str, Any]) -> None:
maxrjones marked this conversation as resolved.
Show resolved Hide resolved
# Test that the context manager closes the store
with await self.store_cls.open(**open_kwargs) as store:
assert store._is_open
# Test trying to open an already open store
with pytest.raises(ValueError):
maxrjones marked this conversation as resolved.
Show resolved Hide resolved
await store._open()
assert not store._is_open

async def test_read_only_store_raises(self, open_kwargs: dict[str, Any]) -> None:
d-v-b marked this conversation as resolved.
Show resolved Hide resolved
kwargs = {**open_kwargs, "read_only": True}
store = await self.store_cls.open(**kwargs)
assert store.read_only

Expand All @@ -107,18 +138,6 @@ async def test_read_only_store_raises(self, store_kwargs: dict[str, Any]) -> Non
with pytest.raises(ValueError):
await store.delete("foo")

def test_store_repr(self, store: S) -> None:
raise NotImplementedError

def test_store_supports_writes(self, store: S) -> None:
raise NotImplementedError

def test_store_supports_partial_writes(self, store: S) -> None:
raise NotImplementedError

def test_store_supports_listing(self, store: S) -> None:
raise NotImplementedError

@pytest.mark.parametrize("key", ["c/0", "foo/c/0.0", "foo/0/0"])
@pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""])
@pytest.mark.parametrize(
Expand All @@ -135,6 +154,26 @@ async def test_get(self, store: S, key: str, data: bytes, byte_range: ByteReques
expected = data_buf[start:stop]
assert_bytes_equal(observed, expected)

async def test_get_not_open(self, store_not_open: S) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is rather surprising -- I would expect that a non-open store would not support IO of any kind. what exactly does open mean? cc @jhamman

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is also surprising to me!

"""
Ensure that data can be read from the store that isn't yet open using the store.get method.
"""
assert not store_not_open._is_open
data_buf = self.buffer_cls.from_bytes(b"\x01\x02\x03\x04")
key = "c/0"
await self.set(store_not_open, key, data_buf)
observed = await store_not_open.get(key, prototype=default_buffer_prototype())
assert_bytes_equal(observed, data_buf)

async def test_get_raises(self, store: S) -> None:
"""
Ensure that a ValueError is raise for invalid byte range syntax
"""
data_buf = self.buffer_cls.from_bytes(b"\x01\x02\x03\x04")
await self.set(store, "c/0", data_buf)
with pytest.raises((ValueError, TypeError)):
await store.get("c/0", prototype=default_buffer_prototype(), byte_range=(0, 2)) # type: ignore[arg-type]

async def test_get_many(self, store: S) -> None:
"""
Ensure that multiple keys can be retrieved at once with the _get_many method.
Expand All @@ -157,6 +196,37 @@ async def test_get_many(self, store: S) -> None:
expected_kvs = sorted(((k, b) for k, b in zip(keys, values, strict=False)))
assert observed_kvs == expected_kvs

@pytest.mark.parametrize("key", ["c/0", "foo/c/0.0", "foo/0/0"])
@pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""])
async def test_getsize(self, store: S, key: str, data: bytes) -> None:
"""
Test the result of store.getsize().
"""
data_buf = self.buffer_cls.from_bytes(data)
expected = len(data_buf)
await self.set(store, key, data_buf)
observed = await store.getsize(key)
assert observed == expected

async def test_getsize_prefix(self, store: S) -> None:
"""
Test the result of store.getsize_prefix().
"""
data_buf = self.buffer_cls.from_bytes(b"\x01\x02\x03\x04")
keys = ["c/0/0", "c/0/1", "c/1/0", "c/1/1"]
keys_values = [(k, data_buf) for k in keys]
await store._set_many(keys_values)
expected = len(data_buf) * len(keys)
observed = await store.getsize_prefix("c")
assert observed == expected

async def test_getsize_raises(self, store: S) -> None:
"""
Test the result of store.getsize().
"""
maxrjones marked this conversation as resolved.
Show resolved Hide resolved
with pytest.raises(FileNotFoundError):
await store.getsize("c/1000")

@pytest.mark.parametrize("key", ["zarr.json", "c/0", "foo/c/0.0", "foo/0/0"])
@pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""])
async def test_set(self, store: S, key: str, data: bytes) -> None:
Expand All @@ -169,6 +239,17 @@ async def test_set(self, store: S, key: str, data: bytes) -> None:
observed = await self.get(store, key)
assert_bytes_equal(observed, data_buf)

async def test_set_not_open(self, store_not_open: S) -> None:
"""
Ensure that data can be written to the store that's not yet open using the store.set method.
"""
Comment on lines +246 to +249
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert not store_not_open._is_open
data_buf = self.buffer_cls.from_bytes(b"\x01\x02\x03\x04")
key = "c/0"
await store_not_open.set(key, data_buf)
observed = await self.get(store_not_open, key)
assert_bytes_equal(observed, data_buf)

async def test_set_many(self, store: S) -> None:
"""
Test that a dict of key : value pairs can be inserted into the store via the
Expand All @@ -181,6 +262,13 @@ async def test_set_many(self, store: S) -> None:
for k, v in store_dict.items():
assert (await self.get(store, k)).to_bytes() == v.to_bytes()

async def test_set_raises(self, store: S) -> None:
maxrjones marked this conversation as resolved.
Show resolved Hide resolved
"""
Ensure that set raises a Type or Value Error for invalid buffer arguments.
"""
with pytest.raises((ValueError, TypeError)):
await store.set("c/0", 0) # type: ignore[arg-type]

@pytest.mark.parametrize(
"key_ranges",
[
Expand Down
Loading
Loading