Add hypothesis property tests (zarr-developers#1746)

* Add hypothesis tests 1. Roundtrip a numpy array 2. Basic Indexing * Add compressors This is important for zarr-developers#1931 * Add more test * Add zarr_version * Revert "Add zarr_version" This reverts commit 2ad1b35. * ADapt for V3 * Add workflow * Try again * always run * fix env * Try typing * Cleanup * Add vindex * Review feedback * cleanup * WIP * Cleanup * Move to v3/ * another type ignore * Add `_` * Update src/zarr/strategies.py * Update src/zarr/strategies.py * style: pre-commit fixes --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
MSanKeys963 · Aug 8, 2024 · 334d6fe · 334d6fe
1 parent 064b2e0
commit 334d6fe
Show file tree

Hide file tree

Showing 6 changed files with 319 additions and 1 deletion.
diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml
@@ -0,0 +1,84 @@
+name: Slow Hypothesis CI
+on:
+  push:
+    branches:
+      - "main"
+      - "v3"
+  pull_request:
+    branches:
+      - "main"
+      - "v3"
+    types: [opened, reopened, synchronize, labeled]
+  schedule:
+    - cron: "0 0 * * *" # Daily “At 00:00” UTC
+  workflow_dispatch: # allows you to trigger manually
+
+env:
+  FORCE_COLOR: 3
+
+jobs:
+
+  hypothesis:
+    name: Slow Hypothesis Tests
+    runs-on: "ubuntu-latest"
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    strategy:
+      matrix:
+        python-version: ['3.11']
+        numpy-version: ['1.26']
+        dependency-set: ["optional"]
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+        cache: 'pip'
+    - name: Install Hatch
+      run: |
+        python -m pip install --upgrade pip
+        pip install hatch
+    - name: Set Up Hatch Env
+      run: |
+        hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }}
+        hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env
+    # https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache
+    - name: Restore cached hypothesis directory
+      id: restore-hypothesis-cache
+      uses: actions/cache/restore@v4
+      with:
+        path: .hypothesis/
+        key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }}
+        restore-keys: |
+          cache-hypothesis-
+
+    - name: Run slow Hypothesis tests
+      if: success()
+      id: status
+      run: |
+        hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-hypothesis
+
+    # explicitly save the cache so it gets updated, also do this even if it fails.
+    - name: Save cached hypothesis directory
+      id: save-hypothesis-cache
+      if: always() && steps.status.outcome != 'skipped'
+      uses: actions/cache/save@v4
+      with:
+        path: .hypothesis/
+        key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }}
+
+    - name: Generate and publish the report
+      if: |
+        failure()
+        && steps.status.outcome == 'failure'
+        && github.event_name == 'schedule'
+        && github.repository_owner == 'zarr-developers'
+      uses: xarray-contrib/issue-from-pytest-log@v1
+      with:
+        log-path: output-${{ matrix.python-version }}-log.jsonl
+        issue-title: "Nightly Hypothesis tests failed"
+        issue-label: "topic-hypothesis"
diff --git a/.gitignore b/.gitignore
@@ -78,5 +78,8 @@ src/zarr/_version.py
 #test_sync*
 data/*
 src/fixture/
+fixture/
 
 .DS_Store
+tests/.hypothesis
+.hypothesis/
diff --git a/pyproject.toml b/pyproject.toml
@@ -120,7 +120,8 @@ extra-dependencies = [
     "flask-cors",
     "flask",
     "requests",
-    "mypy"
+    "mypy",
+    "hypothesis"
 ]
 features = ["extra"]
 
@@ -139,6 +140,7 @@ run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=tests"
 run = "run-coverage --no-cov"
 run-verbose = "run-coverage --verbose"
 run-mypy = "mypy src"
+run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py"
 list-env = "pip list"
 
 [tool.hatch.envs.docs]

diff --git a/src/zarr/strategies.py b/src/zarr/strategies.py
@@ -0,0 +1,145 @@
+from typing import Any
+
+import hypothesis.extra.numpy as npst
+import hypothesis.strategies as st
+import numpy as np
+from hypothesis import given, settings  # noqa
+
+from .array import Array
+from .group import Group
+from .store import MemoryStore, StoreLike
+
+# Copied from Xarray
+_attr_keys = st.text(st.characters(), min_size=1)
+_attr_values = st.recursive(
+    st.none() | st.booleans() | st.text(st.characters(), max_size=5),
+    lambda children: st.lists(children) | st.dictionaries(_attr_keys, children),
+    max_leaves=3,
+)
+
+# From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names
+# 1. must not be the empty string ("")
+# 2. must not include the character "/"
+# 3. must not be a string composed only of period characters, e.g. "." or ".."
+# 4. must not start with the reserved prefix "__"
+zarr_key_chars = st.sampled_from(
+    ".-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
+)
+node_names = st.text(zarr_key_chars, min_size=1).filter(
+    lambda t: t not in (".", "..") and not t.startswith("__")
+)
+array_names = node_names
+attrs = st.none() | st.dictionaries(_attr_keys, _attr_values)
+paths = st.lists(node_names, min_size=1).map(lambda x: "/".join(x)) | st.just("/")
+np_arrays = npst.arrays(
+    # TODO: re-enable timedeltas once they are supported
+    dtype=npst.scalar_dtypes().filter(lambda x: x.kind != "m"),
+    shape=npst.array_shapes(max_dims=4),
+)
+stores = st.builds(MemoryStore, st.just({}), mode=st.just("w"))
+compressors = st.sampled_from([None, "default"])
+
+
+@st.composite  # type: ignore[misc]
+def np_array_and_chunks(
+    draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = np_arrays
+) -> tuple[np.ndarray, tuple[int]]:  # type: ignore[type-arg]
+    """A hypothesis strategy to generate small sized random arrays.
+
+    Returns: a tuple of the array and a suitable random chunking for it.
+    """
+    array = draw(arrays)
+    # We want this strategy to shrink towards arrays with smaller number of chunks
+    # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
+    numchunks = draw(st.tuples(*[st.integers(min_value=1, max_value=size) for size in array.shape]))
+    # 2. and now generate the chunks tuple
+    chunks = tuple(size // nchunks for size, nchunks in zip(array.shape, numchunks, strict=True))
+    return (array, chunks)
+
+
+@st.composite  # type: ignore[misc]
+def arrays(
+    draw: st.DrawFn,
+    *,
+    compressors: st.SearchStrategy = compressors,
+    stores: st.SearchStrategy[StoreLike] = stores,
+    arrays: st.SearchStrategy[np.ndarray] = np_arrays,
+    paths: st.SearchStrategy[None | str] = paths,
+    array_names: st.SearchStrategy = array_names,
+    attrs: st.SearchStrategy = attrs,
+) -> Array:
+    store = draw(stores)
+    nparray, chunks = draw(np_array_and_chunks(arrays=arrays))
+    path = draw(paths)
+    name = draw(array_names)
+    attributes = draw(attrs)
+    # compressor = draw(compressors)
+
+    # TODO: clean this up
+    # if path is None and name is None:
+    #     array_path = None
+    #     array_name = None
+    # elif path is None and name is not None:
+    #     array_path = f"{name}"
+    #     array_name = f"/{name}"
+    # elif path is not None and name is None:
+    #     array_path = path
+    #     array_name = None
+    # elif path == "/":
+    #     assert name is not None
+    #     array_path = name
+    #     array_name = "/" + name
+    # else:
+    #     assert name is not None
+    #     array_path = f"{path}/{name}"
+    #     array_name = "/" + array_path
+
+    expected_attrs = {} if attributes is None else attributes
+
+    array_path = path + ("/" if not path.endswith("/") else "") + name
+    root = Group.create(store)
+    fill_value_args: tuple[Any, ...] = tuple()
+    if nparray.dtype.kind == "M":
+        fill_value_args = ("ns",)
+
+    a = root.create_array(
+        array_path,
+        shape=nparray.shape,
+        chunks=chunks,
+        dtype=nparray.dtype.str,
+        attributes=attributes,
+        # compressor=compressor,  # TODO: FIXME
+        fill_value=nparray.dtype.type(0, *fill_value_args),
+    )
+
+    assert isinstance(a, Array)
+    assert nparray.shape == a.shape
+    assert chunks == a.chunks
+    assert array_path == a.path, (path, name, array_path, a.name, a.path)
+    # assert array_path == a.name, (path, name, array_path, a.name, a.path)
+    # assert a.basename is None  # TODO
+    # assert a.store == normalize_store_arg(store)
+    assert dict(a.attrs) == expected_attrs
+
+    a[:] = nparray
+
+    return a
+
+
+def is_negative_slice(idx: Any) -> bool:
+    return isinstance(idx, slice) and idx.step is not None and idx.step < 0
+
+
+@st.composite  # type: ignore[misc]
+def basic_indices(draw: st.DrawFn, *, shape: tuple[int], **kwargs):  # type: ignore[no-untyped-def]
+    """Basic indices without unsupported negative slices."""
+    return draw(
+        npst.basic_indices(shape=shape, **kwargs).filter(
+            lambda idxr: (
+                not (
+                    is_negative_slice(idxr)
+                    or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr))
+                )
+            )
+        )
+    )
diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py
@@ -18,6 +18,7 @@
 
 import numpy as np
 import pytest
+from hypothesis import HealthCheck, Verbosity, settings
 
 from zarr.store import LocalStore, MemoryStore, StorePath
 from zarr.store.remote import RemoteStore
@@ -119,3 +120,17 @@ def array_fixture(request: pytest.FixtureRequest) -> np.ndarray:
         .reshape(array_request.shape, order=array_request.order)
         .astype(array_request.dtype)
     )
+
+
+settings.register_profile(
+    "ci",
+    max_examples=1000,
+    deadline=None,
+    suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow],
+)
+settings.register_profile(
+    "local",
+    max_examples=300,
+    suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow],
+    verbosity=Verbosity.verbose,
+)
diff --git a/tests/v3/test_properties.py b/tests/v3/test_properties.py
@@ -0,0 +1,69 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+pytest.importorskip("hypothesis")
+
+import hypothesis.extra.numpy as npst  # noqa
+import hypothesis.strategies as st  # noqa
+from hypothesis import given, settings  # noqa
+from zarr.strategies import arrays, np_arrays, basic_indices  # noqa
+
+
+@given(st.data())
+def test_roundtrip(data):
+    nparray = data.draw(np_arrays)
+    zarray = data.draw(arrays(arrays=st.just(nparray)))
+    assert_array_equal(nparray, zarray[:])
+
+
+@given(data=st.data())
+def test_basic_indexing(data):
+    zarray = data.draw(arrays())
+    nparray = zarray[:]
+    indexer = data.draw(basic_indices(shape=nparray.shape))
+    actual = zarray[indexer]
+    assert_array_equal(nparray[indexer], actual)
+
+    new_data = np.ones_like(actual)
+    zarray[indexer] = new_data
+    nparray[indexer] = new_data
+    assert_array_equal(nparray, zarray[:])
+
+
+@given(data=st.data())
+def test_vindex(data):
+    zarray = data.draw(arrays())
+    nparray = zarray[:]
+
+    indexer = data.draw(
+        npst.integer_array_indices(
+            shape=nparray.shape, result_shape=npst.array_shapes(max_dims=None)
+        )
+    )
+    actual = zarray.vindex[indexer]
+    assert_array_equal(nparray[indexer], actual)
+
+
+# @st.composite
+# def advanced_indices(draw, *, shape):
+#     basic_idxr = draw(
+#         basic_indices(
+#             shape=shape, min_dims=len(shape), max_dims=len(shape), allow_ellipsis=False
+#         ).filter(lambda x: isinstance(x, tuple))
+#     )
+
+#     int_idxr = draw(
+#         npst.integer_array_indices(shape=shape, result_shape=npst.array_shapes(max_dims=1))
+#     )
+#     args = tuple(
+#         st.sampled_from((l, r)) for l, r in zip_longest(basic_idxr, int_idxr, fillvalue=slice(None))
+#     )
+#     return draw(st.tuples(*args))
+
+
+# @given(st.data())
+# def test_roundtrip_object_array(data):
+#     nparray = data.draw(np_arrays)
+#     zarray = data.draw(arrays(arrays=st.just(nparray)))
+#     assert_array_equal(nparray, zarray[:])