From 45571d5106b8ed5d29f6f85e4f5ab06ad211c081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Fri, 10 May 2024 10:50:16 +0200 Subject: [PATCH 1/2] Add keyword to asarray --- pyproject.toml | 2 +- src/finch/tensor.py | 87 +++++++++++++++++++++++++++++++------------- tests/test_sparse.py | 5 ++- 3 files changed, 65 insertions(+), 29 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e1534e2..a098502 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "finch-tensor" -version = "0.1.25" +version = "0.1.26" description = "" authors = ["Willow Ahrens "] readme = "README.md" diff --git a/src/finch/tensor.py b/src/finch/tensor.py index 3fa7b8f..958e305 100644 --- a/src/finch/tensor.py +++ b/src/finch/tensor.py @@ -1,6 +1,5 @@ import builtins from typing import Any, Callable, Optional, Iterable, Literal -import warnings import numpy as np from numpy.core.numeric import normalize_axis_index, normalize_axis_tuple @@ -53,6 +52,9 @@ class Tensor(_Display, SparseArray): order. Default: row-major. fill_value : np.number, optional Only used when `numpy.ndarray` or `scipy.sparse` is passed. + copy : bool, optional + If ``True``, then the object is copied. If ``None`` then the object is copied only if needed. + For ``False`` it raises a ``ValueError`` if a copy cannot be avoided. Default: ``None``. Returns ------- @@ -86,26 +88,33 @@ def __init__( /, *, fill_value: np.number | None = None, + copy: bool | None = None, ): if isinstance(obj, (int, float, complex, bool, list)): - obj = np.array(obj) + obj = np.array(obj, copy=copy) if fill_value is None: fill_value = 0.0 if _is_scipy_sparse_obj(obj): # scipy constructor - jl_data = self._from_scipy_sparse(obj, fill_value=fill_value) + jl_data = self._from_scipy_sparse(obj, fill_value=fill_value, copy=copy) self._obj = jl_data elif isinstance(obj, np.ndarray): # numpy constructor - jl_data = self._from_numpy(obj, fill_value=fill_value) + jl_data = self._from_numpy(obj, fill_value=fill_value, copy=copy) self._obj = jl_data elif isinstance(obj, Storage): # from-storage constructor + if copy: + self._raise_julia_copy_not_supported() order = self.preprocess_order( obj.order, self.get_lvl_ndim(obj.levels_descr._obj) ) self._obj = jl.swizzle(jl.Tensor(obj.levels_descr._obj), *order) elif jl.isa(obj, jl.Finch.Tensor): # raw-Julia-object constructors + if copy: + self._raise_julia_copy_not_supported() self._obj = jl.swizzle(obj, *tuple(range(1, jl.ndims(obj) + 1))) elif jl.isa(obj, jl.Finch.SwizzleArray) or jl.isa(obj, jl.Finch.LazyTensor): + if copy: + self._raise_julia_copy_not_supported() self._obj = obj elif isinstance(obj, Tensor): self._obj = obj._obj @@ -283,6 +292,16 @@ def _is_dense(self) -> bool: def _order(self) -> tuple[int, ...]: return jl.typeof(self._obj).parameters[1] + @property + def device(self) -> str: + return "cpu" + + def to_device(self, device: Device, /, *, stream: int | Any | None = None) -> "Tensor": + if device != "cpu": + raise ValueError("Only `device='cpu'` is supported.") + + return self + def is_computed(self) -> bool: return not jl.isa(self._obj, jl.Finch.LazyTensor) @@ -368,7 +387,9 @@ def _from_other_tensor(cls, tensor: "Tensor", storage: Storage | None) -> JuliaO ) @classmethod - def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj: + def _from_numpy(cls, arr: np.ndarray, fill_value: np.number, copy: bool | None = None) -> JuliaObj: + if copy: + arr = arr.copy() order_char = "F" if np.isfortran(arr) else "C" order = cls.preprocess_order(order_char, arr.ndim) inv_order = tuple(i - 1 for i in jl.invperm(order)) @@ -385,21 +406,31 @@ def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj: return jl.swizzle(jl.Tensor(lvl._obj), *order) @classmethod - def from_scipy_sparse(cls, x, fill_value=None) -> "Tensor": + def from_scipy_sparse( + cls, + x, + fill_value: np.number | None = None, + copy: bool | None = None, + ) -> "Tensor": if not _is_scipy_sparse_obj(x): raise ValueError("{x} is not a SciPy sparse object.") - return Tensor(x, fill_value=fill_value) + return Tensor(x, fill_value=fill_value, copy=copy) @classmethod - def _from_scipy_sparse(cls, x, fill_value=None) -> JuliaObj: + def _from_scipy_sparse( + cls, + x, + *, + fill_value: np.number | None = None, + copy: bool | None = None, + ) -> JuliaObj: + if copy is False and not (x.has_canonical_format and x.format in ("coo", "csr", "csc")): + raise ValueError("Unable to avoid copy while creating an array as requested.") + if copy or not x.has_canonical_format: + x = x.copy() if x.format not in ("coo", "csr", "csc"): x = x.asformat("coo") if not x.has_canonical_format: - warnings.warn( - "SciPy sparse input must be in a canonical format. " - "Calling `sum_duplicates`." - ) - x = x.copy() x.sum_duplicates() assert x.has_canonical_format @@ -581,15 +612,9 @@ def to_scipy_sparse(self, accept_fv=None): else: raise ValueError("Tensor can't be converted to scipy.sparse object.") - @property - def device(self) -> str: - return "cpu" - - def to_device(self, device: Device, /, *, stream: int | Any | None = None) -> "Tensor": - if device != "cpu": - raise ValueError("Only `device='cpu'` is supported.") - - return self + @staticmethod + def _raise_julia_copy_not_supported() -> None: + raise ValueError("copy=True isn't supported for Julia object inputs") def __array_namespace__(self, *, api_version: str | None = None) -> Any: if api_version is None: @@ -615,13 +640,23 @@ def random(shape, density=0.01, random_state=None): return Tensor(jl.fsprand(*args)) -def asarray(obj, /, *, dtype=None, format=None, fill_value=None, device=None): +def asarray( + obj, + /, + *, + dtype: DType | None = None, + format: str | None = None, + fill_value: np.number | None = None, + device: Device | None = None, + copy: bool | None = None, +) -> Tensor: if format not in {"coo", "csr", "csc", "csf", "dense", None}: raise ValueError(f"{format} format not supported.") _validate_device(device) - tensor = obj if isinstance(obj, Tensor) else Tensor(obj, fill_value=fill_value) - + tensor = obj if isinstance(obj, Tensor) else Tensor(obj, fill_value=fill_value, copy=copy) if format is not None: + if copy is False: + raise ValueError("Unable to avoid copy while creating an array as requested.") order = tensor.get_order() if format == "coo": storage = Storage(SparseCOO(tensor.ndim, Element(tensor.fill_value)), order) @@ -643,7 +678,7 @@ def asarray(obj, /, *, dtype=None, format=None, fill_value=None, device=None): tensor = tensor.to_storage(storage) if dtype is not None: - return astype(tensor, dtype) + return astype(tensor, dtype, copy=copy) else: return tensor diff --git a/tests/test_sparse.py b/tests/test_sparse.py index 9749d4b..e712c1f 100644 --- a/tests/test_sparse.py +++ b/tests/test_sparse.py @@ -44,9 +44,10 @@ def test_wrappers(dtype, jl_dtype, order): @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128]) @pytest.mark.parametrize("order", ["C", "F", None]) -def test_no_copy_fully_dense(dtype, order, arr3d): +@pytest.mark.parametrize("copy", [True, False, None]) +def test_no_copy_fully_dense(dtype, order, copy, arr3d): arr = np.array(arr3d, dtype=dtype, order=order) - arr_finch = finch.Tensor(arr) + arr_finch = finch.Tensor(arr, copy=copy) arr_todense = arr_finch.todense() assert_equal(arr_todense, arr) From 282b8aec25af72d0dfafaef4fd75ee832b8ec62e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Thu, 6 Jun 2024 12:40:48 +0200 Subject: [PATCH 2/2] Tweak `copy` keyword logic --- src/finch/tensor.py | 11 +++++++---- tests/test_scipy_constructors.py | 7 ++++--- tests/test_sparse.py | 8 +++++--- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/finch/tensor.py b/src/finch/tensor.py index 958e305..7b45b01 100644 --- a/src/finch/tensor.py +++ b/src/finch/tensor.py @@ -1,5 +1,6 @@ import builtins from typing import Any, Callable, Optional, Iterable, Literal +import warnings import numpy as np from numpy.core.numeric import normalize_axis_index, normalize_axis_tuple @@ -91,7 +92,9 @@ def __init__( copy: bool | None = None, ): if isinstance(obj, (int, float, complex, bool, list)): - obj = np.array(obj, copy=copy) + if copy is False: + raise ValueError("copy=False isn't supported for scalar inputs and Python lists") + obj = np.asarray(obj) if fill_value is None: fill_value = 0.0 @@ -424,12 +427,12 @@ def _from_scipy_sparse( fill_value: np.number | None = None, copy: bool | None = None, ) -> JuliaObj: - if copy is False and not (x.has_canonical_format and x.format in ("coo", "csr", "csc")): + if copy is False and not (x.format in ("coo", "csr", "csc") and x.has_canonical_format): raise ValueError("Unable to avoid copy while creating an array as requested.") - if copy or not x.has_canonical_format: - x = x.copy() if x.format not in ("coo", "csr", "csc"): x = x.asformat("coo") + if copy: + x = x.copy() if not x.has_canonical_format: x.sum_duplicates() assert x.has_canonical_format diff --git a/tests/test_scipy_constructors.py b/tests/test_scipy_constructors.py index a3a1686..972121b 100644 --- a/tests/test_scipy_constructors.py +++ b/tests/test_scipy_constructors.py @@ -112,9 +112,10 @@ def test_from_scipy_sparse(format_with_pattern, fill_value): def test_non_canonical_format(format): sp_arr = sp.random(3, 4, density=0.5, format=format) - with pytest.warns( - UserWarning, match="SciPy sparse input must be in a canonical format." + with pytest.raises( + ValueError, match="Unable to avoid copy while creating an array" ): - finch_arr = finch.asarray(sp_arr) + finch.asarray(sp_arr, copy=False) + finch_arr = finch.asarray(sp_arr) assert_equal(finch_arr.todense(), sp_arr.toarray()) diff --git a/tests/test_sparse.py b/tests/test_sparse.py index e712c1f..dae91f2 100644 --- a/tests/test_sparse.py +++ b/tests/test_sparse.py @@ -45,14 +45,16 @@ def test_wrappers(dtype, jl_dtype, order): @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128]) @pytest.mark.parametrize("order", ["C", "F", None]) @pytest.mark.parametrize("copy", [True, False, None]) -def test_no_copy_fully_dense(dtype, order, copy, arr3d): +def test_copy_fully_dense(dtype, order, copy, arr3d): arr = np.array(arr3d, dtype=dtype, order=order) arr_finch = finch.Tensor(arr, copy=copy) arr_todense = arr_finch.todense() assert_equal(arr_todense, arr) - assert np.shares_memory(arr_todense, arr) - + if copy: + assert not np.shares_memory(arr_todense, arr) + else: + assert np.shares_memory(arr_todense, arr) def test_coo(rng): coords = (