diff --git a/pyproject.toml b/pyproject.toml index b0ce945..b29b967 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "finch-tensor" -version = "0.1.22" +version = "0.1.23" description = "" authors = ["Willow Ahrens "] readme = "README.md" diff --git a/src/finch/tensor.py b/src/finch/tensor.py index ef781a8..40d5389 100644 --- a/src/finch/tensor.py +++ b/src/finch/tensor.py @@ -52,7 +52,7 @@ class Tensor(_Display, SparseArray): then `N .^ order == strides`. Available options are "C" (row-major), "F" (column-major), or a custom order. Default: row-major. fill_value : np.number, optional - Only used when `arr : np.ndarray` is passed. + Only used when `numpy.ndarray` or `scipy.sparse` is passed. Returns ------- @@ -85,13 +85,15 @@ def __init__( obj: np.ndarray | spmatrix | Storage | JuliaObj, /, *, - fill_value: np.number = 0.0, + fill_value: np.number | None = None, ): if isinstance(obj, (int, float, complex, bool, list)): obj = np.array(obj) + if fill_value is None: + fill_value = 0.0 if _is_scipy_sparse_obj(obj): # scipy constructor - jl_data = self._from_scipy_sparse(obj) + jl_data = self._from_scipy_sparse(obj, fill_value=fill_value) self._obj = jl_data elif isinstance(obj, np.ndarray): # numpy constructor jl_data = self._from_numpy(obj, fill_value=fill_value) @@ -381,13 +383,13 @@ def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj: return jl.swizzle(jl.Tensor(lvl._obj), *order) @classmethod - def from_scipy_sparse(cls, x) -> "Tensor": + def from_scipy_sparse(cls, x, fill_value=None) -> "Tensor": if not _is_scipy_sparse_obj(x): raise ValueError("{x} is not a SciPy sparse object.") - return Tensor(x) + return Tensor(x, fill_value=fill_value) @classmethod - def _from_scipy_sparse(cls, x) -> JuliaObj: + def _from_scipy_sparse(cls, x, fill_value=None) -> JuliaObj: if x.format not in ("coo", "csr", "csc"): x = x.asformat("coo") if not x.has_canonical_format: @@ -405,16 +407,19 @@ def _from_scipy_sparse(cls, x) -> JuliaObj: data=x.data, shape=x.shape[::-1], order=Tensor.row_major, + fill_value=fill_value, ) elif x.format == "csc": return cls.construct_csc_jl_object( arg=(x.data, x.indices, x.indptr), shape=x.shape, + fill_value=fill_value, ) elif x.format == "csr": return cls.construct_csr_jl_object( arg=(x.data, x.indices, x.indptr), shape=x.shape, + fill_value=fill_value, ) else: raise ValueError(f"Unsupported SciPy format: {type(x)}") @@ -470,25 +475,31 @@ def _construct_compressed2d_jl_object( @classmethod def construct_csc_jl_object( - cls, arg: TupleOf3Arrays, shape: tuple[int, ...] + cls, arg: TupleOf3Arrays, shape: tuple[int, ...], fill_value: np.number = 0.0 ) -> JuliaObj: - return cls._construct_compressed2d_jl_object(arg=arg, shape=shape, order=(1, 2)) + return cls._construct_compressed2d_jl_object( + arg=arg, shape=shape, order=(1, 2), fill_value=fill_value + ) @classmethod - def construct_csc(cls, arg: TupleOf3Arrays, shape: tuple[int, ...]) -> "Tensor": - return Tensor(cls.construct_csc_jl_object(arg, shape)) + def construct_csc( + cls, arg: TupleOf3Arrays, shape: tuple[int, ...], fill_value: np.number = 0.0 + ) -> "Tensor": + return Tensor(cls.construct_csc_jl_object(arg, shape, fill_value)) @classmethod def construct_csr_jl_object( - cls, arg: TupleOf3Arrays, shape: tuple[int, ...] + cls, arg: TupleOf3Arrays, shape: tuple[int, ...], fill_value: np.number = 0.0 ) -> JuliaObj: return cls._construct_compressed2d_jl_object( - arg=arg, shape=shape[::-1], order=(2, 1) + arg=arg, shape=shape[::-1], order=(2, 1), fill_value=fill_value ) @classmethod - def construct_csr(cls, arg: TupleOf3Arrays, shape: tuple[int, ...]) -> "Tensor": - return Tensor(cls.construct_csr_jl_object(arg, shape)) + def construct_csr( + cls, arg: TupleOf3Arrays, shape: tuple[int, ...], fill_value: np.number = 0.0 + ) -> "Tensor": + return Tensor(cls.construct_csr_jl_object(arg, shape, fill_value)) @staticmethod def construct_csf_jl_object( @@ -515,19 +526,30 @@ def construct_csf_jl_object( return jl_data @classmethod - def construct_csf(cls, arg: TupleOf3Arrays, shape: tuple[int, ...]) -> "Tensor": - return Tensor(cls.construct_csf_jl_object(arg, shape)) + def construct_csf( + cls, + arg: TupleOf3Arrays, + shape: tuple[int, ...], + fill_value: np.number = 0.0 + ) -> "Tensor": + return Tensor(cls.construct_csf_jl_object(arg, shape, fill_value)) - def to_scipy_sparse(self): + def to_scipy_sparse(self, accept_fv=None): import scipy.sparse as sp + if accept_fv is None: + accept_fv = [0] + elif not isinstance(accept_fv, Iterable): + accept_fv = [accept_fv] + if self.ndim != 2: raise ValueError( "Can only convert a 2-dimensional array to a Scipy sparse matrix." ) - if self.fill_value != 0: + if not builtins.any(_eq_scalars(self.fill_value, fv) for fv in accept_fv): raise ValueError( - "Can only convert arrays with 0 fill value to a Scipy sparse matrix." + f"Can only convert arrays with {accept_fv} fill-values " + "to a Scipy sparse matrix." ) order = self.get_order() body = self._obj.body @@ -581,10 +603,10 @@ def random(shape, density=0.01, random_state=None): return Tensor(jl.fsprand(*args)) -def asarray(obj, /, *, dtype=None, format=None): +def asarray(obj, /, *, dtype=None, format=None, fill_value=None): if format not in {"coo", "csr", "csc", "csf", "dense", None}: raise ValueError(f"{format} format not supported.") - tensor = obj if isinstance(obj, Tensor) else Tensor(obj) + tensor = obj if isinstance(obj, Tensor) else Tensor(obj, fill_value=fill_value) if format is not None: order = tensor.get_order() @@ -1076,3 +1098,12 @@ def _process_lazy_indexing(key: tuple) -> tuple: else: raise ValueError(f"Invalid lazy index member: {idx}") return new_key + + +def _eq_scalars(x, y): + if x is None or y is None: + return x == y + if jl.isnan(x) or jl.isnan(y): + return jl.isnan(x) and jl.isnan(y) + else: + return x == y diff --git a/tests/test_scipy_constructors.py b/tests/test_scipy_constructors.py index 0972a3f..a3a1686 100644 --- a/tests/test_scipy_constructors.py +++ b/tests/test_scipy_constructors.py @@ -4,6 +4,7 @@ import scipy.sparse as sp import finch +from finch.tensor import _eq_scalars def test_scipy_coo(arr2d): @@ -44,14 +45,29 @@ def test_scipy_compressed2d(arr2d, cls): ("csr", sp.csr_matrix, "C"), ], ) -def test_to_scipy_sparse(format_with_cls_with_order): +@pytest.mark.parametrize("fill_value_in", [0, finch.inf, finch.nan, 5, None]) +@pytest.mark.parametrize("fill_value_out", [0, finch.inf, finch.nan, 5, None]) +def test_to_scipy_sparse(format_with_cls_with_order, fill_value_in, fill_value_out): format, sp_class, order = format_with_cls_with_order np_arr = np.random.default_rng(0).random((4, 5)) np_arr = np.array(np_arr, order=order) - finch_arr = finch.asarray(np_arr, format=format) + finch_arr = finch.asarray(np_arr, format=format, fill_value=fill_value_in) - actual = finch_arr.to_scipy_sparse() + if ( + not (fill_value_in in {0, None} and fill_value_out in {0, None}) and + not _eq_scalars(fill_value_in, fill_value_out) + ): + match_fill_value_out = 0 if fill_value_out is None else fill_value_out + with pytest.raises( + ValueError, + match=fr"Can only convert arrays with \[{match_fill_value_out}\] fill-values " + "to a Scipy sparse matrix." + ): + finch_arr.to_scipy_sparse(accept_fv=fill_value_out) + return + + actual = finch_arr.to_scipy_sparse(accept_fv=fill_value_out) assert isinstance(actual, sp_class) assert_equal(actual.todense(), np_arr) @@ -81,12 +97,15 @@ def test_to_scipy_sparse_invalid_input(): ("dok", "SparseCOO"), ], ) -def test_from_scipy_sparse(format_with_pattern): +@pytest.mark.parametrize("fill_value", [0, finch.inf, finch.nan, 5, None]) +def test_from_scipy_sparse(format_with_pattern, fill_value): format, pattern = format_with_pattern sp_arr = sp.random(10, 5, density=0.1, format=format) - result = finch.Tensor.from_scipy_sparse(sp_arr) + result = finch.Tensor.from_scipy_sparse(sp_arr, fill_value=fill_value) assert pattern in str(result) + fill_value = 0 if fill_value is None else fill_value + assert _eq_scalars(result.fill_value, fill_value) @pytest.mark.parametrize("format", ["coo", "bsr"])