Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scipy.sparse fill-value fix. #59

Merged
merged 1 commit into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "finch-tensor"
version = "0.1.22"
version = "0.1.23"
description = ""
authors = ["Willow Ahrens <willow.marie.ahrens@gmail.com>"]
readme = "README.md"
Expand Down
73 changes: 52 additions & 21 deletions src/finch/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class Tensor(_Display, SparseArray):
then `N .^ order == strides`. Available options are "C" (row-major), "F" (column-major), or a custom
order. Default: row-major.
fill_value : np.number, optional
Only used when `arr : np.ndarray` is passed.
Only used when `numpy.ndarray` or `scipy.sparse` is passed.

Returns
-------
Expand Down Expand Up @@ -85,13 +85,15 @@ def __init__(
obj: np.ndarray | spmatrix | Storage | JuliaObj,
/,
*,
fill_value: np.number = 0.0,
fill_value: np.number | None = None,
):
if isinstance(obj, (int, float, complex, bool, list)):
obj = np.array(obj)
if fill_value is None:
fill_value = 0.0

if _is_scipy_sparse_obj(obj): # scipy constructor
jl_data = self._from_scipy_sparse(obj)
jl_data = self._from_scipy_sparse(obj, fill_value=fill_value)
self._obj = jl_data
elif isinstance(obj, np.ndarray): # numpy constructor
jl_data = self._from_numpy(obj, fill_value=fill_value)
Expand Down Expand Up @@ -381,13 +383,13 @@ def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj:
return jl.swizzle(jl.Tensor(lvl._obj), *order)

@classmethod
def from_scipy_sparse(cls, x) -> "Tensor":
def from_scipy_sparse(cls, x, fill_value=None) -> "Tensor":
if not _is_scipy_sparse_obj(x):
raise ValueError("{x} is not a SciPy sparse object.")
return Tensor(x)
return Tensor(x, fill_value=fill_value)

@classmethod
def _from_scipy_sparse(cls, x) -> JuliaObj:
def _from_scipy_sparse(cls, x, fill_value=None) -> JuliaObj:
if x.format not in ("coo", "csr", "csc"):
x = x.asformat("coo")
if not x.has_canonical_format:
Expand All @@ -405,16 +407,19 @@ def _from_scipy_sparse(cls, x) -> JuliaObj:
data=x.data,
shape=x.shape[::-1],
order=Tensor.row_major,
fill_value=fill_value,
)
elif x.format == "csc":
return cls.construct_csc_jl_object(
arg=(x.data, x.indices, x.indptr),
shape=x.shape,
fill_value=fill_value,
)
elif x.format == "csr":
return cls.construct_csr_jl_object(
arg=(x.data, x.indices, x.indptr),
shape=x.shape,
fill_value=fill_value,
)
else:
raise ValueError(f"Unsupported SciPy format: {type(x)}")
Expand Down Expand Up @@ -470,25 +475,31 @@ def _construct_compressed2d_jl_object(

@classmethod
def construct_csc_jl_object(
cls, arg: TupleOf3Arrays, shape: tuple[int, ...]
cls, arg: TupleOf3Arrays, shape: tuple[int, ...], fill_value: np.number = 0.0
) -> JuliaObj:
return cls._construct_compressed2d_jl_object(arg=arg, shape=shape, order=(1, 2))
return cls._construct_compressed2d_jl_object(
arg=arg, shape=shape, order=(1, 2), fill_value=fill_value
)

@classmethod
def construct_csc(cls, arg: TupleOf3Arrays, shape: tuple[int, ...]) -> "Tensor":
return Tensor(cls.construct_csc_jl_object(arg, shape))
def construct_csc(
cls, arg: TupleOf3Arrays, shape: tuple[int, ...], fill_value: np.number = 0.0
) -> "Tensor":
return Tensor(cls.construct_csc_jl_object(arg, shape, fill_value))

@classmethod
def construct_csr_jl_object(
cls, arg: TupleOf3Arrays, shape: tuple[int, ...]
cls, arg: TupleOf3Arrays, shape: tuple[int, ...], fill_value: np.number = 0.0
) -> JuliaObj:
return cls._construct_compressed2d_jl_object(
arg=arg, shape=shape[::-1], order=(2, 1)
arg=arg, shape=shape[::-1], order=(2, 1), fill_value=fill_value
)

@classmethod
def construct_csr(cls, arg: TupleOf3Arrays, shape: tuple[int, ...]) -> "Tensor":
return Tensor(cls.construct_csr_jl_object(arg, shape))
def construct_csr(
cls, arg: TupleOf3Arrays, shape: tuple[int, ...], fill_value: np.number = 0.0
) -> "Tensor":
return Tensor(cls.construct_csr_jl_object(arg, shape, fill_value))

@staticmethod
def construct_csf_jl_object(
Expand All @@ -515,19 +526,30 @@ def construct_csf_jl_object(
return jl_data

@classmethod
def construct_csf(cls, arg: TupleOf3Arrays, shape: tuple[int, ...]) -> "Tensor":
return Tensor(cls.construct_csf_jl_object(arg, shape))
def construct_csf(
cls,
arg: TupleOf3Arrays,
shape: tuple[int, ...],
fill_value: np.number = 0.0
) -> "Tensor":
return Tensor(cls.construct_csf_jl_object(arg, shape, fill_value))

def to_scipy_sparse(self):
def to_scipy_sparse(self, accept_fv=None):
import scipy.sparse as sp

if accept_fv is None:
accept_fv = [0]
elif not isinstance(accept_fv, Iterable):
accept_fv = [accept_fv]

if self.ndim != 2:
raise ValueError(
"Can only convert a 2-dimensional array to a Scipy sparse matrix."
)
if self.fill_value != 0:
if not builtins.any(_eq_scalars(self.fill_value, fv) for fv in accept_fv):
raise ValueError(
"Can only convert arrays with 0 fill value to a Scipy sparse matrix."
f"Can only convert arrays with {accept_fv} fill-values "
"to a Scipy sparse matrix."
)
order = self.get_order()
body = self._obj.body
Expand Down Expand Up @@ -581,10 +603,10 @@ def random(shape, density=0.01, random_state=None):
return Tensor(jl.fsprand(*args))


def asarray(obj, /, *, dtype=None, format=None):
def asarray(obj, /, *, dtype=None, format=None, fill_value=None):
if format not in {"coo", "csr", "csc", "csf", "dense", None}:
raise ValueError(f"{format} format not supported.")
tensor = obj if isinstance(obj, Tensor) else Tensor(obj)
tensor = obj if isinstance(obj, Tensor) else Tensor(obj, fill_value=fill_value)

if format is not None:
order = tensor.get_order()
Expand Down Expand Up @@ -1076,3 +1098,12 @@ def _process_lazy_indexing(key: tuple) -> tuple:
else:
raise ValueError(f"Invalid lazy index member: {idx}")
return new_key


def _eq_scalars(x, y):
if x is None or y is None:
return x == y
if jl.isnan(x) or jl.isnan(y):
return jl.isnan(x) and jl.isnan(y)
else:
return x == y
29 changes: 24 additions & 5 deletions tests/test_scipy_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import scipy.sparse as sp

import finch
from finch.tensor import _eq_scalars


def test_scipy_coo(arr2d):
Expand Down Expand Up @@ -44,14 +45,29 @@ def test_scipy_compressed2d(arr2d, cls):
("csr", sp.csr_matrix, "C"),
],
)
def test_to_scipy_sparse(format_with_cls_with_order):
@pytest.mark.parametrize("fill_value_in", [0, finch.inf, finch.nan, 5, None])
@pytest.mark.parametrize("fill_value_out", [0, finch.inf, finch.nan, 5, None])
def test_to_scipy_sparse(format_with_cls_with_order, fill_value_in, fill_value_out):
format, sp_class, order = format_with_cls_with_order
np_arr = np.random.default_rng(0).random((4, 5))
np_arr = np.array(np_arr, order=order)

finch_arr = finch.asarray(np_arr, format=format)
finch_arr = finch.asarray(np_arr, format=format, fill_value=fill_value_in)

actual = finch_arr.to_scipy_sparse()
if (
not (fill_value_in in {0, None} and fill_value_out in {0, None}) and
not _eq_scalars(fill_value_in, fill_value_out)
):
match_fill_value_out = 0 if fill_value_out is None else fill_value_out
with pytest.raises(
ValueError,
match=fr"Can only convert arrays with \[{match_fill_value_out}\] fill-values "
"to a Scipy sparse matrix."
):
finch_arr.to_scipy_sparse(accept_fv=fill_value_out)
return

actual = finch_arr.to_scipy_sparse(accept_fv=fill_value_out)

assert isinstance(actual, sp_class)
assert_equal(actual.todense(), np_arr)
Expand Down Expand Up @@ -81,12 +97,15 @@ def test_to_scipy_sparse_invalid_input():
("dok", "SparseCOO"),
],
)
def test_from_scipy_sparse(format_with_pattern):
@pytest.mark.parametrize("fill_value", [0, finch.inf, finch.nan, 5, None])
def test_from_scipy_sparse(format_with_pattern, fill_value):
format, pattern = format_with_pattern
sp_arr = sp.random(10, 5, density=0.1, format=format)

result = finch.Tensor.from_scipy_sparse(sp_arr)
result = finch.Tensor.from_scipy_sparse(sp_arr, fill_value=fill_value)
assert pattern in str(result)
fill_value = 0 if fill_value is None else fill_value
assert _eq_scalars(result.fill_value, fill_value)


@pytest.mark.parametrize("format", ["coo", "bsr"])
Expand Down
Loading