Skip to content

Commit

Permalink
Closes Bears-R-Us#3395 align to numpy scalar types (Bears-R-Us#3396)
Browse files Browse the repository at this point in the history
Co-authored-by: Amanda Potts <ajpotts@users.noreply.github.com>
  • Loading branch information
ajpotts and ajpotts committed Jul 30, 2024
1 parent 607fe1c commit 5dbf619
Show file tree
Hide file tree
Showing 25 changed files with 194 additions and 186 deletions.
4 changes: 1 addition & 3 deletions PROTO_tests/tests/dataframe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ def test_dataframe_creation(self, size):
ak_to_pd = akdf.to_pandas()
assert_frame_equal(pddf, ak_to_pd)


@pytest.mark.parametrize("size", pytest.prob_size)
@pytest.mark.parametrize("dtype", ["float64", "int64"])
def test_from_pandas_with_index(self, size, dtype):
Expand Down Expand Up @@ -300,7 +299,6 @@ def test_to_pandas_categorical_column(self, size):

pd_assert_frame_equal(df.to_pandas(retain_index=True), expected_df)


def test_convenience_init(self):
dict1 = {"0": [1, 2], "1": [True, False], "2": ["foo", "bar"], "3": [2.3, -1.8]}
dict2 = {"0": (1, 2), "1": (True, False), "2": ("foo", "bar"), "3": (2.3, -1.8)}
Expand Down Expand Up @@ -1231,7 +1229,7 @@ def test_dropna(self):

def test_memory_usage(self):
dtypes = [ak.int64, ak.float64, ak.bool_]
data = dict([(str(t), ak.ones(5000, dtype=ak.int64).astype(t)) for t in dtypes])
data = dict([(str(ak.dtype(t)), ak.ones(5000, dtype=ak.int64).astype(t)) for t in dtypes])
df = ak.DataFrame(data)
ak_memory_usage = df.memory_usage()
pd_memory_usage = pd.Series(
Expand Down
27 changes: 13 additions & 14 deletions PROTO_tests/tests/dtypes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,10 @@ def test_resolve_scalar_dtype(self):
assert "bigint" == dtypes.resolve_scalar_dtype(2**64)

def test_is_dtype_in_union(self):
from arkouda.dtypes import _is_dtype_in_union
from typing import Union

from arkouda.dtypes import _is_dtype_in_union

float_scalars = Union[float, np.float64, np.float32]
assert _is_dtype_in_union(np.float64, float_scalars)
# Test with a type not present in the union
Expand All @@ -84,24 +85,22 @@ def test_is_dtype_in_union(self):
assert ~_is_dtype_in_union(np.float64, float)

@pytest.mark.parametrize("size", pytest.prob_size)
def test_nbytes(self, size):
from arkouda.dtypes import BigInt

a = ak.cast(ak.arange(size), dt="bigint")
assert a.nbytes == size * BigInt.itemsize

dtype_list = [
@pytest.mark.parametrize(
"dtype",
[
ak.dtypes.uint8,
ak.dtypes.uint64,
ak.dtypes.int64,
ak.dtypes.float64,
ak.dtypes.bool_,
]

for dt in dtype_list:
a = ak.array(ak.arange(size), dtype=dt)
assert a.nbytes == size * dt.itemsize

ak.dtypes.bigint,
],
)
def test_nbytes(self, size, dtype):
a = ak.array(ak.arange(size), dtype=dtype)
assert a.nbytes == size * ak.dtype(dtype).itemsize

def test_nbytes_str(self):
a = ak.array(["a", "b", "c"])
c = ak.Categorical(a)
assert c.nbytes == 82
Expand Down
13 changes: 7 additions & 6 deletions PROTO_tests/tests/groupby_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@
from arkouda import sum as aksum
from arkouda.groupbyclass import GroupByReductionType
from arkouda.scipy import chisquare as akchisquare
from arkouda.dtypes import npstr

# block of variables and functions used in test_unique

UNIQUE_TYPES = [ak.categorical, ak.int64, ak.float64, npstr]
UNIQUE_TYPES = [ak.categorical, ak.int64, ak.float64, ak.str_]
VOWELS_AND_SUCH = ["a", "e", "i", "o", "u", "AB", 47, 2, 3.14159]
PICKS = np.array([f"base {i}" for i in range(10)])

Expand Down Expand Up @@ -804,7 +803,7 @@ def test_unique(self, data_type, prob_size):
F = False
np.random.seed(Jenny)
arrays = {
npstr: np.random.choice(VOWELS_AND_SUCH, prob_size),
ak.str_: np.random.choice(VOWELS_AND_SUCH, prob_size),
ak.int64: np.random.randint(0, prob_size // 3, prob_size),
ak.float64: np.random.uniform(0, prob_size // 3, prob_size),
ak.categorical: np.random.choice(PICKS, prob_size),
Expand Down Expand Up @@ -832,7 +831,7 @@ def test_unique(self, data_type, prob_size):
assert np.all(np_unique == np.sort(ak_TTF[0].to_ndarray()))

# Check groups and indices. If data was sorted, the group ndarray
# should just be list(range(len(nda))).
# should just be list(range(len(nda))).
# For unsorted data, a reordered copy of the pdarray is created
# based on the returned permutation.
# In both cases, broadcasting the unique values using the returned
Expand All @@ -842,7 +841,8 @@ def test_unique(self, data_type, prob_size):

# sorted

if data_type == ak.int64 : assert isSorted(ak_TFF[0].to_ndarray())
if data_type == ak.int64:
assert isSorted(ak_TFF[0].to_ndarray())
srange = np.arange(len(nda))
assert np.all(srange == ak_TTF[1].to_ndarray())
indices = ak_TTF[2]
Expand All @@ -851,7 +851,8 @@ def test_unique(self, data_type, prob_size):
# unsorted

aku = ak.unique(us_pda).to_ndarray()
if data_type == ak.int64 : assert isSorted(aku)
if data_type == ak.int64:
assert isSorted(aku)
reordering = ak_TFF[1]
reordered = us_pda[reordering]
indices = ak_TFF[2]
Expand Down
21 changes: 11 additions & 10 deletions PROTO_tests/tests/index_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,24 +380,25 @@ def test_get_level_values(self):

@pytest.mark.parametrize("size", pytest.prob_size)
def test_memory_usage(self, size):
from arkouda.dtypes import BigInt
from arkouda.dtypes import bigint
from arkouda.index import Index, MultiIndex

idx = Index(ak.cast(ak.array([1, 2, 3]), dt="bigint"))
assert idx.memory_usage() == 3 * BigInt.itemsize
assert idx.memory_usage() == 3 * bigint.itemsize

int64_size = ak.dtype(ak.int64).itemsize
idx = Index(ak.cast(ak.arange(size), dt="int64"))
assert idx.memory_usage(unit="GB") == size * ak.dtypes.int64.itemsize / (1024 * 1024 * 1024)
assert idx.memory_usage(unit="MB") == size * ak.dtypes.int64.itemsize / (1024 * 1024)
assert idx.memory_usage(unit="KB") == size * ak.dtypes.int64.itemsize / 1024
assert idx.memory_usage(unit="B") == size * ak.dtypes.int64.itemsize
assert idx.memory_usage(unit="GB") == size * int64_size / (1024 * 1024 * 1024)
assert idx.memory_usage(unit="MB") == size * int64_size / (1024 * 1024)
assert idx.memory_usage(unit="KB") == size * int64_size / 1024
assert idx.memory_usage(unit="B") == size * int64_size

midx = MultiIndex([ak.cast(ak.arange(size), dt="int64"), ak.cast(ak.arange(size), dt="int64")])
assert midx.memory_usage(unit="GB") == 2 * size * ak.dtypes.int64.itemsize / (1024 * 1024 * 1024)
assert midx.memory_usage(unit="GB") == 2 * size * int64_size / (1024 * 1024 * 1024)

assert midx.memory_usage(unit="MB") == 2 * size * ak.dtypes.int64.itemsize / (1024 * 1024)
assert midx.memory_usage(unit="KB") == 2 * size * ak.dtypes.int64.itemsize / 1024
assert midx.memory_usage(unit="B") == 2 * size * ak.dtypes.int64.itemsize
assert midx.memory_usage(unit="MB") == 2 * size * int64_size / (1024 * 1024)
assert midx.memory_usage(unit="KB") == 2 * size * int64_size / 1024
assert midx.memory_usage(unit="B") == 2 * size * int64_size

def test_is_unique(self):
i = ak.Index(ak.array([0, 1, 2]))
Expand Down
14 changes: 8 additions & 6 deletions PROTO_tests/tests/numeric_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import subprocess
from math import isclose

import numpy as np
import pytest

import arkouda as ak
from arkouda.dtypes import npstr
from math import isclose
import subprocess

NUMERIC_TYPES = [ak.int64, ak.float64, ak.bool_, ak.uint64]
NO_BOOL = [ak.int64, ak.float64, ak.uint64]
NO_FLOAT = [ak.int64, ak.bool_, ak.uint64]
Expand Down Expand Up @@ -86,12 +88,12 @@ def alternatingTF(n):
(ak.bool_, ak.bool_),
(ak.int64, ak.int64),
(ak.int64, ak.float64),
(ak.int64, npstr),
(ak.int64, ak.str_),
(ak.float64, ak.float64),
(ak.float64, npstr),
(ak.float64, ak.str_),
(ak.uint8, ak.int64),
(ak.uint8, ak.float64),
(ak.uint8, npstr),
(ak.uint8, ak.str_),
]

# Most of the trigonometric and hyperbolic tests are identical, so they are combined
Expand Down
2 changes: 1 addition & 1 deletion PROTO_tests/tests/pdarray_creation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_array_creation(self, dtype):
ak.array(deque(range(fixed_size)), dtype),
ak.array([f"{i}" for i in range(fixed_size)], dtype=dtype),
]:
assert isinstance(pda, ak.pdarray if dtype != str else ak.Strings)
assert isinstance(pda, ak.pdarray if ak.dtype(dtype) != "str_" else ak.Strings)
assert len(pda) == fixed_size
assert dtype == pda.dtype

Expand Down
24 changes: 10 additions & 14 deletions PROTO_tests/tests/series_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,21 +235,17 @@ def test_index_as_index_compat(self):
@pytest.mark.parametrize("size", pytest.prob_size)
def test_memory_usage(self, size):
s = ak.Series(ak.arange(size))
assert s.memory_usage(unit="GB", index=False) == size * ak.dtypes.int64.itemsize / (
1024 * 1024 * 1024
)
assert s.memory_usage(unit="MB", index=False) == size * ak.dtypes.int64.itemsize / (1024 * 1024)
assert s.memory_usage(unit="KB", index=False) == size * ak.dtypes.int64.itemsize / 1024
assert s.memory_usage(unit="B", index=False) == size * ak.dtypes.int64.itemsize
int64_size = ak.dtype(ak.int64).itemsize

assert s.memory_usage(unit="GB", index=True) == 2 * size * ak.dtypes.int64.itemsize / (
1024 * 1024 * 1024
)
assert s.memory_usage(unit="MB", index=True) == 2 * size * ak.dtypes.int64.itemsize / (
1024 * 1024
)
assert s.memory_usage(unit="KB", index=True) == 2 * size * ak.dtypes.int64.itemsize / 1024
assert s.memory_usage(unit="B", index=True) == 2 * size * ak.dtypes.int64.itemsize
assert s.memory_usage(unit="GB", index=False) == size * int64_size / (1024 * 1024 * 1024)
assert s.memory_usage(unit="MB", index=False) == size * int64_size / (1024 * 1024)
assert s.memory_usage(unit="KB", index=False) == size * int64_size / 1024
assert s.memory_usage(unit="B", index=False) == size * int64_size

assert s.memory_usage(unit="GB", index=True) == 2 * size * int64_size / (1024 * 1024 * 1024)
assert s.memory_usage(unit="MB", index=True) == 2 * size * int64_size / (1024 * 1024)
assert s.memory_usage(unit="KB", index=True) == 2 * size * int64_size / 1024
assert s.memory_usage(unit="B", index=True) == 2 * size * int64_size

def test_map(self):
a = ak.Series(ak.array(["1", "1", "4", "4", "4"]))
Expand Down
10 changes: 6 additions & 4 deletions PROTO_tests/tests/setops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def make_np_arrays(size, dtype):
# only used for error handling tests
a = np.random.random(size)
b = np.random.random(size)
elif dtype == bool:
elif dtype == ak.bool_:
a = np.random.randint(0, 1, size=size, dtype=dtype)
b = np.random.randint(0, 1, size=size, dtype=dtype)
else:
Expand Down Expand Up @@ -697,7 +697,7 @@ def are_pdarrays_equal(pda1, pda2):
]
for select_from in select_from_list:
count += 1
arr1 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[2]+count)]
arr1 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[2] + count)]

# test unique search space, this should be identical to find
# be sure to test when all items are present and when there are items missing
Expand All @@ -710,7 +710,9 @@ def are_pdarrays_equal(pda1, pda2):
all_unique = ak.unique(arr2).size == arr2.size
if all_unique:
# ensure we match find
if not are_pdarrays_equal(idx_of_first_in_second, ak.find(arr1, arr2, remove_missing=True)):
if not are_pdarrays_equal(
idx_of_first_in_second, ak.find(arr1, arr2, remove_missing=True)
):
print("failed to match find")
print("second array all unique: ", all_unique)
print(seeds)
Expand All @@ -725,7 +727,7 @@ def are_pdarrays_equal(pda1, pda2):

# test duplicate items in search space, the easiest way I can think
# of to do this is to compare against pandas series getitem
arr2 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[3]+count)]
arr2 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[3] + count)]
pd_s = pd.Series(index=arr1.to_ndarray(), data=arr2.to_ndarray())
ak_s = ak.Series(index=arr1, data=arr2)

Expand Down
4 changes: 2 additions & 2 deletions arkouda/array_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
float64,
complex64,
complex128,
bool,
bool_,
)

from .elementwise_functions import (
Expand Down Expand Up @@ -189,7 +189,7 @@
"float64",
"complex64",
"complex128",
"bool",
"bool_",
]

__all__ += [
Expand Down
27 changes: 13 additions & 14 deletions arkouda/array_api/_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,19 @@

# Note: we use dtype objects instead of dtype classes. The spec does not
# require any behavior on dtypes other than equality.
int8 = np.dtype("int8")
int16 = np.dtype("int16")
int32 = np.dtype("int32")
int64 = np.dtype("int64")
uint8 = np.dtype("uint8")
uint16 = np.dtype("uint16")
uint32 = np.dtype("uint32")
uint64 = np.dtype("uint64")
float32 = np.dtype("float32")
float64 = np.dtype("float64")
complex64 = np.dtype("complex64")
complex128 = np.dtype("complex128")
# Note: This name is changed
bool = np.dtype("bool")
int8 = np.int8
int16 = np.int16
int32 = np.int32
int64 = np.int64
uint8 = np.uint8
uint16 = np.uint16
uint32 = np.uint32
uint64 = np.uint64
float32 = np.float32
float64 = np.float64
complex64 = np.complex64
complex128 = np.complex128
bool_ = np.bool_

_all_dtypes = (
int8,
Expand Down
30 changes: 15 additions & 15 deletions arkouda/array_api/statistical_functions.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
from __future__ import annotations

from ._dtypes import (
from typing import TYPE_CHECKING, Optional, Tuple, Union

from ._dtypes import ( # _complex_floating_dtypes,; complex128,
_numeric_dtypes,
_real_floating_dtypes,
_real_numeric_dtypes,
_numeric_dtypes,
# _complex_floating_dtypes,
_signed_integer_dtypes,
uint64,
int64,
float64,
# complex128,
int64,
uint64,
)
from .array_object import Array, implements_numpy
from .manipulation_functions import squeeze

from typing import TYPE_CHECKING, Optional, Tuple, Union

if TYPE_CHECKING:
from ._typing import Dtype

from arkouda.numeric import cast as akcast
import numpy as np

from arkouda.client import generic_msg
from arkouda.pdarrayclass import parse_single_value, create_pdarray
from arkouda.dtypes import dtype as akdtype
from arkouda.numeric import cast as akcast
from arkouda.pdarrayclass import create_pdarray, parse_single_value
from arkouda.pdarraycreation import scalar_array
import numpy as np


def max(
Expand Down Expand Up @@ -411,15 +411,15 @@ def var(

def _prod_sum_dtype(dtype: Dtype) -> Dtype:
if dtype == uint64:
return dtype
return akdtype(dtype)
elif dtype in _real_floating_dtypes:
return float64
return akdtype(float64)
# elif dtype in _complex_floating_dtypes:
# return complex128
elif dtype in _signed_integer_dtypes:
return int64
return akdtype(int64)
else:
return uint64
return akdtype(uint64)


def cumulative_sum(
Expand Down
Loading

0 comments on commit 5dbf619

Please sign in to comment.