From 5dbf619461f2c29817942b1fabad2e4b559fb5a5 Mon Sep 17 00:00:00 2001 From: ajpotts Date: Tue, 30 Jul 2024 15:47:19 -0400 Subject: [PATCH] Closes #3395 align to numpy scalar types (#3396) Co-authored-by: Amanda Potts --- PROTO_tests/tests/dataframe_test.py | 4 +- PROTO_tests/tests/dtypes_test.py | 27 ++++---- PROTO_tests/tests/groupby_test.py | 13 ++-- PROTO_tests/tests/index_test.py | 21 +++--- PROTO_tests/tests/numeric_test.py | 14 ++-- PROTO_tests/tests/pdarray_creation_test.py | 2 +- PROTO_tests/tests/series_test.py | 24 +++---- PROTO_tests/tests/setops_test.py | 10 +-- arkouda/array_api/__init__.py | 4 +- arkouda/array_api/_dtypes.py | 27 ++++---- arkouda/array_api/statistical_functions.py | 30 ++++----- arkouda/categorical.py | 13 ++-- arkouda/dataframe.py | 7 +- arkouda/dtypes.py | 77 +++++++++++++--------- arkouda/groupbyclass.py | 5 +- arkouda/numeric.py | 18 +++-- arkouda/pdarrayclass.py | 14 ++-- arkouda/pdarraycreation.py | 20 +++--- arkouda/random/_generator.py | 13 ++-- arkouda/series.py | 6 +- arkouda/sorting.py | 6 +- arkouda/strings.py | 11 ++-- benchmarks/stream.py | 6 +- tests/deprecated/bigint_agg_test.py | 4 +- tests/message_test.py | 4 +- 25 files changed, 194 insertions(+), 186 deletions(-) diff --git a/PROTO_tests/tests/dataframe_test.py b/PROTO_tests/tests/dataframe_test.py index 120195c934..693d91f012 100644 --- a/PROTO_tests/tests/dataframe_test.py +++ b/PROTO_tests/tests/dataframe_test.py @@ -242,7 +242,6 @@ def test_dataframe_creation(self, size): ak_to_pd = akdf.to_pandas() assert_frame_equal(pddf, ak_to_pd) - @pytest.mark.parametrize("size", pytest.prob_size) @pytest.mark.parametrize("dtype", ["float64", "int64"]) def test_from_pandas_with_index(self, size, dtype): @@ -300,7 +299,6 @@ def test_to_pandas_categorical_column(self, size): pd_assert_frame_equal(df.to_pandas(retain_index=True), expected_df) - def test_convenience_init(self): dict1 = {"0": [1, 2], "1": [True, False], "2": ["foo", "bar"], "3": [2.3, -1.8]} dict2 = {"0": (1, 2), "1": (True, False), "2": ("foo", "bar"), "3": (2.3, -1.8)} @@ -1231,7 +1229,7 @@ def test_dropna(self): def test_memory_usage(self): dtypes = [ak.int64, ak.float64, ak.bool_] - data = dict([(str(t), ak.ones(5000, dtype=ak.int64).astype(t)) for t in dtypes]) + data = dict([(str(ak.dtype(t)), ak.ones(5000, dtype=ak.int64).astype(t)) for t in dtypes]) df = ak.DataFrame(data) ak_memory_usage = df.memory_usage() pd_memory_usage = pd.Series( diff --git a/PROTO_tests/tests/dtypes_test.py b/PROTO_tests/tests/dtypes_test.py index e8a1912b16..8ed5e5e2f6 100644 --- a/PROTO_tests/tests/dtypes_test.py +++ b/PROTO_tests/tests/dtypes_test.py @@ -73,9 +73,10 @@ def test_resolve_scalar_dtype(self): assert "bigint" == dtypes.resolve_scalar_dtype(2**64) def test_is_dtype_in_union(self): - from arkouda.dtypes import _is_dtype_in_union from typing import Union + from arkouda.dtypes import _is_dtype_in_union + float_scalars = Union[float, np.float64, np.float32] assert _is_dtype_in_union(np.float64, float_scalars) # Test with a type not present in the union @@ -84,24 +85,22 @@ def test_is_dtype_in_union(self): assert ~_is_dtype_in_union(np.float64, float) @pytest.mark.parametrize("size", pytest.prob_size) - def test_nbytes(self, size): - from arkouda.dtypes import BigInt - - a = ak.cast(ak.arange(size), dt="bigint") - assert a.nbytes == size * BigInt.itemsize - - dtype_list = [ + @pytest.mark.parametrize( + "dtype", + [ ak.dtypes.uint8, ak.dtypes.uint64, ak.dtypes.int64, ak.dtypes.float64, ak.dtypes.bool_, - ] - - for dt in dtype_list: - a = ak.array(ak.arange(size), dtype=dt) - assert a.nbytes == size * dt.itemsize - + ak.dtypes.bigint, + ], + ) + def test_nbytes(self, size, dtype): + a = ak.array(ak.arange(size), dtype=dtype) + assert a.nbytes == size * ak.dtype(dtype).itemsize + + def test_nbytes_str(self): a = ak.array(["a", "b", "c"]) c = ak.Categorical(a) assert c.nbytes == 82 diff --git a/PROTO_tests/tests/groupby_test.py b/PROTO_tests/tests/groupby_test.py index d4fd5b1c4a..59c7e34287 100644 --- a/PROTO_tests/tests/groupby_test.py +++ b/PROTO_tests/tests/groupby_test.py @@ -8,11 +8,10 @@ from arkouda import sum as aksum from arkouda.groupbyclass import GroupByReductionType from arkouda.scipy import chisquare as akchisquare -from arkouda.dtypes import npstr # block of variables and functions used in test_unique -UNIQUE_TYPES = [ak.categorical, ak.int64, ak.float64, npstr] +UNIQUE_TYPES = [ak.categorical, ak.int64, ak.float64, ak.str_] VOWELS_AND_SUCH = ["a", "e", "i", "o", "u", "AB", 47, 2, 3.14159] PICKS = np.array([f"base {i}" for i in range(10)]) @@ -804,7 +803,7 @@ def test_unique(self, data_type, prob_size): F = False np.random.seed(Jenny) arrays = { - npstr: np.random.choice(VOWELS_AND_SUCH, prob_size), + ak.str_: np.random.choice(VOWELS_AND_SUCH, prob_size), ak.int64: np.random.randint(0, prob_size // 3, prob_size), ak.float64: np.random.uniform(0, prob_size // 3, prob_size), ak.categorical: np.random.choice(PICKS, prob_size), @@ -832,7 +831,7 @@ def test_unique(self, data_type, prob_size): assert np.all(np_unique == np.sort(ak_TTF[0].to_ndarray())) # Check groups and indices. If data was sorted, the group ndarray - # should just be list(range(len(nda))). + # should just be list(range(len(nda))). # For unsorted data, a reordered copy of the pdarray is created # based on the returned permutation. # In both cases, broadcasting the unique values using the returned @@ -842,7 +841,8 @@ def test_unique(self, data_type, prob_size): # sorted - if data_type == ak.int64 : assert isSorted(ak_TFF[0].to_ndarray()) + if data_type == ak.int64: + assert isSorted(ak_TFF[0].to_ndarray()) srange = np.arange(len(nda)) assert np.all(srange == ak_TTF[1].to_ndarray()) indices = ak_TTF[2] @@ -851,7 +851,8 @@ def test_unique(self, data_type, prob_size): # unsorted aku = ak.unique(us_pda).to_ndarray() - if data_type == ak.int64 : assert isSorted(aku) + if data_type == ak.int64: + assert isSorted(aku) reordering = ak_TFF[1] reordered = us_pda[reordering] indices = ak_TFF[2] diff --git a/PROTO_tests/tests/index_test.py b/PROTO_tests/tests/index_test.py index df0ad9b53e..b6e2b8a0a5 100644 --- a/PROTO_tests/tests/index_test.py +++ b/PROTO_tests/tests/index_test.py @@ -380,24 +380,25 @@ def test_get_level_values(self): @pytest.mark.parametrize("size", pytest.prob_size) def test_memory_usage(self, size): - from arkouda.dtypes import BigInt + from arkouda.dtypes import bigint from arkouda.index import Index, MultiIndex idx = Index(ak.cast(ak.array([1, 2, 3]), dt="bigint")) - assert idx.memory_usage() == 3 * BigInt.itemsize + assert idx.memory_usage() == 3 * bigint.itemsize + int64_size = ak.dtype(ak.int64).itemsize idx = Index(ak.cast(ak.arange(size), dt="int64")) - assert idx.memory_usage(unit="GB") == size * ak.dtypes.int64.itemsize / (1024 * 1024 * 1024) - assert idx.memory_usage(unit="MB") == size * ak.dtypes.int64.itemsize / (1024 * 1024) - assert idx.memory_usage(unit="KB") == size * ak.dtypes.int64.itemsize / 1024 - assert idx.memory_usage(unit="B") == size * ak.dtypes.int64.itemsize + assert idx.memory_usage(unit="GB") == size * int64_size / (1024 * 1024 * 1024) + assert idx.memory_usage(unit="MB") == size * int64_size / (1024 * 1024) + assert idx.memory_usage(unit="KB") == size * int64_size / 1024 + assert idx.memory_usage(unit="B") == size * int64_size midx = MultiIndex([ak.cast(ak.arange(size), dt="int64"), ak.cast(ak.arange(size), dt="int64")]) - assert midx.memory_usage(unit="GB") == 2 * size * ak.dtypes.int64.itemsize / (1024 * 1024 * 1024) + assert midx.memory_usage(unit="GB") == 2 * size * int64_size / (1024 * 1024 * 1024) - assert midx.memory_usage(unit="MB") == 2 * size * ak.dtypes.int64.itemsize / (1024 * 1024) - assert midx.memory_usage(unit="KB") == 2 * size * ak.dtypes.int64.itemsize / 1024 - assert midx.memory_usage(unit="B") == 2 * size * ak.dtypes.int64.itemsize + assert midx.memory_usage(unit="MB") == 2 * size * int64_size / (1024 * 1024) + assert midx.memory_usage(unit="KB") == 2 * size * int64_size / 1024 + assert midx.memory_usage(unit="B") == 2 * size * int64_size def test_is_unique(self): i = ak.Index(ak.array([0, 1, 2])) diff --git a/PROTO_tests/tests/numeric_test.py b/PROTO_tests/tests/numeric_test.py index 9ba67f2653..436e7c9058 100644 --- a/PROTO_tests/tests/numeric_test.py +++ b/PROTO_tests/tests/numeric_test.py @@ -1,9 +1,11 @@ +import subprocess +from math import isclose + import numpy as np import pytest + import arkouda as ak -from arkouda.dtypes import npstr -from math import isclose -import subprocess + NUMERIC_TYPES = [ak.int64, ak.float64, ak.bool_, ak.uint64] NO_BOOL = [ak.int64, ak.float64, ak.uint64] NO_FLOAT = [ak.int64, ak.bool_, ak.uint64] @@ -86,12 +88,12 @@ def alternatingTF(n): (ak.bool_, ak.bool_), (ak.int64, ak.int64), (ak.int64, ak.float64), - (ak.int64, npstr), + (ak.int64, ak.str_), (ak.float64, ak.float64), - (ak.float64, npstr), + (ak.float64, ak.str_), (ak.uint8, ak.int64), (ak.uint8, ak.float64), - (ak.uint8, npstr), + (ak.uint8, ak.str_), ] # Most of the trigonometric and hyperbolic tests are identical, so they are combined diff --git a/PROTO_tests/tests/pdarray_creation_test.py b/PROTO_tests/tests/pdarray_creation_test.py index 0545d98bb5..6d1d0304ae 100644 --- a/PROTO_tests/tests/pdarray_creation_test.py +++ b/PROTO_tests/tests/pdarray_creation_test.py @@ -38,7 +38,7 @@ def test_array_creation(self, dtype): ak.array(deque(range(fixed_size)), dtype), ak.array([f"{i}" for i in range(fixed_size)], dtype=dtype), ]: - assert isinstance(pda, ak.pdarray if dtype != str else ak.Strings) + assert isinstance(pda, ak.pdarray if ak.dtype(dtype) != "str_" else ak.Strings) assert len(pda) == fixed_size assert dtype == pda.dtype diff --git a/PROTO_tests/tests/series_test.py b/PROTO_tests/tests/series_test.py index c83e059d49..f20181ca8c 100644 --- a/PROTO_tests/tests/series_test.py +++ b/PROTO_tests/tests/series_test.py @@ -235,21 +235,17 @@ def test_index_as_index_compat(self): @pytest.mark.parametrize("size", pytest.prob_size) def test_memory_usage(self, size): s = ak.Series(ak.arange(size)) - assert s.memory_usage(unit="GB", index=False) == size * ak.dtypes.int64.itemsize / ( - 1024 * 1024 * 1024 - ) - assert s.memory_usage(unit="MB", index=False) == size * ak.dtypes.int64.itemsize / (1024 * 1024) - assert s.memory_usage(unit="KB", index=False) == size * ak.dtypes.int64.itemsize / 1024 - assert s.memory_usage(unit="B", index=False) == size * ak.dtypes.int64.itemsize + int64_size = ak.dtype(ak.int64).itemsize - assert s.memory_usage(unit="GB", index=True) == 2 * size * ak.dtypes.int64.itemsize / ( - 1024 * 1024 * 1024 - ) - assert s.memory_usage(unit="MB", index=True) == 2 * size * ak.dtypes.int64.itemsize / ( - 1024 * 1024 - ) - assert s.memory_usage(unit="KB", index=True) == 2 * size * ak.dtypes.int64.itemsize / 1024 - assert s.memory_usage(unit="B", index=True) == 2 * size * ak.dtypes.int64.itemsize + assert s.memory_usage(unit="GB", index=False) == size * int64_size / (1024 * 1024 * 1024) + assert s.memory_usage(unit="MB", index=False) == size * int64_size / (1024 * 1024) + assert s.memory_usage(unit="KB", index=False) == size * int64_size / 1024 + assert s.memory_usage(unit="B", index=False) == size * int64_size + + assert s.memory_usage(unit="GB", index=True) == 2 * size * int64_size / (1024 * 1024 * 1024) + assert s.memory_usage(unit="MB", index=True) == 2 * size * int64_size / (1024 * 1024) + assert s.memory_usage(unit="KB", index=True) == 2 * size * int64_size / 1024 + assert s.memory_usage(unit="B", index=True) == 2 * size * int64_size def test_map(self): a = ak.Series(ak.array(["1", "1", "4", "4", "4"])) diff --git a/PROTO_tests/tests/setops_test.py b/PROTO_tests/tests/setops_test.py index e78469e470..64502b45c9 100644 --- a/PROTO_tests/tests/setops_test.py +++ b/PROTO_tests/tests/setops_test.py @@ -22,7 +22,7 @@ def make_np_arrays(size, dtype): # only used for error handling tests a = np.random.random(size) b = np.random.random(size) - elif dtype == bool: + elif dtype == ak.bool_: a = np.random.randint(0, 1, size=size, dtype=dtype) b = np.random.randint(0, 1, size=size, dtype=dtype) else: @@ -697,7 +697,7 @@ def are_pdarrays_equal(pda1, pda2): ] for select_from in select_from_list: count += 1 - arr1 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[2]+count)] + arr1 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[2] + count)] # test unique search space, this should be identical to find # be sure to test when all items are present and when there are items missing @@ -710,7 +710,9 @@ def are_pdarrays_equal(pda1, pda2): all_unique = ak.unique(arr2).size == arr2.size if all_unique: # ensure we match find - if not are_pdarrays_equal(idx_of_first_in_second, ak.find(arr1, arr2, remove_missing=True)): + if not are_pdarrays_equal( + idx_of_first_in_second, ak.find(arr1, arr2, remove_missing=True) + ): print("failed to match find") print("second array all unique: ", all_unique) print(seeds) @@ -725,7 +727,7 @@ def are_pdarrays_equal(pda1, pda2): # test duplicate items in search space, the easiest way I can think # of to do this is to compare against pandas series getitem - arr2 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[3]+count)] + arr2 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[3] + count)] pd_s = pd.Series(index=arr1.to_ndarray(), data=arr2.to_ndarray()) ak_s = ak.Series(index=arr1, data=arr2) diff --git a/arkouda/array_api/__init__.py b/arkouda/array_api/__init__.py index 54f58bbb27..2fc1582503 100644 --- a/arkouda/array_api/__init__.py +++ b/arkouda/array_api/__init__.py @@ -44,7 +44,7 @@ float64, complex64, complex128, - bool, + bool_, ) from .elementwise_functions import ( @@ -189,7 +189,7 @@ "float64", "complex64", "complex128", - "bool", + "bool_", ] __all__ += [ diff --git a/arkouda/array_api/_dtypes.py b/arkouda/array_api/_dtypes.py index 0e8f666eee..084c58ec48 100644 --- a/arkouda/array_api/_dtypes.py +++ b/arkouda/array_api/_dtypes.py @@ -2,20 +2,19 @@ # Note: we use dtype objects instead of dtype classes. The spec does not # require any behavior on dtypes other than equality. -int8 = np.dtype("int8") -int16 = np.dtype("int16") -int32 = np.dtype("int32") -int64 = np.dtype("int64") -uint8 = np.dtype("uint8") -uint16 = np.dtype("uint16") -uint32 = np.dtype("uint32") -uint64 = np.dtype("uint64") -float32 = np.dtype("float32") -float64 = np.dtype("float64") -complex64 = np.dtype("complex64") -complex128 = np.dtype("complex128") -# Note: This name is changed -bool = np.dtype("bool") +int8 = np.int8 +int16 = np.int16 +int32 = np.int32 +int64 = np.int64 +uint8 = np.uint8 +uint16 = np.uint16 +uint32 = np.uint32 +uint64 = np.uint64 +float32 = np.float32 +float64 = np.float64 +complex64 = np.complex64 +complex128 = np.complex128 +bool_ = np.bool_ _all_dtypes = ( int8, diff --git a/arkouda/array_api/statistical_functions.py b/arkouda/array_api/statistical_functions.py index 2a25b7b30d..888186a5ec 100644 --- a/arkouda/array_api/statistical_functions.py +++ b/arkouda/array_api/statistical_functions.py @@ -1,29 +1,29 @@ from __future__ import annotations -from ._dtypes import ( +from typing import TYPE_CHECKING, Optional, Tuple, Union + +from ._dtypes import ( # _complex_floating_dtypes,; complex128, + _numeric_dtypes, _real_floating_dtypes, _real_numeric_dtypes, - _numeric_dtypes, - # _complex_floating_dtypes, _signed_integer_dtypes, - uint64, - int64, float64, - # complex128, + int64, + uint64, ) from .array_object import Array, implements_numpy from .manipulation_functions import squeeze -from typing import TYPE_CHECKING, Optional, Tuple, Union - if TYPE_CHECKING: from ._typing import Dtype -from arkouda.numeric import cast as akcast +import numpy as np + from arkouda.client import generic_msg -from arkouda.pdarrayclass import parse_single_value, create_pdarray +from arkouda.dtypes import dtype as akdtype +from arkouda.numeric import cast as akcast +from arkouda.pdarrayclass import create_pdarray, parse_single_value from arkouda.pdarraycreation import scalar_array -import numpy as np def max( @@ -411,15 +411,15 @@ def var( def _prod_sum_dtype(dtype: Dtype) -> Dtype: if dtype == uint64: - return dtype + return akdtype(dtype) elif dtype in _real_floating_dtypes: - return float64 + return akdtype(float64) # elif dtype in _complex_floating_dtypes: # return complex128 elif dtype in _signed_integer_dtypes: - return int64 + return akdtype(int64) else: - return uint64 + return akdtype(uint64) def cumulative_sum( diff --git a/arkouda/categorical.py b/arkouda/categorical.py index 4a08faac1d..e6d62fdbee 100644 --- a/arkouda/categorical.py +++ b/arkouda/categorical.py @@ -21,8 +21,9 @@ from arkouda.client import generic_msg from arkouda.dtypes import bool_ as akbool +from arkouda.dtypes import dtype as akdtype from arkouda.dtypes import int64 as akint64 -from arkouda.dtypes import int_scalars, npstr, resolve_scalar_dtype, str_, str_scalars +from arkouda.dtypes import int_scalars, resolve_scalar_dtype, str_, str_scalars from arkouda.groupbyclass import GroupBy, unique from arkouda.infoclass import information from arkouda.logger import getArkoudaLogger @@ -81,7 +82,7 @@ class Categorical: permutation = None segments = None objType = "Categorical" - dtype = npstr # this is being set for now because Categoricals only supported on Strings + dtype = akdtype(str_) # this is being set for now because Categoricals only supported on Strings def __init__(self, values, **kwargs) -> None: self.logger = getArkoudaLogger(name=__class__.__name__) # type: ignore @@ -153,7 +154,7 @@ def __init__(self, values, **kwargs) -> None: self.nlevels = self.categories.size self.ndim = self.codes.ndim self.shape = self.codes.shape - self.dtype = str_ + self.dtype = akdtype(str_) self.registered_name: Optional[str] = None @property @@ -173,17 +174,17 @@ def nbytes(self): if isinstance(self.codes, pdarray): nbytes += self.codes.nbytes - elif isinstance(self.codes, akint64): + elif isinstance(self.codes, akdtype("int64")): nbytes += 1 if isinstance(self.permutation, pdarray): nbytes += self.permutation.nbytes - elif isinstance(self.permutation, akint64): + elif isinstance(self.permutation, akdtype("int64")): nbytes += 1 if isinstance(self.segments, pdarray): nbytes += self.segments.nbytes - elif isinstance(self.segments, akint64): + elif isinstance(self.segments, akdtype("int64")): nbytes += 1 return nbytes diff --git a/arkouda/dataframe.py b/arkouda/dataframe.py index 94ed80b584..13256d46d4 100644 --- a/arkouda/dataframe.py +++ b/arkouda/dataframe.py @@ -17,7 +17,7 @@ from arkouda.categorical import Categorical from arkouda.client import generic_msg, maxTransferBytes from arkouda.client_dtypes import BitVector, Fields, IPv4 -from arkouda.dtypes import BigInt +from arkouda.dtypes import bigint from arkouda.dtypes import bool_ as akbool from arkouda.dtypes import float64 as akfloat64 from arkouda.dtypes import int64 as akint64 @@ -138,10 +138,7 @@ def aggop(self, colnames=None): colnames = [ c for c in colnames - if ( - (self.df.data[c].dtype.type in numerical_dtypes) - or isinstance(self.df.data[c].dtype, BigInt) - ) + if ((self.df.data[c].dtype in numerical_dtypes) or self.df.data[c].dtype == bigint) and ( (isinstance(self.gb_key_names, str) and (c != self.gb_key_names)) or (isinstance(self.gb_key_names, list) and c not in self.gb_key_names) diff --git a/arkouda/dtypes.py b/arkouda/dtypes.py index ad1f564eac..1d949dc682 100644 --- a/arkouda/dtypes.py +++ b/arkouda/dtypes.py @@ -60,8 +60,14 @@ def dtype(x): # we had to create our own bigint type since numpy # gives them dtype=object there's no np equivalent - if (isinstance(x, str) and x == "bigint") or isinstance(x, BigInt): - return bigint + if ( + (isinstance(x, str) and x == "bigint") + or isinstance(x, bigint) + or (hasattr(x, "name") and x.name == "bigint") + ): + return bigint() + if isinstance(x, str) and x in ["Strings"]: + return np.dtype(np.str_) else: return np.dtype(x) @@ -99,12 +105,14 @@ def _val_isinstance_of_union(val, union_type) -> builtins.bool: return hasattr(union_type, "__args__") and isinstance(val, union_type.__args__) -class BigInt: +class bigint: # an estimate of the itemsize of bigint (128 bytes) itemsize = 128 + name = "bigint" + ndim = 0 + shape = () def __init__(self): - self.name = "bigint" self.kind = "ui" def __str__(self): @@ -113,27 +121,36 @@ def __str__(self): def __repr__(self): return f"dtype({self.name})" + def __hash__(self): + return hash(str(self)) + + def __eq__(self, other): + if isinstance(dtype(other), bigint): + return True + return False + + def __neq__(self, other): + return not (self == other) + def type(self, x): return int(x) -uint8 = np.dtype(np.uint8) -uint16 = np.dtype(np.uint16) -uint32 = np.dtype(np.uint32) -uint64 = np.dtype(np.uint64) -int8 = np.dtype(np.int8) -int16 = np.dtype(np.int16) -int32 = np.dtype(np.int32) -int64 = np.dtype(np.int64) -float32 = np.dtype(np.float32) -float64 = np.dtype(np.float64) -complex64 = np.dtype(np.complex64) -complex128 = np.dtype(np.complex128) -bool_ = np.dtype(bool) -str_ = np.dtype(np.str_) -bigint = BigInt() -npstr = np.dtype(str) -intTypes = frozenset((int64, uint64, uint8)) +uint8 = np.uint8 +uint16 = np.uint16 +uint32 = np.uint32 +uint64 = np.uint64 +int8 = np.int8 +int16 = np.int16 +int32 = np.int32 +int64 = np.int64 +float32 = np.float32 +float64 = np.float64 +complex64 = np.complex64 +complex128 = np.complex128 +bool_ = np.bool_ +str_ = np.str_ +intTypes = frozenset((dtype("int64"), dtype("uint64"), dtype("uint8"))) bitType = uint64 # Union aliases used for static and runtime type checking @@ -219,8 +236,9 @@ def __repr__(self) -> str: np.uint16, np.uint32, np.uint64, - BigInt, + bigint, ) + ARKOUDA_SUPPORTED_FLOATS = (float, np.float64) ARKOUDA_SUPPORTED_NUMBERS = ( int, @@ -235,7 +253,7 @@ def __repr__(self) -> str: np.uint16, np.uint32, np.uint64, - BigInt, + bigint, ) # TODO: bring supported data types into parity with all numpy dtypes @@ -276,14 +294,8 @@ def isSupportedNumber(num): return isinstance(num, ARKOUDA_SUPPORTED_NUMBERS) -def _as_dtype(dt) -> Union[np.dtype, "BigInt"]: - if not isinstance(dt, np.dtype): - return dtype(dt) - return dt - - @typechecked -def check_np_dtype(dt: Union[np.dtype, "BigInt"]) -> None: +def check_np_dtype(dt: Union[np.dtype, "bigint"]) -> None: """ Assert that numpy dtype dt is one of the dtypes supported by arkouda, otherwise raise TypeError. @@ -295,7 +307,7 @@ def check_np_dtype(dt: Union[np.dtype, "BigInt"]) -> None: dt is not a np.dtype """ - if _as_dtype(dt).name not in DTypes: + if dtype(dt).name not in DTypes: raise TypeError(f"Unsupported type: {dt}") @@ -312,7 +324,7 @@ def translate_np_dtype(dt) -> Tuple[builtins.str, int]: dt is not a np.dtype """ # Assert that dt is one of the arkouda supported dtypes - dt = _as_dtype(dt) + dt = dtype(dt) check_np_dtype(dt) trans = {"i": "int", "f": "float", "b": "bool", "u": "uint", "U": "str", "c": "complex"} kind = trans[dt.kind] @@ -323,6 +335,7 @@ def resolve_scalar_dtype(val: object) -> str: """ Try to infer what dtype arkouda_server should treat val as. """ + # Python bool or np.bool if isinstance(val, builtins.bool) or ( hasattr(val, "dtype") and cast(np.bool_, val).dtype.kind == "b" diff --git a/arkouda/groupbyclass.py b/arkouda/groupbyclass.py index ef01d431ea..501e71d1da 100644 --- a/arkouda/groupbyclass.py +++ b/arkouda/groupbyclass.py @@ -14,6 +14,8 @@ no_type_check, ) +from arkouda.dtypes import dtype as akdtype + if TYPE_CHECKING: from arkouda.categorical import Categorical @@ -22,7 +24,6 @@ from arkouda.client import generic_msg from arkouda.dtypes import _val_isinstance_of_union, bigint -from arkouda.dtypes import dtype as to_numpy_dtype from arkouda.dtypes import float64 as akfloat64 from arkouda.dtypes import float_scalars from arkouda.dtypes import int64 as akint64 @@ -1802,7 +1803,7 @@ def sample( permuted_weights = "" random_state = default_rng(random_state) - gen_name = random_state._name_dict[to_numpy_dtype(akfloat64 if has_weights else akint64)] + gen_name = random_state._name_dict[akdtype(akfloat64 if has_weights else akint64)] has_seed = random_state._seed is not None diff --git a/arkouda/numeric.py b/arkouda/numeric.py index 4321c6f860..1fa0a9dae4 100644 --- a/arkouda/numeric.py +++ b/arkouda/numeric.py @@ -8,16 +8,14 @@ from typeguard import typechecked from arkouda.client import generic_msg +from arkouda.dtypes import DTypes, bigint +from arkouda.dtypes import dtype as akdtype from arkouda.dtypes import ( - BigInt, - DTypes, - _as_dtype, - bigint, int_scalars, isSupportedNumber, numeric_scalars, resolve_scalar_dtype, - str_ + str_, ) from arkouda.groupbyclass import GroupBy from arkouda.pdarrayclass import all as ak_all @@ -88,7 +86,7 @@ class ErrorMode(Enum): @typechecked def cast( pda: Union[pdarray, Strings, Categorical], # type: ignore - dt: Union[np.dtype, type, str, BigInt], + dt: Union[np.dtype, type, str, bigint], errors: ErrorMode = ErrorMode.strict, ) -> Union[Union[pdarray, Strings, Categorical], Tuple[pdarray, pdarray]]: # type: ignore """ @@ -142,7 +140,7 @@ def cast( from arkouda.categorical import Categorical # type: ignore if isinstance(pda, pdarray): - if dt is Strings or dt in ["Strings", "str"] or dt == str_: + if dt is Strings or akdtype(dt) == str_: if pda.ndim > 1: raise ValueError("Cannot cast a multi-dimensional pdarray to Strings") repMsg = generic_msg( @@ -151,7 +149,7 @@ def cast( ) return Strings.from_parts(*(type_cast(str, repMsg).split("+"))) else: - dt = _as_dtype(dt) + dt = akdtype(dt) return create_pdarray( generic_msg( cmd=f"cast<{pda.dtype},{dt},{pda.ndim}>", @@ -161,10 +159,10 @@ def cast( elif isinstance(pda, Strings): if dt is Categorical or dt == "Categorical": return Categorical(pda) # type: ignore - elif dt is Strings or dt in ["Strings", "str"] or dt == str_: + elif dt is Strings or akdtype(dt) == str_: return pda[:] else: - dt = _as_dtype(dt) + dt = akdtype(dt) repMsg = generic_msg( cmd=f"castStringsTo<{dt}>", args={ diff --git a/arkouda/pdarrayclass.py b/arkouda/pdarrayclass.py index 573cf55b33..b44e0fd351 100644 --- a/arkouda/pdarrayclass.py +++ b/arkouda/pdarrayclass.py @@ -12,9 +12,9 @@ from arkouda.client import generic_msg from arkouda.dtypes import NUMBER_FORMAT_STRINGS, DTypes, bigint from arkouda.dtypes import bool_ as akbool -from arkouda.dtypes import dtype, get_byteorder +from arkouda.dtypes import dtype from arkouda.dtypes import float64 as akfloat64 -from arkouda.dtypes import get_server_byteorder +from arkouda.dtypes import get_byteorder, get_server_byteorder from arkouda.dtypes import int64 as akint64 from arkouda.dtypes import ( int_scalars, @@ -495,7 +495,7 @@ def format_other(self, other) -> str: other = int(other) except Exception: raise TypeError(f"Unable to convert {other} to {self.dtype.name}") - if self.dtype == bool: + if self.dtype == "bool_": return str(other) fmt = NUMBER_FORMAT_STRINGS[self.dtype.name] return fmt.format(other) @@ -743,13 +743,13 @@ def __ge__(self, other): def __eq__(self, other): if other is None: return False - elif (self.dtype == bool) and (isinstance(other, pdarray) and (other.dtype == bool)): + elif (self.dtype == "bool_") and (isinstance(other, pdarray) and (other.dtype == "bool_")): return ~(self ^ other) else: return self._binop(other, "==") def __ne__(self, other): - if (self.dtype == bool) and (isinstance(other, pdarray) and (other.dtype == bool)): + if (self.dtype == "bool_") and (isinstance(other, pdarray) and (other.dtype == "bool_")): return self ^ other else: return self._binop(other, "!=") @@ -764,7 +764,7 @@ def __invert__(self): return self._binop(~0, "^") if self.dtype == akuint64: return self._binop(~np.uint(0), "^") - if self.dtype == bool: + if self.dtype == "bool_": return self._binop(True, "^") raise TypeError(f"Unhandled dtype: {self} ({self.dtype})") @@ -2802,7 +2802,7 @@ def prod(pda: pdarray) -> np.float64: repMsg = generic_msg( cmd=f"reduce{pda.ndim}D", args={"op": "prod", "x": pda, "nAxes": 0, "axis": [], "skipNan": False} ) - return parse_single_value(cast(str, repMsg)) + return np.float64(parse_single_value(cast(str, repMsg))) def min(pda: pdarray) -> numpy_scalars: diff --git a/arkouda/pdarraycreation.py b/arkouda/pdarraycreation.py index a7fb832f44..0a8990f008 100644 --- a/arkouda/pdarraycreation.py +++ b/arkouda/pdarraycreation.py @@ -1,5 +1,5 @@ import itertools -from typing import Iterable, List, Optional, Tuple, Union, cast, Any +from typing import Any, Iterable, List, Optional, Tuple, Union, cast import numpy as np import pandas as pd @@ -8,7 +8,6 @@ from arkouda.client import generic_msg from arkouda.dtypes import ( NUMBER_FORMAT_STRINGS, - BigInt, DTypes, NumericDTypes, SeriesDTypes, @@ -46,7 +45,7 @@ "from_series", "bigint_from_uint_arrays", "promote_to_common_dtype", - "scalar_array" + "scalar_array", ] @@ -432,7 +431,7 @@ def bigint_from_uint_arrays(arrays, max_bits=-1): @typechecked def zeros( size: Union[int_scalars, str], - dtype: Union[np.dtype, type, str, BigInt] = float64, + dtype: Union[np.dtype, type, str, bigint] = float64, max_bits: Optional[int] = None, ) -> pdarray: """ @@ -475,11 +474,10 @@ def zeros( """ if not np.isscalar(size): raise TypeError(f"size must be a scalar, not {size.__class__.__name__}") - dtype = akdtype(dtype) # normalize dtype - dtype_name = dtype.name if isinstance(dtype, BigInt) else cast(np.dtype, dtype).name + dtype_name = akdtype(dtype).name # check dtype for error if dtype_name not in NumericDTypes: - raise TypeError(f"unsupported dtype {dtype}") + raise TypeError(f"unsupported dtype {akdtype(dtype)}") repMsg = generic_msg(cmd=f"create<{dtype_name},1>", args={"shape": size}) return create_pdarray(repMsg, max_bits=max_bits) @@ -488,7 +486,7 @@ def zeros( @typechecked def ones( size: Union[int_scalars, str], - dtype: Union[np.dtype, type, str, BigInt] = float64, + dtype: Union[np.dtype, type, str, bigint] = float64, max_bits: Optional[int] = None, ) -> pdarray: """ @@ -532,7 +530,7 @@ def ones( if not np.isscalar(size): raise TypeError(f"size must be a scalar, not {size.__class__.__name__}") dtype = akdtype(dtype) # normalize dtype - dtype_name = dtype.name if isinstance(dtype, BigInt) else cast(np.dtype, dtype).name + dtype_name = dtype.name if isinstance(dtype, bigint) else cast(np.dtype, dtype).name # check dtype for error if dtype_name not in NumericDTypes: raise TypeError(f"unsupported dtype {dtype}") @@ -548,7 +546,7 @@ def ones( def full( size: Union[int_scalars, str], fill_value: Union[numeric_scalars, str], - dtype: Union[np.dtype, type, str, BigInt] = float64, + dtype: Union[np.dtype, type, str, bigint] = float64, max_bits: Optional[int] = None, ) -> Union[pdarray, Strings]: """ @@ -597,7 +595,7 @@ def full( return _full_string(size, fill_value) dtype = akdtype(dtype) # normalize dtype - dtype_name = dtype.name if isinstance(dtype, BigInt) else cast(np.dtype, dtype).name + dtype_name = dtype.name if isinstance(dtype, bigint) else cast(np.dtype, dtype).name # check dtype for error if dtype_name not in NumericDTypes: raise TypeError(f"unsupported dtype {dtype}") diff --git a/arkouda/random/_generator.py b/arkouda/random/_generator.py index 670376b1cc..d712828160 100644 --- a/arkouda/random/_generator.py +++ b/arkouda/random/_generator.py @@ -2,13 +2,12 @@ from arkouda.client import generic_msg from arkouda.dtypes import _val_isinstance_of_union -from arkouda.dtypes import bool_ as akbool +from arkouda.dtypes import dtype as akdtype from arkouda.dtypes import dtype as to_numpy_dtype from arkouda.dtypes import float64 as akfloat64 from arkouda.dtypes import float_scalars from arkouda.dtypes import int64 as akint64 from arkouda.dtypes import int_scalars, numeric_scalars -from arkouda.dtypes import uint64 as akuint64 from arkouda.pdarrayclass import create_pdarray, pdarray @@ -204,7 +203,7 @@ def standard_exponential(self, size=None, method="zig"): rep_msg = generic_msg( cmd="standardExponential", args={ - "name": self._name_dict[akfloat64], + "name": self._name_dict[akdtype("float64")], "size": size, "method": method.upper(), "has_seed": self._seed is not None, @@ -425,7 +424,7 @@ def standard_normal(self, size=None): rep_msg = generic_msg( cmd=f"standardNormalGenerator<{ndim}>", args={ - "name": self._name_dict[akfloat64], + "name": self._name_dict[akdtype("float64")], "shape": shape, "state": self._state, }, @@ -571,7 +570,7 @@ def poisson(self, lam=1.0, size=None): rep_msg = generic_msg( cmd="poissonGenerator", args={ - "name": self._name_dict[akfloat64], + "name": self._name_dict[akdtype("float64")], "lam": lam, "is_single_lambda": is_single_lambda, "size": size, @@ -629,7 +628,7 @@ def uniform(self, low=0.0, high=1.0, size=None): if full_size < 0: raise ValueError("The size parameter must be > 0") - dt = akfloat64 + dt = akdtype("float64") rep_msg = generic_msg( cmd=f"uniformGenerator<{dt.name},{ndim}>", args={ @@ -678,7 +677,7 @@ def default_rng(seed=None): # we declare a generator for each type and fast-forward the state name_dict = dict() - for dt in akint64, akuint64, akfloat64, akbool: + for dt in akdtype("int64"), akdtype("uint64"), akdtype("float64"), akdtype("bool"): name_dict[dt] = generic_msg( cmd=f"createGenerator<{dt.name}>", args={"has_seed": has_seed, "seed": seed, "state": state}, diff --git a/arkouda/series.py b/arkouda/series.py index c75c7feb1f..4c04731d40 100644 --- a/arkouda/series.py +++ b/arkouda/series.py @@ -266,7 +266,7 @@ def validate_key( if key.dtype == self.index.dtype: if any(~in1d(key, self.index.values)): raise KeyError("{} not in index".format(key[~in1d(key, self.index.values)])) - elif key.dtype == bool: + elif key.dtype == "bool_": if key.size != self.index.size: raise IndexError( "Boolean index has wrong length: {} instead of {}".format(key.size, self.size) @@ -314,7 +314,7 @@ def __getitem__(self, _key: Union[supported_scalars, pdarray, Strings, List, Ser if is_supported_scalar(key): return self[array([key])] assert isinstance(key, (pdarray, Strings)) - if key.dtype == bool: + if key.dtype == "bool_": # boolean array indexes without sorting return Series(index=self.index[key], data=self.values[key]) indices = indexof1d(key, self.index.values) @@ -1649,7 +1649,7 @@ def validate_key(self, key): if key.dtype != int64 and key.dtype != bool: raise TypeError(".{} requires integer keys".format(self.name)) - if key.dtype == bool and key.size != self.series.size: + if key.dtype == "bool_" and key.size != self.series.size: raise IndexError( "Boolean index has wrong length: {} instead of {}".format(key.size, self.series.size) ) diff --git a/arkouda/sorting.py b/arkouda/sorting.py index 3ff5d0493e..18eaf57ec9 100644 --- a/arkouda/sorting.py +++ b/arkouda/sorting.py @@ -6,12 +6,12 @@ from typeguard import check_type, typechecked from arkouda.client import generic_msg -from arkouda.dtypes import bigint, float64, int64, int_scalars, uint64 +from arkouda.dtypes import bigint, bool_, dtype, float64, int64, int_scalars, uint64 from arkouda.pdarrayclass import create_pdarray, pdarray from arkouda.pdarraycreation import zeros from arkouda.strings import Strings -numeric_dtypes = {int64, uint64, float64} +numeric_dtypes = {dtype(int64), dtype(uint64), dtype(float64)} __all__ = ["argsort", "coargsort", "sort"] @@ -145,7 +145,7 @@ def coargsort( atypes = [] expanded_arrays = [] for a in arrays: - if not isinstance(a, pdarray) or a.dtype not in [bigint, bool]: + if not isinstance(a, pdarray) or a.dtype not in [bigint, bool_]: expanded_arrays.append(a) elif a.dtype == bigint: expanded_arrays.extend(a.bigint_to_uint_arrays()) diff --git a/arkouda/strings.py b/arkouda/strings.py index 97c54b4835..ce7ae44d29 100644 --- a/arkouda/strings.py +++ b/arkouda/strings.py @@ -10,11 +10,12 @@ import arkouda.dtypes from arkouda.client import generic_msg +from arkouda.dtypes import NUMBER_FORMAT_STRINGS +from arkouda.dtypes import dtype as akdtype from arkouda.dtypes import ( - NUMBER_FORMAT_STRINGS, int_scalars, - npstr, resolve_scalar_dtype, + str_, str_scalars, translate_np_dtype, ) @@ -183,7 +184,7 @@ def __init__(self, strings_pdarray: pdarray, bytes_size: int_scalars) -> None: self._bytes: Optional[pdarray] = None self._offsets: Optional[pdarray] = None - self.dtype = npstr + self.dtype = akdtype(str_) self._regex_dict: Dict = dict() self.logger = getArkoudaLogger(name=__class__.__name__) # type: ignore @@ -2206,9 +2207,9 @@ def _comp_to_ndarray(self, comp: str) -> np.ndarray: # Total number of bytes in the array data array_bytes = ( - self.size * arkouda.dtypes.int64.itemsize + self.size * arkouda.dtypes.dtype(arkouda.dtypes.int64).itemsize if comp == "offsets" - else self.nbytes * arkouda.dtypes.uint8.itemsize + else self.nbytes * arkouda.dtypes.dtype(arkouda.dtypes.uint8).itemsize ) # Guard against overflowing client memory diff --git a/benchmarks/stream.py b/benchmarks/stream.py index ebcdfe2882..b2f8316313 100755 --- a/benchmarks/stream.py +++ b/benchmarks/stream.py @@ -6,6 +6,7 @@ import numpy as np import arkouda as ak +from arkouda.dtypes import dtype as akdtype TYPES = ("int64", "float64") @@ -131,15 +132,16 @@ def create_parser(): parser = create_parser() args = parser.parse_args() + if args.dtype not in TYPES: raise ValueError("Dtype must be {}, not {}".format("/".join(TYPES), args.dtype)) - args.alpha = getattr(ak, args.dtype).type(args.alpha) + args.alpha = akdtype(getattr(ak, args.dtype)).type(args.alpha) ak.verbose = False ak.connect(server=args.hostname, port=args.port) if args.correctness_only: for dtype in TYPES: - alpha = getattr(ak, dtype).type(args.alpha) + alpha = akdtype(getattr(ak, dtype)).type(args.alpha) check_correctness(alpha, dtype, args.randomize, args.seed) sys.exit(0) diff --git a/tests/deprecated/bigint_agg_test.py b/tests/deprecated/bigint_agg_test.py index b6603073c4..cec5a57fa9 100644 --- a/tests/deprecated/bigint_agg_test.py +++ b/tests/deprecated/bigint_agg_test.py @@ -4,6 +4,7 @@ SIZE = 5 + class BigIntTest(ArkoudaTest): def test_negative(self): # test with negative bigint values @@ -47,8 +48,9 @@ def test_change_size(self): bi_arr[:] = res self.assertListEqual(bi_arr.to_list(), res.to_list()) + def gather_scatter(a): - rev = ak.array(np.arange(len(a)-1, -1, -1)) + rev = ak.array(np.arange(len(a) - 1, -1, -1)) a2 = a[rev] res = ak.zeros(len(a), dtype=a.dtype) res[:] = a2 diff --git a/tests/message_test.py b/tests/message_test.py index a04528a20a..96e5560f7a 100644 --- a/tests/message_test.py +++ b/tests/message_test.py @@ -123,9 +123,7 @@ def testJSONArgs(self): size, args = _json_args_to_str({"list1": [3, 2, 4]}) self.assertEqual(size, 1) self.assertListEqual( - [ - '{"key": "list1", "dtype": "int64", "val": "[\\"3\\", \\"2\\", \\"4\\"]"}' - ], + ['{"key": "list1", "dtype": "int64", "val": "[\\"3\\", \\"2\\", \\"4\\"]"}'], json.loads(args), )