Closes Bears-R-Us#3395 align to numpy scalar types (Bears-R-Us#3396)

Co-authored-by: Amanda Potts <ajpotts@users.noreply.github.com>
ajpotts · Jul 30, 2024 · 5dbf619 · 5dbf619
1 parent 607fe1c
commit 5dbf619
Show file tree

Hide file tree

Showing 25 changed files with 194 additions and 186 deletions.
diff --git a/PROTO_tests/tests/dataframe_test.py b/PROTO_tests/tests/dataframe_test.py
@@ -242,7 +242,6 @@ def test_dataframe_creation(self, size):
  ak_to_pd = akdf.to_pandas()
  assert_frame_equal(pddf, ak_to_pd)
 
-
  @pytest.mark.parametrize("size", pytest.prob_size)
  @pytest.mark.parametrize("dtype", ["float64", "int64"])
  def test_from_pandas_with_index(self, size, dtype):
@@ -300,7 +299,6 @@ def test_to_pandas_categorical_column(self, size):
 
  pd_assert_frame_equal(df.to_pandas(retain_index=True), expected_df)
 
-
  def test_convenience_init(self):
  dict1 = {"0": [1, 2], "1": [True, False], "2": ["foo", "bar"], "3": [2.3, -1.8]}
  dict2 = {"0": (1, 2), "1": (True, False), "2": ("foo", "bar"), "3": (2.3, -1.8)}
@@ -1231,7 +1229,7 @@ def test_dropna(self):
 
  def test_memory_usage(self):
  dtypes = [ak.int64, ak.float64, ak.bool_]
- data = dict([(str(t), ak.ones(5000, dtype=ak.int64).astype(t)) for t in dtypes])
+ data = dict([(str(ak.dtype(t)), ak.ones(5000, dtype=ak.int64).astype(t)) for t in dtypes])
  df = ak.DataFrame(data)
  ak_memory_usage = df.memory_usage()
  pd_memory_usage = pd.Series(

diff --git a/PROTO_tests/tests/dtypes_test.py b/PROTO_tests/tests/dtypes_test.py
@@ -73,9 +73,10 @@ def test_resolve_scalar_dtype(self):
  assert "bigint" == dtypes.resolve_scalar_dtype(2**64)
 
  def test_is_dtype_in_union(self):
- from arkouda.dtypes import _is_dtype_in_union
  from typing import Union
 
+ from arkouda.dtypes import _is_dtype_in_union
+
  float_scalars = Union[float, np.float64, np.float32]
  assert _is_dtype_in_union(np.float64, float_scalars)
  # Test with a type not present in the union
@@ -84,24 +85,22 @@ def test_is_dtype_in_union(self):
  assert ~_is_dtype_in_union(np.float64, float)
 
  @pytest.mark.parametrize("size", pytest.prob_size)
- def test_nbytes(self, size):
- from arkouda.dtypes import BigInt
-
- a = ak.cast(ak.arange(size), dt="bigint")
- assert a.nbytes == size * BigInt.itemsize
-
- dtype_list = [
+ @pytest.mark.parametrize(
+ "dtype",
+ [
  ak.dtypes.uint8,
  ak.dtypes.uint64,
  ak.dtypes.int64,
  ak.dtypes.float64,
  ak.dtypes.bool_,
- ]
-
- for dt in dtype_list:
- a = ak.array(ak.arange(size), dtype=dt)
- assert a.nbytes == size * dt.itemsize
-
+ ak.dtypes.bigint,
+ ],
+ )
+ def test_nbytes(self, size, dtype):
+ a = ak.array(ak.arange(size), dtype=dtype)
+ assert a.nbytes == size * ak.dtype(dtype).itemsize
+
+ def test_nbytes_str(self):
  a = ak.array(["a", "b", "c"])
  c = ak.Categorical(a)
  assert c.nbytes == 82

diff --git a/PROTO_tests/tests/groupby_test.py b/PROTO_tests/tests/groupby_test.py
@@ -8,11 +8,10 @@
 from arkouda import sum as aksum
 from arkouda.groupbyclass import GroupByReductionType
 from arkouda.scipy import chisquare as akchisquare
-from arkouda.dtypes import npstr
 
 # block of variables and functions used in test_unique
 
-UNIQUE_TYPES = [ak.categorical, ak.int64, ak.float64, npstr]
+UNIQUE_TYPES = [ak.categorical, ak.int64, ak.float64, ak.str_]
 VOWELS_AND_SUCH = ["a", "e", "i", "o", "u", "AB", 47, 2, 3.14159]
 PICKS = np.array([f"base {i}" for i in range(10)])
 
@@ -804,7 +803,7 @@ def test_unique(self, data_type, prob_size):
  F = False
  np.random.seed(Jenny)
  arrays = {
- npstr: np.random.choice(VOWELS_AND_SUCH, prob_size),
+ ak.str_: np.random.choice(VOWELS_AND_SUCH, prob_size),
  ak.int64: np.random.randint(0, prob_size // 3, prob_size),
  ak.float64: np.random.uniform(0, prob_size // 3, prob_size),
  ak.categorical: np.random.choice(PICKS, prob_size),
@@ -832,7 +831,7 @@ def test_unique(self, data_type, prob_size):
  assert np.all(np_unique == np.sort(ak_TTF[0].to_ndarray()))
 
  # Check groups and indices. If data was sorted, the group ndarray
- # should just be list(range(len(nda))). 
+ # should just be list(range(len(nda))).
  # For unsorted data, a reordered copy of the pdarray is created
  # based on the returned permutation.
  # In both cases, broadcasting the unique values using the returned
@@ -842,7 +841,8 @@ def test_unique(self, data_type, prob_size):
 
  # sorted
 
- if data_type == ak.int64 : assert isSorted(ak_TFF[0].to_ndarray())
+ if data_type == ak.int64:
+ assert isSorted(ak_TFF[0].to_ndarray())
  srange = np.arange(len(nda))
  assert np.all(srange == ak_TTF[1].to_ndarray())
  indices = ak_TTF[2]
@@ -851,7 +851,8 @@ def test_unique(self, data_type, prob_size):
  # unsorted
 
  aku = ak.unique(us_pda).to_ndarray()
- if data_type == ak.int64 : assert isSorted(aku)
+ if data_type == ak.int64:
+ assert isSorted(aku)
  reordering = ak_TFF[1]
  reordered = us_pda[reordering]
  indices = ak_TFF[2]

diff --git a/PROTO_tests/tests/index_test.py b/PROTO_tests/tests/index_test.py
@@ -380,24 +380,25 @@ def test_get_level_values(self):
 
  @pytest.mark.parametrize("size", pytest.prob_size)
  def test_memory_usage(self, size):
- from arkouda.dtypes import BigInt
+ from arkouda.dtypes import bigint
  from arkouda.index import Index, MultiIndex
 
  idx = Index(ak.cast(ak.array([1, 2, 3]), dt="bigint"))
- assert idx.memory_usage() == 3 * BigInt.itemsize
+ assert idx.memory_usage() == 3 * bigint.itemsize
 
+ int64_size = ak.dtype(ak.int64).itemsize
  idx = Index(ak.cast(ak.arange(size), dt="int64"))
- assert idx.memory_usage(unit="GB") == size * ak.dtypes.int64.itemsize / (1024 * 1024 * 1024)
- assert idx.memory_usage(unit="MB") == size * ak.dtypes.int64.itemsize / (1024 * 1024)
- assert idx.memory_usage(unit="KB") == size * ak.dtypes.int64.itemsize / 1024
- assert idx.memory_usage(unit="B") == size * ak.dtypes.int64.itemsize
+ assert idx.memory_usage(unit="GB") == size * int64_size / (1024 * 1024 * 1024)
+ assert idx.memory_usage(unit="MB") == size * int64_size / (1024 * 1024)
+ assert idx.memory_usage(unit="KB") == size * int64_size / 1024
+ assert idx.memory_usage(unit="B") == size * int64_size
 
  midx = MultiIndex([ak.cast(ak.arange(size), dt="int64"), ak.cast(ak.arange(size), dt="int64")])
- assert midx.memory_usage(unit="GB") == 2 * size * ak.dtypes.int64.itemsize / (1024 * 1024 * 1024)
+ assert midx.memory_usage(unit="GB") == 2 * size * int64_size / (1024 * 1024 * 1024)
 
- assert midx.memory_usage(unit="MB") == 2 * size * ak.dtypes.int64.itemsize / (1024 * 1024)
- assert midx.memory_usage(unit="KB") == 2 * size * ak.dtypes.int64.itemsize / 1024
- assert midx.memory_usage(unit="B") == 2 * size * ak.dtypes.int64.itemsize
+ assert midx.memory_usage(unit="MB") == 2 * size * int64_size / (1024 * 1024)
+ assert midx.memory_usage(unit="KB") == 2 * size * int64_size / 1024
+ assert midx.memory_usage(unit="B") == 2 * size * int64_size
 
  def test_is_unique(self):
  i = ak.Index(ak.array([0, 1, 2]))

diff --git a/PROTO_tests/tests/numeric_test.py b/PROTO_tests/tests/numeric_test.py
@@ -1,9 +1,11 @@
+import subprocess
+from math import isclose
+
 import numpy as np
 import pytest
+
 import arkouda as ak
-from arkouda.dtypes import npstr
-from math import isclose
-import subprocess
+
 NUMERIC_TYPES = [ak.int64, ak.float64, ak.bool_, ak.uint64]
 NO_BOOL = [ak.int64, ak.float64, ak.uint64]
 NO_FLOAT = [ak.int64, ak.bool_, ak.uint64]
@@ -86,12 +88,12 @@ def alternatingTF(n):
  (ak.bool_, ak.bool_),
  (ak.int64, ak.int64),
  (ak.int64, ak.float64),
- (ak.int64, npstr),
+ (ak.int64, ak.str_),
  (ak.float64, ak.float64),
- (ak.float64, npstr),
+ (ak.float64, ak.str_),
  (ak.uint8, ak.int64),
  (ak.uint8, ak.float64),
- (ak.uint8, npstr),
+ (ak.uint8, ak.str_),
 ]
 
 # Most of the trigonometric and hyperbolic tests are identical, so they are combined

diff --git a/PROTO_tests/tests/pdarray_creation_test.py b/PROTO_tests/tests/pdarray_creation_test.py
@@ -38,7 +38,7 @@ def test_array_creation(self, dtype):
  ak.array(deque(range(fixed_size)), dtype),
  ak.array([f"{i}" for i in range(fixed_size)], dtype=dtype),
  ]:
- assert isinstance(pda, ak.pdarray if dtype != str else ak.Strings)
+ assert isinstance(pda, ak.pdarray if ak.dtype(dtype) != "str_" else ak.Strings)
  assert len(pda) == fixed_size
  assert dtype == pda.dtype
 

diff --git a/PROTO_tests/tests/series_test.py b/PROTO_tests/tests/series_test.py
@@ -235,21 +235,17 @@ def test_index_as_index_compat(self):
  @pytest.mark.parametrize("size", pytest.prob_size)
  def test_memory_usage(self, size):
  s = ak.Series(ak.arange(size))
- assert s.memory_usage(unit="GB", index=False) == size * ak.dtypes.int64.itemsize / (
- 1024 * 1024 * 1024
- )
- assert s.memory_usage(unit="MB", index=False) == size * ak.dtypes.int64.itemsize / (1024 * 1024)
- assert s.memory_usage(unit="KB", index=False) == size * ak.dtypes.int64.itemsize / 1024
- assert s.memory_usage(unit="B", index=False) == size * ak.dtypes.int64.itemsize
+ int64_size = ak.dtype(ak.int64).itemsize
 
- assert s.memory_usage(unit="GB", index=True) == 2 * size * ak.dtypes.int64.itemsize / (
- 1024 * 1024 * 1024
- )
- assert s.memory_usage(unit="MB", index=True) == 2 * size * ak.dtypes.int64.itemsize / (
- 1024 * 1024
- )
- assert s.memory_usage(unit="KB", index=True) == 2 * size * ak.dtypes.int64.itemsize / 1024
- assert s.memory_usage(unit="B", index=True) == 2 * size * ak.dtypes.int64.itemsize
+ assert s.memory_usage(unit="GB", index=False) == size * int64_size / (1024 * 1024 * 1024)
+ assert s.memory_usage(unit="MB", index=False) == size * int64_size / (1024 * 1024)
+ assert s.memory_usage(unit="KB", index=False) == size * int64_size / 1024
+ assert s.memory_usage(unit="B", index=False) == size * int64_size
+
+ assert s.memory_usage(unit="GB", index=True) == 2 * size * int64_size / (1024 * 1024 * 1024)
+ assert s.memory_usage(unit="MB", index=True) == 2 * size * int64_size / (1024 * 1024)
+ assert s.memory_usage(unit="KB", index=True) == 2 * size * int64_size / 1024
+ assert s.memory_usage(unit="B", index=True) == 2 * size * int64_size
 
  def test_map(self):
  a = ak.Series(ak.array(["1", "1", "4", "4", "4"]))

diff --git a/PROTO_tests/tests/setops_test.py b/PROTO_tests/tests/setops_test.py
@@ -22,7 +22,7 @@ def make_np_arrays(size, dtype):
  # only used for error handling tests
  a = np.random.random(size)
  b = np.random.random(size)
- elif dtype == bool:
+ elif dtype == ak.bool_:
  a = np.random.randint(0, 1, size=size, dtype=dtype)
  b = np.random.randint(0, 1, size=size, dtype=dtype)
  else:
@@ -697,7 +697,7 @@ def are_pdarrays_equal(pda1, pda2):
  ]
  for select_from in select_from_list:
  count += 1
- arr1 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[2]+count)]
+ arr1 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[2] + count)]
 
  # test unique search space, this should be identical to find
  # be sure to test when all items are present and when there are items missing
@@ -710,7 +710,9 @@ def are_pdarrays_equal(pda1, pda2):
  all_unique = ak.unique(arr2).size == arr2.size
  if all_unique:
  # ensure we match find
- if not are_pdarrays_equal(idx_of_first_in_second, ak.find(arr1, arr2, remove_missing=True)):
+ if not are_pdarrays_equal(
+ idx_of_first_in_second, ak.find(arr1, arr2, remove_missing=True)
+ ):
  print("failed to match find")
  print("second array all unique: ", all_unique)
  print(seeds)
@@ -725,7 +727,7 @@ def are_pdarrays_equal(pda1, pda2):
 
  # test duplicate items in search space, the easiest way I can think
  # of to do this is to compare against pandas series getitem
- arr2 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[3]+count)]
+ arr2 = select_from[ak.randint(0, select_from.size, 20, seed=seeds[3] + count)]
  pd_s = pd.Series(index=arr1.to_ndarray(), data=arr2.to_ndarray())
  ak_s = ak.Series(index=arr1, data=arr2)
 

diff --git a/arkouda/array_api/__init__.py b/arkouda/array_api/__init__.py
@@ -44,7 +44,7 @@
  float64,
  complex64,
  complex128,
- bool,
+ bool_,
 )
 
 from .elementwise_functions import (
@@ -189,7 +189,7 @@
  "float64",
  "complex64",
  "complex128",
- "bool",
+ "bool_",
 ]
 
 __all__ += [

diff --git a/arkouda/array_api/_dtypes.py b/arkouda/array_api/_dtypes.py
@@ -2,20 +2,19 @@
 
 # Note: we use dtype objects instead of dtype classes. The spec does not
 # require any behavior on dtypes other than equality.
-int8 = np.dtype("int8")
-int16 = np.dtype("int16")
-int32 = np.dtype("int32")
-int64 = np.dtype("int64")
-uint8 = np.dtype("uint8")
-uint16 = np.dtype("uint16")
-uint32 = np.dtype("uint32")
-uint64 = np.dtype("uint64")
-float32 = np.dtype("float32")
-float64 = np.dtype("float64")
-complex64 = np.dtype("complex64")
-complex128 = np.dtype("complex128")
-# Note: This name is changed
-bool = np.dtype("bool")
+int8 = np.int8
+int16 = np.int16
+int32 = np.int32
+int64 = np.int64
+uint8 = np.uint8
+uint16 = np.uint16
+uint32 = np.uint32
+uint64 = np.uint64
+float32 = np.float32
+float64 = np.float64
+complex64 = np.complex64
+complex128 = np.complex128
+bool_ = np.bool_
 
 _all_dtypes = (
  int8,

diff --git a/arkouda/array_api/statistical_functions.py b/arkouda/array_api/statistical_functions.py
@@ -1,29 +1,29 @@
 from __future__ import annotations
 
-from ._dtypes import (
+from typing import TYPE_CHECKING, Optional, Tuple, Union
+
+from ._dtypes import ( # _complex_floating_dtypes,; complex128,
+ _numeric_dtypes,
  _real_floating_dtypes,
  _real_numeric_dtypes,
- _numeric_dtypes,
- # _complex_floating_dtypes,
  _signed_integer_dtypes,
- uint64,
- int64,
  float64,
- # complex128,
+ int64,
+ uint64,
 )
 from .array_object import Array, implements_numpy
 from .manipulation_functions import squeeze
 
-from typing import TYPE_CHECKING, Optional, Tuple, Union
-
 if TYPE_CHECKING:
  from ._typing import Dtype
 
-from arkouda.numeric import cast as akcast
+import numpy as np
+
 from arkouda.client import generic_msg
-from arkouda.pdarrayclass import parse_single_value, create_pdarray
+from arkouda.dtypes import dtype as akdtype
+from arkouda.numeric import cast as akcast
+from arkouda.pdarrayclass import create_pdarray, parse_single_value
 from arkouda.pdarraycreation import scalar_array
-import numpy as np
 
 
 def max(
@@ -411,15 +411,15 @@ def var(
 
 def _prod_sum_dtype(dtype: Dtype) -> Dtype:
  if dtype == uint64:
- return dtype
+ return akdtype(dtype)
  elif dtype in _real_floating_dtypes:
- return float64
+ return akdtype(float64)
  # elif dtype in _complex_floating_dtypes:
  # return complex128
  elif dtype in _signed_integer_dtypes:
- return int64
+ return akdtype(int64)
  else:
- return uint64
+ return akdtype(uint64)
 
 
 def cumulative_sum(