From 337b2e78aeca4fd0d4cb0366ab92856feaa5edcf Mon Sep 17 00:00:00 2001 From: ajpotts Date: Wed, 11 Sep 2024 07:18:43 -0400 Subject: [PATCH] Closes #3742: move numeric module to arkouda.numpy (#3743) Co-authored-by: Amanda Potts --- arkouda/__init__.py | 1 - arkouda/alignment.py | 5 +- arkouda/array_api/searching_functions.py | 17 +- arkouda/array_api/statistical_functions.py | 2 +- arkouda/categorical.py | 4 +- arkouda/client_dtypes.py | 6 +- arkouda/dataframe.py | 14 +- arkouda/groupbyclass.py | 8 +- arkouda/index.py | 4 +- arkouda/join.py | 4 +- arkouda/numeric.py | 2616 -------------------- arkouda/numeric/__init__.py | 3 + arkouda/numpy/__init__.py | 2 +- arkouda/numpy/_numeric.py | 2609 ++++++++++++++++++- arkouda/pdarrayclass.py | 22 +- arkouda/pdarraycreation.py | 7 +- arkouda/plotting.py | 8 +- arkouda/random/_generator.py | 6 +- arkouda/scipy/_stats_py.py | 2 +- arkouda/scipy/special/_math.py | 2 +- arkouda/segarray.py | 8 +- arkouda/series.py | 8 +- arkouda/sorting.py | 12 +- arkouda/strings.py | 13 +- arkouda/timeclass.py | 6 +- pydoc/preprocess/generate_import_stubs.py | 9 +- pytest.ini | 3 +- tests/{ => numpy}/numeric_test.py | 25 +- tests/numpy/numpy_numeric_test.py | 26 - 29 files changed, 2718 insertions(+), 2734 deletions(-) delete mode 100644 arkouda/numeric.py create mode 100644 arkouda/numeric/__init__.py rename tests/{ => numpy}/numeric_test.py (98%) delete mode 100644 tests/numpy/numpy_numeric_test.py diff --git a/arkouda/__init__.py b/arkouda/__init__.py index 2e706be58b..d0d960ef44 100644 --- a/arkouda/__init__.py +++ b/arkouda/__init__.py @@ -13,7 +13,6 @@ from arkouda.pdarraysetops import * from arkouda.pdarraycreation import * from arkouda.pdarraymanipulation import * -from arkouda.numeric import * from arkouda.groupbyclass import * from arkouda.strings import * from arkouda.join import * diff --git a/arkouda/alignment.py b/arkouda/alignment.py index bb8acf1129..60c8a1ac7a 100644 --- a/arkouda/alignment.py +++ b/arkouda/alignment.py @@ -6,12 +6,12 @@ from arkouda.categorical import Categorical from arkouda.client import generic_msg +from arkouda.groupbyclass import GroupBy, broadcast, unique +from arkouda.numpy import cumsum, where from arkouda.numpy.dtypes import bigint from arkouda.numpy.dtypes import float64 as akfloat64 from arkouda.numpy.dtypes import int64 as akint64 from arkouda.numpy.dtypes import uint64 as akuint64 -from arkouda.groupbyclass import GroupBy, broadcast, unique -from arkouda.numeric import cumsum, where from arkouda.pdarrayclass import create_pdarray, pdarray from arkouda.pdarraycreation import arange, full, ones, zeros from arkouda.pdarraysetops import concatenate, in1d @@ -236,7 +236,6 @@ def find(query, space, all_occurrences=False, remove_missing=False): # create a segarray which contains all the indices from query # in our search space, instead of just the min for each segment - # im not completely convinced there's not a better way to get this given the # amount of structure but this is not the bottleneck of the computation anymore min_k_vals = i[g.permutation][less_than] diff --git a/arkouda/array_api/searching_functions.py b/arkouda/array_api/searching_functions.py index eceabe9d03..e9f301bfd2 100644 --- a/arkouda/array_api/searching_functions.py +++ b/arkouda/array_api/searching_functions.py @@ -1,17 +1,16 @@ from __future__ import annotations -from .array_object import Array -from ._dtypes import _real_numeric_dtypes, _real_floating_dtypes - -from typing import Optional, Tuple, Literal, cast - -from .manipulation_functions import squeeze, reshape, broadcast_arrays +from typing import Literal, Optional, Tuple, cast +import arkouda as ak from arkouda.client import generic_msg -from arkouda.pdarrayclass import parse_single_value, create_pdarray, create_pdarrays +from arkouda.numpy import cast as akcast +from arkouda.pdarrayclass import create_pdarray, create_pdarrays, parse_single_value from arkouda.pdarraycreation import scalar_array -from arkouda.numeric import cast as akcast -import arkouda as ak + +from ._dtypes import _real_floating_dtypes, _real_numeric_dtypes +from .array_object import Array +from .manipulation_functions import broadcast_arrays, reshape, squeeze def argmax(x: Array, /, *, axis: Optional[int] = None, keepdims: bool = False) -> Array: diff --git a/arkouda/array_api/statistical_functions.py b/arkouda/array_api/statistical_functions.py index af559c31b4..737c29ad14 100644 --- a/arkouda/array_api/statistical_functions.py +++ b/arkouda/array_api/statistical_functions.py @@ -20,8 +20,8 @@ import numpy as np from arkouda.client import generic_msg +from arkouda.numpy import cast as akcast from arkouda.numpy.dtypes import dtype as akdtype -from arkouda.numeric import cast as akcast from arkouda.pdarrayclass import create_pdarray, parse_single_value from arkouda.pdarraycreation import scalar_array diff --git a/arkouda/categorical.py b/arkouda/categorical.py index 940b744f4c..1ccd33f68e 100644 --- a/arkouda/categorical.py +++ b/arkouda/categorical.py @@ -23,8 +23,8 @@ from arkouda.groupbyclass import GroupBy, unique from arkouda.infoclass import information from arkouda.logger import getArkoudaLogger -from arkouda.numeric import cast as akcast -from arkouda.numeric import where +from arkouda.numpy import cast as akcast +from arkouda.numpy import where from arkouda.numpy.dtypes import bool_ as akbool from arkouda.numpy.dtypes import dtype as akdtype from arkouda.numpy.dtypes import int64 as akint64 diff --git a/arkouda/client_dtypes.py b/arkouda/client_dtypes.py index 3d926a48e4..5a6ed9e860 100644 --- a/arkouda/client_dtypes.py +++ b/arkouda/client_dtypes.py @@ -5,11 +5,11 @@ import numpy as np from typeguard import typechecked +from arkouda.groupbyclass import GroupBy, broadcast +from arkouda.numpy import cast as akcast +from arkouda.numpy import where from arkouda.numpy.dtypes import bitType, intTypes, isSupportedInt from arkouda.numpy.dtypes import uint64 as akuint64 -from arkouda.groupbyclass import GroupBy, broadcast -from arkouda.numeric import cast as akcast -from arkouda.numeric import where from arkouda.pdarrayclass import RegistrationError, pdarray from arkouda.pdarraycreation import arange, array, create_pdarray, zeros from arkouda.strings import Strings diff --git a/arkouda/dataframe.py b/arkouda/dataframe.py index 785711f005..bdeed1b7a1 100644 --- a/arkouda/dataframe.py +++ b/arkouda/dataframe.py @@ -17,18 +17,18 @@ from arkouda.categorical import Categorical from arkouda.client import generic_msg, maxTransferBytes from arkouda.client_dtypes import BitVector, Fields, IPv4 -from arkouda.numpy.dtypes import bigint -from arkouda.numpy.dtypes import bool_ as akbool -from arkouda.numpy.dtypes import float64 as akfloat64 -from arkouda.numpy.dtypes import int64 as akint64 -from arkouda.numpy.dtypes import uint64 as akuint64 from arkouda.groupbyclass import GROUPBY_REDUCTION_TYPES from arkouda.groupbyclass import GroupBy as akGroupBy from arkouda.groupbyclass import unique from arkouda.index import Index, MultiIndex from arkouda.join import inner_join -from arkouda.numeric import cast as akcast -from arkouda.numeric import cumsum, where +from arkouda.numpy import cast as akcast +from arkouda.numpy import cumsum, where +from arkouda.numpy.dtypes import bigint +from arkouda.numpy.dtypes import bool_ as akbool +from arkouda.numpy.dtypes import float64 as akfloat64 +from arkouda.numpy.dtypes import int64 as akint64 +from arkouda.numpy.dtypes import uint64 as akuint64 from arkouda.pdarrayclass import RegistrationError, pdarray from arkouda.pdarraycreation import arange, array, create_pdarray, full, zeros from arkouda.pdarraysetops import concatenate, in1d, intersect1d diff --git a/arkouda/groupbyclass.py b/arkouda/groupbyclass.py index d330e22de4..a99278f575 100644 --- a/arkouda/groupbyclass.py +++ b/arkouda/groupbyclass.py @@ -23,13 +23,13 @@ from typeguard import typechecked from arkouda.client import generic_msg +from arkouda.logger import getArkoudaLogger from arkouda.numpy.dtypes import _val_isinstance_of_union, bigint from arkouda.numpy.dtypes import float64 as akfloat64 from arkouda.numpy.dtypes import float_scalars from arkouda.numpy.dtypes import int64 as akint64 from arkouda.numpy.dtypes import int_scalars from arkouda.numpy.dtypes import uint64 as akuint64 -from arkouda.logger import getArkoudaLogger from arkouda.pdarrayclass import RegistrationError, create_pdarray, is_sorted, pdarray from arkouda.pdarraycreation import arange, full from arkouda.random import default_rng @@ -262,7 +262,7 @@ def __init__( dropna: bool = True, **kwargs, ): - from arkouda.numeric import isnan + from arkouda.numpy import isnan def drop_na_keys(): if self.dropna is True: @@ -1733,8 +1733,8 @@ def sample( if return_indices is True, return the indices of the sampled values. Otherwise, return the sample values. """ - from arkouda.numeric import cast as akcast - from arkouda.numeric import round as akround + from arkouda.numpy import cast as akcast + from arkouda.numpy import round as akround if frac is not None and n is not None: raise ValueError("Please enter a value for `frac` OR `n`, not both") diff --git a/arkouda/index.py b/arkouda/index.py index 72d0cbc91d..91016eb00b 100644 --- a/arkouda/index.py +++ b/arkouda/index.py @@ -10,11 +10,11 @@ from typeguard import typechecked from arkouda import Categorical, Strings +from arkouda.groupbyclass import GroupBy, unique +from arkouda.numpy import cast as akcast from arkouda.numpy.dtypes import bool_ as akbool from arkouda.numpy.dtypes import float64 as akfloat64 from arkouda.numpy.dtypes import int64 as akint64 -from arkouda.groupbyclass import GroupBy, unique -from arkouda.numeric import cast as akcast from arkouda.pdarrayclass import RegistrationError, pdarray from arkouda.pdarraycreation import arange, array, create_pdarray, ones from arkouda.pdarraysetops import argsort, in1d diff --git a/arkouda/join.py b/arkouda/join.py index 6d78de03dc..e56110a982 100644 --- a/arkouda/join.py +++ b/arkouda/join.py @@ -6,11 +6,11 @@ from arkouda.alignment import right_align from arkouda.categorical import Categorical from arkouda.client import generic_msg +from arkouda.groupbyclass import GroupBy, broadcast +from arkouda.numpy import cumsum from arkouda.numpy.dtypes import NUMBER_FORMAT_STRINGS from arkouda.numpy.dtypes import int64 as akint64 from arkouda.numpy.dtypes import resolve_scalar_dtype -from arkouda.groupbyclass import GroupBy, broadcast -from arkouda.numeric import cumsum from arkouda.pdarrayclass import create_pdarray, pdarray from arkouda.pdarraycreation import arange, array, ones, zeros from arkouda.pdarraysetops import concatenate, in1d diff --git a/arkouda/numeric.py b/arkouda/numeric.py deleted file mode 100644 index 5326bf96f2..0000000000 --- a/arkouda/numeric.py +++ /dev/null @@ -1,2616 +0,0 @@ -import json -from enum import Enum -from typing import TYPE_CHECKING, List, Sequence, Tuple, TypeVar, Union -from typing import cast as type_cast -from typing import no_type_check - -import numpy as np -from typeguard import typechecked - -from arkouda.client import generic_msg -from arkouda.groupbyclass import GroupBy -from arkouda.numpy.dtypes import DTypes, bigint -from arkouda.numpy.dtypes import dtype as akdtype -from arkouda.numpy.dtypes import int64 as akint64 -from arkouda.numpy.dtypes import ( - int_scalars, - isSupportedNumber, - numeric_scalars, - resolve_scalar_dtype, -) -from arkouda.numpy.dtypes import str_ -from arkouda.numpy.dtypes import str_ as akstr_ -from arkouda.pdarrayclass import all as ak_all -from arkouda.pdarrayclass import any as ak_any -from arkouda.pdarrayclass import argmax, create_pdarray, pdarray, sum -from arkouda.pdarraycreation import array, linspace, scalar_array -from arkouda.sorting import sort -from arkouda.strings import Strings - -if TYPE_CHECKING: - from arkouda.categorical import Categorical - from arkouda.segarray import SegArray -else: - Categorical = TypeVar("Categorical") - SegArray = TypeVar("SegArray") - -__all__ = [ - "cast", - "abs", - "ceil", - "clip", - "count_nonzero", - "eye", - "floor", - "trunc", - "round", - "sign", - "isfinite", - "isinf", - "isnan", - "log", - "log2", - "log10", - "log1p", - "exp", - "expm1", - "square", - "matmul", - "triu", - "tril", - "transpose", - "vecdot", - "cumsum", - "cumprod", - "sin", - "cos", - "tan", - "arcsin", - "arccos", - "arctan", - "arctan2", - "sinh", - "cosh", - "tanh", - "arcsinh", - "arccosh", - "arctanh", - "rad2deg", - "deg2rad", - "hash", - "array_equal", - "putmask", - "where", - "histogram", - "histogram2d", - "histogramdd", - "median", - "value_counts", - "ErrorMode", -] - - -class ErrorMode(Enum): - strict = "strict" - ignore = "ignore" - return_validity = "return_validity" - - -@typechecked -def cast( - pda: Union[pdarray, Strings, Categorical], # type: ignore - dt: Union[np.dtype, type, str, bigint], - errors: ErrorMode = ErrorMode.strict, -) -> Union[Union[pdarray, Strings, Categorical], Tuple[pdarray, pdarray]]: # type: ignore - """ - Cast an array to another dtype. - - Parameters - ---------- - pda : pdarray or Strings - The array of values to cast - dt : np.dtype, type, or str - The target dtype to cast values to - errors : {strict, ignore, return_validity} - Controls how errors are handled when casting strings to a numeric type - (ignored for casts from numeric types). - - strict: raise RuntimeError if *any* string cannot be converted - - ignore: never raise an error. Uninterpretable strings get - converted to NaN (float64), -2**63 (int64), zero (uint64 and - uint8), or False (bool) - - return_validity: in addition to returning the same output as - "ignore", also return a bool array indicating where the cast - was successful. - - Returns - ------- - pdarray or Strings - Array of values cast to desired dtype - [validity : pdarray(bool)] - If errors="return_validity" and input is Strings, a second array is - returned with True where the cast succeeded and False where it failed. - - Notes - ----- - The cast is performed according to Chapel's casting rules and is NOT safe - from overflows or underflows. The user must ensure that the target dtype - has the precision and capacity to hold the desired result. - - Examples - -------- - >>> ak.cast(ak.linspace(1.0,5.0,5), dt=ak.int64) - array([1, 2, 3, 4, 5]) - - >>> ak.cast(ak.arange(0,5), dt=ak.float64).dtype - dtype('float64') - - >>> ak.cast(ak.arange(0,5), dt=ak.bool_) - array([False, True, True, True, True]) - - >>> ak.cast(ak.linspace(0,4,5), dt=ak.bool_) - array([False, True, True, True, True]) - """ - from arkouda.categorical import Categorical # type: ignore - - if isinstance(pda, pdarray): - if dt is Strings or akdtype(dt) == str_: - if pda.ndim > 1: - raise ValueError("Cannot cast a multi-dimensional pdarray to Strings") - repMsg = generic_msg( - cmd=f"castToStrings<{pda.dtype}>", - args={"name": pda}, - ) - return Strings.from_parts(*(type_cast(str, repMsg).split("+"))) - else: - dt = akdtype(dt) - return create_pdarray( - generic_msg( - cmd=f"cast<{pda.dtype},{dt},{pda.ndim}>", - args={"name": pda}, - ) - ) - elif isinstance(pda, Strings): - if dt is Categorical or dt == "Categorical": - return Categorical(pda) # type: ignore - elif dt is Strings or akdtype(dt) == str_: - return pda[:] - else: - dt = akdtype(dt) - repMsg = generic_msg( - cmd=f"castStringsTo<{dt}>", - args={ - "name": pda.entry.name, - "opt": errors.name, - }, - ) - if errors == ErrorMode.return_validity: - a, b = type_cast(str, repMsg).split("+") - return create_pdarray(type_cast(str, a)), create_pdarray(type_cast(str, b)) - else: - return create_pdarray(type_cast(str, repMsg)) - elif isinstance(pda, Categorical): # type: ignore - if dt is Strings or dt in ["Strings", "str"] or dt == str_: - return pda.categories[pda.codes] - else: - raise ValueError("Categoricals can only be casted to Strings") - else: - raise TypeError("pda must be a pdarray, Strings, or Categorical object") - - -@typechecked -def abs(pda: pdarray) -> pdarray: - """ - Return the element-wise absolute value of the array. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing absolute values of the input array elements - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Examples - -------- - >>> ak.abs(ak.arange(-5,-1)) - array([5, 4, 3, 2]) - - >>> ak.abs(ak.linspace(-5,-1,5)) - array([5, 4, 3, 2, 1]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "abs", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def ceil(pda: pdarray) -> pdarray: - """ - Return the element-wise ceiling of the array. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing ceiling values of the input array elements - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Examples - -------- - >>> ak.ceil(ak.linspace(1.1,5.5,5)) - array([2, 3, 4, 5, 6]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "ceil", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def floor(pda: pdarray) -> pdarray: - """ - Alias of arkouda.numpy.floor - """ - from arkouda.numpy import floor as ak_floor - - return ak_floor(pda) - - -@typechecked -def round(pda: pdarray) -> pdarray: - """ - Return the element-wise rounding of the array. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing input array elements rounded to the nearest integer - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Examples - -------- - >>> ak.round(ak.array([1.1, 2.5, 3.14159])) - array([1, 3, 3]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "round", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def trunc(pda: pdarray) -> pdarray: - """ - Return the element-wise truncation of the array. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing input array elements truncated to the nearest integer - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Examples - -------- - >>> ak.trunc(ak.array([1.1, 2.5, 3.14159])) - array([1, 2, 3]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "trunc", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def sign(pda: pdarray) -> pdarray: - """ - Return the element-wise sign of the array. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing sign values of the input array elements - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Examples - -------- - >>> ak.sign(ak.array([-10, -5, 0, 5, 10])) - array([-1, -1, 0, 1, 1]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "sign", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def isfinite(pda: pdarray) -> pdarray: - """ - Return the element-wise isfinite check applied to the array. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing boolean values indicating whether the - input array elements are finite - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - RuntimeError - if the underlying pdarray is not float-based - - Examples - -------- - >>> ak.isfinite(ak.array[1.0, 2.0, ak.inf]) - array([True, True, False]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "isfinite", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def isinf(pda: pdarray) -> pdarray: - """ - Return the element-wise isinf check applied to the array. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing boolean values indicating whether the - input array elements are infinite - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - RuntimeError - if the underlying pdarray is not float-based - - Examples - -------- - >>> ak.isinf(ak.array[1.0, 2.0, ak.inf]) - array([False, False, True]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "isinf", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def isnan(pda: pdarray) -> pdarray: - """ - Return the element-wise isnan check applied to the array. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing boolean values indicating whether the - input array elements are NaN - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - RuntimeError - if the underlying pdarray is not float-based - - Examples - -------- - >>> ak.isnan(ak.array[1.0, 2.0, 1.0 / 0.0]) - array([False, False, True]) - """ - from arkouda.util import is_float, is_numeric - - if is_numeric(pda) and not is_float(pda): - from arkouda.pdarraycreation import full - - return full(pda.size, False, dtype=bool) - elif not is_numeric(pda): - raise TypeError("isnan only supports pdarray of numeric type.") - - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "isnan", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def log(pda: pdarray) -> pdarray: - """ - Return the element-wise natural log of the array. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing natural log values of the input - array elements - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Notes - ----- - Logarithms with other bases can be computed as follows: - - Examples - -------- - >>> A = ak.array([1, 10, 100]) - # Natural log - >>> ak.log(A) - array([0, 2.3025850929940459, 4.6051701859880918]) - # Log base 10 - >>> ak.log(A) / np.log(10) - array([0, 1, 2]) - # Log base 2 - >>> ak.log(A) / np.log(2) - array([0, 3.3219280948873626, 6.6438561897747253]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "log", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def log10(x: pdarray) -> pdarray: - """ - Return the element-wise base 10 log of the array. - - Parameters - __________ - x : pdarray - array to compute on - - Returns - _______ - pdarray contain values of the base 10 log - """ - repMsg = generic_msg( - cmd=f"efunc{x.ndim}D", - args={ - "func": "log10", - "array": x, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def log2(x: pdarray) -> pdarray: - """ - Return the element-wise base 2 log of the array. - - Parameters - __________ - x : pdarray - array to compute on - - Returns - _______ - pdarray contain values of the base 2 log - """ - repMsg = generic_msg( - cmd=f"efunc{x.ndim}D", - args={ - "func": "log2", - "array": x, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def log1p(x: pdarray) -> pdarray: - """ - Return the element-wise natural log of one plus the array. - - Parameters - __________ - x : pdarray - array to compute on - - Returns - _______ - pdarray contain values of the natural log of one plus the array - """ - repMsg = generic_msg( - cmd=f"efunc{x.ndim}D", - args={ - "func": "log1p", - "array": x, - }, - ) - return create_pdarray(repMsg) - - -@typechecked -def exp(pda: pdarray) -> pdarray: - """ - Return the element-wise exponential of the array. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing exponential values of the input - array elements - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Examples - -------- - >>> ak.exp(ak.arange(1,5)) - array([2.7182818284590451, 7.3890560989306504, 20.085536923187668, 54.598150033144236]) - - >>> ak.exp(ak.uniform(5,1.0,5.0)) - array([11.84010843172504, 46.454368507659211, 5.5571769623557188, - 33.494295836924771, 13.478894913238722]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "exp", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def expm1(pda: pdarray) -> pdarray: - """ - Return the element-wise exponential of the array minus one. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing exponential values of the input - array elements minus one - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Examples - -------- - >>> ak.exp1m(ak.arange(1,5)) - array([1.7182818284590451, 6.3890560989306504, 19.085536923187668, 53.598150033144236]) - - >>> ak.exp1m(ak.uniform(5,1.0,5.0)) - array([10.84010843172504, 45.454368507659211, 4.5571769623557188, - 32.494295836924771, 12.478894913238722]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "expm1", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def square(pda: pdarray) -> pdarray: - """ - Return the element-wise square of the array. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing square values of the input - array elements - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Examples - -------- - >>> ak.square(ak.arange(1,5)) - array([1, 4, 9, 16]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "square", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def cumsum(pda: pdarray) -> pdarray: - """ - Return the cumulative sum over the array. - - The sum is inclusive, such that the ``i`` th element of the - result is the sum of elements up to and including ``i``. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing cumulative sums for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Examples - -------- - >>> ak.cumsum(ak.arange([1,5])) - array([1, 3, 6]) - - >>> ak.cumsum(ak.uniform(5,1.0,5.0)) - array([3.1598310770203937, 5.4110385860243131, 9.1622479306453748, - 12.710615785506533, 13.945880905466208]) - - >>> ak.cumsum(ak.randint(0, 1, 5, dtype=ak.bool_)) - array([0, 1, 1, 2, 3]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "cumsum", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def cumprod(pda: pdarray) -> pdarray: - """ - Return the cumulative product over the array. - - The product is inclusive, such that the ``i`` th element of the - result is the product of elements up to and including ``i``. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - A pdarray containing cumulative products for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Examples - -------- - >>> ak.cumprod(ak.arange(1,5)) - array([1, 2, 6, 24])) - - >>> ak.cumprod(ak.uniform(5,1.0,5.0)) - array([1.5728783400481925, 7.0472855509390593, 33.78523998586553, - 134.05309592737584, 450.21589865655358]) - """ - repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "cumprod", - "array": pda, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def sin(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise sine of the array. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the sine will be applied to the corresponding value. Elsewhere, it will retain - its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing sin for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - return _trig_helper(pda, "sin", where) - - -@typechecked -def cos(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise cosine of the array. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the cosine will be applied to the corresponding value. Elsewhere, it will retain - its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing cosine for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - return _trig_helper(pda, "cos", where) - - -@typechecked -def tan(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise tangent of the array. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the tangent will be applied to the corresponding value. Elsewhere, it will retain - its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing tangent for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - return _trig_helper(pda, "tan", where) - - -@typechecked -def arcsin(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise inverse sine of the array. The result is between -pi/2 and pi/2. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the inverse sine will be applied to the corresponding value. Elsewhere, it will retain - its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing inverse sine for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - return _trig_helper(pda, "arcsin", where) - - -@typechecked -def arccos(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise inverse cosine of the array. The result is between 0 and pi. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the inverse cosine will be applied to the corresponding value. Elsewhere, it will retain - its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing inverse cosine for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - return _trig_helper(pda, "arccos", where) - - -@typechecked -def arctan(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise inverse tangent of the array. The result is between -pi/2 and pi/2. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the inverse tangent will be applied to the corresponding value. Elsewhere, it will retain - its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing inverse tangent for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - return _trig_helper(pda, "arctan", where) - - -@typechecked -def arctan2( - num: Union[pdarray, numeric_scalars], - denom: Union[pdarray, numeric_scalars], - where: Union[bool, pdarray] = True, -) -> pdarray: - """ - Return the element-wise inverse tangent of the array pair. The result chosen is the - signed angle in radians between the ray ending at the origin and passing through the - point (1,0), and the ray ending at the origin and passing through the point (denom, num). - The result is between -pi and pi. - - Parameters - ---------- - num : Union[numeric_scalars, pdarray] - Numerator of the arctan2 argument. - denom : Union[numeric_scalars, pdarray] - Denominator of the arctan2 argument. - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the inverse tangent will be applied to the corresponding values. Elsewhere, it will retain - its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing inverse tangent for each corresponding element pair - of the original pdarray, using the signed values or the numerator and - denominator to get proper placement on unit circle. - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - if not all(isSupportedNumber(arg) or isinstance(arg, pdarray) for arg in [num, denom]): - raise TypeError( - f"Unsupported types {type(num)} and/or {type(denom)}. Supported " - "types are numeric scalars and pdarrays. At least one argument must be a pdarray." - ) - if isSupportedNumber(num) and isSupportedNumber(denom): - raise TypeError( - f"Unsupported types {type(num)} and/or {type(denom)}. Supported " - "types are numeric scalars and pdarrays. At least one argument must be a pdarray." - ) - # TODO: handle shape broadcasting for multidimensional arrays - if isinstance(num, pdarray) or isinstance(denom, pdarray): - ndim = num.ndim if isinstance(num, pdarray) else denom.ndim # type: ignore[union-attr] - if where is True: - repMsg = type_cast( - str, - generic_msg( - cmd=f"efunc2Arg{ndim}D", - args={ - "func": "arctan2", - "A": num, - "B": denom, - }, - ), - ) - return create_pdarray(repMsg) - elif where is False: - return num / denom # type: ignore - else: - if where.dtype != bool: - raise TypeError(f"where must have dtype bool, got {where.dtype} instead") - if isinstance(num, pdarray) and isinstance(denom, pdarray): - # TODO: handle shape broadcasting for multidimensional arrays - repMsg = type_cast( - str, - generic_msg( - cmd=f"efunc2Arg{ndim}D", - args={ - "func": "arctan2", - "A": num[where], - "B": denom[where], - }, - ), - ) - if not isinstance(num, pdarray) or not isinstance(denom, pdarray): - repMsg = type_cast( - str, - generic_msg( - cmd=f"efunc2Arg{ndim}D", - args={ - "func": "arctan2", - "A": num if not isinstance(num, pdarray) else num[where], - "B": denom if not isinstance(denom, pdarray) else denom[where], - }, - ), - ) - new_pda = num / denom - ret = create_pdarray(repMsg) - new_pda = cast(new_pda, ret.dtype) - new_pda[where] = ret - return new_pda - else: - return scalar_array(arctan2(num, denom) if where else num / denom) - - -@typechecked -def sinh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise hyperbolic sine of the array. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the hyperbolic sine will be applied to the corresponding value. Elsewhere, it will retain - its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing hyperbolic sine for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - return _trig_helper(pda, "sinh", where) - - -@typechecked -def cosh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise hyperbolic cosine of the array. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the hyperbolic cosine will be applied to the corresponding value. Elsewhere, it will retain - its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing hyperbolic cosine for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - return _trig_helper(pda, "cosh", where) - - -@typechecked -def tanh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise hyperbolic tangent of the array. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the hyperbolic tangent will be applied to the corresponding value. Elsewhere, it will retain - its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing hyperbolic tangent for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - return _trig_helper(pda, "tanh", where) - - -@typechecked -def arcsinh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise inverse hyperbolic sine of the array. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the inverse hyperbolic sine will be applied to the corresponding value. Elsewhere, it will retain - its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing inverse hyperbolic sine for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - return _trig_helper(pda, "arcsinh", where) - - -@typechecked -def arccosh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise inverse hyperbolic cosine of the array. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the inverse hyperbolic cosine will be applied to the corresponding value. Elsewhere, it will - retain its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing inverse hyperbolic cosine for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - return _trig_helper(pda, "arccosh", where) - - -@typechecked -def arctanh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Return the element-wise inverse hyperbolic tangent of the array. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, - the inverse hyperbolic tangent will be applied to the corresponding value. Elsewhere, - it will retain its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing inverse hyperbolic tangent for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameters are not a pdarray or numeric scalar. - """ - return _trig_helper(pda, "arctanh", where) - - -def _trig_helper(pda: pdarray, func: str, where: Union[bool, pdarray] = True) -> pdarray: - """ - Returns the result of the input trig function acting element-wise on the array. - - Parameters - ---------- - pda : pdarray - func : string - The designated trig function that is passed in - where : Boolean or pdarray - This condition is applied over the input. At locations where the condition is True, the - corresponding value will be acted on by the respective trig function. Elsewhere, - it will retain its original value. Default set to True. - - Returns - ------- - pdarray - A pdarray with the trig function applied at each element of pda - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - TypeError - Raised if where condition is not type Boolean - """ - if where is True: - repMsg = type_cast( - str, - generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": func, - "array": pda, - }, - ), - ) - return create_pdarray(repMsg) - elif where is False: - return pda - else: - if where.dtype != bool: - raise TypeError(f"where must have dtype bool, got {where.dtype} instead") - repMsg = type_cast( - str, - generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": func, - "array": pda[where], - }, - ), - ) - new_pda = pda[:] - ret = create_pdarray(repMsg) - new_pda = cast(new_pda, ret.dtype) - new_pda[where] = ret - return new_pda - - -@typechecked -def rad2deg(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Converts angles element-wise from radians to degrees. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, the - corresponding value will be converted from radians to degrees. Elsewhere, it will retain its - original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing an angle converted to degrees, from radians, for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - if where is True: - return 180 * (pda / np.pi) - elif where is False: - return pda - else: - new_pda = pda - ret = 180 * (pda[where] / np.pi) - new_pda = cast(new_pda, ret.dtype) - new_pda[where] = ret - return new_pda - - -@typechecked -def deg2rad(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: - """ - Converts angles element-wise from degrees to radians. - - Parameters - ---------- - pda : pdarray - where : Boolean or pdarray - This condition is broadcast over the input. At locations where the condition is True, the - corresponding value will be converted from degrees to radians. Elsewhere, it will retain its - original value. Default set to True. - - Returns - ------- - pdarray - A pdarray containing an angle converted to radians, from degrees, for each element - of the original pdarray - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - """ - if where is True: - return np.pi * pda / 180 - elif where is False: - return pda - else: - new_pda = pda - ret = np.pi * pda[where] / 180 - new_pda = cast(new_pda, ret.dtype) - new_pda[where] = ret - return new_pda - - -def _hash_helper(a): - from arkouda import Categorical as Categorical_ - from arkouda import SegArray as SegArray_ - - if isinstance(a, SegArray_): - return json.dumps( - { - "segments": a.segments.name, - "values": a.values.name, - "valObjType": a.values.objType, - } - ) - elif isinstance(a, Categorical_): - return json.dumps({"categories": a.categories.name, "codes": a.codes.name}) - else: - return a.name - - -# this is # type: ignored and doesn't actually do any type checking -# the type hints are there as a reference to show which types are expected -# type validation is done within the function -def hash( - pda: Union[ # type: ignore - Union[pdarray, Strings, SegArray, Categorical], - List[Union[pdarray, Strings, SegArray, Categorical]], - ], - full: bool = True, -) -> Union[Tuple[pdarray, pdarray], pdarray]: - """ - Return an element-wise hash of the array or list of arrays. - - Parameters - ---------- - pda : Union[pdarray, Strings, Segarray, Categorical], - List[Union[pdarray, Strings, Segarray, Categorical]]] - - full : bool - This is only used when a single pdarray is passed into hash - By default, a 128-bit hash is computed and returned as - two int64 arrays. If full=False, then a 64-bit hash - is computed and returned as a single int64 array. - - Returns - ------- - hashes - If full=True or a list of pdarrays is passed, - a 2-tuple of pdarrays containing the high - and low 64 bits of each hash, respectively. - If full=False and a single pdarray is passed, - a single pdarray containing a 64-bit hash - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - Notes - ----- - In the case of a single pdarray being passed, this function - uses the SIPhash algorithm, which can output either a 64-bit - or 128-bit hash. However, the 64-bit hash runs a significant - risk of collisions when applied to more than a few million - unique values. Unless the number of unique values is known to - be small, the 128-bit hash is strongly recommended. - - Note that this hash should not be used for security, or for - any cryptographic application. Not only is SIPhash not - intended for such uses, but this implementation employs a - fixed key for the hash, which makes it possible for an - adversary with control over input to engineer collisions. - - In the case of a list of pdrrays, Strings, Categoricals, or Segarrays - being passed, a non-linear function must be applied to each - array since hashes of subsequent arrays cannot be simply XORed - because equivalent values will cancel each other out, hence we - do a rotation by the ordinal of the array. - """ - from arkouda import Categorical as Categorical_ - from arkouda import SegArray as SegArray_ - - if isinstance(pda, (pdarray, Strings, SegArray_, Categorical_)): - return _hash_single(pda, full) if isinstance(pda, pdarray) else pda.hash() - elif isinstance(pda, List): - if any( - wrong_type := [not isinstance(a, (pdarray, Strings, SegArray_, Categorical_)) for a in pda] - ): - raise TypeError( - f"Unsupported type {type(pda[np.argmin(wrong_type)])}. Supported types are pdarray," - f" SegArray, Strings, Categoricals, and Lists of these types." - ) - # replace bigint pdarrays with the uint limbs - expanded_pda = [] - for a in pda: - if isinstance(a, pdarray) and a.dtype == bigint: - expanded_pda.extend(a.bigint_to_uint_arrays()) - else: - expanded_pda.append(a) - types_list = [a.objType for a in expanded_pda] - names_list = [_hash_helper(a) for a in expanded_pda] - rep_msg = type_cast( - str, - generic_msg( - cmd="hashList", - args={ - "nameslist": names_list, - "typeslist": types_list, - "length": len(expanded_pda), - "size": len(expanded_pda[0]), - }, - ), - ) - hashes = json.loads(rep_msg) - return create_pdarray(hashes["upperHash"]), create_pdarray(hashes["lowerHash"]) - else: - raise TypeError( - f"Unsupported type {type(pda)}. Supported types are pdarray," - f" SegArray, Strings, Categoricals, and Lists of these types." - ) - - -@typechecked -def _hash_single(pda: pdarray, full: bool = True): - if pda.dtype == bigint: - return hash(pda.bigint_to_uint_arrays()) - repMsg = type_cast( - str, - generic_msg( - cmd=f"efunc{pda.ndim}D", - args={ - "func": "hash128" if full else "hash64", - "array": pda, - }, - ), - ) - if full: - a, b = repMsg.split("+") - return create_pdarray(a), create_pdarray(b) - else: - return create_pdarray(repMsg) - - -@no_type_check -def _str_cat_where( - condition: pdarray, - A: Union[str, Strings, Categorical], - B: Union[str, Strings, Categorical], -) -> Union[Strings, Categorical]: - # added @no_type_check because mypy can't handle Categorical not being declared - # sooner, but there are circular dependencies preventing that - from arkouda.categorical import Categorical - from arkouda.pdarraysetops import concatenate - - if isinstance(A, str) and isinstance(B, (Categorical, Strings)): - # This allows us to assume if a str is present it is B - A, B, condition = B, A, ~condition - - # one cat and one str - if isinstance(A, Categorical) and isinstance(B, str): - is_in_categories = A.categories == B - if ak_any(is_in_categories): - new_categories = A.categories - b_code = argmax(is_in_categories) - else: - new_categories = concatenate([A.categories, array([B])]) - b_code = A.codes.size + 1 - new_codes = where(condition, A.codes, b_code) - return Categorical.from_codes(new_codes, new_categories, NAvalue=A.NAvalue).reset_categories() - - # both cat - if isinstance(A, Categorical) and isinstance(B, Categorical): - if A.codes.size != B.codes.size: - raise TypeError("Categoricals must be same length") - if A.categories.size != B.categories.size or not ak_all(A.categories == B.categories): - A, B = A.standardize_categories([A, B]) - new_codes = where(condition, A.codes, B.codes) - return Categorical.from_codes(new_codes, A.categories, NAvalue=A.NAvalue).reset_categories() - - # one strings and one str - if isinstance(A, Strings) and isinstance(B, str): - new_lens = where(condition, A.get_lengths(), len(B)) - repMsg = generic_msg( - cmd="segmentedWhere", - args={ - "seg_str": A, - "other": B, - "is_str_literal": True, - "new_lens": new_lens, - "condition": condition, - }, - ) - return Strings.from_return_msg(repMsg) - - # both strings - if isinstance(A, Strings) and isinstance(B, Strings): - if A.size != B.size: - raise TypeError("Strings must be same length") - new_lens = where(condition, A.get_lengths(), B.get_lengths()) - repMsg = generic_msg( - cmd="segmentedWhere", - args={ - "seg_str": A, - "other": B, - "is_str_literal": False, - "new_lens": new_lens, - "condition": condition, - }, - ) - return Strings.from_return_msg(repMsg) - - raise TypeError("ak.where is not supported between Strings and Categorical") - - -@typechecked -def where( - condition: pdarray, - A: Union[str, numeric_scalars, pdarray, Strings, Categorical], # type: ignore - B: Union[str, numeric_scalars, pdarray, Strings, Categorical], # type: ignore -) -> Union[pdarray, Strings, Categorical]: # type: ignore - """ - Returns an array with elements chosen from A and B based upon a - conditioning array. As is the case with numpy.where, the return array - consists of values from the first array (A) where the conditioning array - elements are True and from the second array (B) where the conditioning - array elements are False. - - Parameters - ---------- - condition : pdarray - Used to choose values from A or B - A : Union[numeric_scalars, str, pdarray, Strings, Categorical] - Value(s) used when condition is True - B : Union[numeric_scalars, str, pdarray, Strings, Categorical] - Value(s) used when condition is False - - Returns - ------- - pdarray - Values chosen from A where the condition is True and B where - the condition is False - - Raises - ------ - TypeError - Raised if the condition object is not a pdarray, if A or B is not - an int, np.int64, float, np.float64, pdarray, str, Strings, Categorical - if pdarray dtypes are not supported or do not match, or multiple - condition clauses (see Notes section) are applied - ValueError - Raised if the shapes of the condition, A, and B pdarrays are unequal - - Examples - -------- - >>> a1 = ak.arange(1,10) - >>> a2 = ak.ones(9, dtype=np.int64) - >>> cond = a1 < 5 - >>> ak.where(cond,a1,a2) - array([1, 2, 3, 4, 1, 1, 1, 1, 1]) - - >>> a1 = ak.arange(1,10) - >>> a2 = ak.ones(9, dtype=np.int64) - >>> cond = a1 == 5 - >>> ak.where(cond,a1,a2) - array([1, 1, 1, 1, 5, 1, 1, 1, 1]) - - >>> a1 = ak.arange(1,10) - >>> a2 = 10 - >>> cond = a1 < 5 - >>> ak.where(cond,a1,a2) - array([1, 2, 3, 4, 10, 10, 10, 10, 10]) - - >>> s1 = ak.array([f'str {i}' for i in range(10)]) - >>> s2 = 'str 21' - >>> cond = (ak.arange(10) % 2 == 0) - >>> ak.where(cond,s1,s2) - array(['str 0', 'str 21', 'str 2', 'str 21', 'str 4', 'str 21', 'str 6', 'str 21', 'str 8','str 21']) - - >>> c1 = ak.Categorical(ak.array([f'str {i}' for i in range(10)])) - >>> c2 = ak.Categorical(ak.array([f'str {i}' for i in range(9, -1, -1)])) - >>> cond = (ak.arange(10) % 2 == 0) - >>> ak.where(cond,c1,c2) - array(['str 0', 'str 8', 'str 2', 'str 6', 'str 4', 'str 4', 'str 6', 'str 2', 'str 8', 'str 0']) - - Notes - ----- - A and B must have the same dtype and only one conditional clause - is supported e.g., n < 5, n > 1, which is supported in numpy - is not currently supported in Arkouda - """ - if (not isSupportedNumber(A) and not isinstance(A, pdarray)) or ( - not isSupportedNumber(B) and not isinstance(B, pdarray) - ): - from arkouda.categorical import Categorical # type: ignore - - # fmt: off - if ( - not isinstance(A, (str, Strings, Categorical)) # type: ignore - or not isinstance(B, (str, Strings, Categorical)) # type: ignore - ): - # fmt:on - raise TypeError( - "both A and B must be an int, np.int64, float, np.float64, pdarray OR" - " both A and B must be an str, Strings, Categorical" - ) - return _str_cat_where(condition, A, B) - if isinstance(A, pdarray) and isinstance(B, pdarray): - # TODO: handle shape broadcasting for multidimensional arrays - repMsg = generic_msg( - cmd=f"efunc3vv{condition.ndim}D", - args={ - "func": "where", - "condition": condition, - "a": A, - "b": B, - }, - ) - # For scalars, try to convert it to the array's dtype - elif isinstance(A, pdarray) and np.isscalar(B): - repMsg = generic_msg( - cmd=f"efunc3vs{condition.ndim}D", - args={ - "func": "where", - "condition": condition, - "a": A, - "dtype": A.dtype.name, - "scalar": A.format_other(B), - }, - ) - elif isinstance(B, pdarray) and np.isscalar(A): - repMsg = generic_msg( - cmd=f"efunc3sv{condition.ndim}D", - args={ - "func": "where", - "condition": condition, - "dtype": B.dtype.name, - "scalar": B.format_other(A), - "b": B, - }, - ) - elif np.isscalar(A) and np.isscalar(B): - # Scalars must share a common dtype (or be cast) - dtA = resolve_scalar_dtype(A) - dtB = resolve_scalar_dtype(B) - # Make sure at least one of the dtypes is supported - if not (dtA in DTypes or dtB in DTypes): - raise TypeError(f"Not implemented for scalar types {dtA} and {dtB}") - # If the dtypes are the same, do not cast - if dtA == dtB: # type: ignore - dt = dtA - # If the dtypes are different, try casting one direction then the other - elif dtB in DTypes and np.can_cast(A, dtB): - A = np.dtype(dtB).type(A) # type: ignore - dt = dtB - elif dtA in DTypes and np.can_cast(B, dtA): - B = np.dtype(dtA).type(B) # type: ignore - dt = dtA - # Cannot safely cast - else: - raise TypeError(f"Cannot cast between scalars {str(A)} and {str(B)} to supported dtype") - repMsg = generic_msg( - cmd=f"efunc3ss{condition.ndim}D", - args={ - "func": "where", - "condition": condition, - "dtype": dt, - "a": A, - "b": B, - }, - ) - return create_pdarray(type_cast(str, repMsg)) - - -@typechecked -def histogram(pda: pdarray, bins: int_scalars = 10) -> Tuple[pdarray, pdarray]: - """ - Compute a histogram of evenly spaced bins over the range of an array. - - Parameters - ---------- - pda : pdarray - The values to histogram - - bins : int_scalars - The number of equal-size bins to use (default: 10) - - Returns - ------- - (pdarray, Union[pdarray, int64 or float64]) - Bin edges and The number of values present in each bin - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray or if bins is - not an int. - ValueError - Raised if bins < 1 - NotImplementedError - Raised if pdarray dtype is bool or uint8 - - See Also - -------- - value_counts, histogram2d - - Notes - ----- - The bins are evenly spaced in the interval [pda.min(), pda.max()]. - - Examples - -------- - >>> import matplotlib.pyplot as plt - >>> A = ak.arange(0, 10, 1) - >>> nbins = 3 - >>> h, b = ak.histogram(A, bins=nbins) - >>> h - array([3, 3, 4]) - >>> b - array([0., 3., 6., 9.]) - - # To plot, export the left edges and the histogram to NumPy - >>> plt.plot(b.to_ndarray()[::-1], h.to_ndarray()) - """ - if bins < 1: - raise ValueError("bins must be 1 or greater") - b = linspace(pda.min(), pda.max(), bins + 1) - repMsg = generic_msg(cmd="histogram", args={"array": pda, "bins": bins}) - return create_pdarray(type_cast(str, repMsg)), b - - -# Typechecking removed due to circular dependencies with arrayview -# @typechecked -def histogram2d( - x: pdarray, y: pdarray, bins: Union[int_scalars, Sequence[int_scalars]] = 10 -) -> Tuple[pdarray, pdarray, pdarray]: - """ - Compute the bi-dimensional histogram of two data samples with evenly spaced bins - - Parameters - ---------- - x : pdarray - A pdarray containing the x coordinates of the points to be histogrammed. - - y : pdarray - A pdarray containing the y coordinates of the points to be histogrammed. - - bins : int_scalars or [int, int] = 10 - The number of equal-size bins to use. - If int, the number of bins for the two dimensions (nx=ny=bins). - If [int, int], the number of bins in each dimension (nx, ny = bins). - Defaults to 10 - - Returns - ------- - hist : pdarray - shape(nx, ny) - The bi-dimensional histogram of samples x and y. - Values in x are histogrammed along the first dimension and - values in y are histogrammed along the second dimension. - - x_edges : pdarray - The bin edges along the first dimension. - - y_edges : pdarray - The bin edges along the second dimension. - - Raises - ------ - TypeError - Raised if x or y parameters are not pdarrays or if bins is - not an int or (int, int). - ValueError - Raised if bins < 1 - NotImplementedError - Raised if pdarray dtype is bool or uint8 - - See Also - -------- - histogram - - Notes - ----- - The x bins are evenly spaced in the interval [x.min(), x.max()] - and y bins are evenly spaced in the interval [y.min(), y.max()]. - - Examples - -------- - >>> x = ak.arange(0, 10, 1) - >>> y = ak.arange(9, -1, -1) - >>> nbins = 3 - >>> h, x_edges, y_edges = ak.histogram2d(x, y, bins=nbins) - >>> h - array([[0, 0, 3], - [0, 2, 1], - [3, 1, 0]]) - >>> x_edges - array([0.0 3.0 6.0 9.0]) - >>> x_edges - array([0.0 3.0 6.0 9.0]) - """ - if not isinstance(bins, Sequence): - x_bins, y_bins = bins, bins - else: - if len(bins) != 2: - raise ValueError("Sequences of bins must contain two elements (num_x_bins, num_y_bins)") - x_bins, y_bins = bins - if x_bins < 1 or y_bins < 1: - raise ValueError("bins must be 1 or greater") - x_bin_boundaries = linspace(x.min(), x.max(), x_bins + 1) - y_bin_boundaries = linspace(y.min(), y.max(), y_bins + 1) - repMsg = generic_msg(cmd="histogram2D", args={"x": x, "y": y, "xBins": x_bins, "yBins": y_bins}) - return ( - create_pdarray(type_cast(str, repMsg)).reshape(x_bins, y_bins), - x_bin_boundaries, - y_bin_boundaries, - ) - - -def histogramdd( - sample: Sequence[pdarray], bins: Union[int_scalars, Sequence[int_scalars]] = 10 -) -> Tuple[pdarray, Sequence[pdarray]]: - """ - Compute the multidimensional histogram of data in sample with evenly spaced bins. - - Parameters - ---------- - sample : Sequence[pdarray] - A sequence of pdarrays containing the coordinates of the points to be histogrammed. - - bins : int_scalars or Sequence[int_scalars] = 10 - The number of equal-size bins to use. - If int, the number of bins for all dimensions (nx=ny=...=bins). - If [int, int, ...], the number of bins in each dimension (nx, ny, ... = bins). - Defaults to 10 - - Returns - ------- - hist : pdarray - shape(nx, ny, ..., nd) - The multidimensional histogram of pdarrays in sample. - Values in first pdarray are histogrammed along the first dimension. - Values in second pdarray are histogrammed along the second dimension and so on. - - edges : List[pdarray] - A list of pdarrays containing the bin edges for each dimension. - - - Raises - ------ - ValueError - Raised if bins < 1 - NotImplementedError - Raised if pdarray dtype is bool or uint8 - - See Also - -------- - histogram - - Notes - ----- - The bins for each dimension, m, are evenly spaced in the interval [m.min(), m.max()] - - Examples - -------- - >>> x = ak.arange(0, 10, 1) - >>> y = ak.arange(9, -1, -1) - >>> z = ak.where(x % 2 == 0, x, y) - >>> h, edges = ak.histogramdd((x, y,z), bins=(2,2,5)) - >>> h - array([[[0, 0, 0, 0, 0], - [1, 1, 1, 1, 1]], - - [[1, 1, 1, 1, 1], - [0, 0, 0, 0, 0]]]) - >>> edges - [array([0.0 4.5 9.0]), - array([0.0 4.5 9.0]), - array([0.0 1.6 3.2 4.8 6.4 8.0])] - """ - if not isinstance(sample, Sequence): - raise ValueError("Sample must be a sequence of pdarrays") - if len(set(pda.dtype for pda in sample)) != 1: - raise ValueError("All pdarrays in sample must have same dtype") - - num_dims = len(sample) - if not isinstance(bins, Sequence): - bins = [bins] * num_dims - else: - if len(bins) != num_dims: - raise ValueError("Sequences of bins must contain same number of elements as the sample") - if any(b < 1 for b in bins): - raise ValueError("bins must be 1 or greater") - - bins = list(bins) if isinstance(bins, tuple) else bins - sample = list(sample) if isinstance(sample, tuple) else sample - bin_boundaries = [linspace(a.min(), a.max(), b + 1) for a, b in zip(sample, bins)] - bins_pda = array(bins)[::-1] - dim_prod = (cumprod(bins_pda) // bins_pda)[::-1] - repMsg = generic_msg( - cmd="histogramdD", - args={ - "sample": sample, - "num_dims": num_dims, - "bins": bins, - "dim_prod": dim_prod, - "num_samples": sample[0].size, - }, - ) - return create_pdarray(type_cast(str, repMsg)).reshape(bins), bin_boundaries - - -@typechecked -def value_counts( - pda: pdarray, -) -> Tuple[ - Union[Union[pdarray, Strings, Categorical], Sequence[Union[pdarray, Strings, Categorical]]], pdarray -]: - """ - Count the occurrences of the unique values of an array. - - Parameters - ---------- - pda : pdarray, int64 - The array of values to count - - Returns - ------- - unique_values : pdarray, int64 or Strings - The unique values, sorted in ascending order - - counts : pdarray, int64 - The number of times the corresponding unique value occurs - - Raises - ------ - TypeError - Raised if the parameter is not a pdarray - - See Also - -------- - unique, histogram - - Notes - ----- - This function differs from ``histogram()`` in that it only returns - counts for values that are present, leaving out empty "bins". This - function delegates all logic to the unique() method where the - return_counts parameter is set to True. - - Examples - -------- - >>> A = ak.array([2, 0, 2, 4, 0, 0]) - >>> ak.value_counts(A) - (array([0, 2, 4]), array([3, 2, 1])) - """ - return GroupBy(pda).size() - - -@typechecked -def clip( - pda: pdarray, - lo: Union[numeric_scalars, pdarray], - hi: Union[numeric_scalars, pdarray], -) -> pdarray: - """ - Clip (limit) the values in an array to a given range [lo,hi] - - Given an array a, values outside the range are clipped to the - range edges, such that all elements lie in the range. - - There is no check to enforce that lo < hi. If lo > hi, the corresponding - value of the array will be set to hi. - - If lo or hi (or both) are pdarrays, the check is by pairwise elements. - See examples. - - Parameters - ---------- - pda : pdarray, int64 or float64 - the array of values to clip - lo : scalar or pdarray, int64 or float64 - the lower value of the clipping range - hi : scalar or pdarray, int64 or float64 - the higher value of the clipping range - If lo or hi (or both) are pdarrays, the check is by pairwise elements. - See examples. - - Returns - ------- - arkouda.pdarrayclass.pdarray - A pdarray matching pda, except that element x remains x if lo <= x <= hi, - or becomes lo if x < lo, - or becomes hi if x > hi. - - Examples - -------- - >>> a = ak.array([1,2,3,4,5,6,7,8,9,10]) - >>> ak.clip(a,3,8) - array([3,3,3,4,5,6,7,8,8,8]) - >>> ak.clip(a,3,8.0) - array([3.00000000000000000 3.00000000000000000 3.00000000000000000 4.00000000000000000 - 5.00000000000000000 6.00000000000000000 7.00000000000000000 8.00000000000000000 - 8.00000000000000000 8.00000000000000000]) - >>> ak.clip(a,None,7) - array([1,2,3,4,5,6,7,7,7,7]) - >>> ak.clip(a,5,None) - array([5,5,5,5,5,6,7,8,9,10]) - >>> ak.clip(a,None,None) - ValueError : either min or max must be supplied - >>> ak.clip(a,ak.array([2,2,3,3,8,8,5,5,6,6],8)) - array([2,2,3,4,8,8,7,8,8,8]) - >>> ak.clip(a,4,ak.array([10,9,8,7,6,5,5,5,5,5])) - array([4,4,4,4,5,5,5,5,5,5]) - - Notes - ----- - Either lo or hi may be None, but not both. - If lo > hi, all x = hi. - If all inputs are int64, output is int64, but if any input is float64, output is float64. - - Raises - ------ - ValueError - Raised if both lo and hi are None - """ - - # Check that a range was actually supplied. - - if lo is None and hi is None: - raise ValueError("Either min or max must be supplied.") - - # If any of the inputs are float, then make everything float. - # Some type checking is needed, because scalars and pdarrays get cast differently. - - dataFloat = pda.dtype == float - minFloat = isinstance(lo, float) or (isinstance(lo, pdarray) and lo.dtype == float) - maxFloat = isinstance(hi, float) or (isinstance(hi, pdarray) and hi.dtype == float) - forceFloat = dataFloat or minFloat or maxFloat - if forceFloat: - if not dataFloat: - pda = cast(pda, np.float64) - if lo is not None and not minFloat: - lo = cast(lo, np.float64) if isinstance(lo, pdarray) else float(lo) - if hi is not None and not maxFloat: - hi = cast(hi, np.float64) if isinstance(hi, pdarray) else float(hi) - - # Now do the clipping. - - pda1 = pda - if lo is not None: - pda1 = where(pda < lo, lo, pda) - if hi is not None: - pda1 = where(pda1 > hi, hi, pda1) - return pda1 - - -def median(pda): - """ - Compute the median of a given array. 1d case only, for now. - - Parameters - ---------- - pda: pdarray - The input data, in pdarray form, numeric type or boolean - - Returns - ------- - np.float64 - The median of the entire pdarray - The array is sorted, and then if the number of elements is odd, - the return value is the middle element. If even, then the - mean of the two middle elements. - - Examples - -------- - >>> import arkouda as ak - >>> arkouda.connect() - >>> pda = ak.array ([0,4,7,8,1,3,5,2,-1]) - >>> ak.median(pda) - 3 - >>> pda = ak.array([0,1,3,3,1,2,3,4,2,3]) - 2.5 - - """ - - # Now do the computation - - if pda.dtype == bool: - pda_srtd = sort(cast(pda, dt=np.int64)) - else: - pda_srtd = sort(pda) - if len(pda_srtd) % 2 == 1: - return pda_srtd[len(pda_srtd) // 2].astype(np.float64) - else: - return ((pda_srtd[len(pda_srtd) // 2] + pda_srtd[len(pda_srtd) // 2 - 1]) / 2.0).astype( - np.float64 - ) - - -def count_nonzero(pda): - """ - Compute the nonzero count of a given array. 1D case only, for now. - - Parameters - ---------- - pda: pdarray - The input data, in pdarray form, numeric, bool, or str - - Returns - ------- - np.int64 - The nonzero count of the entire pdarray - - Examples - -------- - >>> pda = ak.array([0,4,7,8,1,3,5,2,-1]) - >>> ak.count_nonzero(pda) - 9 - >>> pda = ak.array([False,True,False,True,False]) - >>> ak.count_nonzero(pda) - 3 - >>> pda = ak.array(["hello","","there"]) - >>> ak.count_nonzero(pda) - 2 - - """ - - from arkouda.util import is_numeric - - # Handle different data types. - - if is_numeric(pda): - return sum((pda != 0).astype(np.int64)) - elif pda.dtype == bool: - return sum((pda).astype(np.int64)) - elif pda.dtype == str: - return sum((pda != "").astype(np.int64)) - - -def array_equal(pda_a: pdarray, pda_b: pdarray, equal_nan: bool = False): - """ - Compares two pdarrays for equality. - If neither array has any nan elements, then if all elements are pairwise equal, - it returns True. - If equal_Nan is False, then any nan element in either array gives a False return. - If equal_Nan is True, then pairwise-corresponding nans are considered equal. - - Parameters - ---------- - pda_a : pdarray - pda_b : pdarray - equal_nan : boolean to determine how to handle nans, default False - - Returns - ------- - boolean - With string data: - False if one array is type ak.str_ & the other isn't, True if both are ak.str_ & they match. - - With numeric data: - True if neither array has any nan elements, and all elements pairwise equal. - - True if equal_Nan True, all non-nans pairwise equal & nans in pda_a correspond to nans in pda_b - - False if equal_Nan False, & either array has any nan element. - - Examples - -------- - >>> a = ak.randint(0,10,10,dtype=ak.float64) - >>> b = a - >>> ak.array_equal(a,b) - True - >>> b[9] = np.nan - >>> ak.array_equal(a,b) - False - >>> a[9] = np.nan - >>> ak.array_equal(a,b) - False - >>> ak.array_equal(a,b,True) - True - """ - if (pda_a.shape != pda_b.shape) or ((pda_a.dtype == akstr_) ^ (pda_b.dtype == akstr_)): - return False - elif equal_nan: - return ak_all(where(isnan(pda_a), isnan(pda_b), pda_a == pda_b)) - else: - return ak_all(pda_a == pda_b) - - -def putmask(pda: pdarray, mask: Union[bool, pdarray], values: pdarray): - """ - Overwrite elements of a pdarray at indices where mask is True - - Parameters - ---------- - pda : pdarray, source data, also output data - pda = input where mask is False, = values where mask is True - mask : a scalar boolean, or a pdarray of booleans - values : pdarray, replacement data - - Returns - ------- - None - pda is modified in-place - - Notes - ----- - If values.size != a.size, values is repeated and/or pruned as needed to - make sizes match, because ak.where requires matching sizes. - - Examples - ------- - >>> a = ak.array(np.arange(10)) - >>> ak.putmask (a,a>2,a**2) - array ([0,1,2,9,16,25,36,49,64,81]) - - >>> values = ak.array([3,2]) - >>> ak.putmask (a,a>2,values) - array ([0,1,2,2,3,2,3,2,3,2]) - - Raises - ------ - TypeError - Raised if a and values are not the same type - - """ - - from arkouda.pdarraysetops import concatenate - - # check for matching types - - if values.dtype != pda.dtype: - raise TypeError("ak.putmask requires arrays of matching type") - - # if values is not the same size as pda, repeat it and/or prune it as needed - - growth = pda.size // values.size + (0 if pda.size % values.size == 0 else 1) - result = concatenate(growth * [values]) - if result.size > pda.size: - reduction = result.size % pda.size - result = result[:-(reduction)] - - pda[:] = where(mask, result, pda) # pda[:] = allows us to return modified value - - -def eye(rows: int_scalars, cols: int_scalars, diag: int_scalars = 0, dt: type = akint64): - """ - Return a pdarray with zeros everywhere except along a diagonal, which is all ones. - The matrix need not be square. - - Parameters - ---------- - rows : int_scalars - cols : int_scalars - diag : int_scalars - if diag = 0, zeros start at element [0,0] and proceed along diagonal - if diag > 0, zeros start at element [0,diag] and proceed along diagonal - if diag < 0, zeros start at element [diag,0] and proceed along diagonal - etc. - - Returns - ------- - pdarray - an array of zeros with ones along the specified diagonal - - Examples - -------- - >>> ak.eye(rows=4,cols=4,diag=0,dt=ak.int64) - array([array([1 0 0 0]) array([0 1 0 0]) array([0 0 1 0]) array([0 0 0 1])]) - >>> ak.eye(rows=3,cols=3,diag=1,dt=ak.float64) - array([array([0.00000000000000000 1.00000000000000000 0.00000000000000000]) - array([0.00000000000000000 0.00000000000000000 1.00000000000000000]) - array([0.00000000000000000 0.00000000000000000 0.00000000000000000])]) - >>> ak.eye(rows=4,cols=4,diag=-1,dt=ak.bool_) - array([array([False False False False]) array([True False False False]) - array([False True False False]) array([False False True False])] - - Notes - ----- - if rows = cols and diag = 0, the result is an identity matrix - Server returns an error if rank of pda < 2 - - """ - - cmd = f"eye<{akdtype(dt).name}>" - args = { - "rows": rows, - "cols": cols, - "diag": diag, - } - return create_pdarray( - generic_msg( - cmd=cmd, - args=args, - ) - ) - - -def triu(pda: pdarray, diag: int_scalars = 0): - """ - Return a copy of the pda with the lower triangle zeroed out - - Parameters - ---------- - pda : pdarray - diag : int_scalars - if diag = 0, zeros start just above the main diagonal - if diag = 1, zeros start at the main diagonal - if diag = 2, zeros start just below the main diagonal - etc. - - Returns - ------- - pdarray - a copy of pda with zeros in the lower triangle - - Examples - -------- - >>> a = ak.array([[1,2,3,4,5],[2,3,4,5,6],[3,4,5,6,7],[4,5,6,7,8],[5,6,7,8,9]]) - >>> ak.triu(a,diag=0) - array([array([1 2 3 4 5]) array([0 3 4 5 6]) array([0 0 5 6 7]) - array([0 0 0 7 8]) array([0 0 0 0 9])]) - >>> ak.triu(a,diag=1) - array([array([0 2 3 4 5]) array([0 0 4 5 6]) array([0 0 0 6 7]) - array([0 0 0 0 8]) array([0 0 0 0 0])]) - >>> ak.triu(a,diag=2) - array([array([0 0 3 4 5]) array([0 0 0 5 6]) array([0 0 0 0 7]) - array([0 0 0 0 0]) array([0 0 0 0 0])]) - >>> ak.triu(a,diag=3) - array([array([0 0 0 4 5]) array([0 0 0 0 6]) array([0 0 0 0 0]) - array([0 0 0 0 0]) array([0 0 0 0 0])]) - >>> ak.triu(a,diag=4) - array([array([0 0 0 0 5]) array([0 0 0 0 0]) array([0 0 0 0 0]) - array([0 0 0 0 0]) array([0 0 0 0 0])]) - - Notes - ----- - Server returns an error if rank of pda < 2 - - """ - - cmd = f"triu<{pda.dtype},{pda.ndim}>" - args = { - "array": pda, - "diag": diag, - } - return create_pdarray( - generic_msg( - cmd=cmd, - args=args, - ) - ) - - -def tril(pda: pdarray, diag: int_scalars = 0): - """ - Return a copy of the pda with the upper triangle zeroed out - - Parameters - ---------- - pda : pdarray - diag : int_scalars - if diag = 0, zeros start just below the main diagonal - if diag = 1, zeros start at the main diagonal - if diag = 2, zeros start just above the main diagonal - etc. - - Returns - ------- - pdarray - a copy of pda with zeros in the upper triangle - - Examples - -------- - >>> a = ak.array([[1,2,3,4,5],[2,3,4,5,6],[3,4,5,6,7],[4,5,6,7,8],[5,6,7,8,9]]) - >>> ak.tril(a,diag=4) - array([array([1 2 3 4 5]) array([2 3 4 5 6]) array([3 4 5 6 7]) - array([4 5 6 7 8]) array([5 6 7 8 9])]) - >>> ak.tril(a,diag=3) - array([array([1 2 3 4 0]) array([2 3 4 5 6]) array([3 4 5 6 7]) - array([4 5 6 7 8]) array([5 6 7 8 9])]) - >>> ak.tril(a,diag=2) - array([array([1 2 3 0 0]) array([2 3 4 5 0]) array([3 4 5 6 7]) - array([4 5 6 7 8]) array([5 6 7 8 9])]) - >>> ak.tril(a,diag=1) - array([array([1 2 0 0 0]) array([2 3 4 0 0]) array([3 4 5 6 0]) - array([4 5 6 7 8]) array([5 6 7 8 9])]) - >>> ak.tril(a,diag=0) - array([array([1 0 0 0 0]) array([2 3 0 0 0]) array([3 4 5 0 0]) - array([4 5 6 7 0]) array([5 6 7 8 9])]) - - Notes - ----- - Server returns an error if rank of pda < 2 - - """ - cmd = f"tril<{pda.dtype},{pda.ndim}>" - args = { - "array": pda, - "diag": diag, - } - return create_pdarray( - generic_msg( - cmd=cmd, - args=args, - ) - ) - - -def transpose(pda: pdarray): - """ - Compute the transpose of a matrix. - - Parameters - ---------- - pda : pdarray - - Returns - ------- - pdarray - the transpose of the input matrix - - Examples - -------- - >>> a = ak.array([[1,2,3,4,5],[1,2,3,4,5]]) - >>> ak.transpose(a) - array([array([1 1]) array([2 2]) array([3 3]) array([4 4]) array([5 5])]) - - - Notes - ----- - Server returns an error if rank of pda < 2 - - """ - cmd = f"transpose<{pda.dtype},{pda.ndim}>" - args = { - "array": pda, - } - return create_pdarray( - generic_msg( - cmd=cmd, - args=args, - ) - ) - - -def matmul(pdaLeft: pdarray, pdaRight: pdarray): - """ - Compute the product of two matrices. - - Parameters - ---------- - pdaLeft : pdarray - pdaRight : pdarray - - Returns - ------- - pdarray - the matrix product pdaLeft x pdaRight - - Examples - -------- - >>> a = ak.array([[1,2,3,4,5],[1,2,3,4,5]]) - >>> b = ak.array(([1,1],[2,2],[3,3],[4,4],[5,5]]) - >>> ak.matmul(a,b) - array([array([30 30]) array([45 45])]) - - >>> x = ak.array([[1,2,3],[1.1,2.1,3.1]]) - >>> y = ak.array([[1,1,1],[0,2,2],[0,0,3]]) - >>> ak.matmul(x,y) - array([array([1.00000000000000000 5.00000000000000000 14.00000000000000000]) - array([1.1000000000000001 5.3000000000000007 14.600000000000001])]) - - Notes - ----- - Server returns an error if shapes of pdaLeft and pdaRight - are incompatible with matrix multiplication. - - """ - if pdaLeft.ndim != pdaRight.ndim: - raise ValueError("matmul requires matrices of matching rank.") - cmd = f"matmul<{pdaLeft.dtype},{pdaRight.dtype},{pdaLeft.ndim}>" - args = { - "x1": pdaLeft, - "x2": pdaRight, - } - return create_pdarray( - generic_msg( - cmd=cmd, - args=args, - ) - ) - - -def vecdot(x1: pdarray, x2: pdarray): - """ - Compute the generalized dot product of two vectors along the given axis. - Assumes that both tensors have already been broadcast to the same shape. - - Parameters - ---------- - x1 : pdarray - x2 : pdarray - - Returns - ------- - pdarray - x1 vecdot x2 - - Examples - -------- - >>> a = ak.array([[1,2,3,4,5],[1,2,3,4,5]]) - >>> b = ak.array(([2,2,2,2,2],[2,2,2,2,2]]) - >>> ak.vecdot(a,b) - array([5 10 15 20 25]) - >>> ak.vecdot(b,a) - array([5 10 15 20 25]) - - Raises - ------ - ValueTypeError - Raised if x1 and x2 are not of matching shape or if rank of x1 < 2 - - """ - - if x1.shape != x2.shape: - raise ValueError("vecdot requires matrices of matching rank.") - if x1.ndim < 2: - raise ValueError("vector requires matrices of rank 2 or more.") - cmd = f"vecdot<{x1.dtype},{x2.dtype},{x1.ndim}>" - args = { - "x1": x1, - "x2": x2, - "bcShape": tuple(x1.shape), - "axis": 0, - } - return create_pdarray( - generic_msg( - cmd=cmd, - args=args, - ) - ) diff --git a/arkouda/numeric/__init__.py b/arkouda/numeric/__init__.py new file mode 100644 index 0000000000..b2e086ba84 --- /dev/null +++ b/arkouda/numeric/__init__.py @@ -0,0 +1,3 @@ +# flake8: noqa + +from arkouda.numpy._numeric import * diff --git a/arkouda/numpy/__init__.py b/arkouda/numpy/__init__.py index 5ac1ce3d6d..6d2c3bf2ae 100644 --- a/arkouda/numpy/__init__.py +++ b/arkouda/numpy/__init__.py @@ -95,4 +95,4 @@ from arkouda.numpy.polynomial import * from arkouda.numpy.rec import * -from ._numeric import floor +from ._numeric import * diff --git a/arkouda/numpy/_numeric.py b/arkouda/numpy/_numeric.py index b115240cd3..a26678b777 100644 --- a/arkouda/numpy/_numeric.py +++ b/arkouda/numpy/_numeric.py @@ -1,19 +1,278 @@ +import json +from enum import Enum +from typing import TYPE_CHECKING, List, Sequence, Tuple, TypeVar, Union from typing import cast as type_cast - +from typing import no_type_check +from arkouda.groupbyclass import groupable +import numpy as np from typeguard import typechecked from arkouda.client import generic_msg -from arkouda.pdarrayclass import create_pdarray, pdarray +from arkouda.dtypes import str_ as akstr_ +from arkouda.groupbyclass import GroupBy +from arkouda.numpy.dtypes import DTypes, bigint +from arkouda.numpy.dtypes import bool_ as ak_bool +from arkouda.numpy.dtypes import dtype as akdtype +from arkouda.numpy.dtypes import float64 as ak_float64 +from arkouda.numpy.dtypes import int64 as ak_int64 +from arkouda.numpy.dtypes import int64 as akint64 from arkouda.numpy.dtypes import ( - int64 as ak_int64, - float64 as ak_float64, - bool_ as ak_bool, - uint64 as ak_uint64, + int_scalars, + isSupportedNumber, + numeric_scalars, + resolve_scalar_dtype, + str_, ) +from arkouda.numpy.dtypes import uint64 as ak_uint64 +from arkouda.pdarrayclass import all as ak_all +from arkouda.pdarrayclass import any as ak_any +from arkouda.pdarrayclass import argmax, create_pdarray, pdarray, sum +from arkouda.pdarraycreation import array, linspace, scalar_array +from arkouda.sorting import sort +from arkouda.strings import Strings NUMERIC_TYPES = [ak_int64, ak_float64, ak_bool, ak_uint64] -__all__ = ["floor"] + +if TYPE_CHECKING: + from arkouda.categorical import Categorical + from arkouda.segarray import SegArray +else: + Categorical = TypeVar("Categorical") + SegArray = TypeVar("SegArray") + +__all__ = [ + "cast", + "abs", + "ceil", + "clip", + "count_nonzero", + "eye", + "floor", + "trunc", + "round", + "sign", + "isfinite", + "isinf", + "isnan", + "log", + "log2", + "log10", + "log1p", + "exp", + "expm1", + "square", + "matmul", + "triu", + "tril", + "transpose", + "vecdot", + "cumsum", + "cumprod", + "sin", + "cos", + "tan", + "arcsin", + "arccos", + "arctan", + "arctan2", + "sinh", + "cosh", + "tanh", + "arcsinh", + "arccosh", + "arctanh", + "rad2deg", + "deg2rad", + "hash", + "array_equal", + "putmask", + "where", + "histogram", + "histogram2d", + "histogramdd", + "median", + "value_counts", + "ErrorMode", +] + + +class ErrorMode(Enum): + strict = "strict" + ignore = "ignore" + return_validity = "return_validity" + + +@typechecked +def cast( + pda: Union[pdarray, Strings, Categorical], # type: ignore + dt: Union[np.dtype, type, str, bigint], + errors: ErrorMode = ErrorMode.strict, +) -> Union[Union[pdarray, Strings, Categorical], Tuple[pdarray, pdarray]]: # type: ignore + """ + Cast an array to another dtype. + + Parameters + ---------- + pda : pdarray or Strings + The array of values to cast + dt : np.dtype, type, or str + The target dtype to cast values to + errors : {strict, ignore, return_validity} + Controls how errors are handled when casting strings to a numeric type + (ignored for casts from numeric types). + - strict: raise RuntimeError if *any* string cannot be converted + - ignore: never raise an error. Uninterpretable strings get + converted to NaN (float64), -2**63 (int64), zero (uint64 and + uint8), or False (bool) + - return_validity: in addition to returning the same output as + "ignore", also return a bool array indicating where the cast + was successful. + + Returns + ------- + pdarray or Strings + Array of values cast to desired dtype + [validity : pdarray(bool)] + If errors="return_validity" and input is Strings, a second array is + returned with True where the cast succeeded and False where it failed. + + Notes + ----- + The cast is performed according to Chapel's casting rules and is NOT safe + from overflows or underflows. The user must ensure that the target dtype + has the precision and capacity to hold the desired result. + + Examples + -------- + >>> ak.cast(ak.linspace(1.0,5.0,5), dt=ak.int64) + array([1, 2, 3, 4, 5]) + + >>> ak.cast(ak.arange(0,5), dt=ak.float64).dtype + dtype('float64') + + >>> ak.cast(ak.arange(0,5), dt=ak.bool_) + array([False, True, True, True, True]) + + >>> ak.cast(ak.linspace(0,4,5), dt=ak.bool_) + array([False, True, True, True, True]) + """ + from arkouda.categorical import Categorical # type: ignore + + if isinstance(pda, pdarray): + if dt is Strings or akdtype(dt) == str_: + if pda.ndim > 1: + raise ValueError("Cannot cast a multi-dimensional pdarray to Strings") + repMsg = generic_msg( + cmd=f"castToStrings<{pda.dtype}>", + args={"name": pda}, + ) + return Strings.from_parts(*(type_cast(str, repMsg).split("+"))) + else: + dt = akdtype(dt) + return create_pdarray( + generic_msg( + cmd=f"cast<{pda.dtype},{dt},{pda.ndim}>", + args={"name": pda}, + ) + ) + elif isinstance(pda, Strings): + if dt is Categorical or dt == "Categorical": + return Categorical(pda) # type: ignore + elif dt is Strings or akdtype(dt) == str_: + return pda[:] + else: + dt = akdtype(dt) + repMsg = generic_msg( + cmd=f"castStringsTo<{dt}>", + args={ + "name": pda.entry.name, + "opt": errors.name, + }, + ) + if errors == ErrorMode.return_validity: + a, b = type_cast(str, repMsg).split("+") + return create_pdarray(type_cast(str, a)), create_pdarray(type_cast(str, b)) + else: + return create_pdarray(type_cast(str, repMsg)) + elif isinstance(pda, Categorical): # type: ignore + if dt is Strings or dt in ["Strings", "str"] or dt == str_: + return pda.categories[pda.codes] + else: + raise ValueError("Categoricals can only be casted to Strings") + else: + raise TypeError("pda must be a pdarray, Strings, or Categorical object") + + +@typechecked +def abs(pda: pdarray) -> pdarray: + """ + Return the element-wise absolute value of the array. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing absolute values of the input array elements + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Examples + -------- + >>> ak.abs(ak.arange(-5,-1)) + array([5, 4, 3, 2]) + + >>> ak.abs(ak.linspace(-5,-1,5)) + array([5, 4, 3, 2, 1]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "abs", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def ceil(pda: pdarray) -> pdarray: + """ + Return the element-wise ceiling of the array. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing ceiling values of the input array elements + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Examples + -------- + >>> ak.ceil(ak.linspace(1.1,5.5,5)) + array([2, 3, 4, 5, 6]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "ceil", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) @typechecked @@ -48,3 +307,2339 @@ def floor(pda: pdarray) -> pdarray: }, ) return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def round(pda: pdarray) -> pdarray: + """ + Return the element-wise rounding of the array. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing input array elements rounded to the nearest integer + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Examples + -------- + >>> ak.round(ak.array([1.1, 2.5, 3.14159])) + array([1, 3, 3]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "round", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def trunc(pda: pdarray) -> pdarray: + """ + Return the element-wise truncation of the array. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing input array elements truncated to the nearest integer + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Examples + -------- + >>> ak.trunc(ak.array([1.1, 2.5, 3.14159])) + array([1, 2, 3]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "trunc", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def sign(pda: pdarray) -> pdarray: + """ + Return the element-wise sign of the array. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing sign values of the input array elements + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Examples + -------- + >>> ak.sign(ak.array([-10, -5, 0, 5, 10])) + array([-1, -1, 0, 1, 1]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "sign", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def isfinite(pda: pdarray) -> pdarray: + """ + Return the element-wise isfinite check applied to the array. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing boolean values indicating whether the + input array elements are finite + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + RuntimeError + if the underlying pdarray is not float-based + + Examples + -------- + >>> ak.isfinite(ak.array[1.0, 2.0, ak.inf]) + array([True, True, False]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "isfinite", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def isinf(pda: pdarray) -> pdarray: + """ + Return the element-wise isinf check applied to the array. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing boolean values indicating whether the + input array elements are infinite + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + RuntimeError + if the underlying pdarray is not float-based + + Examples + -------- + >>> ak.isinf(ak.array[1.0, 2.0, ak.inf]) + array([False, False, True]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "isinf", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def isnan(pda: pdarray) -> pdarray: + """ + Return the element-wise isnan check applied to the array. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing boolean values indicating whether the + input array elements are NaN + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + RuntimeError + if the underlying pdarray is not float-based + + Examples + -------- + >>> ak.isnan(ak.array[1.0, 2.0, 1.0 / 0.0]) + array([False, False, True]) + """ + from arkouda.util import is_float, is_numeric + + if is_numeric(pda) and not is_float(pda): + from arkouda.pdarraycreation import full + + return full(pda.size, False, dtype=bool) + elif not is_numeric(pda): + raise TypeError("isnan only supports pdarray of numeric type.") + + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "isnan", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def log(pda: pdarray) -> pdarray: + """ + Return the element-wise natural log of the array. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing natural log values of the input + array elements + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Notes + ----- + Logarithms with other bases can be computed as follows: + + Examples + -------- + >>> A = ak.array([1, 10, 100]) + # Natural log + >>> ak.log(A) + array([0, 2.3025850929940459, 4.6051701859880918]) + # Log base 10 + >>> ak.log(A) / np.log(10) + array([0, 1, 2]) + # Log base 2 + >>> ak.log(A) / np.log(2) + array([0, 3.3219280948873626, 6.6438561897747253]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "log", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def log10(x: pdarray) -> pdarray: + """ + Return the element-wise base 10 log of the array. + + Parameters + __________ + x : pdarray + array to compute on + + Returns + _______ + pdarray contain values of the base 10 log + """ + repMsg = generic_msg( + cmd=f"efunc{x.ndim}D", + args={ + "func": "log10", + "array": x, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def log2(x: pdarray) -> pdarray: + """ + Return the element-wise base 2 log of the array. + + Parameters + __________ + x : pdarray + array to compute on + + Returns + _______ + pdarray contain values of the base 2 log + """ + repMsg = generic_msg( + cmd=f"efunc{x.ndim}D", + args={ + "func": "log2", + "array": x, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def log1p(x: pdarray) -> pdarray: + """ + Return the element-wise natural log of one plus the array. + + Parameters + __________ + x : pdarray + array to compute on + + Returns + _______ + pdarray contain values of the natural log of one plus the array + """ + repMsg = generic_msg( + cmd=f"efunc{x.ndim}D", + args={ + "func": "log1p", + "array": x, + }, + ) + return create_pdarray(repMsg) + + +@typechecked +def exp(pda: pdarray) -> pdarray: + """ + Return the element-wise exponential of the array. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing exponential values of the input + array elements + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Examples + -------- + >>> ak.exp(ak.arange(1,5)) + array([2.7182818284590451, 7.3890560989306504, 20.085536923187668, 54.598150033144236]) + + >>> ak.exp(ak.uniform(5,1.0,5.0)) + array([11.84010843172504, 46.454368507659211, 5.5571769623557188, + 33.494295836924771, 13.478894913238722]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "exp", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def expm1(pda: pdarray) -> pdarray: + """ + Return the element-wise exponential of the array minus one. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing exponential values of the input + array elements minus one + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Examples + -------- + >>> ak.exp1m(ak.arange(1,5)) + array([1.7182818284590451, 6.3890560989306504, 19.085536923187668, 53.598150033144236]) + + >>> ak.exp1m(ak.uniform(5,1.0,5.0)) + array([10.84010843172504, 45.454368507659211, 4.5571769623557188, + 32.494295836924771, 12.478894913238722]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "expm1", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def square(pda: pdarray) -> pdarray: + """ + Return the element-wise square of the array. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing square values of the input + array elements + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Examples + -------- + >>> ak.square(ak.arange(1,5)) + array([1, 4, 9, 16]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "square", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def cumsum(pda: pdarray) -> pdarray: + """ + Return the cumulative sum over the array. + + The sum is inclusive, such that the ``i`` th element of the + result is the sum of elements up to and including ``i``. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing cumulative sums for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Examples + -------- + >>> ak.cumsum(ak.arange([1,5])) + array([1, 3, 6]) + + >>> ak.cumsum(ak.uniform(5,1.0,5.0)) + array([3.1598310770203937, 5.4110385860243131, 9.1622479306453748, + 12.710615785506533, 13.945880905466208]) + + >>> ak.cumsum(ak.randint(0, 1, 5, dtype=ak.bool_)) + array([0, 1, 1, 2, 3]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "cumsum", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def cumprod(pda: pdarray) -> pdarray: + """ + Return the cumulative product over the array. + + The product is inclusive, such that the ``i`` th element of the + result is the product of elements up to and including ``i``. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + A pdarray containing cumulative products for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Examples + -------- + >>> ak.cumprod(ak.arange(1,5)) + array([1, 2, 6, 24])) + + >>> ak.cumprod(ak.uniform(5,1.0,5.0)) + array([1.5728783400481925, 7.0472855509390593, 33.78523998586553, + 134.05309592737584, 450.21589865655358]) + """ + repMsg = generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "cumprod", + "array": pda, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def sin(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise sine of the array. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the sine will be applied to the corresponding value. Elsewhere, it will retain + its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing sin for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + return _trig_helper(pda, "sin", where) + + +@typechecked +def cos(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise cosine of the array. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the cosine will be applied to the corresponding value. Elsewhere, it will retain + its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing cosine for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + return _trig_helper(pda, "cos", where) + + +@typechecked +def tan(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise tangent of the array. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the tangent will be applied to the corresponding value. Elsewhere, it will retain + its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing tangent for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + return _trig_helper(pda, "tan", where) + + +@typechecked +def arcsin(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise inverse sine of the array. The result is between -pi/2 and pi/2. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the inverse sine will be applied to the corresponding value. Elsewhere, it will retain + its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing inverse sine for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + return _trig_helper(pda, "arcsin", where) + + +@typechecked +def arccos(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise inverse cosine of the array. The result is between 0 and pi. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the inverse cosine will be applied to the corresponding value. Elsewhere, it will retain + its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing inverse cosine for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + return _trig_helper(pda, "arccos", where) + + +@typechecked +def arctan(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise inverse tangent of the array. The result is between -pi/2 and pi/2. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the inverse tangent will be applied to the corresponding value. Elsewhere, it will retain + its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing inverse tangent for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + return _trig_helper(pda, "arctan", where) + + +@typechecked +def arctan2( + num: Union[pdarray, numeric_scalars], + denom: Union[pdarray, numeric_scalars], + where: Union[bool, pdarray] = True, +) -> pdarray: + """ + Return the element-wise inverse tangent of the array pair. The result chosen is the + signed angle in radians between the ray ending at the origin and passing through the + point (1,0), and the ray ending at the origin and passing through the point (denom, num). + The result is between -pi and pi. + + Parameters + ---------- + num : Union[numeric_scalars, pdarray] + Numerator of the arctan2 argument. + denom : Union[numeric_scalars, pdarray] + Denominator of the arctan2 argument. + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the inverse tangent will be applied to the corresponding values. Elsewhere, it will retain + its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing inverse tangent for each corresponding element pair + of the original pdarray, using the signed values or the numerator and + denominator to get proper placement on unit circle. + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + if not all(isSupportedNumber(arg) or isinstance(arg, pdarray) for arg in [num, denom]): + raise TypeError( + f"Unsupported types {type(num)} and/or {type(denom)}. Supported " + "types are numeric scalars and pdarrays. At least one argument must be a pdarray." + ) + if isSupportedNumber(num) and isSupportedNumber(denom): + raise TypeError( + f"Unsupported types {type(num)} and/or {type(denom)}. Supported " + "types are numeric scalars and pdarrays. At least one argument must be a pdarray." + ) + # TODO: handle shape broadcasting for multidimensional arrays + if isinstance(num, pdarray) or isinstance(denom, pdarray): + ndim = num.ndim if isinstance(num, pdarray) else denom.ndim # type: ignore[union-attr] + if where is True: + repMsg = type_cast( + str, + generic_msg( + cmd=f"efunc2Arg{ndim}D", + args={ + "func": "arctan2", + "A": num, + "B": denom, + }, + ), + ) + return create_pdarray(repMsg) + elif where is False: + return num / denom # type: ignore + else: + if where.dtype != bool: + raise TypeError(f"where must have dtype bool, got {where.dtype} instead") + if isinstance(num, pdarray) and isinstance(denom, pdarray): + # TODO: handle shape broadcasting for multidimensional arrays + repMsg = type_cast( + str, + generic_msg( + cmd=f"efunc2Arg{ndim}D", + args={ + "func": "arctan2", + "A": num[where], + "B": denom[where], + }, + ), + ) + if not isinstance(num, pdarray) or not isinstance(denom, pdarray): + repMsg = type_cast( + str, + generic_msg( + cmd=f"efunc2Arg{ndim}D", + args={ + "func": "arctan2", + "A": num if not isinstance(num, pdarray) else num[where], + "B": denom if not isinstance(denom, pdarray) else denom[where], + }, + ), + ) + new_pda = num / denom + ret = create_pdarray(repMsg) + new_pda = cast(new_pda, ret.dtype) + new_pda[where] = ret + return new_pda + else: + return scalar_array(arctan2(num, denom) if where else num / denom) + + +@typechecked +def sinh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise hyperbolic sine of the array. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the hyperbolic sine will be applied to the corresponding value. Elsewhere, it will retain + its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing hyperbolic sine for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + return _trig_helper(pda, "sinh", where) + + +@typechecked +def cosh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise hyperbolic cosine of the array. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the hyperbolic cosine will be applied to the corresponding value. Elsewhere, it will retain + its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing hyperbolic cosine for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + return _trig_helper(pda, "cosh", where) + + +@typechecked +def tanh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise hyperbolic tangent of the array. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the hyperbolic tangent will be applied to the corresponding value. Elsewhere, it will retain + its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing hyperbolic tangent for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + return _trig_helper(pda, "tanh", where) + + +@typechecked +def arcsinh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise inverse hyperbolic sine of the array. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the inverse hyperbolic sine will be applied to the corresponding value. Elsewhere, it will retain + its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing inverse hyperbolic sine for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + return _trig_helper(pda, "arcsinh", where) + + +@typechecked +def arccosh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise inverse hyperbolic cosine of the array. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the inverse hyperbolic cosine will be applied to the corresponding value. Elsewhere, it will + retain its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing inverse hyperbolic cosine for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + return _trig_helper(pda, "arccosh", where) + + +@typechecked +def arctanh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Return the element-wise inverse hyperbolic tangent of the array. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, + the inverse hyperbolic tangent will be applied to the corresponding value. Elsewhere, + it will retain its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing inverse hyperbolic tangent for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameters are not a pdarray or numeric scalar. + """ + return _trig_helper(pda, "arctanh", where) + + +def _trig_helper(pda: pdarray, func: str, where: Union[bool, pdarray] = True) -> pdarray: + """ + Returns the result of the input trig function acting element-wise on the array. + + Parameters + ---------- + pda : pdarray + func : string + The designated trig function that is passed in + where : Boolean or pdarray + This condition is applied over the input. At locations where the condition is True, the + corresponding value will be acted on by the respective trig function. Elsewhere, + it will retain its original value. Default set to True. + + Returns + ------- + pdarray + A pdarray with the trig function applied at each element of pda + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + TypeError + Raised if where condition is not type Boolean + """ + if where is True: + repMsg = type_cast( + str, + generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": func, + "array": pda, + }, + ), + ) + return create_pdarray(repMsg) + elif where is False: + return pda + else: + if where.dtype != bool: + raise TypeError(f"where must have dtype bool, got {where.dtype} instead") + repMsg = type_cast( + str, + generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": func, + "array": pda[where], + }, + ), + ) + new_pda = pda[:] + ret = create_pdarray(repMsg) + new_pda = cast(new_pda, ret.dtype) + new_pda[where] = ret + return new_pda + + +@typechecked +def rad2deg(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Converts angles element-wise from radians to degrees. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, the + corresponding value will be converted from radians to degrees. Elsewhere, it will retain its + original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing an angle converted to degrees, from radians, for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + if where is True: + return 180 * (pda / np.pi) + elif where is False: + return pda + else: + new_pda = pda + ret = 180 * (pda[where] / np.pi) + new_pda = cast(new_pda, ret.dtype) + new_pda[where] = ret + return new_pda + + +@typechecked +def deg2rad(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: + """ + Converts angles element-wise from degrees to radians. + + Parameters + ---------- + pda : pdarray + where : Boolean or pdarray + This condition is broadcast over the input. At locations where the condition is True, the + corresponding value will be converted from degrees to radians. Elsewhere, it will retain its + original value. Default set to True. + + Returns + ------- + pdarray + A pdarray containing an angle converted to radians, from degrees, for each element + of the original pdarray + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + """ + if where is True: + return np.pi * pda / 180 + elif where is False: + return pda + else: + new_pda = pda + ret = np.pi * pda[where] / 180 + new_pda = cast(new_pda, ret.dtype) + new_pda[where] = ret + return new_pda + + +def _hash_helper(a): + from arkouda import Categorical as Categorical_ + from arkouda import SegArray as SegArray_ + + if isinstance(a, SegArray_): + return json.dumps( + { + "segments": a.segments.name, + "values": a.values.name, + "valObjType": a.values.objType, + } + ) + elif isinstance(a, Categorical_): + return json.dumps({"categories": a.categories.name, "codes": a.codes.name}) + else: + return a.name + + +# this is # type: ignored and doesn't actually do any type checking +# the type hints are there as a reference to show which types are expected +# type validation is done within the function +def hash( + pda: Union[ # type: ignore + Union[pdarray, Strings, SegArray, Categorical], + List[Union[pdarray, Strings, SegArray, Categorical]], + ], + full: bool = True, +) -> Union[Tuple[pdarray, pdarray], pdarray]: + """ + Return an element-wise hash of the array or list of arrays. + + Parameters + ---------- + pda : Union[pdarray, Strings, Segarray, Categorical], + List[Union[pdarray, Strings, Segarray, Categorical]]] + + full : bool + This is only used when a single pdarray is passed into hash + By default, a 128-bit hash is computed and returned as + two int64 arrays. If full=False, then a 64-bit hash + is computed and returned as a single int64 array. + + Returns + ------- + hashes + If full=True or a list of pdarrays is passed, + a 2-tuple of pdarrays containing the high + and low 64 bits of each hash, respectively. + If full=False and a single pdarray is passed, + a single pdarray containing a 64-bit hash + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + Notes + ----- + In the case of a single pdarray being passed, this function + uses the SIPhash algorithm, which can output either a 64-bit + or 128-bit hash. However, the 64-bit hash runs a significant + risk of collisions when applied to more than a few million + unique values. Unless the number of unique values is known to + be small, the 128-bit hash is strongly recommended. + + Note that this hash should not be used for security, or for + any cryptographic application. Not only is SIPhash not + intended for such uses, but this implementation employs a + fixed key for the hash, which makes it possible for an + adversary with control over input to engineer collisions. + + In the case of a list of pdrrays, Strings, Categoricals, or Segarrays + being passed, a non-linear function must be applied to each + array since hashes of subsequent arrays cannot be simply XORed + because equivalent values will cancel each other out, hence we + do a rotation by the ordinal of the array. + """ + from arkouda import Categorical as Categorical_ + from arkouda import SegArray as SegArray_ + + if isinstance(pda, (pdarray, Strings, SegArray_, Categorical_)): + return _hash_single(pda, full) if isinstance(pda, pdarray) else pda.hash() + elif isinstance(pda, List): + if any( + wrong_type := [not isinstance(a, (pdarray, Strings, SegArray_, Categorical_)) for a in pda] + ): + raise TypeError( + f"Unsupported type {type(pda[np.argmin(wrong_type)])}. Supported types are pdarray," + f" SegArray, Strings, Categoricals, and Lists of these types." + ) + # replace bigint pdarrays with the uint limbs + expanded_pda = [] + for a in pda: + if isinstance(a, pdarray) and a.dtype == bigint: + expanded_pda.extend(a.bigint_to_uint_arrays()) + else: + expanded_pda.append(a) + types_list = [a.objType for a in expanded_pda] + names_list = [_hash_helper(a) for a in expanded_pda] + rep_msg = type_cast( + str, + generic_msg( + cmd="hashList", + args={ + "nameslist": names_list, + "typeslist": types_list, + "length": len(expanded_pda), + "size": len(expanded_pda[0]), + }, + ), + ) + hashes = json.loads(rep_msg) + return create_pdarray(hashes["upperHash"]), create_pdarray(hashes["lowerHash"]) + else: + raise TypeError( + f"Unsupported type {type(pda)}. Supported types are pdarray," + f" SegArray, Strings, Categoricals, and Lists of these types." + ) + + +@typechecked +def _hash_single(pda: pdarray, full: bool = True): + if pda.dtype == bigint: + return hash(pda.bigint_to_uint_arrays()) + repMsg = type_cast( + str, + generic_msg( + cmd=f"efunc{pda.ndim}D", + args={ + "func": "hash128" if full else "hash64", + "array": pda, + }, + ), + ) + if full: + a, b = repMsg.split("+") + return create_pdarray(a), create_pdarray(b) + else: + return create_pdarray(repMsg) + + +@no_type_check +def _str_cat_where( + condition: pdarray, + A: Union[str, Strings, Categorical], + B: Union[str, Strings, Categorical], +) -> Union[Strings, Categorical]: + # added @no_type_check because mypy can't handle Categorical not being declared + # sooner, but there are circular dependencies preventing that + from arkouda.categorical import Categorical + from arkouda.pdarraysetops import concatenate + + if isinstance(A, str) and isinstance(B, (Categorical, Strings)): + # This allows us to assume if a str is present it is B + A, B, condition = B, A, ~condition + + # one cat and one str + if isinstance(A, Categorical) and isinstance(B, str): + is_in_categories = A.categories == B + if ak_any(is_in_categories): + new_categories = A.categories + b_code = argmax(is_in_categories) + else: + new_categories = concatenate([A.categories, array([B])]) + b_code = A.codes.size + 1 + new_codes = where(condition, A.codes, b_code) + return Categorical.from_codes(new_codes, new_categories, NAvalue=A.NAvalue).reset_categories() + + # both cat + if isinstance(A, Categorical) and isinstance(B, Categorical): + if A.codes.size != B.codes.size: + raise TypeError("Categoricals must be same length") + if A.categories.size != B.categories.size or not ak_all(A.categories == B.categories): + A, B = A.standardize_categories([A, B]) + new_codes = where(condition, A.codes, B.codes) + return Categorical.from_codes(new_codes, A.categories, NAvalue=A.NAvalue).reset_categories() + + # one strings and one str + if isinstance(A, Strings) and isinstance(B, str): + new_lens = where(condition, A.get_lengths(), len(B)) + repMsg = generic_msg( + cmd="segmentedWhere", + args={ + "seg_str": A, + "other": B, + "is_str_literal": True, + "new_lens": new_lens, + "condition": condition, + }, + ) + return Strings.from_return_msg(repMsg) + + # both strings + if isinstance(A, Strings) and isinstance(B, Strings): + if A.size != B.size: + raise TypeError("Strings must be same length") + new_lens = where(condition, A.get_lengths(), B.get_lengths()) + repMsg = generic_msg( + cmd="segmentedWhere", + args={ + "seg_str": A, + "other": B, + "is_str_literal": False, + "new_lens": new_lens, + "condition": condition, + }, + ) + return Strings.from_return_msg(repMsg) + + raise TypeError("ak.where is not supported between Strings and Categorical") + + +@typechecked +def where( + condition: pdarray, + A: Union[str, numeric_scalars, pdarray, Strings, Categorical], # type: ignore + B: Union[str, numeric_scalars, pdarray, Strings, Categorical], # type: ignore +) -> Union[pdarray, Strings, Categorical]: # type: ignore + """ + Returns an array with elements chosen from A and B based upon a + conditioning array. As is the case with numpy.where, the return array + consists of values from the first array (A) where the conditioning array + elements are True and from the second array (B) where the conditioning + array elements are False. + + Parameters + ---------- + condition : pdarray + Used to choose values from A or B + A : Union[numeric_scalars, str, pdarray, Strings, Categorical] + Value(s) used when condition is True + B : Union[numeric_scalars, str, pdarray, Strings, Categorical] + Value(s) used when condition is False + + Returns + ------- + pdarray + Values chosen from A where the condition is True and B where + the condition is False + + Raises + ------ + TypeError + Raised if the condition object is not a pdarray, if A or B is not + an int, np.int64, float, np.float64, pdarray, str, Strings, Categorical + if pdarray dtypes are not supported or do not match, or multiple + condition clauses (see Notes section) are applied + ValueError + Raised if the shapes of the condition, A, and B pdarrays are unequal + + Examples + -------- + >>> a1 = ak.arange(1,10) + >>> a2 = ak.ones(9, dtype=np.int64) + >>> cond = a1 < 5 + >>> ak.where(cond,a1,a2) + array([1, 2, 3, 4, 1, 1, 1, 1, 1]) + + >>> a1 = ak.arange(1,10) + >>> a2 = ak.ones(9, dtype=np.int64) + >>> cond = a1 == 5 + >>> ak.where(cond,a1,a2) + array([1, 1, 1, 1, 5, 1, 1, 1, 1]) + + >>> a1 = ak.arange(1,10) + >>> a2 = 10 + >>> cond = a1 < 5 + >>> ak.where(cond,a1,a2) + array([1, 2, 3, 4, 10, 10, 10, 10, 10]) + + >>> s1 = ak.array([f'str {i}' for i in range(10)]) + >>> s2 = 'str 21' + >>> cond = (ak.arange(10) % 2 == 0) + >>> ak.where(cond,s1,s2) + array(['str 0', 'str 21', 'str 2', 'str 21', 'str 4', 'str 21', 'str 6', 'str 21', 'str 8','str 21']) + + >>> c1 = ak.Categorical(ak.array([f'str {i}' for i in range(10)])) + >>> c2 = ak.Categorical(ak.array([f'str {i}' for i in range(9, -1, -1)])) + >>> cond = (ak.arange(10) % 2 == 0) + >>> ak.where(cond,c1,c2) + array(['str 0', 'str 8', 'str 2', 'str 6', 'str 4', 'str 4', 'str 6', 'str 2', 'str 8', 'str 0']) + + Notes + ----- + A and B must have the same dtype and only one conditional clause + is supported e.g., n < 5, n > 1, which is supported in numpy + is not currently supported in Arkouda + """ + if (not isSupportedNumber(A) and not isinstance(A, pdarray)) or ( + not isSupportedNumber(B) and not isinstance(B, pdarray) + ): + from arkouda.categorical import Categorical # type: ignore + + # fmt: off + if ( + not isinstance(A, (str, Strings, Categorical)) # type: ignore + or not isinstance(B, (str, Strings, Categorical)) # type: ignore + ): + # fmt:on + raise TypeError( + "both A and B must be an int, np.int64, float, np.float64, pdarray OR" + " both A and B must be an str, Strings, Categorical" + ) + return _str_cat_where(condition, A, B) + if isinstance(A, pdarray) and isinstance(B, pdarray): + # TODO: handle shape broadcasting for multidimensional arrays + repMsg = generic_msg( + cmd=f"efunc3vv{condition.ndim}D", + args={ + "func": "where", + "condition": condition, + "a": A, + "b": B, + }, + ) + # For scalars, try to convert it to the array's dtype + elif isinstance(A, pdarray) and np.isscalar(B): + repMsg = generic_msg( + cmd=f"efunc3vs{condition.ndim}D", + args={ + "func": "where", + "condition": condition, + "a": A, + "dtype": A.dtype.name, + "scalar": A.format_other(B), + }, + ) + elif isinstance(B, pdarray) and np.isscalar(A): + repMsg = generic_msg( + cmd=f"efunc3sv{condition.ndim}D", + args={ + "func": "where", + "condition": condition, + "dtype": B.dtype.name, + "scalar": B.format_other(A), + "b": B, + }, + ) + elif np.isscalar(A) and np.isscalar(B): + # Scalars must share a common dtype (or be cast) + dtA = resolve_scalar_dtype(A) + dtB = resolve_scalar_dtype(B) + # Make sure at least one of the dtypes is supported + if not (dtA in DTypes or dtB in DTypes): + raise TypeError(f"Not implemented for scalar types {dtA} and {dtB}") + # If the dtypes are the same, do not cast + if dtA == dtB: # type: ignore + dt = dtA + # If the dtypes are different, try casting one direction then the other + elif dtB in DTypes and np.can_cast(A, dtB): + A = np.dtype(dtB).type(A) # type: ignore + dt = dtB + elif dtA in DTypes and np.can_cast(B, dtA): + B = np.dtype(dtA).type(B) # type: ignore + dt = dtA + # Cannot safely cast + else: + raise TypeError(f"Cannot cast between scalars {str(A)} and {str(B)} to supported dtype") + repMsg = generic_msg( + cmd=f"efunc3ss{condition.ndim}D", + args={ + "func": "where", + "condition": condition, + "dtype": dt, + "a": A, + "b": B, + }, + ) + return create_pdarray(type_cast(str, repMsg)) + + +@typechecked +def histogram(pda: pdarray, bins: int_scalars = 10) -> Tuple[pdarray, pdarray]: + """ + Compute a histogram of evenly spaced bins over the range of an array. + + Parameters + ---------- + pda : pdarray + The values to histogram + + bins : int_scalars + The number of equal-size bins to use (default: 10) + + Returns + ------- + (pdarray, Union[pdarray, int64 or float64]) + Bin edges and The number of values present in each bin + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray or if bins is + not an int. + ValueError + Raised if bins < 1 + NotImplementedError + Raised if pdarray dtype is bool or uint8 + + See Also + -------- + value_counts, histogram2d + + Notes + ----- + The bins are evenly spaced in the interval [pda.min(), pda.max()]. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> A = ak.arange(0, 10, 1) + >>> nbins = 3 + >>> h, b = ak.histogram(A, bins=nbins) + >>> h + array([3, 3, 4]) + >>> b + array([0., 3., 6., 9.]) + + # To plot, export the left edges and the histogram to NumPy + >>> plt.plot(b.to_ndarray()[::-1], h.to_ndarray()) + """ + if bins < 1: + raise ValueError("bins must be 1 or greater") + b = linspace(pda.min(), pda.max(), bins + 1) + repMsg = generic_msg(cmd="histogram", args={"array": pda, "bins": bins}) + return create_pdarray(type_cast(str, repMsg)), b + + +# Typechecking removed due to circular dependencies with arrayview +# @typechecked +def histogram2d( + x: pdarray, y: pdarray, bins: Union[int_scalars, Sequence[int_scalars]] = 10 +) -> Tuple[pdarray, pdarray, pdarray]: + """ + Compute the bi-dimensional histogram of two data samples with evenly spaced bins + + Parameters + ---------- + x : pdarray + A pdarray containing the x coordinates of the points to be histogrammed. + + y : pdarray + A pdarray containing the y coordinates of the points to be histogrammed. + + bins : int_scalars or [int, int] = 10 + The number of equal-size bins to use. + If int, the number of bins for the two dimensions (nx=ny=bins). + If [int, int], the number of bins in each dimension (nx, ny = bins). + Defaults to 10 + + Returns + ------- + hist : pdarray + shape(nx, ny) + The bi-dimensional histogram of samples x and y. + Values in x are histogrammed along the first dimension and + values in y are histogrammed along the second dimension. + + x_edges : pdarray + The bin edges along the first dimension. + + y_edges : pdarray + The bin edges along the second dimension. + + Raises + ------ + TypeError + Raised if x or y parameters are not pdarrays or if bins is + not an int or (int, int). + ValueError + Raised if bins < 1 + NotImplementedError + Raised if pdarray dtype is bool or uint8 + + See Also + -------- + histogram + + Notes + ----- + The x bins are evenly spaced in the interval [x.min(), x.max()] + and y bins are evenly spaced in the interval [y.min(), y.max()]. + + Examples + -------- + >>> x = ak.arange(0, 10, 1) + >>> y = ak.arange(9, -1, -1) + >>> nbins = 3 + >>> h, x_edges, y_edges = ak.histogram2d(x, y, bins=nbins) + >>> h + array([[0, 0, 3], + [0, 2, 1], + [3, 1, 0]]) + >>> x_edges + array([0.0 3.0 6.0 9.0]) + >>> x_edges + array([0.0 3.0 6.0 9.0]) + """ + if not isinstance(bins, Sequence): + x_bins, y_bins = bins, bins + else: + if len(bins) != 2: + raise ValueError("Sequences of bins must contain two elements (num_x_bins, num_y_bins)") + x_bins, y_bins = bins + if x_bins < 1 or y_bins < 1: + raise ValueError("bins must be 1 or greater") + x_bin_boundaries = linspace(x.min(), x.max(), x_bins + 1) + y_bin_boundaries = linspace(y.min(), y.max(), y_bins + 1) + repMsg = generic_msg(cmd="histogram2D", args={"x": x, "y": y, "xBins": x_bins, "yBins": y_bins}) + return ( + create_pdarray(type_cast(str, repMsg)).reshape(x_bins, y_bins), + x_bin_boundaries, + y_bin_boundaries, + ) + + +def histogramdd( + sample: Sequence[pdarray], bins: Union[int_scalars, Sequence[int_scalars]] = 10 +) -> Tuple[pdarray, Sequence[pdarray]]: + """ + Compute the multidimensional histogram of data in sample with evenly spaced bins. + + Parameters + ---------- + sample : Sequence[pdarray] + A sequence of pdarrays containing the coordinates of the points to be histogrammed. + + bins : int_scalars or Sequence[int_scalars] = 10 + The number of equal-size bins to use. + If int, the number of bins for all dimensions (nx=ny=...=bins). + If [int, int, ...], the number of bins in each dimension (nx, ny, ... = bins). + Defaults to 10 + + Returns + ------- + hist : pdarray + shape(nx, ny, ..., nd) + The multidimensional histogram of pdarrays in sample. + Values in first pdarray are histogrammed along the first dimension. + Values in second pdarray are histogrammed along the second dimension and so on. + + edges : List[pdarray] + A list of pdarrays containing the bin edges for each dimension. + + + Raises + ------ + ValueError + Raised if bins < 1 + NotImplementedError + Raised if pdarray dtype is bool or uint8 + + See Also + -------- + histogram + + Notes + ----- + The bins for each dimension, m, are evenly spaced in the interval [m.min(), m.max()] + + Examples + -------- + >>> x = ak.arange(0, 10, 1) + >>> y = ak.arange(9, -1, -1) + >>> z = ak.where(x % 2 == 0, x, y) + >>> h, edges = ak.histogramdd((x, y,z), bins=(2,2,5)) + >>> h + array([[[0, 0, 0, 0, 0], + [1, 1, 1, 1, 1]], + + [[1, 1, 1, 1, 1], + [0, 0, 0, 0, 0]]]) + >>> edges + [array([0.0 4.5 9.0]), + array([0.0 4.5 9.0]), + array([0.0 1.6 3.2 4.8 6.4 8.0])] + """ + if not isinstance(sample, Sequence): + raise ValueError("Sample must be a sequence of pdarrays") + if len(set(pda.dtype for pda in sample)) != 1: + raise ValueError("All pdarrays in sample must have same dtype") + + num_dims = len(sample) + if not isinstance(bins, Sequence): + bins = [bins] * num_dims + else: + if len(bins) != num_dims: + raise ValueError("Sequences of bins must contain same number of elements as the sample") + if any(b < 1 for b in bins): + raise ValueError("bins must be 1 or greater") + + bins = list(bins) if isinstance(bins, tuple) else bins + sample = list(sample) if isinstance(sample, tuple) else sample + bin_boundaries = [linspace(a.min(), a.max(), b + 1) for a, b in zip(sample, bins)] + bins_pda = array(bins)[::-1] + dim_prod = (cumprod(bins_pda) // bins_pda)[::-1] + repMsg = generic_msg( + cmd="histogramdD", + args={ + "sample": sample, + "num_dims": num_dims, + "bins": bins, + "dim_prod": dim_prod, + "num_samples": sample[0].size, + }, + ) + return create_pdarray(type_cast(str, repMsg)).reshape(bins), bin_boundaries + + +@typechecked +def value_counts( + pda: pdarray, +) -> tuple[groupable, pdarray]: + """ + Count the occurrences of the unique values of an array. + + Parameters + ---------- + pda : pdarray, int64 + The array of values to count + + Returns + ------- + unique_values : pdarray, int64 or Strings + The unique values, sorted in ascending order + + counts : pdarray, int64 + The number of times the corresponding unique value occurs + + Raises + ------ + TypeError + Raised if the parameter is not a pdarray + + See Also + -------- + unique, histogram + + Notes + ----- + This function differs from ``histogram()`` in that it only returns + counts for values that are present, leaving out empty "bins". This + function delegates all logic to the unique() method where the + return_counts parameter is set to True. + + Examples + -------- + >>> A = ak.array([2, 0, 2, 4, 0, 0]) + >>> ak.value_counts(A) + (array([0, 2, 4]), array([3, 2, 1])) + """ + return GroupBy(pda).size() + + +@typechecked +def clip( + pda: pdarray, + lo: Union[numeric_scalars, pdarray], + hi: Union[numeric_scalars, pdarray], +) -> pdarray: + """ + Clip (limit) the values in an array to a given range [lo,hi] + + Given an array a, values outside the range are clipped to the + range edges, such that all elements lie in the range. + + There is no check to enforce that lo < hi. If lo > hi, the corresponding + value of the array will be set to hi. + + If lo or hi (or both) are pdarrays, the check is by pairwise elements. + See examples. + + Parameters + ---------- + pda : pdarray, int64 or float64 + the array of values to clip + lo : scalar or pdarray, int64 or float64 + the lower value of the clipping range + hi : scalar or pdarray, int64 or float64 + the higher value of the clipping range + If lo or hi (or both) are pdarrays, the check is by pairwise elements. + See examples. + + Returns + ------- + arkouda.pdarrayclass.pdarray + A pdarray matching pda, except that element x remains x if lo <= x <= hi, + or becomes lo if x < lo, + or becomes hi if x > hi. + + Examples + -------- + >>> a = ak.array([1,2,3,4,5,6,7,8,9,10]) + >>> ak.clip(a,3,8) + array([3,3,3,4,5,6,7,8,8,8]) + >>> ak.clip(a,3,8.0) + array([3.00000000000000000 3.00000000000000000 3.00000000000000000 4.00000000000000000 + 5.00000000000000000 6.00000000000000000 7.00000000000000000 8.00000000000000000 + 8.00000000000000000 8.00000000000000000]) + >>> ak.clip(a,None,7) + array([1,2,3,4,5,6,7,7,7,7]) + >>> ak.clip(a,5,None) + array([5,5,5,5,5,6,7,8,9,10]) + >>> ak.clip(a,None,None) + ValueError : either min or max must be supplied + >>> ak.clip(a,ak.array([2,2,3,3,8,8,5,5,6,6],8)) + array([2,2,3,4,8,8,7,8,8,8]) + >>> ak.clip(a,4,ak.array([10,9,8,7,6,5,5,5,5,5])) + array([4,4,4,4,5,5,5,5,5,5]) + + Notes + ----- + Either lo or hi may be None, but not both. + If lo > hi, all x = hi. + If all inputs are int64, output is int64, but if any input is float64, output is float64. + + Raises + ------ + ValueError + Raised if both lo and hi are None + """ + + # Check that a range was actually supplied. + + if lo is None and hi is None: + raise ValueError("Either min or max must be supplied.") + + # If any of the inputs are float, then make everything float. + # Some type checking is needed, because scalars and pdarrays get cast differently. + + dataFloat = pda.dtype == float + minFloat = isinstance(lo, float) or (isinstance(lo, pdarray) and lo.dtype == float) + maxFloat = isinstance(hi, float) or (isinstance(hi, pdarray) and hi.dtype == float) + forceFloat = dataFloat or minFloat or maxFloat + if forceFloat: + if not dataFloat: + pda = cast(pda, np.float64) + if lo is not None and not minFloat: + lo = cast(lo, np.float64) if isinstance(lo, pdarray) else float(lo) + if hi is not None and not maxFloat: + hi = cast(hi, np.float64) if isinstance(hi, pdarray) else float(hi) + + # Now do the clipping. + + pda1 = pda + if lo is not None: + pda1 = where(pda < lo, lo, pda) + if hi is not None: + pda1 = where(pda1 > hi, hi, pda1) + return pda1 + + +def median(pda): + """ + Compute the median of a given array. 1d case only, for now. + + Parameters + ---------- + pda: pdarray + The input data, in pdarray form, numeric type or boolean + + Returns + ------- + np.float64 + The median of the entire pdarray + The array is sorted, and then if the number of elements is odd, + the return value is the middle element. If even, then the + mean of the two middle elements. + + Examples + -------- + >>> import arkouda as ak + >>> arkouda.connect() + >>> pda = ak.array ([0,4,7,8,1,3,5,2,-1]) + >>> ak.median(pda) + 3 + >>> pda = ak.array([0,1,3,3,1,2,3,4,2,3]) + 2.5 + + """ + + # Now do the computation + + if pda.dtype == bool: + pda_srtd = sort(cast(pda, dt=np.int64)) + else: + pda_srtd = sort(pda) + if len(pda_srtd) % 2 == 1: + return pda_srtd[len(pda_srtd) // 2].astype(np.float64) + else: + return ((pda_srtd[len(pda_srtd) // 2] + pda_srtd[len(pda_srtd) // 2 - 1]) / 2.0).astype( + np.float64 + ) + + +def count_nonzero(pda): + """ + Compute the nonzero count of a given array. 1D case only, for now. + + Parameters + ---------- + pda: pdarray + The input data, in pdarray form, numeric, bool, or str + + Returns + ------- + np.int64 + The nonzero count of the entire pdarray + + Examples + -------- + >>> pda = ak.array([0,4,7,8,1,3,5,2,-1]) + >>> ak.count_nonzero(pda) + 9 + >>> pda = ak.array([False,True,False,True,False]) + >>> ak.count_nonzero(pda) + 3 + >>> pda = ak.array(["hello","","there"]) + >>> ak.count_nonzero(pda) + 2 + + """ + + from arkouda.util import is_numeric + + # Handle different data types. + + if is_numeric(pda): + return sum((pda != 0).astype(np.int64)) + elif pda.dtype == bool: + return sum((pda).astype(np.int64)) + elif pda.dtype == str: + return sum((pda != "").astype(np.int64)) + + +def array_equal(pda_a: pdarray, pda_b: pdarray, equal_nan: bool = False): + """ + Compares two pdarrays for equality. + If neither array has any nan elements, then if all elements are pairwise equal, + it returns True. + If equal_Nan is False, then any nan element in either array gives a False return. + If equal_Nan is True, then pairwise-corresponding nans are considered equal. + + Parameters + ---------- + pda_a : pdarray + pda_b : pdarray + equal_nan : boolean to determine how to handle nans, default False + + Returns + ------- + boolean + With string data: + False if one array is type ak.str_ & the other isn't, True if both are ak.str_ & they match. + + With numeric data: + True if neither array has any nan elements, and all elements pairwise equal. + + True if equal_Nan True, all non-nans pairwise equal & nans in pda_a correspond to nans in pda_b + + False if equal_Nan False, & either array has any nan element. + + Examples + -------- + >>> a = ak.randint(0,10,10,dtype=ak.float64) + >>> b = a + >>> ak.array_equal(a,b) + True + >>> b[9] = np.nan + >>> ak.array_equal(a,b) + False + >>> a[9] = np.nan + >>> ak.array_equal(a,b) + False + >>> ak.array_equal(a,b,True) + True + """ + if (pda_a.shape != pda_b.shape) or ((pda_a.dtype == akstr_) ^ (pda_b.dtype == akstr_)): + return False + elif equal_nan: + return ak_all(where(isnan(pda_a), isnan(pda_b), pda_a == pda_b)) + else: + return ak_all(pda_a == pda_b) + + +def putmask(pda: pdarray, mask: Union[bool, pdarray], values: pdarray): + """ + Overwrite elements of a pdarray at indices where mask is True + + Parameters + ---------- + pda : pdarray, source data, also output data + pda = input where mask is False, = values where mask is True + mask : a scalar boolean, or a pdarray of booleans + values : pdarray, replacement data + + Returns + ------- + None - pda is modified in-place + + Notes + ----- + If values.size != a.size, values is repeated and/or pruned as needed to + make sizes match, because ak.where requires matching sizes. + + Examples + ------- + >>> a = ak.array(np.arange(10)) + >>> ak.putmask (a,a>2,a**2) + array ([0,1,2,9,16,25,36,49,64,81]) + + >>> values = ak.array([3,2]) + >>> ak.putmask (a,a>2,values) + array ([0,1,2,2,3,2,3,2,3,2]) + + Raises + ------ + TypeError + Raised if a and values are not the same type + + """ + + from arkouda.pdarraysetops import concatenate + + # check for matching types + + if values.dtype != pda.dtype: + raise TypeError("ak.putmask requires arrays of matching type") + + # if values is not the same size as pda, repeat it and/or prune it as needed + + growth = pda.size // values.size + (0 if pda.size % values.size == 0 else 1) + result = concatenate(growth * [values]) + if result.size > pda.size: + reduction = result.size % pda.size + result = result[:-(reduction)] + + pda[:] = where(mask, result, pda) # pda[:] = allows us to return modified value + + +def eye(rows: int_scalars, cols: int_scalars, diag: int_scalars = 0, dt: type = akint64): + """ + Return a pdarray with zeros everywhere except along a diagonal, which is all ones. + The matrix need not be square. + + Parameters + ---------- + rows : int_scalars + cols : int_scalars + diag : int_scalars + if diag = 0, zeros start at element [0,0] and proceed along diagonal + if diag > 0, zeros start at element [0,diag] and proceed along diagonal + if diag < 0, zeros start at element [diag,0] and proceed along diagonal + etc. + + Returns + ------- + pdarray + an array of zeros with ones along the specified diagonal + + Examples + -------- + >>> ak.eye(rows=4,cols=4,diag=0,dt=ak.int64) + array([array([1 0 0 0]) array([0 1 0 0]) array([0 0 1 0]) array([0 0 0 1])]) + >>> ak.eye(rows=3,cols=3,diag=1,dt=ak.float64) + array([array([0.00000000000000000 1.00000000000000000 0.00000000000000000]) + array([0.00000000000000000 0.00000000000000000 1.00000000000000000]) + array([0.00000000000000000 0.00000000000000000 0.00000000000000000])]) + >>> ak.eye(rows=4,cols=4,diag=-1,dt=ak.bool_) + array([array([False False False False]) array([True False False False]) + array([False True False False]) array([False False True False])] + + Notes + ----- + if rows = cols and diag = 0, the result is an identity matrix + Server returns an error if rank of pda < 2 + + """ + + cmd = f"eye<{akdtype(dt).name}>" + args = { + "rows": rows, + "cols": cols, + "diag": diag, + } + return create_pdarray( + generic_msg( + cmd=cmd, + args=args, + ) + ) + + +def triu(pda: pdarray, diag: int_scalars = 0): + """ + Return a copy of the pda with the lower triangle zeroed out + + Parameters + ---------- + pda : pdarray + diag : int_scalars + if diag = 0, zeros start just above the main diagonal + if diag = 1, zeros start at the main diagonal + if diag = 2, zeros start just below the main diagonal + etc. + + Returns + ------- + pdarray + a copy of pda with zeros in the lower triangle + + Examples + -------- + >>> a = ak.array([[1,2,3,4,5],[2,3,4,5,6],[3,4,5,6,7],[4,5,6,7,8],[5,6,7,8,9]]) + >>> ak.triu(a,diag=0) + array([array([1 2 3 4 5]) array([0 3 4 5 6]) array([0 0 5 6 7]) + array([0 0 0 7 8]) array([0 0 0 0 9])]) + >>> ak.triu(a,diag=1) + array([array([0 2 3 4 5]) array([0 0 4 5 6]) array([0 0 0 6 7]) + array([0 0 0 0 8]) array([0 0 0 0 0])]) + >>> ak.triu(a,diag=2) + array([array([0 0 3 4 5]) array([0 0 0 5 6]) array([0 0 0 0 7]) + array([0 0 0 0 0]) array([0 0 0 0 0])]) + >>> ak.triu(a,diag=3) + array([array([0 0 0 4 5]) array([0 0 0 0 6]) array([0 0 0 0 0]) + array([0 0 0 0 0]) array([0 0 0 0 0])]) + >>> ak.triu(a,diag=4) + array([array([0 0 0 0 5]) array([0 0 0 0 0]) array([0 0 0 0 0]) + array([0 0 0 0 0]) array([0 0 0 0 0])]) + + Notes + ----- + Server returns an error if rank of pda < 2 + + """ + + cmd = f"triu<{pda.dtype},{pda.ndim}>" + args = { + "array": pda, + "diag": diag, + } + return create_pdarray( + generic_msg( + cmd=cmd, + args=args, + ) + ) + + +def tril(pda: pdarray, diag: int_scalars = 0): + """ + Return a copy of the pda with the upper triangle zeroed out + + Parameters + ---------- + pda : pdarray + diag : int_scalars + if diag = 0, zeros start just below the main diagonal + if diag = 1, zeros start at the main diagonal + if diag = 2, zeros start just above the main diagonal + etc. + + Returns + ------- + pdarray + a copy of pda with zeros in the upper triangle + + Examples + -------- + >>> a = ak.array([[1,2,3,4,5],[2,3,4,5,6],[3,4,5,6,7],[4,5,6,7,8],[5,6,7,8,9]]) + >>> ak.tril(a,diag=4) + array([array([1 2 3 4 5]) array([2 3 4 5 6]) array([3 4 5 6 7]) + array([4 5 6 7 8]) array([5 6 7 8 9])]) + >>> ak.tril(a,diag=3) + array([array([1 2 3 4 0]) array([2 3 4 5 6]) array([3 4 5 6 7]) + array([4 5 6 7 8]) array([5 6 7 8 9])]) + >>> ak.tril(a,diag=2) + array([array([1 2 3 0 0]) array([2 3 4 5 0]) array([3 4 5 6 7]) + array([4 5 6 7 8]) array([5 6 7 8 9])]) + >>> ak.tril(a,diag=1) + array([array([1 2 0 0 0]) array([2 3 4 0 0]) array([3 4 5 6 0]) + array([4 5 6 7 8]) array([5 6 7 8 9])]) + >>> ak.tril(a,diag=0) + array([array([1 0 0 0 0]) array([2 3 0 0 0]) array([3 4 5 0 0]) + array([4 5 6 7 0]) array([5 6 7 8 9])]) + + Notes + ----- + Server returns an error if rank of pda < 2 + + """ + cmd = f"tril<{pda.dtype},{pda.ndim}>" + args = { + "array": pda, + "diag": diag, + } + return create_pdarray( + generic_msg( + cmd=cmd, + args=args, + ) + ) + + +def transpose(pda: pdarray): + """ + Compute the transpose of a matrix. + + Parameters + ---------- + pda : pdarray + + Returns + ------- + pdarray + the transpose of the input matrix + + Examples + -------- + >>> a = ak.array([[1,2,3,4,5],[1,2,3,4,5]]) + >>> ak.transpose(a) + array([array([1 1]) array([2 2]) array([3 3]) array([4 4]) array([5 5])]) + + + Notes + ----- + Server returns an error if rank of pda < 2 + + """ + cmd = f"transpose<{pda.dtype},{pda.ndim}>" + args = { + "array": pda, + } + return create_pdarray( + generic_msg( + cmd=cmd, + args=args, + ) + ) + + +def matmul(pdaLeft: pdarray, pdaRight: pdarray): + """ + Compute the product of two matrices. + + Parameters + ---------- + pdaLeft : pdarray + pdaRight : pdarray + + Returns + ------- + pdarray + the matrix product pdaLeft x pdaRight + + Examples + -------- + >>> a = ak.array([[1,2,3,4,5],[1,2,3,4,5]]) + >>> b = ak.array(([1,1],[2,2],[3,3],[4,4],[5,5]]) + >>> ak.matmul(a,b) + array([array([30 30]) array([45 45])]) + + >>> x = ak.array([[1,2,3],[1.1,2.1,3.1]]) + >>> y = ak.array([[1,1,1],[0,2,2],[0,0,3]]) + >>> ak.matmul(x,y) + array([array([1.00000000000000000 5.00000000000000000 14.00000000000000000]) + array([1.1000000000000001 5.3000000000000007 14.600000000000001])]) + + Notes + ----- + Server returns an error if shapes of pdaLeft and pdaRight + are incompatible with matrix multiplication. + + """ + if pdaLeft.ndim != pdaRight.ndim: + raise ValueError("matmul requires matrices of matching rank.") + cmd = f"matmul<{pdaLeft.dtype},{pdaRight.dtype},{pdaLeft.ndim}>" + args = { + "x1": pdaLeft, + "x2": pdaRight, + } + return create_pdarray( + generic_msg( + cmd=cmd, + args=args, + ) + ) + + +def vecdot(x1: pdarray, x2: pdarray): + """ + Compute the generalized dot product of two vectors along the given axis. + Assumes that both tensors have already been broadcast to the same shape. + + Parameters + ---------- + x1 : pdarray + x2 : pdarray + + Returns + ------- + pdarray + x1 vecdot x2 + + Examples + -------- + >>> a = ak.array([[1,2,3,4,5],[1,2,3,4,5]]) + >>> b = ak.array(([2,2,2,2,2],[2,2,2,2,2]]) + >>> ak.vecdot(a,b) + array([5 10 15 20 25]) + >>> ak.vecdot(b,a) + array([5 10 15 20 25]) + + Raises + ------ + ValueTypeError + Raised if x1 and x2 are not of matching shape or if rank of x1 < 2 + + """ + + if x1.shape != x2.shape: + raise ValueError("vecdot requires matrices of matching rank.") + if x1.ndim < 2: + raise ValueError("vector requires matrices of rank 2 or more.") + cmd = f"vecdot<{x1.dtype},{x2.dtype},{x1.ndim}>" + args = { + "x1": x1, + "x2": x2, + "bcShape": tuple(x1.shape), + "axis": 0, + } + return create_pdarray( + generic_msg( + cmd=cmd, + args=args, + ) + ) diff --git a/arkouda/pdarrayclass.py b/arkouda/pdarrayclass.py index 01bf0292ed..e9fe63dc4c 100644 --- a/arkouda/pdarrayclass.py +++ b/arkouda/pdarrayclass.py @@ -10,6 +10,8 @@ from typeguard import typechecked from arkouda.client import generic_msg +from arkouda.infoclass import information, pretty_print_information +from arkouda.logger import getArkoudaLogger from arkouda.numpy.dtypes import NUMBER_FORMAT_STRINGS, DTypes, bigint from arkouda.numpy.dtypes import bool_ as akbool from arkouda.numpy.dtypes import dtype @@ -27,8 +29,6 @@ ) from arkouda.numpy.dtypes import str_ as akstr_ from arkouda.numpy.dtypes import uint64 as akuint64 -from arkouda.infoclass import information, pretty_print_information -from arkouda.logger import getArkoudaLogger __all__ = [ "pdarray", @@ -1620,7 +1620,7 @@ def value_counts(self): >>> ak.array([2, 0, 2, 4, 0, 0]).value_counts() (array([0, 2, 4]), array([3, 2, 1])) """ - from arkouda.numeric import value_counts + from arkouda.numpy import value_counts return value_counts(self) @@ -1642,7 +1642,7 @@ def astype(self, dtype) -> pdarray: _____ This is essentially shorthand for ak.cast(x, '') where x is a pdarray. """ - from arkouda.numeric import cast as akcast + from arkouda.numpy import cast as akcast return akcast(self, dtype) @@ -2482,7 +2482,7 @@ def _get_grouping_keys(self) -> List[pdarray]: must return a list of arrays that can be (co)argsorted. """ if self.dtype == akbool: - from arkouda.numeric import cast as akcast + from arkouda.numpy import cast as akcast return [akcast(self, akint64)] elif self.dtype in (akint64, akuint64): @@ -3170,8 +3170,8 @@ def divmod( >>> ak.divmod(x,y, x % 2 == 0) (array([5 6 7 1 9]), array([5 0 7 3 9])) """ - from arkouda.numeric import cast as akcast - from arkouda.numeric import where as akwhere + from arkouda.numpy import cast as akcast + from arkouda.numpy import where as akwhere from arkouda.pdarraycreation import full if not isinstance(x, pdarray) and not isinstance(y, pdarray): @@ -3530,7 +3530,7 @@ def clz(pda: pdarray) -> pdarray: if pda.dtype == bigint: if pda.max_bits == -1: raise ValueError("max_bits must be set to count leading zeros") - from arkouda.numeric import where + from arkouda.numpy import where from arkouda.pdarraycreation import zeros uint_arrs = pda.bigint_to_uint_arrays() @@ -3602,7 +3602,7 @@ def ctz(pda: pdarray) -> pdarray: if pda.dtype == bigint: # we don't need max_bits to be set because that only limits the high bits # which is only relevant when ctz(0) which is defined to be 0 - from arkouda.numeric import where + from arkouda.numpy import where from arkouda.pdarraycreation import zeros # reverse the list, so we visit low bits first @@ -3750,8 +3750,8 @@ def power(pda: pdarray, pwr: Union[int, float, pdarray], where: Union[bool, pdar >>> ak.power(a), 3, a % 2 == 0) array([0, 1, 8, 3, 64]) """ - from arkouda.numeric import cast as akcast - from arkouda.numeric import where as akwhere + from arkouda.numpy import cast as akcast + from arkouda.numpy import where as akwhere if where is True: return pda**pwr diff --git a/arkouda/pdarraycreation.py b/arkouda/pdarraycreation.py index 81e063f867..5a17da21cf 100644 --- a/arkouda/pdarraycreation.py +++ b/arkouda/pdarraycreation.py @@ -27,7 +27,6 @@ from arkouda.pdarrayclass import create_pdarray, pdarray from arkouda.strings import Strings - __all__ = [ "array", "zeros", @@ -205,7 +204,7 @@ def array( >>> type(strings) """ - from arkouda.numeric import cast as akcast + from arkouda.numpy import cast as akcast # If a is already a pdarray, do nothing if isinstance(a, pdarray): @@ -284,6 +283,7 @@ def array( raise RuntimeError(f"Unhandled dtype {a.dtype}") else: from arkouda.util import _infer_shape_from_size + shape, ndim, full_size = _infer_shape_from_size(a.shape) # Do not allow arrays that are too large @@ -478,6 +478,7 @@ def zeros( if dtype_name not in NumericDTypes: raise TypeError(f"unsupported dtype {dtype}") from arkouda.util import _infer_shape_from_size + shape, ndim, full_size = _infer_shape_from_size(size) if ndim > get_max_array_rank(): @@ -538,6 +539,7 @@ def ones( if dtype_name not in NumericDTypes: raise TypeError(f"unsupported dtype {dtype}") from arkouda.util import _infer_shape_from_size + shape, ndim, full_size = _infer_shape_from_size(size) if ndim > get_max_array_rank(): @@ -607,6 +609,7 @@ def full( if dtype_name not in NumericDTypes: raise TypeError(f"unsupported dtype {dtype}") from arkouda.util import _infer_shape_from_size + shape, ndim, full_size = _infer_shape_from_size(size) if ndim > get_max_array_rank(): diff --git a/arkouda/plotting.py b/arkouda/plotting.py index 6447f7f460..312eb573a1 100644 --- a/arkouda/plotting.py +++ b/arkouda/plotting.py @@ -1,12 +1,14 @@ import math + import numpy as np from matplotlib import pyplot as plt + from arkouda.dataframe import DataFrame -from arkouda.timeclass import Datetime, Timedelta, date_range, timedelta_range +from arkouda.groupbyclass import GroupBy +from arkouda.numpy import histogram, isnan from arkouda.pdarrayclass import skew from arkouda.pdarraycreation import arange -from arkouda.numeric import histogram, isnan -from arkouda.groupbyclass import GroupBy +from arkouda.timeclass import Datetime, Timedelta, date_range, timedelta_range def plot_dist(b, h, log=True, xlabel=None, newfig=True): diff --git a/arkouda/random/_generator.py b/arkouda/random/_generator.py index aa0b170d8f..654c8bc509 100644 --- a/arkouda/random/_generator.py +++ b/arkouda/random/_generator.py @@ -87,7 +87,7 @@ def choice(self, a, size=None, replace=True, p=None): else: ret_scalar = False - from arkouda.numeric import cast as akcast + from arkouda.numpy import cast as akcast if _val_isinstance_of_union(a, int_scalars): is_domain = True @@ -416,7 +416,7 @@ def lognormal(self, mean=0.0, sigma=1.0, size=None, method="zig"): >>> ak.random.default_rng(17).lognormal(3, 2.5, 3) array([7.3866978126031091 106.20159494048757 4.5424399190667666]) """ - from arkouda.numeric import exp + from arkouda.numpy import exp norm_arr = self.normal(loc=mean, scale=sigma, size=size, method=method) return exp(norm_arr) if size is not None else np.exp(norm_arr) @@ -822,7 +822,7 @@ def float_array_or_scalar_helper(func_name, var_name, var, size): if size != var.size: raise TypeError(f"array of {var_name} must have same size as return size") if var.dtype != akfloat64: - from arkouda.numeric import cast as akcast + from arkouda.numpy import cast as akcast var = akcast(var, akfloat64) else: diff --git a/arkouda/scipy/_stats_py.py b/arkouda/scipy/_stats_py.py index 78d12a2faf..ea962650b8 100644 --- a/arkouda/scipy/_stats_py.py +++ b/arkouda/scipy/_stats_py.py @@ -5,8 +5,8 @@ from scipy.stats import chi2 # type: ignore import arkouda as ak -from arkouda.scipy.special import xlogy from arkouda.numpy.dtypes import float64 as akfloat64 +from arkouda.scipy.special import xlogy __all__ = ["power_divergence", "chisquare", "Power_divergenceResult"] diff --git a/arkouda/scipy/special/_math.py b/arkouda/scipy/special/_math.py index 02b640c1cd..8b9dba21ab 100644 --- a/arkouda/scipy/special/_math.py +++ b/arkouda/scipy/special/_math.py @@ -3,7 +3,7 @@ import numpy as np -from arkouda.numeric import log +from arkouda.numpy import log from arkouda.pdarrayclass import pdarray diff --git a/arkouda/segarray.py b/arkouda/segarray.py index 96876c550a..d9a894d826 100644 --- a/arkouda/segarray.py +++ b/arkouda/segarray.py @@ -9,14 +9,14 @@ import numpy as np from arkouda.client import generic_msg +from arkouda.groupbyclass import GroupBy, broadcast +from arkouda.join import gen_ranges +from arkouda.logger import getArkoudaLogger +from arkouda.numpy import cumsum from arkouda.numpy.dtypes import bool_ as akbool from arkouda.numpy.dtypes import int64 as akint64 from arkouda.numpy.dtypes import int_scalars, isSupportedInt, str_ from arkouda.numpy.dtypes import uint64 as akuint64 -from arkouda.groupbyclass import GroupBy, broadcast -from arkouda.join import gen_ranges -from arkouda.logger import getArkoudaLogger -from arkouda.numeric import cumsum from arkouda.pdarrayclass import RegistrationError, create_pdarray, is_sorted, pdarray from arkouda.pdarraycreation import arange, array, ones, zeros from arkouda.pdarraysetops import concatenate diff --git a/arkouda/series.py b/arkouda/series.py index 8e762ac673..a9ee672ee9 100644 --- a/arkouda/series.py +++ b/arkouda/series.py @@ -12,11 +12,11 @@ from arkouda.accessor import CachedAccessor, DatetimeAccessor, StringAccessor from arkouda.alignment import lookup from arkouda.categorical import Categorical -from arkouda.numpy.dtypes import dtype, float64, int64 from arkouda.groupbyclass import GroupBy, groupable_element_type from arkouda.index import Index, MultiIndex -from arkouda.numeric import cast as akcast -from arkouda.numeric import isnan, value_counts +from arkouda.numpy import cast as akcast +from arkouda.numpy import isnan, value_counts +from arkouda.numpy.dtypes import dtype, float64, int64 from arkouda.pdarrayclass import ( RegistrationError, any, @@ -1562,7 +1562,7 @@ def fillna(self, value) -> Series: +----+-----+ """ - from arkouda.numeric import where + from arkouda.numpy import where if isinstance(value, Series): value = value.values diff --git a/arkouda/sorting.py b/arkouda/sorting.py index 8af267011c..a45aadf79c 100644 --- a/arkouda/sorting.py +++ b/arkouda/sorting.py @@ -6,7 +6,15 @@ from typeguard import check_type, typechecked from arkouda.client import generic_msg -from arkouda.numpy.dtypes import bigint, bool_, dtype, float64, int64, int_scalars, uint64 +from arkouda.numpy.dtypes import ( + bigint, + bool_, + dtype, + float64, + int64, + int_scalars, + uint64, +) from arkouda.pdarrayclass import create_pdarray, pdarray from arkouda.pdarraycreation import zeros from arkouda.strings import Strings @@ -146,7 +154,7 @@ def coargsort( array([0, 1, 0, 1]) """ from arkouda.categorical import Categorical - from arkouda.numeric import cast as akcast + from arkouda.numpy import cast as akcast check_type( argname="coargsort", value=arrays, expected_type=Sequence[Union[pdarray, Strings, Categorical]] diff --git a/arkouda/strings.py b/arkouda/strings.py index 872afd4a6f..5f619bf80f 100644 --- a/arkouda/strings.py +++ b/arkouda/strings.py @@ -10,17 +10,12 @@ import arkouda.numpy.dtypes from arkouda.client import generic_msg -from arkouda.numpy.dtypes import NUMBER_FORMAT_STRINGS -from arkouda.numpy.dtypes import dtype as akdtype -from arkouda.numpy.dtypes import ( - int_scalars, - resolve_scalar_dtype, - str_, - str_scalars, -) from arkouda.infoclass import information, list_symbol_table from arkouda.logger import getArkoudaLogger from arkouda.match import Match, MatchType +from arkouda.numpy.dtypes import NUMBER_FORMAT_STRINGS +from arkouda.numpy.dtypes import dtype as akdtype +from arkouda.numpy.dtypes import int_scalars, resolve_scalar_dtype, str_, str_scalars from arkouda.pdarrayclass import RegistrationError from arkouda.pdarrayclass import all as akall from arkouda.pdarrayclass import create_pdarray, parse_single_value, pdarray @@ -2255,7 +2250,7 @@ def astype(self, dtype) -> pdarray: _____ This is essentially shorthand for ak.cast(x, '') where x is a pdarray. """ - from arkouda.numeric import cast as akcast + from arkouda.numpy import cast as akcast return akcast(self, dtype) diff --git a/arkouda/timeclass.py b/arkouda/timeclass.py index 0cf392f55f..301abe6e7d 100644 --- a/arkouda/timeclass.py +++ b/arkouda/timeclass.py @@ -4,16 +4,16 @@ import numpy as np from pandas import Series as pdSeries -from pandas import Timestamp as pdTimestamp from pandas import Timedelta as pdTimedelta +from pandas import Timestamp as pdTimestamp from pandas import date_range as pd_date_range from pandas import timedelta_range as pd_timedelta_range from pandas import to_datetime, to_timedelta from arkouda.client import generic_msg +from arkouda.numpy import abs as akabs +from arkouda.numpy import cast from arkouda.numpy.dtypes import int64, int_scalars, intTypes, isSupportedInt -from arkouda.numeric import abs as akabs -from arkouda.numeric import cast from arkouda.pdarrayclass import RegistrationError, create_pdarray, pdarray from arkouda.pdarraycreation import from_series diff --git a/pydoc/preprocess/generate_import_stubs.py b/pydoc/preprocess/generate_import_stubs.py index 3baabe441b..de671fa401 100644 --- a/pydoc/preprocess/generate_import_stubs.py +++ b/pydoc/preprocess/generate_import_stubs.py @@ -28,6 +28,12 @@ def insert_spaces_after_newlines(input_string, spaces): return None +def reformat_signature(signature_string: str): + signature_string = re.sub(r"\<([\w.]+):[ \'\w\>]+", "\\1", signature_string) + signature_string = re.sub(r"\", "\\1", signature_string) + return signature_string + + def get_parent_class_str(obj): if hasattr(obj, "__class__"): if inspect.isclass(obj): @@ -81,7 +87,8 @@ def write_stub(module, filename, all_only=False, allow_arkouda=False): elif inspect.isfunction(obj): if not name.startswith("__"): try: - f.write(f"def {name}{inspect.signature(obj)}:\n") + signature_string = reformat_signature(str(inspect.signature(obj))) + f.write(f"def {name}{signature_string}:\n") except: f.write(f"def {name}(self, *args, **kwargs):\n") diff --git a/pytest.ini b/pytest.ini index 551bf33042..26088e57fe 100644 --- a/pytest.ini +++ b/pytest.ini @@ -30,9 +30,8 @@ testpaths = tests/join_test.py tests/logger_test.py tests/message_test.py - tests/numeric_test.py tests/numpy/dtypes_test.py - tests/numpy/numpy_numeric_test.py + tests/numpy/numeric_test.py tests/numpy/numpy_test.py tests/operator_test.py tests/pdarray_creation_test.py diff --git a/tests/numeric_test.py b/tests/numpy/numeric_test.py similarity index 98% rename from tests/numeric_test.py rename to tests/numpy/numeric_test.py index 67310e03bf..b2735aea76 100644 --- a/tests/numeric_test.py +++ b/tests/numpy/numeric_test.py @@ -137,6 +137,21 @@ def _infinity_edge_case_helper(np_func, ak_func): class TestNumeric: + @pytest.mark.parametrize("prob_size", pytest.prob_size) + def test_floor_float(self, prob_size): + from arkouda import all as akall + from arkouda.numpy import floor as ak_floor + + a = 0.5 * ak.arange(prob_size, dtype="float64") + a_floor = ak_floor(a) + + expected_size = np.floor((prob_size + 1) / 2).astype("int64") + expected = ak.array(np.repeat(ak.arange(expected_size, dtype="float64").to_ndarray(), 2)) + # To deal with prob_size as an odd number: + expected = expected[0:prob_size] + + assert akall(a_floor == expected) + @pytest.mark.parametrize("numeric_type", NUMERIC_TYPES) @pytest.mark.parametrize("prob_size", pytest.prob_size) def test_seeded_rng_typed(self, prob_size, numeric_type): @@ -1009,7 +1024,7 @@ def test_array_equal(self, prob_size, data_type, same_size, matching, nan_handli pda_b = ak.array(temp) assert ak.array_equal(pda_a, pda_b) # matching string arrays pda_c = pda_b[:-1] - assert not (ak.array_equal(pda_a,pda_c)) # matching except c is shorter by 1 + assert not (ak.array_equal(pda_a, pda_c)) # matching except c is shorter by 1 temp = np.random.choice(VOWELS_AND_SUCH, prob_size) pda_b = ak.array(temp) assert not (ak.array_equal(pda_a, pda_b)) # mismatching string arrays @@ -1029,12 +1044,14 @@ def test_array_equal(self, prob_size, data_type, same_size, matching, nan_handli pda_a = ak.random.randint(0, 100, prob_size, dtype=data_type) if matching: # known to match? pda_b = pda_a if same_size else pda_a[:-1] - assert (ak.array_equal(pda_a, pda_b) == (matching and same_size)) + assert ak.array_equal(pda_a, pda_b) == (matching and same_size) elif same_size: # not matching, but same size? pda_b = ak.random.randint(0, 100, prob_size, dtype=data_type) assert not (ak.array_equal(pda_a, pda_b)) - else: - pda_b = ak.random.randint(0, 100, (prob_size if same_size else prob_size-1), dtype=data_type) + else: + pda_b = ak.random.randint( + 0, 100, (prob_size if same_size else prob_size - 1), dtype=data_type + ) assert not (ak.array_equal(pda_a, pda_b)) # Notes about median: diff --git a/tests/numpy/numpy_numeric_test.py b/tests/numpy/numpy_numeric_test.py deleted file mode 100644 index 54d328bfd4..0000000000 --- a/tests/numpy/numpy_numeric_test.py +++ /dev/null @@ -1,26 +0,0 @@ -import numpy as np -import pytest - -import arkouda as ak - -NUMERIC_TYPES = [ak.int64, ak.float64, ak.bool_, ak.uint64] -NO_BOOL = [ak.int64, ak.float64, ak.uint64] -NO_FLOAT = [ak.int64, ak.bool_, ak.uint64] -INT_FLOAT = [ak.int64, ak.float64] - - -class TestNumeric: - @pytest.mark.parametrize("prob_size", pytest.prob_size) - def test_floor_float(self, prob_size): - from arkouda import all as akall - from arkouda.numpy import floor as ak_floor - - a = 0.5 * ak.arange(prob_size, dtype="float64") - a_floor = ak_floor(a) - - expected_size = np.floor((prob_size + 1) / 2).astype("int64") - expected = ak.array(np.repeat(ak.arange(expected_size, dtype="float64").to_ndarray(), 2)) - # To deal with prob_size as an odd number: - expected = expected[0:prob_size] - - assert akall(a_floor == expected)