From c7ffa243d7213cc7fbb0d2ba14f6a72927d019af Mon Sep 17 00:00:00 2001 From: jeremiah-corrado <62707311+jeremiah-corrado@users.noreply.github.com> Date: Mon, 10 Jun 2024 10:32:30 -0600 Subject: [PATCH] Add vstack implementation (#3305) * add vstack implementation Signed-off-by: Jeremiah Corrado * fix bad numpy import Signed-off-by: Jeremiah Corrado * fix mypy type error Signed-off-by: Jeremiah Corrado * move vstack test to PROTO_tests. Fix bug in vstack's use of np.common_type Signed-off-by: Jeremiah Corrado * fix mypy error Signed-off-by: Jeremiah Corrado * fix undefined function in base_test Signed-off-by: Jeremiah Corrado --------- Signed-off-by: Jeremiah Corrado --- PROTO_tests/tests/array_manipulation_tests.py | 24 ++++++ ServerModules.cfg | 1 + arkouda/__init__.py | 1 + arkouda/pdarraymanipulation.py | 73 +++++++++++++++++++ tests/base_test.py | 1 + 5 files changed, 100 insertions(+) create mode 100644 PROTO_tests/tests/array_manipulation_tests.py create mode 100644 arkouda/pdarraymanipulation.py diff --git a/PROTO_tests/tests/array_manipulation_tests.py b/PROTO_tests/tests/array_manipulation_tests.py new file mode 100644 index 0000000000..e8abfb84e1 --- /dev/null +++ b/PROTO_tests/tests/array_manipulation_tests.py @@ -0,0 +1,24 @@ +import arkouda as ak +import numpy as np + +import pytest +import json + + +def get_server_max_array_dims(): + try: + return json.load(open('serverConfig.json', 'r'))['max_array_dims'] + except (ValueError, FileNotFoundError, TypeError, KeyError): + return 1 + + +class TestManipulationFunctions: + @pytest.mark.skipif(get_server_max_array_dims() < 2, reason="vstack requires server with 'max_array_dims' >= 2") + def test_vstack(self): + a = [ak.random.randint(0, 10, 25) for _ in range(4)] + n = [x.to_ndarray() for x in a] + + n_vstack = np.vstack(n) + a_vstack = ak.vstack(a) + + assert n_vstack.tolist() == a_vstack.to_list() diff --git a/ServerModules.cfg b/ServerModules.cfg index e0e4884f53..96b7a58cfd 100644 --- a/ServerModules.cfg +++ b/ServerModules.cfg @@ -19,6 +19,7 @@ IndexingMsg JoinEqWithDTMsg KExtremeMsg LogMsg +# ManipulationMsg OperatorMsg ParquetMsg RandMsg diff --git a/arkouda/__init__.py b/arkouda/__init__.py index 45ee62413f..d6b8b50cf4 100644 --- a/arkouda/__init__.py +++ b/arkouda/__init__.py @@ -13,6 +13,7 @@ from arkouda.sorting import * from arkouda.pdarraysetops import * from arkouda.pdarraycreation import * +from arkouda.pdarraymanipulation import * from arkouda.numeric import * from arkouda.groupbyclass import * from arkouda.strings import * diff --git a/arkouda/pdarraymanipulation.py b/arkouda/pdarraymanipulation.py new file mode 100644 index 0000000000..8ffc6ffddb --- /dev/null +++ b/arkouda/pdarraymanipulation.py @@ -0,0 +1,73 @@ +from typing import Tuple, List, Literal, Union, Optional +from typeguard import typechecked + +from arkouda.client import generic_msg +from arkouda.pdarrayclass import pdarray, create_pdarray +from arkouda.dtypes import dtype as akdtype + +import numpy as np + +__all__ = ["vstack"] + + +@typechecked +def vstack( + tup: Union[Tuple[pdarray], List[pdarray]], + *, + dtype: Optional[Union[type, str]] = None, + casting: Literal["no", "equiv", "safe", "same_kind", "unsafe"] = "same_kind", +) -> pdarray: + """ + Stack a sequence of arrays vertically (row-wise). + + This is equivalent to concatenation along the first axis after 1-D arrays of + shape `(N,)` have been reshaped to `(1,N)`. + + Parameters + ---------- + tup : Tuple[pdarray] + The arrays to be stacked + dtype : Optional[Union[type, str]], optional + The data-type of the output array. If not provided, the output + array will be determined using `np.common_type` on the + input arrays Defaults to None + casting : {"no", "equiv", "safe", "same_kind", "unsafe"], optional + Controls what kind of data casting may occur - currently unused + + Returns + ------- + + pdarray + The stacked array + """ + + if casting != "same_kind": + # TODO: wasn't clear from the docs what each of the casting options does + raise NotImplementedError(f"casting={casting} is not yet supported") + + # ensure all arrays have the same number of dimensions + ndim = tup[0].ndim + for a in tup: + if a.ndim != ndim: + raise ValueError("all input arrays must have the same number of dimensions") + + # establish the dtype of the output array + if dtype is None: + dtype_ = np.common_type(*[np.empty(0, dtype=a.dtype) for a in tup]) + else: + dtype_ = akdtype(dtype) + + # cast the input arrays to the output dtype if necessary + arrays = [a.astype(dtype_) if a.dtype != dtype_ else a for a in tup] + + # stack the arrays along the first axis + return create_pdarray( + generic_msg( + cmd=f"stack{ndim}D", + args={ + "names": list(arrays), + "n": len(arrays), + "axis": 0, + }, + ) + ) diff --git a/tests/base_test.py b/tests/base_test.py index 21b9321c5b..3aed71b617 100644 --- a/tests/base_test.py +++ b/tests/base_test.py @@ -11,6 +11,7 @@ stop_arkouda_server, ) + """ ArkoudaTest defines the base Arkouda test logic for starting up the arkouda_server at the launch of a unittest TestCase and shutting down the arkouda_server at the completion of