From f0e39cc3cb2f61bbd8294119f4ad9f39545b99f4 Mon Sep 17 00:00:00 2001 From: Mustafa Akur <33904309+akurmustafa@users.noreply.github.com> Date: Sun, 29 Sep 2024 02:54:17 -0700 Subject: [PATCH] Add take_arrays util for getting entries from 2d arrays (#6475) * Add take_arrays util function * Update comments * Minor changes --- arrow-select/src/take.rs | 59 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index ed7179fd36ce..6d037fc41984 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -94,6 +94,65 @@ pub fn take( } } +/// For each [ArrayRef] in the [`Vec`], take elements by index and create a new +/// [`Vec`] from those indices. +/// +/// ```text +/// ┌────────┬────────┐ +/// │ │ │ ┌────────┐ ┌────────┬────────┐ +/// │ A │ 1 │ │ │ │ │ │ +/// ├────────┼────────┤ │ 0 │ │ A │ 1 │ +/// │ │ │ ├────────┤ ├────────┼────────┤ +/// │ D │ 4 │ │ │ │ │ │ +/// ├────────┼────────┤ │ 2 │ take_arrays(values,indices) │ B │ 2 │ +/// │ │ │ ├────────┤ ├────────┼────────┤ +/// │ B │ 2 │ │ │ ───────────────────────────► │ │ │ +/// ├────────┼────────┤ │ 3 │ │ C │ 3 │ +/// │ │ │ ├────────┤ ├────────┼────────┤ +/// │ C │ 3 │ │ │ │ │ │ +/// ├────────┼────────┤ │ 1 │ │ D │ 4 │ +/// │ │ │ └────────┘ └────────┼────────┘ +/// │ E │ 5 │ +/// └────────┴────────┘ +/// values arrays indices array result +/// ``` +/// +/// # Errors +/// This function errors whenever: +/// * An index cannot be casted to `usize` (typically 32 bit architectures) +/// * An index is out of bounds and `options` is set to check bounds. +/// +/// # Safety +/// +/// When `options` is not set to check bounds, taking indexes after `len` will panic. +/// +/// # Examples +/// ``` +/// # use std::sync::Arc; +/// # use arrow_array::{StringArray, UInt32Array, cast::AsArray}; +/// # use arrow_select::take::{take, take_arrays}; +/// let string_values = Arc::new(StringArray::from(vec!["zero", "one", "two"])); +/// let values = Arc::new(UInt32Array::from(vec![0, 1, 2])); +/// +/// // Take items at index 2, and 1: +/// let indices = UInt32Array::from(vec![2, 1]); +/// let taken_arrays = take_arrays(&[string_values, values], &indices, None).unwrap(); +/// let taken_string = taken_arrays[0].as_string::(); +/// assert_eq!(*taken_string, StringArray::from(vec!["two", "one"])); +/// let taken_values = taken_arrays[1].as_primitive(); +/// assert_eq!(*taken_values, UInt32Array::from(vec![2, 1])); +/// ``` +pub fn take_arrays( + arrays: &[ArrayRef], + indices: &dyn Array, + options: Option, +) -> Result, ArrowError> { + arrays + .iter() + .map(|array| take(array.as_ref(), indices, options.clone())) + .collect() +} + /// Verifies that the non-null values of `indices` are all `< len` fn check_bounds( len: usize,