Skip to content

Commit

Permalink
Refine documentation for unary_mut and binary_mut,
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed May 24, 2024
1 parent bd5d4a5 commit a32ee76
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 42 deletions.
2 changes: 1 addition & 1 deletion arrow-arith/src/arity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! Defines kernels suitable to perform operations to primitive arrays.
//! Kernels for operating on [`PrimitiveArray`]s
use arrow_array::builder::BufferBuilder;
use arrow_array::types::ArrowDictionaryKeyType;
Expand Down
123 changes: 88 additions & 35 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ pub type Decimal256Array = PrimitiveArray<Decimal256Type>;

pub use crate::types::ArrowPrimitiveType;

/// An array of [primitive values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
/// An array of primtive values, of type [`ArrowPrimitiveType`]
///
/// # Example: From a Vec
///
Expand Down Expand Up @@ -480,6 +480,19 @@ pub use crate::types::ArrowPrimitiveType;
/// assert_eq!(array.values(), &[1, 0, 2]);
/// assert!(array.is_null(1));
/// ```
///
/// # Example: Get a `PrimitiveArray` from an [`ArrayRef`]
/// ```
/// # use std::sync::Arc;
/// # use arrow_array::{Array, ArrayRef, Float32Array, PrimitiveArray};
/// # use arrow_array::types::{Float32Type};
/// # use arrow_schema::DataType;
/// # let array: ArrayRef = Arc::new(Float32Array::from(vec![1.2, 2.3]));
/// // will panic if the array is not a Float32Array
/// assert_eq!(&DataType::Float32, array.data_type());
/// let f32_array = PrimitiveArray::<Float32Type>::from(array.into_data());
/// assert_eq!(f32_array, Float32Array::from(vec![1.2, 2.3]));
/// ```
pub struct PrimitiveArray<T: ArrowPrimitiveType> {
data_type: DataType,
/// Values data
Expand Down Expand Up @@ -732,22 +745,30 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
PrimitiveArray::from(unsafe { d.build_unchecked() })
}

/// Applies an unary and infallible function to a primitive array.
/// This is the fastest way to perform an operation on a primitive array when
/// the benefits of a vectorized operation outweigh the cost of branching nulls and non-nulls.
/// Applies a unary infallible function to a primitive array, producing a
/// new array of potentially different type.
///
/// This is the fastest way to perform an operation on a primitive array
/// when the benefits of a vectorized operation outweigh the cost of
/// branching nulls and non-nulls.
///
/// # Implementation
/// See [`Self::unary_mut`] for in place modification.
///
/// # Null Handling
///
/// Applies the function for all values, including those on null slots. This
/// will often allow the compiler to generate faster vectorized code, but
/// requires that the operation must be infallible (not error/panic) for any
/// value of the corresponding type or this function may panic.
///
/// This will apply the function for all values, including those on null slots.
/// This implies that the operation must be infallible for any value of the corresponding type
/// or this function may panic.
/// # Example
/// ```rust
/// # use arrow_array::{Int32Array, types::Int32Type};
/// # use arrow_array::{Int32Array, Float32Array, types::Int32Type};
/// # fn main() {
/// let array = Int32Array::from(vec![Some(5), Some(7), None]);
/// let c = array.unary(|x| x * 2 + 1);
/// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
/// // Create a new array with the value of applying sqrt
/// let c = array.unary(|x| f32::sqrt(x as f32));
/// assert_eq!(c, Float32Array::from(vec![Some(2.236068), Some(2.6457512), None]));
/// # }
/// ```
pub fn unary<F, O>(&self, op: F) -> PrimitiveArray<O>
Expand All @@ -766,23 +787,43 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
PrimitiveArray::new(buffer.into(), nulls)
}

/// Applies an unary and infallible function to a mutable primitive array.
/// Mutable primitive array means that the buffer is not shared with other arrays.
/// As a result, this mutates the buffer directly without allocating new buffer.
/// Applies a unary and infallible function to the array in place if possible.
///
/// # Buffer Reuse
///
/// If the underlying buffers are not shared with other arrays, mutates the
/// underlying buffer in place, without allocating a new buffer.
///
/// # Null Handling
///
/// # Implementation
/// See [`Self::unary`] for more information on null handling.
///
/// This will apply the function for all values, including those on null slots.
/// This implies that the operation must be infallible for any value of the corresponding type
/// or this function may panic.
/// # Example
///
/// ```rust
/// # use arrow_array::{Int32Array, types::Int32Type};
/// # fn main() {
/// let array = Int32Array::from(vec![Some(5), Some(7), None]);
/// // Apply x*2+1 to the data in place, no allocations
/// let c = array.unary_mut(|x| x * 2 + 1).unwrap();
/// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
/// ```
///
/// # Example: modify [`ArrayRef`] in place, if not shared
///
/// It is also possible to modify an [`ArrayRef`] if there are no other
/// references to the underlying buffer.
///
/// ```rust
/// # use std::sync::Arc;
/// # use arrow_array::{Array, ArrayRef, Int32Array, PrimitiveArray, types::Int32Type};
/// # let array: ArrayRef = Arc::new(Int32Array::from(vec![Some(5), Some(7), None]));
/// // Convert to Int32Array (panic's if array.data_type is not Int32)
/// let array = PrimitiveArray::<Int32Type>::from(array.into_data());
/// // Apply x*2+1 to the data in place, no allocations if
/// // there are no other references to the underlying buffer
/// // will create a new buffer if there are references.
/// let c = array.unary_mut(|x| x * 2 + 1).unwrap();
/// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
/// # }
/// ```
pub fn unary_mut<F>(self, op: F) -> Result<PrimitiveArray<T>, PrimitiveArray<T>>
where
Expand All @@ -796,11 +837,12 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
Ok(builder.finish())
}

/// Applies a unary and fallible function to all valid values in a primitive array
/// Applies a unary fallible function to all valid values in a primitive
/// array, producing a new array of potentially different type.
///
/// This is unlike [`Self::unary`] which will apply an infallible function to all rows
/// regardless of validity, in many cases this will be significantly faster and should
/// be preferred if `op` is infallible.
/// Applies `op` to only rows that are valid, which is often significantly
/// slower than [`Self::unary`], which should be preferred if `op` is
/// fallible.
///
/// Note: LLVM is currently unable to effectively vectorize fallible operations
pub fn try_unary<F, O, E>(&self, op: F) -> Result<PrimitiveArray<O>, E>
Expand Down Expand Up @@ -829,13 +871,16 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
Ok(PrimitiveArray::new(values, nulls))
}

/// Applies an unary and fallible function to all valid values in a mutable primitive array.
/// Mutable primitive array means that the buffer is not shared with other arrays.
/// As a result, this mutates the buffer directly without allocating new buffer.
/// Applies a unary fallible function to all valid values in a mutable
/// primitive array.
///
/// This is unlike [`Self::unary_mut`] which will apply an infallible function to all rows
/// regardless of validity, in many cases this will be significantly faster and should
/// be preferred if `op` is infallible.
/// # Null Handling
///
/// See [`Self::try_unary`] for more information on null handling.
///
/// # Buffer Reuse
///
/// See [`Self::unary_mut`] for more information on buffer reuse.
///
/// This returns an `Err` when the input array is shared buffer with other
/// array. In the case, returned `Err` wraps input array. If the function
Expand Down Expand Up @@ -870,9 +915,9 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {

/// Applies a unary and nullable function to all valid values in a primitive array
///
/// This is unlike [`Self::unary`] which will apply an infallible function to all rows
/// regardless of validity, in many cases this will be significantly faster and should
/// be preferred if `op` is infallible.
/// Applies `op` to only rows that are valid, which is often significantly
/// slower than [`Self::unary`], which should be preferred if `op` is
/// fallible.
///
/// Note: LLVM is currently unable to effectively vectorize fallible operations
pub fn unary_opt<F, O>(&self, op: F) -> PrimitiveArray<O>
Expand Down Expand Up @@ -915,8 +960,16 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
PrimitiveArray::new(values, Some(nulls))
}

/// Returns `PrimitiveBuilder` of this primitive array for mutating its values if the underlying
/// data buffer is not shared by others.
/// Returns a `PrimitiveBuilder` for this array, suitable for mutating values
/// in place.
///
/// # Buffer Reuse
///
/// If the underlying data buffer has no other outstanding references, the
/// buffer is used without copying.
///
/// If the underlying data buffer does have outstanding references, the
/// buffer is cloned.
pub fn into_builder(self) -> Result<PrimitiveBuilder<T>, Self> {
let len = self.len();
let data = self.into_data();
Expand Down
6 changes: 4 additions & 2 deletions arrow-array/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ impl BooleanType {
pub const DATA_TYPE: DataType = DataType::Boolean;
}

/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`].
/// Trait for [primitive values], bridging the dynamic-typed nature of Arrow
/// (via [`DataType`]) with the static-typed nature of rust types
/// ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`].
///
/// [primitive values]: https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout
/// [`ArrowNativeType`]: arrow_buffer::ArrowNativeType
pub trait ArrowPrimitiveType: primitive::PrimitiveTypeSealed + 'static {
/// Corresponding Rust native type for the primitive type.
Expand Down
11 changes: 7 additions & 4 deletions arrow-buffer/src/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@ mod private {
pub trait Sealed {}
}

/// Trait expressing a Rust type that has the same in-memory representation
/// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow is represented in bits).
/// Trait expressing a Rust type that has the same in-memory representation as
/// Arrow.
///
/// In little endian machines, types that implement [`ArrowNativeType`] can be memcopied to arrow buffers
/// as is.
/// This includes `i16`, `f32`, but excludes `bool` (which in arrow is
/// represented in bits).
///
/// In little endian machines, types that implement [`ArrowNativeType`] can be
/// memcopied to arrow buffers as is.
///
/// # Transmute Safety
///
Expand Down

0 comments on commit a32ee76

Please sign in to comment.