diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 9c2d4af8c45..22af62471b4 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -352,6 +352,18 @@ impl ArrayAccessor for &BooleanArray { } } +impl ArrayAccessor for BooleanArray { + type Item = bool; + + fn value(&self, index: usize) -> Self::Item { + self.value(index) + } + + unsafe fn value_unchecked(&self, index: usize) -> Self::Item { + self.value_unchecked(index) + } +} + impl From> for BooleanArray { fn from(data: Vec) -> Self { let mut mut_buf = MutableBuffer::new_null(data.len()); diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index bed0bdf889b..34becd35b66 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -537,6 +537,18 @@ impl ArrayAccessor for &GenericListArray ArrayAccessor for GenericListArray { + type Item = ArrayRef; + + fn value(&self, index: usize) -> Self::Item { + self.value(index) + } + + unsafe fn value_unchecked(&self, index: usize) -> Self::Item { + self.value_unchecked(index) + } +} + impl std::fmt::Debug for GenericListArray { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let prefix = OffsetSize::PREFIX; diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 57aa23bf904..fc9ad155ef9 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -1197,6 +1197,19 @@ impl ArrayAccessor for &PrimitiveArray { } } +impl ArrayAccessor for PrimitiveArray { + type Item = T::Native; + + fn value(&self, index: usize) -> Self::Item { + self.value(index) + } + + #[inline] + unsafe fn value_unchecked(&self, index: usize) -> Self::Item { + self.value_unchecked(index) + } +} + impl PrimitiveArray where i64: From, diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs index 60ed86ce80b..f630d4ea89b 100644 --- a/arrow-array/src/builder/boolean_builder.rs +++ b/arrow-array/src/builder/boolean_builder.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use crate::builder::{ArrayBuilder, BooleanBufferBuilder}; -use crate::{ArrayRef, BooleanArray}; +use crate::builder::{SpecificArrayBuilder, ArrayBuilder, BooleanBufferBuilder}; +use crate::{Array, ArrayRef, BooleanArray}; use arrow_buffer::Buffer; use arrow_buffer::NullBufferBuilder; use arrow_data::ArrayData; @@ -219,6 +219,49 @@ impl ArrayBuilder for BooleanBuilder { } } + +impl SpecificArrayBuilder for BooleanBuilder { + type Output = BooleanArray; + type Item<'a> = bool; + + /// Builds the array and reset this builder. + fn finish(&mut self) -> Arc { + Arc::new(self.finish()) + } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> Arc { + Arc::new(self.finish_cloned()) + } + + fn append_value(&mut self, value: bool) { + self.append_value(value) + } + + fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) { + self.append_value(*value) + } + + fn append_null(&mut self) { + self.append_null() + } + + fn append_nulls(&mut self, n: usize) { + self.append_nulls(n) + } + + fn append_output<'a>(&'a mut self, output: &'a Self::Output) { + // TODO - if iterator exists try it? + for i in 0..output.len() { + if output.is_null(i) { + self.append_null(); + } else { + self.append_value(output.value(i)); + } + } + } +} + impl Extend> for BooleanBuilder { #[inline] fn extend>>(&mut self, iter: T) { diff --git a/arrow-array/src/builder/fixed_size_binary_builder.rs b/arrow-array/src/builder/fixed_size_binary_builder.rs index 65072a09f60..d742cb7dccc 100644 --- a/arrow-array/src/builder/fixed_size_binary_builder.rs +++ b/arrow-array/src/builder/fixed_size_binary_builder.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use crate::builder::{ArrayBuilder, UInt8BufferBuilder}; -use crate::{ArrayRef, FixedSizeBinaryArray}; +use crate::builder::{ArrayBuilder, SpecificArrayBuilder, UInt8BufferBuilder}; +use crate::{Array, ArrayRef, FixedSizeBinaryArray}; use arrow_buffer::Buffer; use arrow_buffer::NullBufferBuilder; use arrow_data::ArrayData; @@ -154,6 +154,41 @@ impl ArrayBuilder for FixedSizeBinaryBuilder { } } +impl SpecificArrayBuilder for FixedSizeBinaryBuilder { + type Output = FixedSizeBinaryArray; + type Item<'a> = &'a [u8]; + + fn finish(&mut self) -> Arc { + Arc::new(self.finish()) + } + + fn finish_cloned(&self) -> Arc { + Arc::new(self.finish_cloned()) + } + + fn append_value<'a>(&'a mut self, value: Self::Item<'a>) { + self.append_value(value).unwrap() + } + + fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) { + self.append_value(value).unwrap() + } + + fn append_null(&mut self) { + self.append_null() + } + + fn append_output<'a>(&'a mut self, output: &'a Self::Output) { + for i in 0..output.len() { + if output.is_null(i) { + self.append_null(); + } else { + self.append_value(output.value(i)).unwrap(); + } + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs index 5c142b277d1..0e82fa4ede9 100644 --- a/arrow-array/src/builder/fixed_size_list_builder.rs +++ b/arrow-array/src/builder/fixed_size_list_builder.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use crate::builder::ArrayBuilder; -use crate::{ArrayRef, FixedSizeListArray}; +use crate::builder::{ArrayBuilder, SpecificArrayBuilder}; +use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray}; use arrow_buffer::NullBufferBuilder; use arrow_schema::{Field, FieldRef}; use std::any::Any; @@ -215,6 +215,57 @@ where } } + +impl SpecificArrayBuilder for FixedSizeListBuilder +where + ValuesOutput: Array + 'static, + T: SpecificArrayBuilder, + for<'a> &'a ValuesOutput: ArrayAccessor, + for<'a> ::Item<'a>: From<<&'a ValuesOutput as ArrayAccessor>::Item> +{ + type Output = FixedSizeListArray; + type Item<'a> = T::Output; + + /// Builds the array and reset this builder. + fn finish(&mut self) -> Arc { + Arc::new(self.finish()) + } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> Arc { + Arc::new(self.finish_cloned()) + } + + fn append_value<'a>(&'a mut self, value: Self::Item<'a>) { + // our item is their output + self.values_builder.append_output(value.as_any().downcast_ref::().unwrap()); + self.append(true); + } + + fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) { + self.values_builder.append_output(value.as_any().downcast_ref::().unwrap()); + self.append(true); + } + + fn append_null(&mut self) { + // TODO - make sure we should append nulls to the values builder + self.values_builder.append_nulls(self.list_len as usize); + self.append(false); + } + + fn append_output<'a>(&'a mut self, output: &'a Self::Output) { + // TODO - if iterator exists try it? + for i in 0..output.len() { + if output.is_null(i) { + self.append_null(); + } else { + self.values_builder.append_output(output.value(i).as_any().downcast_ref::().unwrap()); + self.append(true); + } + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/arrow-array/src/builder/generic_byte_run_builder.rs b/arrow-array/src/builder/generic_byte_run_builder.rs index 0bf5658b297..66d7d979b3d 100644 --- a/arrow-array/src/builder/generic_byte_run_builder.rs +++ b/arrow-array/src/builder/generic_byte_run_builder.rs @@ -18,12 +18,9 @@ use crate::types::bytes::ByteArrayNativeType; use std::{any::Any, sync::Arc}; -use crate::{ - types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType, Utf8Type}, - ArrayRef, ArrowPrimitiveType, RunArray, -}; +use crate::{types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType, Utf8Type}, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, RunArray}; -use super::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder}; +use super::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder, SpecificArrayBuilder}; use arrow_buffer::ArrowNativeType; diff --git a/arrow-array/src/builder/generic_bytes_builder.rs b/arrow-array/src/builder/generic_bytes_builder.rs index e2be96615b6..9e265d22c5e 100644 --- a/arrow-array/src/builder/generic_bytes_builder.rs +++ b/arrow-array/src/builder/generic_bytes_builder.rs @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder}; +use crate::builder::{ArrayBuilder, BufferBuilder, SpecificArrayBuilder, UInt8BufferBuilder}; use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType}; -use crate::{ArrayRef, GenericByteArray, OffsetSizeTrait}; +use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait}; use arrow_buffer::NullBufferBuilder; use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer}; use arrow_data::ArrayDataBuilder; @@ -228,6 +228,44 @@ impl ArrayBuilder for GenericByteBuilder { } } +impl SpecificArrayBuilder for GenericByteBuilder { + type Output = GenericByteArray; + type Item<'a> = &'a T::Native; + + /// Builds the array and reset this builder. + fn finish(&mut self) -> Arc> { + Arc::new(self.finish()) + } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> Arc> { + Arc::new(self.finish_cloned()) + } + + fn append_value(&mut self, value: &T::Native) { + self.append_value(value) + } + + fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) { + self.append_value(value) + } + + fn append_null(&mut self) { + self.append_null() + } + + fn append_output<'a>(&'a mut self, output: &'a Self::Output) { + // TODO - if iterator exists try it? + for i in 0..output.len() { + if output.is_null(i) { + self.append_null(); + } else { + self.append_value(output.value(i)); + } + } + } +} + impl> Extend> for GenericByteBuilder { #[inline] fn extend>>(&mut self, iter: I) { diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs b/arrow-array/src/builder/generic_bytes_view_builder.rs index 7268e751b14..c242ba22090 100644 --- a/arrow-array/src/builder/generic_bytes_view_builder.rs +++ b/arrow-array/src/builder/generic_bytes_view_builder.rs @@ -25,10 +25,10 @@ use arrow_schema::ArrowError; use hashbrown::hash_table::Entry; use hashbrown::HashTable; -use crate::builder::ArrayBuilder; +use crate::builder::{ArrayBuilder, SpecificArrayBuilder}; use crate::types::bytes::ByteArrayNativeType; use crate::types::{BinaryViewType, ByteViewType, StringViewType}; -use crate::{ArrayRef, GenericByteViewArray}; +use crate::{Array, ArrayRef, GenericByteViewArray}; const STARTING_BLOCK_SIZE: u32 = 8 * 1024; // 8KiB const MAX_BLOCK_SIZE: u32 = 2 * 1024 * 1024; // 2MiB @@ -452,6 +452,42 @@ impl ArrayBuilder for GenericByteViewBuilder { } } +impl SpecificArrayBuilder for GenericByteViewBuilder { + type Output = GenericByteViewArray; + type Item<'a> = &'a T::Native; + + fn finish(&mut self) -> Arc> { + Arc::new(self.finish()) + } + + fn finish_cloned(&self) -> Arc> { + Arc::new(self.finish_cloned()) + } + + fn append_value(&mut self, value: &T::Native) { + self.append_value(value) + } + + fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) { + self.append_value(value) + } + + fn append_null(&mut self) { + self.append_null() + } + + fn append_output<'a>(&'a mut self, output: &'a Self::Output) { + // TODO - if iterator exists try it? + for i in 0..output.len() { + if output.is_null(i) { + self.append_null(); + } else { + self.append_value(output.value(i)); + } + } + } +} + impl> Extend> for GenericByteViewBuilder { diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs index a9c88ec6c58..0d2d6f89338 100644 --- a/arrow-array/src/builder/generic_list_builder.rs +++ b/arrow-array/src/builder/generic_list_builder.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::builder::{ArrayBuilder, BufferBuilder}; +use crate::builder::{ArrayBuilder, BufferBuilder, SpecificArrayBuilder}; use crate::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::NullBufferBuilder; use arrow_buffer::{Buffer, OffsetBuffer}; @@ -168,6 +168,59 @@ where } } +impl SpecificArrayBuilder for GenericListBuilder +where + OffsetSize: OffsetSizeTrait, +{ + type Output = GenericListArray; + type Item<'a> = T::Output; + + /// Builds the array and reset this builder. + fn finish(&mut self) -> Arc> { + Arc::new(self.finish()) + } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> Arc> { + Arc::new(self.finish_cloned()) + } + + fn append_value<'a>(&'a mut self, value: Self::Item<'a>) { + // our item is their output + self.values_builder + .append_output(value.as_any().downcast_ref::>().unwrap()); + self.append(true); + } + + fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) { + self.values_builder + .append_output(value.as_any().downcast_ref::>().unwrap()); + self.append(true); + } + + fn append_null(&mut self) { + self.append(false); + } + + fn append_output<'a>(&'a mut self, output: &'a Self::Output) { + // TODO - if iterator exists try it? + for i in 0..output.len() { + if output.is_null(i) { + self.append_null(); + } else { + let current_value = output.value(i); + self.values_builder.append_output( + current_value + .as_any() + .downcast_ref::>() + .unwrap(), + ); + self.append(true); + } + } + } +} + impl GenericListBuilder where T: 'static, @@ -353,11 +406,12 @@ where #[cfg(test)] mod tests { + use arrow_buffer::ArrowNativeType; use super::*; - use crate::builder::{make_builder, Int32Builder, ListBuilder}; + use crate::builder::{make_builder, Int32Builder, ListBuilder, PrimitiveBuilder}; use crate::cast::AsArray; use crate::types::Int32Type; - use crate::Int32Array; + use crate::{Int32Array, ListArray}; use arrow_schema::DataType; fn _test_generic_list_array_builder() { @@ -803,4 +857,72 @@ mod tests { builder.append_value([Some(1)]); builder.finish(); } + + #[test] + fn should_be_able_to_add_from_list_as_is() { + let from: Arc = { + let primitive_builder = Int32Builder::with_capacity(10); + let values_builder = ListBuilder::new(primitive_builder); + let mut builder = ListBuilder::new(values_builder); + + // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]] + builder.values().values().append_value(1); + builder.values().values().append_value(2); + builder.values().append(true); + builder.values().values().append_value(3); + builder.values().values().append_value(4); + builder.values().append(true); + builder.append(true); + + builder.values().values().append_value(5); + builder.values().values().append_value(6); + builder.values().values().append_value(7); + builder.values().append(true); + builder.values().append(false); + builder.values().values().append_value(8); + builder.values().append(true); + builder.append(true); + + builder.append(false); + + builder.values().values().append_value(9); + builder.values().values().append_value(10); + builder.values().append(true); + builder.append(true); + + Arc::new(builder.finish()) + }; + let mut to = ListBuilder::new(ListBuilder::new(Int32Builder::new())); + + for i in 0..from.len() { + if from.is_valid(i) { + let item = from.value(i); + let inner_list = item + .as_any() + .downcast_ref::>() + .unwrap(); + SpecificArrayBuilder::append_value_ref(&mut to, inner_list); + } else { + to.append_null(); + } + } + + // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]] + let l1 = to.finish(); + + assert_eq!(4, l1.len()); + assert_eq!(1, l1.null_count()); + + assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6].map(i32::usize_as)); + let l2 = l1.values().as_list::(); + + assert_eq!(6, l2.len()); + assert_eq!(1, l2.null_count()); + assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10].map(i32::usize_as)); + + let i1 = l2.values().as_primitive::(); + assert_eq!(10, i1.len()); + assert_eq!(0, i1.null_count()); + assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + } } diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs index 89a96280eb8..ad5c28c97cc 100644 --- a/arrow-array/src/builder/mod.rs +++ b/arrow-array/src/builder/mod.rs @@ -184,8 +184,9 @@ mod union_builder; pub use union_builder::*; -use crate::ArrayRef; +use crate::{Array, ArrayRef}; use std::any::Any; +use std::sync::Arc; /// Trait for dealing with different array builders at runtime /// @@ -298,6 +299,46 @@ impl ArrayBuilder for Box { } } +pub trait SpecificArrayBuilder: Any + Send + Sync + ArrayBuilder { + type Output: Array; + type Item<'a>; + + /// Builds the array + fn finish(&mut self) -> Arc; + + /// Builds the array without resetting the underlying builder. + fn finish_cloned(&self) -> Arc; + + // Append a value to the builder + fn append_value<'a>(&'a mut self, value: Self::Item<'a>); + + // Append a value to the builder + fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>); + + /// Appends a null slot into the builder + fn append_null(&mut self); + + /// Appends `n` `null`s into the builder. + #[inline] + fn append_nulls(&mut self, n: usize) { + for _ in 0..n { + self.append_null(); + } + } + + /// Appends an `Option` into the builder + #[inline] + fn append_option<'a>(&'a mut self, v: Option>) { + match v { + None => self.append_null(), + Some(v) => self.append_value(v), + }; + } + + #[inline] + fn append_output<'a>(&'a mut self, output: &'a Self::Output); +} + /// Builder for [`ListArray`](crate::array::ListArray) pub type ListBuilder = GenericListBuilder; diff --git a/arrow-array/src/builder/null_builder.rs b/arrow-array/src/builder/null_builder.rs index 59086dffa90..1da3fcd64af 100644 --- a/arrow-array/src/builder/null_builder.rs +++ b/arrow-array/src/builder/null_builder.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::builder::ArrayBuilder; +use crate::builder::{SpecificArrayBuilder, ArrayBuilder}; use crate::{ArrayRef, NullArray}; use arrow_data::ArrayData; use arrow_schema::DataType; @@ -146,6 +146,43 @@ impl ArrayBuilder for NullBuilder { } } +impl SpecificArrayBuilder for NullBuilder { + type Output = NullArray; + type Item<'a> = (); + + fn finish(&mut self) -> Arc { + Arc::new(self.finish()) + } + + fn finish_cloned(&self) -> Arc { + Arc::new(self.finish_cloned()) + } + + fn append_value<'a>(&'a mut self, value: Self::Item<'a>) { + self.append_null(); + } + + fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) { + self.append_null(); + } + + fn append_null(&mut self) { + self.append_null(); + } + + fn append_output<'a>(&'a mut self, output: &'a Self::Output) { + self.len += output.len(); + } + + fn append_nulls(&mut self, n: usize) { + self.append_nulls(n) + } + + fn append_option<'a>(&'a mut self, v: Option>) { + self.append_null() + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs index 3191fea6e40..102ebc77ad0 100644 --- a/arrow-array/src/builder/primitive_builder.rs +++ b/arrow-array/src/builder/primitive_builder.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::builder::{ArrayBuilder, BufferBuilder}; +use crate::builder::{ArrayBuilder, BufferBuilder, SpecificArrayBuilder}; use crate::types::*; use crate::{ArrayRef, PrimitiveArray}; use arrow_buffer::NullBufferBuilder; @@ -132,6 +132,42 @@ impl ArrayBuilder for PrimitiveBuilder { } } + +impl SpecificArrayBuilder for PrimitiveBuilder { + type Output = PrimitiveArray; + type Item<'a> = T::Native; + + /// Builds the array and reset this builder. + fn finish(&mut self) -> Arc> { + Arc::new(self.finish()) + } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> Arc> { + Arc::new(self.finish_cloned()) + } + + fn append_value(&mut self, value: T::Native) { + self.append_value(value) + } + + fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) { + self.append_value(*value) + } + + fn append_null(&mut self) { + self.append_null() + } + + fn append_nulls(&mut self, n: usize) { + self.append_nulls(n) + } + + fn append_output<'a>(&'a mut self, output: &'a PrimitiveArray) { + self.extend(output) + } +} + impl Default for PrimitiveBuilder { fn default() -> Self { Self::new()