Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create Specific builder for typed builder trait and easier to use for lists #6863

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,18 @@ impl ArrayAccessor for &BooleanArray {
}
}

impl ArrayAccessor for BooleanArray {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't be necessary as this is already implemented for &BooleanArray

type Item = bool;

fn value(&self, index: usize) -> Self::Item {
self.value(index)
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
self.value_unchecked(index)
}
}

impl From<Vec<bool>> for BooleanArray {
fn from(data: Vec<bool>) -> Self {
let mut mut_buf = MutableBuffer::new_null(data.len());
Expand Down
12 changes: 12 additions & 0 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,18 @@ impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize
}
}

impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for GenericListArray<OffsetSize> {
type Item = ArrayRef;

fn value(&self, index: usize) -> Self::Item {
self.value(index)
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
self.value_unchecked(index)
}
}

impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let prefix = OffsetSize::PREFIX;
Expand Down
13 changes: 13 additions & 0 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,19 @@ impl<T: ArrowPrimitiveType> ArrayAccessor for &PrimitiveArray<T> {
}
}

impl<T: ArrowPrimitiveType> ArrayAccessor for PrimitiveArray<T> {
type Item = T::Native;

fn value(&self, index: usize) -> Self::Item {
self.value(index)
}

#[inline]
unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
self.value_unchecked(index)
}
}

impl<T: ArrowTemporalType> PrimitiveArray<T>
where
i64: From<T::Native>,
Expand Down
47 changes: 45 additions & 2 deletions arrow-array/src/builder/boolean_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.

use crate::builder::{ArrayBuilder, BooleanBufferBuilder};
use crate::{ArrayRef, BooleanArray};
use crate::builder::{SpecificArrayBuilder, ArrayBuilder, BooleanBufferBuilder};
use crate::{Array, ArrayRef, BooleanArray};
use arrow_buffer::Buffer;
use arrow_buffer::NullBufferBuilder;
use arrow_data::ArrayData;
Expand Down Expand Up @@ -219,6 +219,49 @@ impl ArrayBuilder for BooleanBuilder {
}
}


impl SpecificArrayBuilder for BooleanBuilder {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The functionality of this trait seems to overlap with the existing extend functionality, I'm not sure what it is adding

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nothing, the only thing it add is for the list builder to use those functions as it expect a SpecificArrayBuilder trait

type Output = BooleanArray;
type Item<'a> = bool;

/// Builds the array and reset this builder.
fn finish(&mut self) -> Arc<BooleanArray> {
Arc::new(self.finish())
}

/// Builds the array without resetting the builder.
fn finish_cloned(&self) -> Arc<BooleanArray> {
Arc::new(self.finish_cloned())
}

fn append_value(&mut self, value: bool) {
self.append_value(value)
}

fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) {
self.append_value(*value)
}

fn append_null(&mut self) {
self.append_null()
}

fn append_nulls(&mut self, n: usize) {
self.append_nulls(n)
}

fn append_output<'a>(&'a mut self, output: &'a Self::Output) {
// TODO - if iterator exists try it?
for i in 0..output.len() {
if output.is_null(i) {
self.append_null();
} else {
self.append_value(output.value(i));
}
}
}
}

impl Extend<Option<bool>> for BooleanBuilder {
#[inline]
fn extend<T: IntoIterator<Item = Option<bool>>>(&mut self, iter: T) {
Expand Down
39 changes: 37 additions & 2 deletions arrow-array/src/builder/fixed_size_binary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.

use crate::builder::{ArrayBuilder, UInt8BufferBuilder};
use crate::{ArrayRef, FixedSizeBinaryArray};
use crate::builder::{ArrayBuilder, SpecificArrayBuilder, UInt8BufferBuilder};
use crate::{Array, ArrayRef, FixedSizeBinaryArray};
use arrow_buffer::Buffer;
use arrow_buffer::NullBufferBuilder;
use arrow_data::ArrayData;
Expand Down Expand Up @@ -154,6 +154,41 @@ impl ArrayBuilder for FixedSizeBinaryBuilder {
}
}

impl SpecificArrayBuilder for FixedSizeBinaryBuilder {
type Output = FixedSizeBinaryArray;
type Item<'a> = &'a [u8];

fn finish(&mut self) -> Arc<Self::Output> {
Arc::new(self.finish())
}

fn finish_cloned(&self) -> Arc<Self::Output> {
Arc::new(self.finish_cloned())
}

fn append_value<'a>(&'a mut self, value: Self::Item<'a>) {
self.append_value(value).unwrap()
}

fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) {
self.append_value(value).unwrap()
}

fn append_null(&mut self) {
self.append_null()
}

fn append_output<'a>(&'a mut self, output: &'a Self::Output) {
for i in 0..output.len() {
if output.is_null(i) {
self.append_null();
} else {
self.append_value(output.value(i)).unwrap();
}
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
55 changes: 53 additions & 2 deletions arrow-array/src/builder/fixed_size_list_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.

use crate::builder::ArrayBuilder;
use crate::{ArrayRef, FixedSizeListArray};
use crate::builder::{ArrayBuilder, SpecificArrayBuilder};
use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray};
use arrow_buffer::NullBufferBuilder;
use arrow_schema::{Field, FieldRef};
use std::any::Any;
Expand Down Expand Up @@ -215,6 +215,57 @@ where
}
}


impl<ValuesOutput, T> SpecificArrayBuilder for FixedSizeListBuilder<T>
where
ValuesOutput: Array + 'static,
T: SpecificArrayBuilder<Output = ValuesOutput>,
for<'a> &'a ValuesOutput: ArrayAccessor,
for<'a> <T as SpecificArrayBuilder>::Item<'a>: From<<&'a ValuesOutput as ArrayAccessor>::Item>
{
type Output = FixedSizeListArray;
type Item<'a> = T::Output;

/// Builds the array and reset this builder.
fn finish(&mut self) -> Arc<FixedSizeListArray> {
Arc::new(self.finish())
}

/// Builds the array without resetting the builder.
fn finish_cloned(&self) -> Arc<FixedSizeListArray> {
Arc::new(self.finish_cloned())
}

fn append_value<'a>(&'a mut self, value: Self::Item<'a>) {
// our item is their output
self.values_builder.append_output(value.as_any().downcast_ref::<ValuesOutput>().unwrap());
self.append(true);
}

fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) {
self.values_builder.append_output(value.as_any().downcast_ref::<ValuesOutput>().unwrap());
self.append(true);
}

fn append_null(&mut self) {
// TODO - make sure we should append nulls to the values builder
self.values_builder.append_nulls(self.list_len as usize);
self.append(false);
}

fn append_output<'a>(&'a mut self, output: &'a Self::Output) {
// TODO - if iterator exists try it?
for i in 0..output.len() {
if output.is_null(i) {
self.append_null();
} else {
self.values_builder.append_output(output.value(i).as_any().downcast_ref::<ValuesOutput>().unwrap());
self.append(true);
}
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
7 changes: 2 additions & 5 deletions arrow-array/src/builder/generic_byte_run_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,9 @@
use crate::types::bytes::ByteArrayNativeType;
use std::{any::Any, sync::Arc};

use crate::{
types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType, Utf8Type},
ArrayRef, ArrowPrimitiveType, RunArray,
};
use crate::{types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType, Utf8Type}, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, RunArray};

use super::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
use super::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder, SpecificArrayBuilder};

use arrow_buffer::ArrowNativeType;

Expand Down
42 changes: 40 additions & 2 deletions arrow-array/src/builder/generic_bytes_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
// specific language governing permissions and limitations
// under the License.

use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder};
use crate::builder::{ArrayBuilder, BufferBuilder, SpecificArrayBuilder, UInt8BufferBuilder};
use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
use crate::{ArrayRef, GenericByteArray, OffsetSizeTrait};
use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait};
use arrow_buffer::NullBufferBuilder;
use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
use arrow_data::ArrayDataBuilder;
Expand Down Expand Up @@ -228,6 +228,44 @@ impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
}
}

impl<T: ByteArrayType> SpecificArrayBuilder for GenericByteBuilder<T> {
type Output = GenericByteArray<T>;
type Item<'a> = &'a T::Native;

/// Builds the array and reset this builder.
fn finish(&mut self) -> Arc<GenericByteArray<T>> {
Arc::new(self.finish())
}

/// Builds the array without resetting the builder.
fn finish_cloned(&self) -> Arc<GenericByteArray<T>> {
Arc::new(self.finish_cloned())
}

fn append_value(&mut self, value: &T::Native) {
self.append_value(value)
}

fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) {
self.append_value(value)
}

fn append_null(&mut self) {
self.append_null()
}

fn append_output<'a>(&'a mut self, output: &'a Self::Output) {
// TODO - if iterator exists try it?
for i in 0..output.len() {
if output.is_null(i) {
self.append_null();
} else {
self.append_value(output.value(i));
}
}
}
}

impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
#[inline]
fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
Expand Down
40 changes: 38 additions & 2 deletions arrow-array/src/builder/generic_bytes_view_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ use arrow_schema::ArrowError;
use hashbrown::hash_table::Entry;
use hashbrown::HashTable;

use crate::builder::ArrayBuilder;
use crate::builder::{ArrayBuilder, SpecificArrayBuilder};
use crate::types::bytes::ByteArrayNativeType;
use crate::types::{BinaryViewType, ByteViewType, StringViewType};
use crate::{ArrayRef, GenericByteViewArray};
use crate::{Array, ArrayRef, GenericByteViewArray};

const STARTING_BLOCK_SIZE: u32 = 8 * 1024; // 8KiB
const MAX_BLOCK_SIZE: u32 = 2 * 1024 * 1024; // 2MiB
Expand Down Expand Up @@ -452,6 +452,42 @@ impl<T: ByteViewType + ?Sized> ArrayBuilder for GenericByteViewBuilder<T> {
}
}

impl<T: ByteViewType + ?Sized> SpecificArrayBuilder for GenericByteViewBuilder<T> {
type Output = GenericByteViewArray<T>;
type Item<'a> = &'a T::Native;

fn finish(&mut self) -> Arc<GenericByteViewArray<T>> {
Arc::new(self.finish())
}

fn finish_cloned(&self) -> Arc<GenericByteViewArray<T>> {
Arc::new(self.finish_cloned())
}

fn append_value(&mut self, value: &T::Native) {
self.append_value(value)
}

fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) {
self.append_value(value)
}

fn append_null(&mut self) {
self.append_null()
}

fn append_output<'a>(&'a mut self, output: &'a Self::Output) {
// TODO - if iterator exists try it?
for i in 0..output.len() {
if output.is_null(i) {
self.append_null();
} else {
self.append_value(output.value(i));
}
}
}
}

impl<T: ByteViewType + ?Sized, V: AsRef<T::Native>> Extend<Option<V>>
for GenericByteViewBuilder<T>
{
Expand Down
Loading
Loading