From 416958aa99eb946d49bf197edb635e07d7fe92ad Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 13 May 2024 19:03:33 +0100 Subject: [PATCH 1/4] Structured interval type (#3125) (#5654) --- arrow-arith/src/numeric.rs | 43 +- arrow-array/src/arithmetic.rs | 14 +- arrow-array/src/array/dictionary_array.rs | 2 +- arrow-array/src/array/primitive_array.rs | 52 ++- arrow-array/src/types.rs | 77 +--- arrow-buffer/src/arith.rs | 61 +++ arrow-buffer/src/bigint/mod.rs | 55 +-- arrow-buffer/src/interval.rs | 424 ++++++++++++++++++ arrow-buffer/src/lib.rs | 11 +- arrow-buffer/src/native.rs | 70 ++- arrow-cast/src/cast/mod.rs | 111 +++-- arrow-cast/src/display.rs | 25 +- arrow-cast/src/pretty.rs | 40 +- arrow-data/src/data.rs | 10 +- arrow-integration-test/src/lib.rs | 8 +- arrow-ord/src/comparison.rs | 71 ++- arrow-ord/src/ord.rs | 20 +- arrow-row/src/fixed.rs | 42 +- arrow-select/src/take.rs | 15 +- arrow/benches/comparison_kernels.rs | 10 +- arrow/src/util/bench_util.rs | 20 +- arrow/tests/array_cast.rs | 12 +- .../array_reader/fixed_len_byte_array.rs | 11 +- parquet/src/arrow/arrow_reader/mod.rs | 10 +- parquet/src/arrow/arrow_writer/mod.rs | 26 +- 25 files changed, 928 insertions(+), 312 deletions(-) create mode 100644 arrow-buffer/src/arith.rs create mode 100644 arrow-buffer/src/interval.rs diff --git a/arrow-arith/src/numeric.rs b/arrow-arith/src/numeric.rs index b2c87bba5143..17b794762b9f 100644 --- a/arrow-arith/src/numeric.rs +++ b/arrow-arith/src/numeric.rs @@ -25,7 +25,7 @@ use arrow_array::cast::AsArray; use arrow_array::timezone::Tz; use arrow_array::types::*; use arrow_array::*; -use arrow_buffer::ArrowNativeType; +use arrow_buffer::{ArrowNativeType, IntervalDayTime, IntervalMonthDayNano}; use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit}; use crate::arity::{binary, try_binary}; @@ -343,12 +343,12 @@ trait TimestampOp: ArrowTimestampType { type Duration: ArrowPrimitiveType; fn add_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option; - fn add_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option; - fn add_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option; + fn add_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) -> Option; + fn add_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz) -> Option; fn sub_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option; - fn sub_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option; - fn sub_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option; + fn sub_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) -> Option; + fn sub_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz) -> Option; } macro_rules! timestamp { @@ -360,11 +360,11 @@ macro_rules! timestamp { Self::add_year_months(left, right, tz) } - fn add_day_time(left: i64, right: i64, tz: Tz) -> Option { + fn add_day_time(left: i64, right: IntervalDayTime, tz: Tz) -> Option { Self::add_day_time(left, right, tz) } - fn add_month_day_nano(left: i64, right: i128, tz: Tz) -> Option { + fn add_month_day_nano(left: i64, right: IntervalMonthDayNano, tz: Tz) -> Option { Self::add_month_day_nano(left, right, tz) } @@ -372,11 +372,11 @@ macro_rules! timestamp { Self::subtract_year_months(left, right, tz) } - fn sub_day_time(left: i64, right: i64, tz: Tz) -> Option { + fn sub_day_time(left: i64, right: IntervalDayTime, tz: Tz) -> Option { Self::subtract_day_time(left, right, tz) } - fn sub_month_day_nano(left: i64, right: i128, tz: Tz) -> Option { + fn sub_month_day_nano(left: i64, right: IntervalMonthDayNano, tz: Tz) -> Option { Self::subtract_month_day_nano(left, right, tz) } } @@ -506,12 +506,12 @@ fn timestamp_op( /// Note: these should be fallible (#4456) trait DateOp: ArrowTemporalType { fn add_year_month(timestamp: Self::Native, delta: i32) -> Self::Native; - fn add_day_time(timestamp: Self::Native, delta: i64) -> Self::Native; - fn add_month_day_nano(timestamp: Self::Native, delta: i128) -> Self::Native; + fn add_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native; + fn add_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native; fn sub_year_month(timestamp: Self::Native, delta: i32) -> Self::Native; - fn sub_day_time(timestamp: Self::Native, delta: i64) -> Self::Native; - fn sub_month_day_nano(timestamp: Self::Native, delta: i128) -> Self::Native; + fn sub_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native; + fn sub_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native; } macro_rules! date { @@ -521,11 +521,11 @@ macro_rules! date { Self::add_year_months(left, right) } - fn add_day_time(left: Self::Native, right: i64) -> Self::Native { + fn add_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native { Self::add_day_time(left, right) } - fn add_month_day_nano(left: Self::Native, right: i128) -> Self::Native { + fn add_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native { Self::add_month_day_nano(left, right) } @@ -533,11 +533,11 @@ macro_rules! date { Self::subtract_year_months(left, right) } - fn sub_day_time(left: Self::Native, right: i64) -> Self::Native { + fn sub_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native { Self::subtract_day_time(left, right) } - fn sub_month_day_nano(left: Self::Native, right: i128) -> Self::Native { + fn sub_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native { Self::subtract_month_day_nano(left, right) } } @@ -1346,13 +1346,10 @@ mod tests { IntervalMonthDayNanoType::make_value(35, -19, 41899000000000000) ]) ); - let a = IntervalMonthDayNanoArray::from(vec![i64::MAX as i128]); - let b = IntervalMonthDayNanoArray::from(vec![1]); + let a = IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::MAX]); + let b = IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::ONE]); let err = add(&a, &b).unwrap_err().to_string(); - assert_eq!( - err, - "Compute error: Overflow happened on: 9223372036854775807 + 1" - ); + assert_eq!(err, "Compute error: Overflow happened on: 2147483647 + 1"); } fn test_duration_impl>() { diff --git a/arrow-array/src/arithmetic.rs b/arrow-array/src/arithmetic.rs index 590536190309..72989ad7d5ef 100644 --- a/arrow-array/src/arithmetic.rs +++ b/arrow-array/src/arithmetic.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow_buffer::{i256, ArrowNativeType}; +use arrow_buffer::{i256, ArrowNativeType, IntervalDayTime, IntervalMonthDayNano}; use arrow_schema::ArrowError; use half::f16; use num::complex::ComplexFloat; @@ -139,7 +139,10 @@ pub trait ArrowNativeTypeOp: ArrowNativeType { macro_rules! native_type_op { ($t:tt) => { - native_type_op!($t, 0, 1, $t::MIN, $t::MAX); + native_type_op!($t, 0, 1); + }; + ($t:tt, $zero:expr, $one: expr) => { + native_type_op!($t, $zero, $one, $t::MIN, $t::MAX); }; ($t:tt, $zero:expr, $one: expr, $min: expr, $max: expr) => { impl ArrowNativeTypeOp for $t { @@ -284,6 +287,13 @@ native_type_op!(u32); native_type_op!(u64); native_type_op!(i256, i256::ZERO, i256::ONE, i256::MIN, i256::MAX); +native_type_op!(IntervalDayTime, IntervalDayTime::ZERO, IntervalDayTime::ONE); +native_type_op!( + IntervalMonthDayNano, + IntervalMonthDayNano::ZERO, + IntervalMonthDayNano::ONE +); + macro_rules! native_type_float_op { ($t:tt, $zero:expr, $one:expr, $min:expr, $max:expr) => { impl ArrowNativeTypeOp for $t { diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index 763e340b792b..045917a1bfb8 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -946,7 +946,7 @@ where /// return Ok(d.with_values(r)); /// } /// downcast_primitive_array! { -/// a => Ok(Arc::new(a.iter().map(|x| x.map(|x| x.to_string())).collect::())), +/// a => Ok(Arc::new(a.iter().map(|x| x.map(|x| format!("{x:?}"))).collect::())), /// d => Err(ArrowError::InvalidArgumentError(format!("{d:?} not supported"))) /// } /// } diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 924cab1ac839..919a1010116b 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -1502,6 +1502,7 @@ mod tests { use crate::builder::{Decimal128Builder, Decimal256Builder}; use crate::cast::downcast_array; use crate::BooleanArray; + use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; use arrow_schema::TimeUnit; #[test] @@ -1624,33 +1625,46 @@ mod tests { assert_eq!(-5, arr.value(2)); assert_eq!(-5, arr.values()[2]); - // a day_time interval contains days and milliseconds, but we do not yet have accessors for the values - let arr = IntervalDayTimeArray::from(vec![Some(1), None, Some(-5)]); + let v0 = IntervalDayTime { + days: 34, + milliseconds: 1, + }; + let v2 = IntervalDayTime { + days: -2, + milliseconds: -5, + }; + + let arr = IntervalDayTimeArray::from(vec![Some(v0), None, Some(v2)]); + assert_eq!(3, arr.len()); assert_eq!(0, arr.offset()); assert_eq!(1, arr.null_count()); - assert_eq!(1, arr.value(0)); - assert_eq!(1, arr.values()[0]); + assert_eq!(v0, arr.value(0)); + assert_eq!(v0, arr.values()[0]); assert!(arr.is_null(1)); - assert_eq!(-5, arr.value(2)); - assert_eq!(-5, arr.values()[2]); + assert_eq!(v2, arr.value(2)); + assert_eq!(v2, arr.values()[2]); - // a month_day_nano interval contains months, days and nanoseconds, - // but we do not yet have accessors for the values. - // TODO: implement month, day, and nanos access method for month_day_nano. - let arr = IntervalMonthDayNanoArray::from(vec![ - Some(100000000000000000000), - None, - Some(-500000000000000000000), - ]); + let v0 = IntervalMonthDayNano { + months: 2, + days: 34, + nanoseconds: -1, + }; + let v2 = IntervalMonthDayNano { + months: -3, + days: -2, + nanoseconds: 4, + }; + + let arr = IntervalMonthDayNanoArray::from(vec![Some(v0), None, Some(v2)]); assert_eq!(3, arr.len()); assert_eq!(0, arr.offset()); assert_eq!(1, arr.null_count()); - assert_eq!(100000000000000000000, arr.value(0)); - assert_eq!(100000000000000000000, arr.values()[0]); + assert_eq!(v0, arr.value(0)); + assert_eq!(v0, arr.values()[0]); assert!(arr.is_null(1)); - assert_eq!(-500000000000000000000, arr.value(2)); - assert_eq!(-500000000000000000000, arr.values()[2]); + assert_eq!(v2, arr.value(2)); + assert_eq!(v2, arr.values()[2]); } #[test] @@ -2460,7 +2474,7 @@ mod tests { expected = "PrimitiveArray expected data type Interval(MonthDayNano) got Interval(DayTime)" )] fn test_invalid_interval_type() { - let array = IntervalDayTimeArray::from(vec![1, 2, 3]); + let array = IntervalDayTimeArray::from(vec![IntervalDayTime::ZERO]); let _ = IntervalMonthDayNanoArray::from(array.into_data()); } diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs index 038b2a291f58..198a11cb6974 100644 --- a/arrow-array/src/types.rs +++ b/arrow-array/src/types.rs @@ -23,7 +23,7 @@ use crate::delta::{ use crate::temporal_conversions::as_datetime_with_timezone; use crate::timezone::Tz; use crate::{ArrowNativeTypeOp, OffsetSizeTrait}; -use arrow_buffer::{i256, Buffer, OffsetBuffer}; +use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano, OffsetBuffer}; use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision}; use arrow_data::{validate_binary_view, validate_string_view}; use arrow_schema::{ @@ -220,7 +220,7 @@ make_type!( ); make_type!( IntervalDayTimeType, - i64, + IntervalDayTime, DataType::Interval(IntervalUnit::DayTime), r#"A “calendar” interval type in days and milliseconds. @@ -247,7 +247,7 @@ which can lead to surprising results. Please see the description of ordering on ); make_type!( IntervalMonthDayNanoType, - i128, + IntervalMonthDayNano, DataType::Interval(IntervalUnit::MonthDayNano), r#"A “calendar” interval type in months, days, and nanoseconds. @@ -264,11 +264,11 @@ Each field is independent (e.g. there is no constraint that the quantity of nanoseconds represents less than a day's worth of time). ```text -┌──────────────────────────────┬─────────────┬──────────────┐ -│ Nanos │ Days │ Months │ -│ (64 bits) │ (32 bits) │ (32 bits) │ -└──────────────────────────────┴─────────────┴──────────────┘ - 0 63 95 127 bit offset +┌───────────────┬─────────────┬─────────────────────────────┐ +│ Months │ Days │ Nanos │ +│ (32 bits) │ (32 bits) │ (64 bits) │ +└───────────────┴─────────────┴─────────────────────────────┘ + 0 32 64 128 bit offset ``` Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details @@ -917,25 +917,8 @@ impl IntervalDayTimeType { /// * `days` - The number of days (+/-) represented in this interval /// * `millis` - The number of milliseconds (+/-) represented in this interval #[inline] - pub fn make_value( - days: i32, - millis: i32, - ) -> ::Native { - /* - https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1433 - struct DayMilliseconds { - int32_t days = 0; - int32_t milliseconds = 0; - ... - } - 64 56 48 40 32 24 16 8 0 - +-------+-------+-------+-------+-------+-------+-------+-------+ - | days | milliseconds | - +-------+-------+-------+-------+-------+-------+-------+-------+ - */ - let m = millis as u64 & u32::MAX as u64; - let d = (days as u64 & u32::MAX as u64) << 32; - (m | d) as ::Native + pub fn make_value(days: i32, milliseconds: i32) -> IntervalDayTime { + IntervalDayTime { days, milliseconds } } /// Turns a IntervalDayTimeType into a tuple of (days, milliseconds) @@ -944,10 +927,8 @@ impl IntervalDayTimeType { /// /// * `i` - The IntervalDayTimeType to convert #[inline] - pub fn to_parts(i: ::Native) -> (i32, i32) { - let days = (i >> 32) as i32; - let ms = i as i32; - (days, ms) + pub fn to_parts(i: IntervalDayTime) -> (i32, i32) { + (i.days, i.milliseconds) } } @@ -960,27 +941,12 @@ impl IntervalMonthDayNanoType { /// * `days` - The number of days (+/-) represented in this interval /// * `nanos` - The number of nanoseconds (+/-) represented in this interval #[inline] - pub fn make_value( - months: i32, - days: i32, - nanos: i64, - ) -> ::Native { - /* - https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1475 - struct MonthDayNanos { - int32_t months; - int32_t days; - int64_t nanoseconds; + pub fn make_value(months: i32, days: i32, nanoseconds: i64) -> IntervalMonthDayNano { + IntervalMonthDayNano { + months, + days, + nanoseconds, } - 128 112 96 80 64 48 32 16 0 - +-------+-------+-------+-------+-------+-------+-------+-------+ - | months | days | nanos | - +-------+-------+-------+-------+-------+-------+-------+-------+ - */ - let m = (months as u128 & u32::MAX as u128) << 96; - let d = (days as u128 & u32::MAX as u128) << 64; - let n = nanos as u128 & u64::MAX as u128; - (m | d | n) as ::Native } /// Turns a IntervalMonthDayNanoType into a tuple of (months, days, nanos) @@ -989,13 +955,8 @@ impl IntervalMonthDayNanoType { /// /// * `i` - The IntervalMonthDayNanoType to convert #[inline] - pub fn to_parts( - i: ::Native, - ) -> (i32, i32, i64) { - let months = (i >> 96) as i32; - let days = (i >> 64) as i32; - let nanos = i as i64; - (months, days, nanos) + pub fn to_parts(i: IntervalMonthDayNano) -> (i32, i32, i64) { + (i.months, i.days, i.nanoseconds) } } diff --git a/arrow-buffer/src/arith.rs b/arrow-buffer/src/arith.rs new file mode 100644 index 000000000000..2ad75f3472b4 --- /dev/null +++ b/arrow-buffer/src/arith.rs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +macro_rules! derive_arith { + ($ty:ty, $t:ident, $op:ident, $wrapping:ident, $checked:ident) => { + impl std::ops::$t for $ty { + type Output = $ty; + + #[cfg(debug_assertions)] + fn $op(self, rhs: Self) -> Self::Output { + self.$checked(rhs) + .expect(concat!(stringify!($ty), " overflow")) + } + + #[cfg(not(debug_assertions))] + fn $op(self, rhs: Self) -> Self::Output { + self.$wrapping(rhs) + } + } + + impl<'a> std::ops::$t<$ty> for &'a $ty { + type Output = $ty; + + fn $op(self, rhs: $ty) -> Self::Output { + (*self).$op(rhs) + } + } + + impl<'a> std::ops::$t<&'a $ty> for $ty { + type Output = $ty; + + fn $op(self, rhs: &'a $ty) -> Self::Output { + self.$op(*rhs) + } + } + + impl<'a, 'b> std::ops::$t<&'b $ty> for &'a $ty { + type Output = $ty; + + fn $op(self, rhs: &'b $ty) -> Self::Output { + (*self).$op(*rhs) + } + } + }; +} + +pub(crate) use derive_arith; diff --git a/arrow-buffer/src/bigint/mod.rs b/arrow-buffer/src/bigint/mod.rs index a8aaff13cd27..bbe65b073aa6 100644 --- a/arrow-buffer/src/bigint/mod.rs +++ b/arrow-buffer/src/bigint/mod.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use crate::arith::derive_arith; use crate::bigint::div::div_rem; use num::cast::AsPrimitive; use num::{BigInt, FromPrimitive, ToPrimitive}; @@ -638,55 +639,13 @@ fn mulx(a: u128, b: u128) -> (u128, u128) { (low, high) } -macro_rules! derive_op { - ($t:ident, $op:ident, $wrapping:ident, $checked:ident) => { - impl std::ops::$t for i256 { - type Output = i256; +derive_arith!(i256, Add, add, wrapping_add, checked_add); +derive_arith!(i256, Sub, sub, wrapping_sub, checked_sub); +derive_arith!(i256, Mul, mul, wrapping_mul, checked_mul); +derive_arith!(i256, Div, div, wrapping_div, checked_div); +derive_arith!(i256, Rem, rem, wrapping_rem, checked_rem); - #[cfg(debug_assertions)] - fn $op(self, rhs: Self) -> Self::Output { - self.$checked(rhs).expect("i256 overflow") - } - - #[cfg(not(debug_assertions))] - fn $op(self, rhs: Self) -> Self::Output { - self.$wrapping(rhs) - } - } - - impl<'a> std::ops::$t for &'a i256 { - type Output = i256; - - fn $op(self, rhs: i256) -> Self::Output { - (*self).$op(rhs) - } - } - - impl<'a> std::ops::$t<&'a i256> for i256 { - type Output = i256; - - fn $op(self, rhs: &'a i256) -> Self::Output { - self.$op(*rhs) - } - } - - impl<'a, 'b> std::ops::$t<&'b i256> for &'a i256 { - type Output = i256; - - fn $op(self, rhs: &'b i256) -> Self::Output { - (*self).$op(*rhs) - } - } - }; -} - -derive_op!(Add, add, wrapping_add, checked_add); -derive_op!(Sub, sub, wrapping_sub, checked_sub); -derive_op!(Mul, mul, wrapping_mul, checked_mul); -derive_op!(Div, div, wrapping_div, checked_div); -derive_op!(Rem, rem, wrapping_rem, checked_rem); - -impl std::ops::Neg for i256 { +impl Neg for i256 { type Output = i256; #[cfg(debug_assertions)] diff --git a/arrow-buffer/src/interval.rs b/arrow-buffer/src/interval.rs new file mode 100644 index 000000000000..7e8043e9a724 --- /dev/null +++ b/arrow-buffer/src/interval.rs @@ -0,0 +1,424 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::arith::derive_arith; +use std::ops::Neg; + +/// Value of an IntervalMonthDayNano array +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[repr(C)] +pub struct IntervalMonthDayNano { + pub months: i32, + pub days: i32, + pub nanoseconds: i64, +} + +impl IntervalMonthDayNano { + /// The additive identity i.e. `0`. + pub const ZERO: Self = Self::new(0, 0, 0); + + /// The multiplicative identity, i.e. `1`. + pub const ONE: Self = Self::new(1, 1, 1); + + /// The multiplicative inverse, i.e. `-1`. + pub const MINUS_ONE: Self = Self::new(-1, -1, -1); + + /// The maximum value that can be represented + pub const MAX: Self = Self::new(i32::MAX, i32::MAX, i64::MAX); + + /// The minimum value that can be represented + pub const MIN: Self = Self::new(i32::MIN, i32::MIN, i64::MIN); + + /// Create a new [`IntervalMonthDayNano`] + #[inline] + pub const fn new(months: i32, days: i32, nanoseconds: i64) -> Self { + Self { + months, + days, + nanoseconds, + } + } + + /// Computes the absolute value + #[inline] + pub fn wrapping_abs(self) -> Self { + Self { + months: self.months.wrapping_abs(), + days: self.days.wrapping_abs(), + nanoseconds: self.nanoseconds.wrapping_abs(), + } + } + + /// Computes the absolute value + #[inline] + pub fn checked_abs(self) -> Option { + Some(Self { + months: self.months.checked_abs()?, + days: self.days.checked_abs()?, + nanoseconds: self.nanoseconds.checked_abs()?, + }) + } + + /// Negates the value + #[inline] + pub fn wrapping_neg(self) -> Self { + Self { + months: self.months.wrapping_neg(), + days: self.days.wrapping_neg(), + nanoseconds: self.nanoseconds.wrapping_neg(), + } + } + + /// Negates the value + #[inline] + pub fn checked_neg(self) -> Option { + Some(Self { + months: self.months.checked_neg()?, + days: self.days.checked_neg()?, + nanoseconds: self.nanoseconds.checked_neg()?, + }) + } + + /// Performs wrapping addition + #[inline] + pub fn wrapping_add(self, other: Self) -> Self { + Self { + months: self.months.wrapping_add(other.months), + days: self.days.wrapping_add(other.days), + nanoseconds: self.nanoseconds.wrapping_add(other.nanoseconds), + } + } + + /// Performs checked addition + #[inline] + pub fn checked_add(self, other: Self) -> Option { + Some(Self { + months: self.months.checked_add(other.months)?, + days: self.days.checked_add(other.days)?, + nanoseconds: self.nanoseconds.checked_add(other.nanoseconds)?, + }) + } + + /// Performs wrapping subtraction + #[inline] + pub fn wrapping_sub(self, other: Self) -> Self { + Self { + months: self.months.wrapping_sub(other.months), + days: self.days.wrapping_sub(other.days), + nanoseconds: self.nanoseconds.wrapping_sub(other.nanoseconds), + } + } + + /// Performs checked subtraction + #[inline] + pub fn checked_sub(self, other: Self) -> Option { + Some(Self { + months: self.months.checked_sub(other.months)?, + days: self.days.checked_sub(other.days)?, + nanoseconds: self.nanoseconds.checked_sub(other.nanoseconds)?, + }) + } + + /// Performs wrapping multiplication + #[inline] + pub fn wrapping_mul(self, other: Self) -> Self { + Self { + months: self.months.wrapping_mul(other.months), + days: self.days.wrapping_mul(other.days), + nanoseconds: self.nanoseconds.wrapping_mul(other.nanoseconds), + } + } + + /// Performs checked multiplication + pub fn checked_mul(self, other: Self) -> Option { + Some(Self { + months: self.months.checked_mul(other.months)?, + days: self.days.checked_mul(other.days)?, + nanoseconds: self.nanoseconds.checked_mul(other.nanoseconds)?, + }) + } + + /// Performs wrapping division + #[inline] + pub fn wrapping_div(self, other: Self) -> Self { + Self { + months: self.months.wrapping_div(other.months), + days: self.days.wrapping_div(other.days), + nanoseconds: self.nanoseconds.wrapping_div(other.nanoseconds), + } + } + + /// Performs checked division + pub fn checked_div(self, other: Self) -> Option { + Some(Self { + months: self.months.checked_div(other.months)?, + days: self.days.checked_div(other.days)?, + nanoseconds: self.nanoseconds.checked_div(other.nanoseconds)?, + }) + } + + /// Performs wrapping remainder + #[inline] + pub fn wrapping_rem(self, other: Self) -> Self { + Self { + months: self.months.wrapping_rem(other.months), + days: self.days.wrapping_rem(other.days), + nanoseconds: self.nanoseconds.wrapping_rem(other.nanoseconds), + } + } + + /// Performs checked remainder + pub fn checked_rem(self, other: Self) -> Option { + Some(Self { + months: self.months.checked_rem(other.months)?, + days: self.days.checked_rem(other.days)?, + nanoseconds: self.nanoseconds.checked_rem(other.nanoseconds)?, + }) + } + + /// Performs wrapping exponentiation + #[inline] + pub fn wrapping_pow(self, exp: u32) -> Self { + Self { + months: self.months.wrapping_pow(exp), + days: self.days.wrapping_pow(exp), + nanoseconds: self.nanoseconds.wrapping_pow(exp), + } + } + + /// Performs checked exponentiation + #[inline] + pub fn checked_pow(self, exp: u32) -> Option { + Some(Self { + months: self.months.checked_pow(exp)?, + days: self.days.checked_pow(exp)?, + nanoseconds: self.nanoseconds.checked_pow(exp)?, + }) + } +} + +impl Neg for IntervalMonthDayNano { + type Output = Self; + + #[cfg(debug_assertions)] + fn neg(self) -> Self::Output { + self.checked_neg().expect("IntervalMonthDayNano overflow") + } + + #[cfg(not(debug_assertions))] + fn neg(self) -> Self::Output { + self.wrapping_neg() + } +} + +derive_arith!(IntervalMonthDayNano, Add, add, wrapping_add, checked_add); +derive_arith!(IntervalMonthDayNano, Sub, sub, wrapping_sub, checked_sub); +derive_arith!(IntervalMonthDayNano, Mul, mul, wrapping_mul, checked_mul); +derive_arith!(IntervalMonthDayNano, Div, div, wrapping_div, checked_div); +derive_arith!(IntervalMonthDayNano, Rem, rem, wrapping_rem, checked_rem); + +/// Value of an IntervalDayTime array +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[repr(C)] +pub struct IntervalDayTime { + pub days: i32, + pub milliseconds: i32, +} + +impl IntervalDayTime { + /// The additive identity i.e. `0`. + pub const ZERO: Self = Self::new(0, 0); + + /// The multiplicative identity, i.e. `1`. + pub const ONE: Self = Self::new(1, 1); + + /// The multiplicative inverse, i.e. `-1`. + pub const MINUS_ONE: Self = Self::new(-1, -1); + + /// The maximum value that can be represented + pub const MAX: Self = Self::new(i32::MAX, i32::MAX); + + /// The minimum value that can be represented + pub const MIN: Self = Self::new(i32::MIN, i32::MIN); + + /// Create a new [`IntervalDayTime`] + #[inline] + pub const fn new(days: i32, milliseconds: i32) -> Self { + Self { days, milliseconds } + } + + /// Computes the absolute value + #[inline] + pub fn wrapping_abs(self) -> Self { + Self { + days: self.days.wrapping_abs(), + milliseconds: self.milliseconds.wrapping_abs(), + } + } + + /// Computes the absolute value + #[inline] + pub fn checked_abs(self) -> Option { + Some(Self { + days: self.days.checked_abs()?, + milliseconds: self.milliseconds.checked_abs()?, + }) + } + + /// Negates the value + #[inline] + pub fn wrapping_neg(self) -> Self { + Self { + days: self.days.wrapping_neg(), + milliseconds: self.milliseconds.wrapping_neg(), + } + } + + /// Negates the value + #[inline] + pub fn checked_neg(self) -> Option { + Some(Self { + days: self.days.checked_neg()?, + milliseconds: self.milliseconds.checked_neg()?, + }) + } + + /// Performs wrapping addition + #[inline] + pub fn wrapping_add(self, other: Self) -> Self { + Self { + days: self.days.wrapping_add(other.days), + milliseconds: self.milliseconds.wrapping_add(other.milliseconds), + } + } + + /// Performs checked addition + #[inline] + pub fn checked_add(self, other: Self) -> Option { + Some(Self { + days: self.days.checked_add(other.days)?, + milliseconds: self.milliseconds.checked_add(other.milliseconds)?, + }) + } + + /// Performs wrapping subtraction + #[inline] + pub fn wrapping_sub(self, other: Self) -> Self { + Self { + days: self.days.wrapping_sub(other.days), + milliseconds: self.milliseconds.wrapping_sub(other.milliseconds), + } + } + + /// Performs checked subtraction + #[inline] + pub fn checked_sub(self, other: Self) -> Option { + Some(Self { + days: self.days.checked_sub(other.days)?, + milliseconds: self.milliseconds.checked_sub(other.milliseconds)?, + }) + } + + /// Performs wrapping multiplication + #[inline] + pub fn wrapping_mul(self, other: Self) -> Self { + Self { + days: self.days.wrapping_mul(other.days), + milliseconds: self.milliseconds.wrapping_mul(other.milliseconds), + } + } + + /// Performs checked multiplication + pub fn checked_mul(self, other: Self) -> Option { + Some(Self { + days: self.days.checked_mul(other.days)?, + milliseconds: self.milliseconds.checked_mul(other.milliseconds)?, + }) + } + + /// Performs wrapping division + #[inline] + pub fn wrapping_div(self, other: Self) -> Self { + Self { + days: self.days.wrapping_div(other.days), + milliseconds: self.milliseconds.wrapping_div(other.milliseconds), + } + } + + /// Performs checked division + pub fn checked_div(self, other: Self) -> Option { + Some(Self { + days: self.days.checked_div(other.days)?, + milliseconds: self.milliseconds.checked_div(other.milliseconds)?, + }) + } + + /// Performs wrapping remainder + #[inline] + pub fn wrapping_rem(self, other: Self) -> Self { + Self { + days: self.days.wrapping_rem(other.days), + milliseconds: self.milliseconds.wrapping_rem(other.milliseconds), + } + } + + /// Performs checked remainder + pub fn checked_rem(self, other: Self) -> Option { + Some(Self { + days: self.days.checked_rem(other.days)?, + milliseconds: self.milliseconds.checked_rem(other.milliseconds)?, + }) + } + + /// Performs wrapping exponentiation + #[inline] + pub fn wrapping_pow(self, exp: u32) -> Self { + Self { + days: self.days.wrapping_pow(exp), + milliseconds: self.milliseconds.wrapping_pow(exp), + } + } + + /// Performs checked exponentiation + #[inline] + pub fn checked_pow(self, exp: u32) -> Option { + Some(Self { + days: self.days.checked_pow(exp)?, + milliseconds: self.milliseconds.checked_pow(exp)?, + }) + } +} + +impl Neg for IntervalDayTime { + type Output = Self; + + #[cfg(debug_assertions)] + fn neg(self) -> Self::Output { + self.checked_neg().expect("IntervalDayMillisecond overflow") + } + + #[cfg(not(debug_assertions))] + fn neg(self) -> Self::Output { + self.wrapping_neg() + } +} + +derive_arith!(IntervalDayTime, Add, add, wrapping_add, checked_add); +derive_arith!(IntervalDayTime, Sub, sub, wrapping_sub, checked_sub); +derive_arith!(IntervalDayTime, Mul, mul, wrapping_mul, checked_mul); +derive_arith!(IntervalDayTime, Div, div, wrapping_div, checked_div); +derive_arith!(IntervalDayTime, Rem, rem, wrapping_rem, checked_rem); diff --git a/arrow-buffer/src/lib.rs b/arrow-buffer/src/lib.rs index 612897af9bed..a7bf93ed0c16 100644 --- a/arrow-buffer/src/lib.rs +++ b/arrow-buffer/src/lib.rs @@ -28,10 +28,17 @@ pub mod builder; pub use builder::*; mod bigint; -mod bytes; -mod native; pub use bigint::i256; +mod bytes; + +mod native; pub use native::*; + mod util; pub use util::*; + +mod interval; +pub use interval::*; + +mod arith; diff --git a/arrow-buffer/src/native.rs b/arrow-buffer/src/native.rs index de665d4e3874..c903057ae98f 100644 --- a/arrow-buffer/src/native.rs +++ b/arrow-buffer/src/native.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::i256; +use crate::{i256, IntervalDayTime, IntervalMonthDayNano}; use half::f16; mod private { @@ -239,6 +239,60 @@ impl ArrowNativeType for i256 { } } +impl private::Sealed for IntervalMonthDayNano {} +impl ArrowNativeType for IntervalMonthDayNano { + fn from_usize(_: usize) -> Option { + None + } + + fn as_usize(self) -> usize { + (self.months as usize) | ((self.days as usize) << 32) + } + + fn usize_as(i: usize) -> Self { + Self::new(i as _, (i >> 32) as _, 0) + } + + fn to_usize(self) -> Option { + None + } + + fn to_isize(self) -> Option { + None + } + + fn to_i64(self) -> Option { + None + } +} + +impl private::Sealed for IntervalDayTime {} +impl ArrowNativeType for IntervalDayTime { + fn from_usize(_: usize) -> Option { + None + } + + fn as_usize(self) -> usize { + (self.days as usize) | ((self.milliseconds as usize) << 32) + } + + fn usize_as(i: usize) -> Self { + Self::new(i as _, (i >> 32) as _) + } + + fn to_usize(self) -> Option { + None + } + + fn to_isize(self) -> Option { + None + } + + fn to_i64(self) -> Option { + None + } +} + /// Allows conversion from supported Arrow types to a byte slice. pub trait ToByteSlice { /// Converts this instance into a byte slice @@ -282,4 +336,18 @@ mod tests { assert!(a.to_usize().is_none()); assert_eq!(a.to_isize().unwrap(), -1); } + + #[test] + fn test_interval_usize() { + assert_eq!(IntervalDayTime::new(1, 0).as_usize(), 1); + assert_eq!(IntervalMonthDayNano::new(1, 0, 0).as_usize(), 1); + + let a = IntervalDayTime::new(23, 53); + let b = IntervalDayTime::usize_as(a.as_usize()); + assert_eq!(a, b); + + let a = IntervalMonthDayNano::new(23, 53, 0); + let b = IntervalMonthDayNano::usize_as(a.as_usize()); + assert_eq!(a, b); + } } diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 171267f80543..294d7a6e4d31 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -46,7 +46,7 @@ use crate::cast::dictionary::*; use crate::cast::list::*; use crate::cast::string::*; -use arrow_buffer::ScalarBuffer; +use arrow_buffer::{IntervalMonthDayNano, ScalarBuffer}; use arrow_data::ByteView; use chrono::{NaiveTime, Offset, TimeZone, Utc}; use std::cmp::Ordering; @@ -275,11 +275,6 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { DayTime => false, MonthDayNano => false, }, - (Int64, Interval(to_type)) => match to_type { - YearMonth => false, - DayTime => true, - MonthDayNano => false, - }, (Duration(_), Interval(MonthDayNano)) => true, (Interval(MonthDayNano), Duration(_)) => true, (Interval(YearMonth), Interval(MonthDayNano)) => true, @@ -392,9 +387,9 @@ fn cast_month_day_nano_to_duration>( }; if cast_options.safe { - let iter = array - .iter() - .map(|v| v.and_then(|v| (v >> 64 == 0).then_some((v as i64) / scale))); + let iter = array.iter().map(|v| { + v.and_then(|v| (v.days == 0 && v.months == 0).then_some(v.nanoseconds / scale)) + }); Ok(Arc::new(unsafe { PrimitiveArray::::from_trusted_len_iter(iter) })) @@ -402,8 +397,8 @@ fn cast_month_day_nano_to_duration>( let vec = array .iter() .map(|v| { - v.map(|v| match v >> 64 { - 0 => Ok((v as i64) / scale), + v.map(|v| match v.days == 0 && v.months == 0 { + true => Ok((v.nanoseconds) / scale), _ => Err(ArrowError::ComputeError( "Cannot convert interval containing non-zero months or days to duration" .to_string(), @@ -442,9 +437,12 @@ fn cast_duration_to_interval>( }; if cast_options.safe { - let iter = array - .iter() - .map(|v| v.and_then(|v| v.checked_mul(scale).map(|v| v as i128))); + let iter = array.iter().map(|v| { + v.and_then(|v| { + v.checked_mul(scale) + .map(|v| IntervalMonthDayNano::new(0, 0, v)) + }) + }); Ok(Arc::new(unsafe { PrimitiveArray::::from_trusted_len_iter(iter) })) @@ -454,7 +452,7 @@ fn cast_duration_to_interval>( .map(|v| { v.map(|v| { if let Ok(v) = v.mul_checked(scale) { - Ok(v as i128) + Ok(IntervalMonthDayNano::new(0, 0, v)) } else { Err(ArrowError::ComputeError(format!( "Cannot cast to {:?}. Overflowing on {:?}", @@ -1959,18 +1957,9 @@ pub fn cast_with_options( (Interval(IntervalUnit::DayTime), Interval(IntervalUnit::MonthDayNano)) => { cast_interval_day_time_to_interval_month_day_nano(array, cast_options) } - (Interval(IntervalUnit::YearMonth), Int64) => { - cast_numeric_arrays::(array, cast_options) - } - (Interval(IntervalUnit::DayTime), Int64) => { - cast_reinterpret_arrays::(array) - } (Int32, Interval(IntervalUnit::YearMonth)) => { cast_reinterpret_arrays::(array) } - (Int64, Interval(IntervalUnit::DayTime)) => { - cast_reinterpret_arrays::(array) - } (_, _) => Err(ArrowError::CastError(format!( "Casting from {from_type:?} to {to_type:?} not supported", ))), @@ -2335,7 +2324,7 @@ where #[cfg(test)] mod tests { - use arrow_buffer::{Buffer, NullBuffer}; + use arrow_buffer::{Buffer, IntervalDayTime, NullBuffer}; use chrono::NaiveDate; use half::f16; @@ -5059,25 +5048,6 @@ mod tests { } } - #[test] - fn test_cast_interval_to_i64() { - let base = vec![5, 6, 7, 8]; - - let interval_arrays = vec![ - Arc::new(IntervalDayTimeArray::from(base.clone())) as ArrayRef, - Arc::new(IntervalYearMonthArray::from( - base.iter().map(|x| *x as i32).collect::>(), - )) as ArrayRef, - ]; - - for arr in interval_arrays { - assert!(can_cast_types(arr.data_type(), &DataType::Int64)); - let result = cast(&arr, &DataType::Int64).unwrap(); - let result = result.as_primitive::(); - assert_eq!(base.as_slice(), result.values()); - } - } - #[test] fn test_cast_to_strings() { let a = Int32Array::from(vec![1, 2, 3]); @@ -8379,7 +8349,10 @@ mod tests { casted_array.data_type(), &DataType::Interval(IntervalUnit::MonthDayNano) ); - assert_eq!(casted_array.value(0), 1234567000000000); + assert_eq!( + casted_array.value(0), + IntervalMonthDayNano::new(0, 0, 1234567000000000) + ); let array = vec![i64::MAX]; let casted_array = cast_from_duration_to_interval::( @@ -8409,7 +8382,10 @@ mod tests { casted_array.data_type(), &DataType::Interval(IntervalUnit::MonthDayNano) ); - assert_eq!(casted_array.value(0), 1234567000000); + assert_eq!( + casted_array.value(0), + IntervalMonthDayNano::new(0, 0, 1234567000000) + ); let array = vec![i64::MAX]; let casted_array = cast_from_duration_to_interval::( @@ -8439,7 +8415,10 @@ mod tests { casted_array.data_type(), &DataType::Interval(IntervalUnit::MonthDayNano) ); - assert_eq!(casted_array.value(0), 1234567000); + assert_eq!( + casted_array.value(0), + IntervalMonthDayNano::new(0, 0, 1234567000) + ); let array = vec![i64::MAX]; let casted_array = cast_from_duration_to_interval::( @@ -8469,7 +8448,10 @@ mod tests { casted_array.data_type(), &DataType::Interval(IntervalUnit::MonthDayNano) ); - assert_eq!(casted_array.value(0), 1234567); + assert_eq!( + casted_array.value(0), + IntervalMonthDayNano::new(0, 0, 1234567) + ); let array = vec![i64::MAX]; let casted_array = cast_from_duration_to_interval::( @@ -8480,7 +8462,10 @@ mod tests { }, ) .unwrap(); - assert_eq!(casted_array.value(0), 9223372036854775807); + assert_eq!( + casted_array.value(0), + IntervalMonthDayNano::new(0, 0, i64::MAX) + ); } /// helper function to test casting from interval to duration @@ -8505,14 +8490,15 @@ mod tests { safe: false, format_options: FormatOptions::default(), }; + let v = IntervalMonthDayNano::new(0, 0, 1234567); // from interval month day nano to duration second - let array = vec![1234567].into(); + let array = vec![v].into(); let casted_array: DurationSecondArray = cast_from_interval_to_duration(&array, &nullable).unwrap(); assert_eq!(casted_array.value(0), 0); - let array = vec![i128::MAX].into(); + let array = vec![IntervalMonthDayNano::MAX].into(); let casted_array: DurationSecondArray = cast_from_interval_to_duration(&array, &nullable).unwrap(); assert!(!casted_array.is_valid(0)); @@ -8521,12 +8507,12 @@ mod tests { assert!(res.is_err()); // from interval month day nano to duration millisecond - let array = vec![1234567].into(); + let array = vec![v].into(); let casted_array: DurationMillisecondArray = cast_from_interval_to_duration(&array, &nullable).unwrap(); assert_eq!(casted_array.value(0), 1); - let array = vec![i128::MAX].into(); + let array = vec![IntervalMonthDayNano::MAX].into(); let casted_array: DurationMillisecondArray = cast_from_interval_to_duration(&array, &nullable).unwrap(); assert!(!casted_array.is_valid(0)); @@ -8535,12 +8521,12 @@ mod tests { assert!(res.is_err()); // from interval month day nano to duration microsecond - let array = vec![1234567].into(); + let array = vec![v].into(); let casted_array: DurationMicrosecondArray = cast_from_interval_to_duration(&array, &nullable).unwrap(); assert_eq!(casted_array.value(0), 1234); - let array = vec![i128::MAX].into(); + let array = vec![IntervalMonthDayNano::MAX].into(); let casted_array = cast_from_interval_to_duration::(&array, &nullable).unwrap(); assert!(!casted_array.is_valid(0)); @@ -8550,12 +8536,12 @@ mod tests { assert!(casted_array.is_err()); // from interval month day nano to duration nanosecond - let array = vec![1234567].into(); + let array = vec![v].into(); let casted_array: DurationNanosecondArray = cast_from_interval_to_duration(&array, &nullable).unwrap(); assert_eq!(casted_array.value(0), 1234567); - let array = vec![i128::MAX].into(); + let array = vec![IntervalMonthDayNano::MAX].into(); let casted_array: DurationNanosecondArray = cast_from_interval_to_duration(&array, &nullable).unwrap(); assert!(!casted_array.is_valid(0)); @@ -8618,12 +8604,15 @@ mod tests { casted_array.data_type(), &DataType::Interval(IntervalUnit::MonthDayNano) ); - assert_eq!(casted_array.value(0), 97812474910747780469848774134464512); + assert_eq!( + casted_array.value(0), + IntervalMonthDayNano::new(1234567, 0, 0) + ); } /// helper function to test casting from interval day time to interval month day nano fn cast_from_interval_day_time_to_interval_month_day_nano( - array: Vec, + array: Vec, cast_options: &CastOptions, ) -> Result, ArrowError> { let array = PrimitiveArray::::from(array); @@ -8641,7 +8630,7 @@ mod tests { #[test] fn test_cast_from_interval_day_time_to_interval_month_day_nano() { // from interval day time to interval month day nano - let array = vec![123]; + let array = vec![IntervalDayTime::new(123, 0)]; let casted_array = cast_from_interval_day_time_to_interval_month_day_nano(array, &CastOptions::default()) .unwrap(); @@ -8649,7 +8638,7 @@ mod tests { casted_array.data_type(), &DataType::Interval(IntervalUnit::MonthDayNano) ); - assert_eq!(casted_array.value(0), 123000000); + assert_eq!(casted_array.value(0), IntervalMonthDayNano::new(0, 123, 0)); } #[test] diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs index a5f69b660944..edde288e9c35 100644 --- a/arrow-cast/src/display.rs +++ b/arrow-cast/src/display.rs @@ -660,19 +660,16 @@ impl<'a> DisplayIndex for &'a PrimitiveArray { impl<'a> DisplayIndex for &'a PrimitiveArray { fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { - let value: u64 = self.value(idx) as u64; + let value = self.value(idx); - let days_parts: i32 = ((value & 0xFFFFFFFF00000000) >> 32) as i32; - let milliseconds_part: i32 = (value & 0xFFFFFFFF) as i32; - - let secs = milliseconds_part / 1_000; + let secs = value.milliseconds / 1_000; let mins = secs / 60; let hours = mins / 60; let secs = secs - (mins * 60); let mins = mins - (hours * 60); - let milliseconds = milliseconds_part % 1_000; + let milliseconds = value.milliseconds % 1_000; let secs_sign = if secs < 0 || milliseconds < 0 { "-" @@ -683,7 +680,7 @@ impl<'a> DisplayIndex for &'a PrimitiveArray { write!( f, "0 years 0 mons {} days {} hours {} mins {}{}.{:03} secs", - days_parts, + value.days, hours, mins, secs_sign, @@ -696,28 +693,24 @@ impl<'a> DisplayIndex for &'a PrimitiveArray { impl<'a> DisplayIndex for &'a PrimitiveArray { fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { - let value: u128 = self.value(idx) as u128; - - let months_part: i32 = ((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32; - let days_part: i32 = ((value & 0xFFFFFFFF0000000000000000) >> 64) as i32; - let nanoseconds_part: i64 = (value & 0xFFFFFFFFFFFFFFFF) as i64; + let value = self.value(idx); - let secs = nanoseconds_part / 1_000_000_000; + let secs = value.nanoseconds / 1_000_000_000; let mins = secs / 60; let hours = mins / 60; let secs = secs - (mins * 60); let mins = mins - (hours * 60); - let nanoseconds = nanoseconds_part % 1_000_000_000; + let nanoseconds = value.nanoseconds % 1_000_000_000; let secs_sign = if secs < 0 || nanoseconds < 0 { "-" } else { "" }; write!( f, "0 years {} mons {} days {} hours {} mins {}{}.{:09} secs", - months_part, - days_part, + value.months, + value.days, hours, mins, secs_sign, diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index 00bba928114f..49fb359b9d42 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -142,7 +142,7 @@ mod tests { use arrow_array::builder::*; use arrow_array::types::*; use arrow_array::*; - use arrow_buffer::ScalarBuffer; + use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer}; use arrow_schema::*; use crate::display::array_value_to_string; @@ -963,12 +963,12 @@ mod tests { #[test] fn test_pretty_format_interval_day_time() { let arr = Arc::new(arrow_array::IntervalDayTimeArray::from(vec![ - Some(-600000), - Some(4294966295), - Some(4294967295), - Some(1), - Some(10), - Some(100), + Some(IntervalDayTime::new(-1, -600_000)), + Some(IntervalDayTime::new(0, -1001)), + Some(IntervalDayTime::new(0, -1)), + Some(IntervalDayTime::new(0, 1)), + Some(IntervalDayTime::new(0, 10)), + Some(IntervalDayTime::new(0, 100)), ])); let schema = Arc::new(Schema::new(vec![Field::new( @@ -1002,19 +1002,19 @@ mod tests { #[test] fn test_pretty_format_interval_month_day_nano_array() { let arr = Arc::new(arrow_array::IntervalMonthDayNanoArray::from(vec![ - Some(-600000000000), - Some(18446744072709551615), - Some(18446744073709551615), - Some(1), - Some(10), - Some(100), - Some(1_000), - Some(10_000), - Some(100_000), - Some(1_000_000), - Some(10_000_000), - Some(100_000_000), - Some(1_000_000_000), + Some(IntervalMonthDayNano::new(-1, -1, -600_000_000_000)), + Some(IntervalMonthDayNano::new(0, 0, -1_000_000_001)), + Some(IntervalMonthDayNano::new(0, 0, -1)), + Some(IntervalMonthDayNano::new(0, 0, 1)), + Some(IntervalMonthDayNano::new(0, 0, 10)), + Some(IntervalMonthDayNano::new(0, 0, 100)), + Some(IntervalMonthDayNano::new(0, 0, 1_000)), + Some(IntervalMonthDayNano::new(0, 0, 10_000)), + Some(IntervalMonthDayNano::new(0, 0, 100_000)), + Some(IntervalMonthDayNano::new(0, 0, 1_000_000)), + Some(IntervalMonthDayNano::new(0, 0, 10_000_000)), + Some(IntervalMonthDayNano::new(0, 0, 100_000_000)), + Some(IntervalMonthDayNano::new(0, 0, 1_000_000_000)), ])); let schema = Arc::new(Schema::new(vec![Field::new( diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index d092fd049d77..5ee966394882 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -20,7 +20,9 @@ use crate::bit_iterator::BitSliceIterator; use arrow_buffer::buffer::{BooleanBuffer, NullBuffer}; -use arrow_buffer::{bit_util, i256, ArrowNativeType, Buffer, MutableBuffer}; +use arrow_buffer::{ + bit_util, i256, ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer, +}; use arrow_schema::{ArrowError, DataType, UnionMode}; use std::ops::Range; use std::sync::Arc; @@ -1568,8 +1570,10 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout { DataType::Time32(_) => DataTypeLayout::new_fixed_width::(), DataType::Time64(_) => DataTypeLayout::new_fixed_width::(), DataType::Interval(YearMonth) => DataTypeLayout::new_fixed_width::(), - DataType::Interval(DayTime) => DataTypeLayout::new_fixed_width::(), - DataType::Interval(MonthDayNano) => DataTypeLayout::new_fixed_width::(), + DataType::Interval(DayTime) => DataTypeLayout::new_fixed_width::(), + DataType::Interval(MonthDayNano) => { + DataTypeLayout::new_fixed_width::() + } DataType::Duration(_) => DataTypeLayout::new_fixed_width::(), DataType::Decimal128(_, _) => DataTypeLayout::new_fixed_width::(), DataType::Decimal256(_, _) => DataTypeLayout::new_fixed_width::(), diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs index 30f0ccfbe12d..8ea788aa4dd3 100644 --- a/arrow-integration-test/src/lib.rs +++ b/arrow-integration-test/src/lib.rs @@ -21,7 +21,7 @@ //! //! This is not a canonical format, but provides a human-readable way of verifying language implementations -use arrow_buffer::ScalarBuffer; +use arrow_buffer::{IntervalMonthDayNano, ScalarBuffer}; use hex::decode; use num::BigInt; use num::Signed; @@ -523,11 +523,7 @@ pub fn array_from_json( let months = months.as_i64().unwrap() as i32; let days = days.as_i64().unwrap() as i32; let nanoseconds = nanoseconds.as_i64().unwrap(); - let months_days_ns: i128 = - ((nanoseconds as i128) & 0xFFFFFFFFFFFFFFFF) << 64 - | ((days as i128) & 0xFFFFFFFF) << 32 - | ((months as i128) & 0xFFFFFFFF); - months_days_ns + IntervalMonthDayNano::new(months, days, nanoseconds) } (_, _, _) => { panic!("Unable to parse {v:?} as MonthDayNano") diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs index 4f56883eaebe..4197b610e7ac 100644 --- a/arrow-ord/src/comparison.rs +++ b/arrow-ord/src/comparison.rs @@ -119,7 +119,7 @@ mod tests { ListBuilder, PrimitiveDictionaryBuilder, StringBuilder, StringDictionaryBuilder, }; use arrow_array::types::*; - use arrow_buffer::{i256, ArrowNativeType, Buffer}; + use arrow_buffer::{i256, ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano}; use arrow_data::ArrayData; use arrow_schema::{DataType, Field}; use half::f16; @@ -856,26 +856,48 @@ mod tests { #[test] fn test_interval_array() { - let a = IntervalDayTimeArray::from(vec![Some(0), Some(6), Some(834), None, Some(3), None]); - let b = - IntervalDayTimeArray::from(vec![Some(70), Some(6), Some(833), Some(6), Some(3), None]); + let a = IntervalDayTimeArray::from(vec![ + Some(IntervalDayTime::new(0, 1)), + Some(IntervalDayTime::new(0, 6)), + Some(IntervalDayTime::new(4, 834)), + None, + Some(IntervalDayTime::new(2, 3)), + None + ]); + let b = IntervalDayTimeArray::from(vec![ + Some(IntervalDayTime::new(0, 4)), + Some(IntervalDayTime::new(0, 6)), + Some(IntervalDayTime::new(0, 834)), + None, + Some(IntervalDayTime::new(2, 3)), + None + ]); let res = crate::cmp::eq(&a, &b).unwrap(); assert_eq!( &res, &BooleanArray::from(vec![Some(false), Some(true), Some(false), None, Some(true), None]) ); - let a = - IntervalMonthDayNanoArray::from(vec![Some(0), Some(6), Some(834), None, Some(3), None]); - let b = IntervalMonthDayNanoArray::from( - vec![Some(86), Some(5), Some(8), Some(6), Some(3), None], - ); + let a = IntervalMonthDayNanoArray::from(vec![ + Some(IntervalMonthDayNano::new(0, 0, 6)), + Some(IntervalMonthDayNano::new(2, 0, 0)), + Some(IntervalMonthDayNano::new(2, -5, 0)), + None, + Some(IntervalMonthDayNano::new(0, 0, 2)), + Some(IntervalMonthDayNano::new(5, 0, -23)), + ]); + let b = IntervalMonthDayNanoArray::from(vec![ + Some(IntervalMonthDayNano::new(0, 0, 6)), + Some(IntervalMonthDayNano::new(2, 3, 0)), + Some(IntervalMonthDayNano::new(5, -5, 0)), + None, + Some(IntervalMonthDayNano::new(-1, 0, 2)), + None, + ]); let res = crate::cmp::lt(&a, &b).unwrap(); assert_eq!( &res, - &BooleanArray::from( - vec![Some(true), Some(false), Some(false), None, Some(false), None] - ) + &BooleanArray::from(vec![Some(false), Some(true), Some(true), None, Some(false), None]) ); let a = @@ -1421,10 +1443,22 @@ mod tests { #[test] fn test_interval_dyn_scalar() { - let array = IntervalDayTimeArray::from(vec![Some(1), None, Some(8), None, Some(10)]); + let array = IntervalDayTimeArray::from(vec![ + Some(IntervalDayTime::new(1, 0)), + None, + Some(IntervalDayTime::new(8, 0)), + None, + Some(IntervalDayTime::new(10, 0)), + ]); test_primitive_dyn_scalar(array); - let array = IntervalMonthDayNanoArray::from(vec![Some(1), None, Some(8), None, Some(10)]); + let array = IntervalMonthDayNanoArray::from(vec![ + Some(IntervalMonthDayNano::new(1, 0, 0)), + None, + Some(IntervalMonthDayNano::new(8, 0, 0)), + None, + Some(IntervalMonthDayNano::new(10, 0, 0)), + ]); test_primitive_dyn_scalar(array); let array = IntervalYearMonthArray::from(vec![Some(1), None, Some(8), None, Some(10)]); @@ -2054,11 +2088,16 @@ mod tests { #[test] fn test_eq_dyn_neq_dyn_dictionary_interval_array() { - let values = IntervalDayTimeArray::from(vec![1, 6, 10, 2, 3, 5]); + let values = IntervalDayTimeArray::from(vec![ + Some(IntervalDayTime::new(0, 1)), + Some(IntervalDayTime::new(0, 1)), + Some(IntervalDayTime::new(0, 6)), + Some(IntervalDayTime::new(4, 10)), + ]); let values = Arc::new(values) as ArrayRef; let keys1 = UInt64Array::from_iter_values([1_u64, 0, 3]); - let keys2 = UInt64Array::from_iter_values([2_u64, 0, 3]); + let keys2 = UInt64Array::from_iter_values([2_u64, 1, 3]); let dict_array1 = DictionaryArray::new(keys1, values.clone()); let dict_array2 = DictionaryArray::new(keys2, values.clone()); diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs index e793038de929..8f21cd7c498d 100644 --- a/arrow-ord/src/ord.rs +++ b/arrow-ord/src/ord.rs @@ -131,7 +131,7 @@ pub fn build_compare(left: &dyn Array, right: &dyn Array) -> Result Self::Encoded { + let mut out = [0_u8; 8]; + out[..4].copy_from_slice(&self.days.encode()); + out[4..].copy_from_slice(&self.milliseconds.encode()); + out + } + + fn decode(encoded: Self::Encoded) -> Self { + Self { + days: i32::decode(encoded[..4].try_into().unwrap()), + milliseconds: i32::decode(encoded[4..].try_into().unwrap()), + } + } +} + +impl FixedLengthEncoding for IntervalMonthDayNano { + type Encoded = [u8; 16]; + + fn encode(self) -> Self::Encoded { + let mut out = [0_u8; 16]; + out[..4].copy_from_slice(&self.months.encode()); + out[4..8].copy_from_slice(&self.days.encode()); + out[8..].copy_from_slice(&self.nanoseconds.encode()); + out + } + + fn decode(encoded: Self::Encoded) -> Self { + Self { + months: i32::decode(encoded[..4].try_into().unwrap()), + days: i32::decode(encoded[4..8].try_into().unwrap()), + nanoseconds: i64::decode(encoded[8..].try_into().unwrap()), + } + } +} + /// Returns the total encoded length (including null byte) for a value of type `T::Native` pub const fn encoded_len(_col: &PrimitiveArray) -> usize where diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index a4dd2470ab6d..b8d59142db7d 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -845,6 +845,7 @@ pub fn take_record_batch( mod tests { use super::*; use arrow_array::builder::*; + use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; use arrow_schema::{Field, Fields, TimeUnit}; fn test_take_decimal_arrays( @@ -1158,20 +1159,26 @@ mod tests { .unwrap(); // interval_day_time + let v1 = IntervalDayTime::new(0, 0); + let v2 = IntervalDayTime::new(2, 0); + let v3 = IntervalDayTime::new(-15, 0); test_take_primitive_arrays::( - vec![Some(0), None, Some(2), Some(-15), None], + vec![Some(v1), None, Some(v2), Some(v3), None], &index, None, - vec![Some(-15), None, None, Some(-15), Some(2)], + vec![Some(v3), None, None, Some(v3), Some(v2)], ) .unwrap(); // interval_month_day_nano + let v1 = IntervalMonthDayNano::new(0, 0, 0); + let v2 = IntervalMonthDayNano::new(2, 0, 0); + let v3 = IntervalMonthDayNano::new(-15, 0, 0); test_take_primitive_arrays::( - vec![Some(0), None, Some(2), Some(-15), None], + vec![Some(v1), None, Some(v2), Some(v3), None], &index, None, - vec![Some(-15), None, None, Some(-15), Some(2)], + vec![Some(v3), None, None, Some(v3), Some(v2)], ) .unwrap(); diff --git a/arrow/benches/comparison_kernels.rs b/arrow/benches/comparison_kernels.rs index a272144b52e0..f330e1386cc4 100644 --- a/arrow/benches/comparison_kernels.rs +++ b/arrow/benches/comparison_kernels.rs @@ -22,9 +22,9 @@ use criterion::Criterion; extern crate arrow; use arrow::compute::kernels::cmp::*; -use arrow::datatypes::IntervalMonthDayNanoType; use arrow::util::bench_util::*; use arrow::{array::*, datatypes::Float32Type, datatypes::Int32Type}; +use arrow_buffer::IntervalMonthDayNano; use arrow_string::like::*; use arrow_string::regexp::regexp_is_match_utf8_scalar; @@ -59,10 +59,8 @@ fn add_benchmark(c: &mut Criterion) { let arr_a = create_primitive_array_with_seed::(SIZE, 0.0, 42); let arr_b = create_primitive_array_with_seed::(SIZE, 0.0, 43); - let arr_month_day_nano_a = - create_primitive_array_with_seed::(SIZE, 0.0, 43); - let arr_month_day_nano_b = - create_primitive_array_with_seed::(SIZE, 0.0, 43); + let arr_month_day_nano_a = create_month_day_nano_array_with_seed(SIZE, 0.0, 43); + let arr_month_day_nano_b = create_month_day_nano_array_with_seed(SIZE, 0.0, 43); let arr_string = create_string_array::(SIZE, 0.0); let scalar = Float32Array::from(vec![1.0]); @@ -134,7 +132,7 @@ fn add_benchmark(c: &mut Criterion) { c.bench_function("eq MonthDayNano", |b| { b.iter(|| eq(&arr_month_day_nano_a, &arr_month_day_nano_b)) }); - let scalar = IntervalMonthDayNanoArray::new_scalar(123); + let scalar = IntervalMonthDayNanoArray::new_scalar(IntervalMonthDayNano::new(123, 0, 0)); c.bench_function("eq scalar MonthDayNano", |b| { b.iter(|| eq(&arr_month_day_nano_b, &scalar).unwrap()) diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index 140c5bc9259d..9fae8e6bab38 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -20,7 +20,7 @@ use crate::array::*; use crate::datatypes::*; use crate::util::test_util::seedable_rng; -use arrow_buffer::Buffer; +use arrow_buffer::{Buffer, IntervalMonthDayNano}; use rand::distributions::uniform::SampleUniform; use rand::thread_rng; use rand::Rng; @@ -72,6 +72,24 @@ where .collect() } +pub fn create_month_day_nano_array_with_seed( + size: usize, + null_density: f32, + seed: u64, +) -> IntervalMonthDayNanoArray { + let mut rng = StdRng::seed_from_u64(seed); + + (0..size) + .map(|_| { + if rng.gen::() < null_density { + None + } else { + Some(IntervalMonthDayNano::new(rng.gen(), rng.gen(), rng.gen())) + } + }) + .collect() +} + /// Creates an random (but fixed-seeded) array of a given size and null density pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray where diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs index 2d3167c928d0..0fd89cc2bff4 100644 --- a/arrow/tests/array_cast.rs +++ b/arrow/tests/array_cast.rs @@ -32,7 +32,7 @@ use arrow_array::{ TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, UnionArray, }; -use arrow_buffer::{i256, Buffer}; +use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano}; use arrow_cast::pretty::pretty_format_columns; use arrow_cast::{can_cast_types, cast}; use arrow_data::ArrayData; @@ -249,8 +249,14 @@ fn get_arrays_of_all_types() -> Vec { Arc::new(Time64MicrosecondArray::from(vec![1000, 2000])), Arc::new(Time64NanosecondArray::from(vec![1000, 2000])), Arc::new(IntervalYearMonthArray::from(vec![1000, 2000])), - Arc::new(IntervalDayTimeArray::from(vec![1000, 2000])), - Arc::new(IntervalMonthDayNanoArray::from(vec![1000, 2000])), + Arc::new(IntervalDayTimeArray::from(vec![ + IntervalDayTime::new(0, 1000), + IntervalDayTime::new(0, 2000), + ])), + Arc::new(IntervalMonthDayNanoArray::from(vec![ + IntervalMonthDayNano::new(0, 0, 1000), + IntervalMonthDayNano::new(0, 0, 1000), + ])), Arc::new(DurationSecondArray::from(vec![1000, 2000])), Arc::new(DurationMillisecondArray::from(vec![1000, 2000])), Arc::new(DurationMicrosecondArray::from(vec![1000, 2000])), diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs index a0d25d403c1b..a9159bb47125 100644 --- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs +++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs @@ -30,7 +30,7 @@ use arrow_array::{ ArrayRef, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array, IntervalDayTimeArray, IntervalYearMonthArray, }; -use arrow_buffer::{i256, Buffer}; +use arrow_buffer::{i256, Buffer, IntervalDayTime}; use arrow_data::ArrayDataBuilder; use arrow_schema::{DataType as ArrowType, IntervalUnit}; use bytes::Bytes; @@ -195,7 +195,14 @@ impl ArrayReader for FixedLenByteArrayReader { IntervalUnit::DayTime => Arc::new( binary .iter() - .map(|o| o.map(|b| i64::from_le_bytes(b[4..12].try_into().unwrap()))) + .map(|o| { + o.map(|b| { + IntervalDayTime::new( + i32::from_le_bytes(b[4..8].try_into().unwrap()), + i32::from_le_bytes(b[8..12].try_into().unwrap()), + ) + }) + }) .collect::(), ) as ArrayRef, IntervalUnit::MonthDayNano => { diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index a30bf168619f..db75c54bf5d0 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -750,7 +750,7 @@ mod tests { Decimal128Type, Decimal256Type, DecimalType, Float16Type, Float32Type, Float64Type, }; use arrow_array::*; - use arrow_buffer::{i256, ArrowNativeType, Buffer}; + use arrow_buffer::{i256, ArrowNativeType, Buffer, IntervalDayTime}; use arrow_data::ArrayDataBuilder; use arrow_schema::{ArrowError, DataType as ArrowDataType, Field, Fields, Schema}; use arrow_select::concat::concat_batches; @@ -1060,8 +1060,12 @@ mod tests { Arc::new( vals.iter() .map(|x| { - x.as_ref() - .map(|b| i64::from_le_bytes(b.as_ref()[4..12].try_into().unwrap())) + x.as_ref().map(|b| IntervalDayTime { + days: i32::from_le_bytes(b.as_ref()[4..8].try_into().unwrap()), + milliseconds: i32::from_le_bytes( + b.as_ref()[8..12].try_into().unwrap(), + ), + }) }) .collect::(), ) diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index bf4b88ac52d4..60feda69e841 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -942,11 +942,11 @@ fn get_interval_dt_array_slice( ) -> Vec { let mut values = Vec::with_capacity(indices.len()); for i in indices { - let mut prefix = vec![0; 4]; - let mut value = array.value(*i).to_le_bytes().to_vec(); - prefix.append(&mut value); - debug_assert_eq!(prefix.len(), 12); - values.push(FixedLenByteArray::from(ByteArray::from(prefix))); + let mut out = [0; 12]; + let value = array.value(*i); + out[4..8].copy_from_slice(&value.days.to_le_bytes()); + out[8..12].copy_from_slice(&value.milliseconds.to_le_bytes()); + values.push(FixedLenByteArray::from(ByteArray::from(out.to_vec()))); } values } @@ -1016,7 +1016,7 @@ mod tests { use arrow::error::Result as ArrowResult; use arrow::util::pretty::pretty_format_batches; use arrow::{array::*, buffer::Buffer}; - use arrow_buffer::NullBuffer; + use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, NullBuffer}; use arrow_schema::Fields; use crate::basic::Encoding; @@ -2057,7 +2057,12 @@ mod tests { #[test] fn interval_day_time_single_column() { - required_and_optional::(0..SMALL_SIZE as i64); + required_and_optional::(vec![ + IntervalDayTime::new(0, 1), + IntervalDayTime::new(0, 3), + IntervalDayTime::new(3, -2), + IntervalDayTime::new(-200, 4), + ]); } #[test] @@ -2065,7 +2070,12 @@ mod tests { expected = "Attempting to write an Arrow interval type MonthDayNano to parquet that is not yet implemented" )] fn interval_month_day_nano_single_column() { - required_and_optional::(0..SMALL_SIZE as i128); + required_and_optional::(vec![ + IntervalMonthDayNano::new(0, 1, 5), + IntervalMonthDayNano::new(0, 3, 2), + IntervalMonthDayNano::new(3, -2, -5), + IntervalMonthDayNano::new(-200, 4, -1), + ]); } #[test] From 25bf96a88b33fa89af13f7224687a01266e8a4ce Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 15 May 2024 20:03:03 +0100 Subject: [PATCH 2/4] Update integration-test --- arrow-integration-test/src/lib.rs | 61 ++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs index 8ea788aa4dd3..66fa9f3320e0 100644 --- a/arrow-integration-test/src/lib.rs +++ b/arrow-integration-test/src/lib.rs @@ -21,7 +21,7 @@ //! //! This is not a canonical format, but provides a human-readable way of verifying language implementations -use arrow_buffer::{IntervalMonthDayNano, ScalarBuffer}; +use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer}; use hex::decode; use num::BigInt; use num::Signed; @@ -32,7 +32,6 @@ use std::sync::Arc; use arrow::array::*; use arrow::buffer::{Buffer, MutableBuffer}; -use arrow::compute; use arrow::datatypes::*; use arrow::error::{ArrowError, Result}; use arrow::util::bit_util; @@ -349,10 +348,7 @@ pub fn array_from_json( } Ok(Arc::new(b.finish())) } - DataType::Int32 - | DataType::Date32 - | DataType::Time32(_) - | DataType::Interval(IntervalUnit::YearMonth) => { + DataType::Int32 | DataType::Date32 | DataType::Time32(_) => { let mut b = Int32Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity @@ -367,14 +363,29 @@ pub fn array_from_json( }; } let array = Arc::new(b.finish()) as ArrayRef; - compute::cast(&array, field.data_type()) + arrow::compute::cast(&array, field.data_type()) + } + DataType::Interval(IntervalUnit::YearMonth) => { + let mut b = IntervalYearMonthBuilder::with_capacity(json_col.count); + for (is_valid, value) in json_col + .validity + .as_ref() + .unwrap() + .iter() + .zip(json_col.data.unwrap()) + { + match is_valid { + 1 => b.append_value(value.as_i64().unwrap() as i32), + _ => b.append_null(), + }; + } + Ok(Arc::new(b.finish())) } DataType::Int64 | DataType::Date64 | DataType::Time64(_) | DataType::Timestamp(_, _) - | DataType::Duration(_) - | DataType::Interval(IntervalUnit::DayTime) => { + | DataType::Duration(_) => { let mut b = Int64Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity @@ -387,6 +398,25 @@ pub fn array_from_json( 1 => b.append_value(match value { Value::Number(n) => n.as_i64().unwrap(), Value::String(s) => s.parse().expect("Unable to parse string as i64"), + _ => panic!("Unable to parse {value:?} as number"), + }), + _ => b.append_null(), + }; + } + let array = Arc::new(b.finish()) as ArrayRef; + arrow::compute::cast(&array, field.data_type()) + } + DataType::Interval(IntervalUnit::DayTime) => { + let mut b = IntervalDayTimeBuilder::with_capacity(json_col.count); + for (is_valid, value) in json_col + .validity + .as_ref() + .unwrap() + .iter() + .zip(json_col.data.unwrap()) + { + match is_valid { + 1 => b.append_value(match value { Value::Object(ref map) if map.contains_key("days") && map.contains_key("milliseconds") => { @@ -397,13 +427,9 @@ pub fn array_from_json( match (days, milliseconds) { (Value::Number(d), Value::Number(m)) => { - let mut bytes = [0_u8; 8]; - let m = (m.as_i64().unwrap() as i32).to_le_bytes(); - let d = (d.as_i64().unwrap() as i32).to_le_bytes(); - - let c = [d, m].concat(); - bytes.copy_from_slice(c.as_slice()); - i64::from_le_bytes(bytes) + let days = d.as_i64().unwrap() as _; + let millis = m.as_i64().unwrap() as _; + IntervalDayTime::new(days, millis) } _ => { panic!("Unable to parse {value:?} as interval daytime") @@ -418,8 +444,7 @@ pub fn array_from_json( _ => b.append_null(), }; } - let array = Arc::new(b.finish()) as ArrayRef; - compute::cast(&array, field.data_type()) + Ok(Arc::new(b.finish())) } DataType::UInt8 => { let mut b = UInt8Builder::with_capacity(json_col.count); From 3dce0f63fac03b33f4672628d4372eac41bc2aa3 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Thu, 16 May 2024 11:02:22 +0100 Subject: [PATCH 3/4] Fix 32-bit build --- arrow-buffer/src/native.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arrow-buffer/src/native.rs b/arrow-buffer/src/native.rs index c903057ae98f..e05c1311ff3c 100644 --- a/arrow-buffer/src/native.rs +++ b/arrow-buffer/src/native.rs @@ -246,11 +246,11 @@ impl ArrowNativeType for IntervalMonthDayNano { } fn as_usize(self) -> usize { - (self.months as usize) | ((self.days as usize) << 32) + ((self.months as u64) | ((self.days as u64) << 32)) as usize } fn usize_as(i: usize) -> Self { - Self::new(i as _, (i >> 32) as _, 0) + Self::new(i as _, ((i as u64) >> 32) as _, 0) } fn to_usize(self) -> Option { @@ -273,11 +273,11 @@ impl ArrowNativeType for IntervalDayTime { } fn as_usize(self) -> usize { - (self.days as usize) | ((self.milliseconds as usize) << 32) + ((self.days as u64) | ((self.milliseconds as u64) << 32)) as usize } fn usize_as(i: usize) -> Self { - Self::new(i as _, (i >> 32) as _) + Self::new(i as _, ((i as u64) >> 32) as _) } fn to_usize(self) -> Option { From 6cfd7cd618e7bb9ad922068a34bada7e44e0493d Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 20 May 2024 11:29:50 +0100 Subject: [PATCH 4/4] Review feedback --- arrow-buffer/src/arith.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arrow-buffer/src/arith.rs b/arrow-buffer/src/arith.rs index 2ad75f3472b4..ca693c3607dc 100644 --- a/arrow-buffer/src/arith.rs +++ b/arrow-buffer/src/arith.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +/// Derives `std::ops::$op` for `$ty` calling `$wrapping` or `$checked` variants +/// based on if debug_assertions enabled macro_rules! derive_arith { ($ty:ty, $t:ident, $op:ident, $wrapping:ident, $checked:ident) => { impl std::ops::$t for $ty {