Skip to content

Commit

Permalink
Add time dictionary coercions (#6208)
Browse files Browse the repository at this point in the history
* Add time dictionary coercions

* format

* Pass through primitive values
  • Loading branch information
adriangb authored Aug 8, 2024
1 parent 79ffdc4 commit 3e02689
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
59 changes: 59 additions & 0 deletions arrow-cast/src/cast/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,26 @@ where
take(cast_dict_values.as_ref(), dict_array.keys(), None)
}

/// Pack a data type into a dictionary array passing the values through a primitive array
pub(crate) fn pack_array_to_dictionary_via_primitive<K: ArrowDictionaryKeyType>(
array: &dyn Array,
primitive_type: DataType,
dict_value_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError> {
let primitive = cast_with_options(array, &primitive_type, cast_options)?;
let dict = cast_with_options(
primitive.as_ref(),
&DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(primitive_type)),
cast_options,
)?;
cast_with_options(
dict.as_ref(),
&DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(dict_value_type.clone())),
cast_options,
)
}

/// Attempts to encode an array into an `ArrayDictionary` with index
/// type K and value (dictionary) type value_type
///
Expand All @@ -188,6 +208,45 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
Decimal256(_, _) => {
pack_numeric_to_dictionary::<K, Decimal256Type>(array, dict_value_type, cast_options)
}
Float16 => {
pack_numeric_to_dictionary::<K, Float16Type>(array, dict_value_type, cast_options)
}
Float32 => {
pack_numeric_to_dictionary::<K, Float32Type>(array, dict_value_type, cast_options)
}
Float64 => {
pack_numeric_to_dictionary::<K, Float64Type>(array, dict_value_type, cast_options)
}
Date32 => pack_array_to_dictionary_via_primitive::<K>(
array,
DataType::Int32,
dict_value_type,
cast_options,
),
Date64 => pack_array_to_dictionary_via_primitive::<K>(
array,
DataType::Int64,
dict_value_type,
cast_options,
),
Time32(_) => pack_array_to_dictionary_via_primitive::<K>(
array,
DataType::Int32,
dict_value_type,
cast_options,
),
Time64(_) => pack_array_to_dictionary_via_primitive::<K>(
array,
DataType::Int64,
dict_value_type,
cast_options,
),
Timestamp(_, _) => pack_array_to_dictionary_via_primitive::<K>(
array,
DataType::Int64,
dict_value_type,
cast_options,
),
Utf8 => {
// If the input is a view type, we can avoid casting (thus copying) the data
if array.data_type() == &DataType::Utf8View {
Expand Down
30 changes: 30 additions & 0 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6768,6 +6768,36 @@ mod tests {
assert_eq!(array_to_strings(&cast_array), expected);
}

#[test]
fn test_cast_time_array_to_dict() {
use DataType::*;

let array = Arc::new(Date32Array::from(vec![Some(1000), None, Some(2000)])) as ArrayRef;

let expected = vec!["1972-09-27", "null", "1975-06-24"];

let cast_type = Dictionary(Box::new(UInt8), Box::new(Date32));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
}

#[test]
fn test_cast_timestamp_array_to_dict() {
use DataType::*;

let array = Arc::new(
TimestampSecondArray::from(vec![Some(1000), None, Some(2000)]).with_timezone_utc(),
) as ArrayRef;

let expected = vec!["1970-01-01T00:16:40", "null", "1970-01-01T00:33:20"];

let cast_type = Dictionary(Box::new(UInt8), Box::new(Timestamp(TimeUnit::Second, None)));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
}

#[test]
fn test_cast_string_array_to_dict() {
use DataType::*;
Expand Down

0 comments on commit 3e02689

Please sign in to comment.