Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: call try_merge recursively for list field #5852

Merged
merged 1 commit into from
Jun 13, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 111 additions & 2 deletions arrow-schema/src/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,30 @@ impl Field {
));
}
},
DataType::List(field) => match &from.data_type {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should also add support for DataType::LargeList https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#variant.LargeList

If you don't want to add it in this PR, I'll file a ticket to track and add it in a follow on

DataType::List(from_field) => {
let mut f = (**field).clone();
f.try_merge(from_field)?;
(*field) = Arc::new(f);
},
_ => {
return Err(ArrowError::SchemaError(
format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
self.name, from.data_type)
))}
},
DataType::LargeList(field) => match &from.data_type {
DataType::LargeList(from_field) => {
let mut f = (**field).clone();
f.try_merge(from_field)?;
(*field) = Arc::new(f);
},
_ => {
return Err(ArrowError::SchemaError(
format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
self.name, from.data_type)
))}
},
DataType::Null => {
self.nullable = true;
self.data_type = from.data_type.clone();
Expand All @@ -509,9 +533,7 @@ impl Field {
| DataType::LargeBinary
| DataType::BinaryView
| DataType::Interval(_)
| DataType::LargeList(_)
| DataType::LargeListView(_)
| DataType::List(_)
| DataType::ListView(_)
| DataType::Map(_, _)
| DataType::Dictionary(_, _)
Expand Down Expand Up @@ -623,6 +645,93 @@ mod test {
assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
}

#[test]
fn test_merge_with_nested_null() {
let mut struct1 = Field::new(
"s1",
DataType::Struct(Fields::from(vec![Field::new(
"inner",
DataType::Float32,
false,
)])),
false,
);

let struct2 = Field::new(
"s2",
DataType::Struct(Fields::from(vec![Field::new(
"inner",
DataType::Null,
false,
)])),
true,
);

struct1
.try_merge(&struct2)
.expect("should widen inner field's type to nullable float");
assert_eq!(
Field::new(
"s1",
DataType::Struct(Fields::from(vec![Field::new(
"inner",
DataType::Float32,
true,
)])),
true,
),
struct1
);

let mut list1 = Field::new(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any chance we could add a test for LargeList too?

"l1",
DataType::List(Field::new("inner", DataType::Float32, false).into()),
false,
);

let list2 = Field::new(
"l2",
DataType::List(Field::new("inner", DataType::Null, false).into()),
true,
);

list1
.try_merge(&list2)
.expect("should widen inner field's type to nullable float");
assert_eq!(
Field::new(
"l1",
DataType::List(Field::new("inner", DataType::Float32, true).into()),
true,
),
list1
);

let mut large_list1 = Field::new(
"ll1",
DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
false,
);

let large_list2 = Field::new(
"ll2",
DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
true,
);

large_list1
.try_merge(&large_list2)
.expect("should widen inner field's type to nullable float");
assert_eq!(
Field::new(
"ll1",
DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
true,
),
large_list1
);
}

#[test]
fn test_fields_with_dict_id() {
let dict1 = Field::new_dict(
Expand Down
Loading