Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: JSON encoding of FixedSizeList #5646

Merged
merged 1 commit into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 84 additions & 1 deletion arrow-json/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,8 @@ mod tests {
use serde_json::json;

use arrow_array::builder::{
FixedSizeBinaryBuilder, Int32Builder, Int64Builder, MapBuilder, StringBuilder,
FixedSizeBinaryBuilder, FixedSizeListBuilder, Int32Builder, Int64Builder, MapBuilder,
StringBuilder,
};
use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
use arrow_data::ArrayData;
Expand Down Expand Up @@ -2215,4 +2216,86 @@ mod tests {
);
}
}

#[test]
fn test_writer_fixed_size_list() {
let size = 3;
let field = FieldRef::new(Field::new("item", DataType::Int32, true));
let schema = SchemaRef::new(Schema::new(vec![Field::new(
"list",
DataType::FixedSizeList(field, size),
true,
)]));

let values_builder = Int32Builder::new();
let mut list_builder = FixedSizeListBuilder::new(values_builder, size);
let lists = [
Some([Some(1), Some(2), None]),
Some([Some(3), None, Some(4)]),
Some([None, Some(5), Some(6)]),
None,
];
for list in lists {
match list {
Some(l) => {
for value in l {
match value {
Some(v) => list_builder.values().append_value(v),
None => list_builder.values().append_null(),
}
}
list_builder.append(true);
}
None => {
for _ in 0..size {
list_builder.values().append_null();
}
list_builder.append(false);
}
}
}
let array = Arc::new(list_builder.finish()) as ArrayRef;
let batch = RecordBatch::try_new(schema, vec![array]).unwrap();

//encode and check JSON with explicit nulls:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

{
let json_value: Value = {
let mut buf = Vec::new();
let mut writer = WriterBuilder::new()
.with_explicit_nulls(true)
.build::<_, JsonArray>(&mut buf);
writer.write(&batch).unwrap();
writer.close().unwrap();
serde_json::from_slice(&buf).unwrap()
};
assert_eq!(
json!([
{"list": [1, 2, null]},
{"list": [3, null, 4]},
{"list": [null, 5, 6]},
{"list": null},
]),
json_value
);
}
// encode and check JSON with no explicit nulls:
{
let json_value: Value = {
let mut buf = Vec::new();
let mut writer = ArrayWriter::new(&mut buf);
writer.write(&batch).unwrap();
writer.close().unwrap();
serde_json::from_slice(&buf).unwrap()
};
assert_eq!(
json!([
{"list": [1, 2, null]},
{"list": [3, null, 4]},
{"list": [null, 5, 6]},
{}, // empty because nulls are omitted
]),
json_value
);
}
}
}
53 changes: 52 additions & 1 deletion arrow-json/src/writer/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ fn make_encoder_impl<'a>(
let array = array.as_list::<i64>();
(Box::new(ListEncoder::try_new(array, options)?) as _, array.nulls().cloned())
}
DataType::FixedSizeList(_, _) => {
let array = array.as_fixed_size_list();
(Box::new(FixedSizeListEncoder::try_new(array, options)?) as _, array.nulls().cloned())
}

DataType::Dictionary(_, _) => downcast_dictionary_array! {
array => (Box::new(DictionaryEncoder::try_new(array, options)?) as _, array.logical_nulls()),
Expand All @@ -100,7 +104,7 @@ fn make_encoder_impl<'a>(
}

DataType::FixedSizeBinary(_) => {
let array = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
let array = array.as_fixed_size_binary();
(Box::new(FixedSizeBinaryEncoder::new(array)) as _, array.nulls().cloned())
}

Expand Down Expand Up @@ -329,6 +333,53 @@ impl<'a, O: OffsetSizeTrait> Encoder for ListEncoder<'a, O> {
}
}

struct FixedSizeListEncoder<'a> {
value_length: usize,
nulls: Option<NullBuffer>,
encoder: Box<dyn Encoder + 'a>,
}

impl<'a> FixedSizeListEncoder<'a> {
fn try_new(
array: &'a FixedSizeListArray,
options: &EncoderOptions,
) -> Result<Self, ArrowError> {
let (encoder, nulls) = make_encoder_impl(array.values().as_ref(), options)?;
Ok(Self {
encoder,
nulls,
value_length: array.value_length().as_usize(),
})
}
}

impl<'a> Encoder for FixedSizeListEncoder<'a> {
fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
let start = idx * self.value_length;
let end = start + self.value_length;
out.push(b'[');
match self.nulls.as_ref() {
Some(n) => (start..end).for_each(|idx| {
if idx != start {
out.push(b',');
}
if n.is_null(idx) {
out.extend_from_slice(b"null");
} else {
self.encoder.encode(idx, out);
}
}),
None => (start..end).for_each(|idx| {
if idx != start {
out.push(b',');
}
self.encoder.encode(idx, out);
}),
}
out.push(b']');
}
}

struct DictionaryEncoder<'a, K: ArrowDictionaryKeyType> {
keys: ScalarBuffer<K::Native>,
encoder: Box<dyn Encoder + 'a>,
Expand Down
Loading