Skip to content

Commit

Permalink
fix arrow-json encoding with dictionary including nulls (#6503)
Browse files Browse the repository at this point in the history
* fix arrow-json encoding with dictionary including nulls

* linting

* fmt, and rename
  • Loading branch information
samuelcolvin authored Oct 7, 2024
1 parent 1a9ac2c commit a117eed
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 3 deletions.
2 changes: 1 addition & 1 deletion arrow-json/src/writer/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ impl<'a, K: ArrowDictionaryKeyType> DictionaryEncoder<'a, K> {
array: &'a DictionaryArray<K>,
options: &EncoderOptions,
) -> Result<Self, ArrowError> {
let encoder = make_encoder(array.values().as_ref(), options)?;
let (encoder, _) = make_encoder_impl(array.values().as_ref(), options)?;

Ok(Self {
keys: array.keys().values().clone(),
Expand Down
31 changes: 29 additions & 2 deletions arrow-json/src/writer.rs → arrow-json/src/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,7 @@ use std::{fmt::Debug, io::Write};
use arrow_array::*;
use arrow_schema::*;

use crate::writer::encoder::EncoderOptions;
use encoder::make_encoder;
use encoder::{make_encoder, EncoderOptions};

/// This trait defines how to format a sequence of JSON objects to a
/// byte stream.
Expand Down Expand Up @@ -1806,4 +1805,32 @@ mod tests {
);
}
}

#[test]
fn test_writer_null_dict() {
let keys = Int32Array::from_iter(vec![Some(0), None, Some(1)]);
let values = Arc::new(StringArray::from_iter(vec![Some("a"), None]));
let dict = DictionaryArray::new(keys, values);

let schema = SchemaRef::new(Schema::new(vec![Field::new(
"my_dict",
DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()),
true,
)]));

let array = Arc::new(dict) as ArrayRef;
let batch = RecordBatch::try_new(schema, vec![array]).unwrap();

let mut json = Vec::new();
let write_builder = WriterBuilder::new().with_explicit_nulls(true);
let mut writer = write_builder.build::<_, JsonArray>(&mut json);
writer.write(&batch).unwrap();
writer.close().unwrap();

let json_str = str::from_utf8(&json).unwrap();
assert_eq!(
json_str,
r#"[{"my_dict":"a"},{"my_dict":null},{"my_dict":null}]"#
)
}
}

0 comments on commit a117eed

Please sign in to comment.