From a117eedd2d65ebb61b506ae8a24c0aff67efe332 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Mon, 7 Oct 2024 19:21:44 +0100 Subject: [PATCH] fix arrow-json encoding with dictionary including nulls (#6503) * fix arrow-json encoding with dictionary including nulls * linting * fmt, and rename --- arrow-json/src/writer/encoder.rs | 2 +- arrow-json/src/{writer.rs => writer/mod.rs} | 31 +++++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) rename arrow-json/src/{writer.rs => writer/mod.rs} (98%) diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs index 9b6c9418fa01..dfe62852123a 100644 --- a/arrow-json/src/writer/encoder.rs +++ b/arrow-json/src/writer/encoder.rs @@ -409,7 +409,7 @@ impl<'a, K: ArrowDictionaryKeyType> DictionaryEncoder<'a, K> { array: &'a DictionaryArray, options: &EncoderOptions, ) -> Result { - let encoder = make_encoder(array.values().as_ref(), options)?; + let (encoder, _) = make_encoder_impl(array.values().as_ref(), options)?; Ok(Self { keys: array.keys().values().clone(), diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer/mod.rs similarity index 98% rename from arrow-json/src/writer.rs rename to arrow-json/src/writer/mod.rs index d973206ccf74..9a93cf0410e5 100644 --- a/arrow-json/src/writer.rs +++ b/arrow-json/src/writer/mod.rs @@ -111,8 +111,7 @@ use std::{fmt::Debug, io::Write}; use arrow_array::*; use arrow_schema::*; -use crate::writer::encoder::EncoderOptions; -use encoder::make_encoder; +use encoder::{make_encoder, EncoderOptions}; /// This trait defines how to format a sequence of JSON objects to a /// byte stream. @@ -1806,4 +1805,32 @@ mod tests { ); } } + + #[test] + fn test_writer_null_dict() { + let keys = Int32Array::from_iter(vec![Some(0), None, Some(1)]); + let values = Arc::new(StringArray::from_iter(vec![Some("a"), None])); + let dict = DictionaryArray::new(keys, values); + + let schema = SchemaRef::new(Schema::new(vec![Field::new( + "my_dict", + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), + true, + )])); + + let array = Arc::new(dict) as ArrayRef; + let batch = RecordBatch::try_new(schema, vec![array]).unwrap(); + + let mut json = Vec::new(); + let write_builder = WriterBuilder::new().with_explicit_nulls(true); + let mut writer = write_builder.build::<_, JsonArray>(&mut json); + writer.write(&batch).unwrap(); + writer.close().unwrap(); + + let json_str = str::from_utf8(&json).unwrap(); + assert_eq!( + json_str, + r#"[{"my_dict":"a"},{"my_dict":null},{"my_dict":null}]"# + ) + } }