From efad407d4d8d940fda5df6b3d3f768bc5ccf7568 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 4 Feb 2024 08:09:33 -0500 Subject: [PATCH 1/4] Add example of converting RecordBatches to JSON objects --- arrow-json/src/writer.rs | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs index dd77328cb7b5..f6508b2ce4df 100644 --- a/arrow-json/src/writer.rs +++ b/arrow-json/src/writer.rs @@ -74,7 +74,34 @@ //! [`LineDelimitedWriter`] and [`ArrayWriter`] will omit writing keys with null values. //! In order to explicitly write null values for keys, configure a custom [`Writer`] by //! using a [`WriterBuilder`] to construct a [`Writer`]. - +//! +//! ## Writing to [serde_json] JSON Objects +//! +//! To serialize [`RecordBatch`]es into an array of +//! [JSON](https://docs.serde.rs/serde_json/) objects, use the [RawValue] api +//! +//! [RawValue]: https://docs.rs/serde_json/latest/serde_json/value/struct.RawValue.html +//! +//! ``` +//! # use std::sync::Arc; +//! # use arrow_array::{Int32Array, RecordBatch}; +//! # use arrow_schema::{DataType, Field, Schema}; +//! +//! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); +//! let a = Int32Array::from(vec![1, 2, 3]); +//! let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap(); +//! +//! todo!("How do we do this?"); +//! // let json_rows = arrow_json::writer::record_batches_to_json_rows(&[&batch]).unwrap(); +//! assert_eq!( +//! serde_json::Value::Object(json_rows[1].clone()), +//! serde_json::json!({"a": 2}), +//! ); +//! ``` +//! +//! +//! +//! mod encoder; use std::iter; From 4f320edc69bd7b6c6319719243b04826295644bf Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 4 Feb 2024 08:11:56 -0500 Subject: [PATCH 2/4] twea --- arrow-json/src/writer.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs index f6508b2ce4df..b8750351ece9 100644 --- a/arrow-json/src/writer.rs +++ b/arrow-json/src/writer.rs @@ -86,12 +86,13 @@ //! # use std::sync::Arc; //! # use arrow_array::{Int32Array, RecordBatch}; //! # use arrow_schema::{DataType, Field, Schema}; +//! # use serde_json::{Map, Value}; //! //! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); //! let a = Int32Array::from(vec![1, 2, 3]); //! let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap(); //! -//! todo!("How do we do this?"); +//! let json_rows: Vec> = todo!("How do we do this?"); //! // let json_rows = arrow_json::writer::record_batches_to_json_rows(&[&batch]).unwrap(); //! assert_eq!( //! serde_json::Value::Object(json_rows[1].clone()), From ec7ea689ed42ff8e1b25773775636e0a28b6d000 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 4 Feb 2024 08:43:33 -0500 Subject: [PATCH 3/4] Update docs --- arrow-json/src/writer.rs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs index b8750351ece9..9c218e37d2d4 100644 --- a/arrow-json/src/writer.rs +++ b/arrow-json/src/writer.rs @@ -78,9 +78,8 @@ //! ## Writing to [serde_json] JSON Objects //! //! To serialize [`RecordBatch`]es into an array of -//! [JSON](https://docs.serde.rs/serde_json/) objects, use the [RawValue] api -//! -//! [RawValue]: https://docs.rs/serde_json/latest/serde_json/value/struct.RawValue.html +//! [JSON](https://docs.serde.rs/serde_json/) objects you can reparse the resulting JSON string. +//! Note that this is less efficient than using the `Writer` API. //! //! ``` //! # use std::sync::Arc; @@ -92,17 +91,22 @@ //! let a = Int32Array::from(vec![1, 2, 3]); //! let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap(); //! -//! let json_rows: Vec> = todo!("How do we do this?"); -//! // let json_rows = arrow_json::writer::record_batches_to_json_rows(&[&batch]).unwrap(); +//! // Write the record batch out as a JSON array +//! let json_string = { +//! let buf = Vec::new(); +//! let mut writer = arrow_json::ArrayWriter::new(buf); +//! writer.write_batches(&vec![&batch]).unwrap(); +//! writer.finish().unwrap(); +//! String::from_utf8(writer.into_inner()).unwrap() +//! }; +//! +//! // Parse the string using serde_json +//! let json_rows: Vec> = serde_json::from_str(&json_string).unwrap(); //! assert_eq!( //! serde_json::Value::Object(json_rows[1].clone()), //! serde_json::json!({"a": 2}), //! ); //! ``` -//! -//! -//! -//! mod encoder; use std::iter; From 341e06b19344049bb01e4685cc0d7a8fa0546006 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 4 Feb 2024 09:15:01 -0500 Subject: [PATCH 4/4] Use from_reader and simplify example --- arrow-json/src/writer.rs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs index 9c218e37d2d4..acca702907ea 100644 --- a/arrow-json/src/writer.rs +++ b/arrow-json/src/writer.rs @@ -85,23 +85,20 @@ //! # use std::sync::Arc; //! # use arrow_array::{Int32Array, RecordBatch}; //! # use arrow_schema::{DataType, Field, Schema}; -//! # use serde_json::{Map, Value}; -//! //! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); //! let a = Int32Array::from(vec![1, 2, 3]); //! let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap(); //! -//! // Write the record batch out as a JSON array -//! let json_string = { -//! let buf = Vec::new(); -//! let mut writer = arrow_json::ArrayWriter::new(buf); -//! writer.write_batches(&vec![&batch]).unwrap(); -//! writer.finish().unwrap(); -//! String::from_utf8(writer.into_inner()).unwrap() -//! }; +//! // Write the record batch out as json bytes (string) +//! let buf = Vec::new(); +//! let mut writer = arrow_json::ArrayWriter::new(buf); +//! writer.write_batches(&vec![&batch]).unwrap(); +//! writer.finish().unwrap(); +//! let json_data = writer.into_inner(); //! //! // Parse the string using serde_json -//! let json_rows: Vec> = serde_json::from_str(&json_string).unwrap(); +//! use serde_json::{Map, Value}; +//! let json_rows: Vec> = serde_json::from_reader(json_data.as_slice()).unwrap(); //! assert_eq!( //! serde_json::Value::Object(json_rows[1].clone()), //! serde_json::json!({"a": 2}),