diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml index dd232f197ead..1609c807c61a 100644 --- a/arrow-json/Cargo.toml +++ b/arrow-json/Cargo.toml @@ -34,11 +34,17 @@ path = "src/lib.rs" bench = false [dependencies] -arrow-array = { workspace = true } -arrow-buffer = { workspace = true } -arrow-cast = { workspace = true } -arrow-data = { workspace = true } -arrow-schema = { workspace = true } +arrow-array = { version = "49" } +# arrow-buffer = { workspace = true } +# arrow-cast = { workspace = true } +# arrow-data = { workspace = true } +# arrow-schema = { workspace = true } + +arrow-buffer = { version = "49" } +arrow-cast = { version = "49" } +arrow-data = { version = "49" } +arrow-schema = { version = "49" } + half = { version = "2.1", default-features = false } indexmap = { version = "2.0", default-features = false, features = ["std"] } num = { version = "0.4", default-features = false, features = ["std"] } @@ -49,15 +55,19 @@ lexical-core = { version = "0.8", default-features = false } [dev-dependencies] tempfile = "3.3" -flate2 = { version = "1", default-features = false, features = ["rust_backend"] } +flate2 = { version = "1", default-features = false, features = [ + "rust_backend", +] } serde = { version = "1.0", default-features = false, features = ["derive"] } futures = "0.3" tokio = { version = "1.27", default-features = false, features = ["io-util"] } bytes = "1.4" criterion = { version = "0.5", default-features = false } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.8", default-features = false, features = [ + "std", + "std_rng", +] } [[bench]] name = "serde" harness = false - diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs index cabda5e2dca8..adb9de765d93 100644 --- a/arrow-json/src/writer.rs +++ b/arrow-json/src/writer.rs @@ -469,11 +469,67 @@ fn set_column_for_json_rows( row.insert(col_name.to_string(), serde_json::Value::Object(obj)); } } + DataType::Decimal128(_precision, _scale) | DataType::Decimal256(_precision, _scale) => { + to_json_float(rows, array, col_name, explicit_nulls)?; + } _ => { return Err(ArrowError::JsonError(format!( - "data type {:?} not supported in nested map for json writer", + "data type {:?} not supported for json writer", array.data_type() - ))) + ))); + } + } + Ok(()) +} + +fn to_json_float( + rows: &mut [Option>], + array: &ArrayRef, + col_name: &str, + explicit_nulls: bool, +) -> Result<(), ArrowError> { + let options = FormatOptions::default(); + let formatter = ArrayFormatter::try_new(array.as_ref(), &options)?; + let nulls = array.nulls(); + let rows = rows + .iter_mut() + .enumerate() + .filter_map(|(idx, maybe_row)| maybe_row.as_mut().map(|row| (idx, row))); + + for (idx, row) in rows { + let maybe_value = nulls + .map(|x| x.is_valid(idx)) + .unwrap_or(true) + .then(|| { + let num = formatter + .value(idx) + .to_string() + .parse::() + .map_err(|e| { + ArrowError::ParseError(format!( + "Cannot convert {} to f64: {}", + formatter.value(idx), + e + )) + }); + + num.and_then(|num| { + serde_json::Number::from_f64(num) + .ok_or_else(|| { + ArrowError::CastError(format!( + "Cannot convert {} to f64", + formatter.value(idx) + )) + }) + .map(Value::Number) + }) + }) + .map_or_else(|| Ok(None), |result| result.map(Some)); + + if let Some(j) = maybe_value? { + row.insert(col_name.to_string(), j); + } else if explicit_nulls { + row.insert(col_name.to_string(), Value::Null); } } Ok(())