diff --git a/parquet-testing b/parquet-testing
index 1ba34478f535..9b48ff4f94dc 160000
--- a/parquet-testing
+++ b/parquet-testing
@@ -1 +1 @@
-Subproject commit 1ba34478f535c89382263c42c675a9af4f57f2dd
+Subproject commit 9b48ff4f94dc5e89592d46a119884dbb88100884
diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs
index c42f92838c8c..369ea4a47e57 100644
--- a/parquet/src/arrow/arrow_reader/statistics.rs
+++ b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -17,6 +17,8 @@
//! [`StatisticsConverter`] to convert statistics in parquet format to arrow [`ArrayRef`].
+/// Notice that all the corresponding tests are in
+/// `arrow-rs/parquet/tests/arrow_reader/statistics.rs`.
use crate::arrow::buffer::bit_util::sign_extend_be;
use crate::arrow::parquet_column;
use crate::data_type::{ByteArray, FixedLenByteArray};
@@ -1568,1130 +1570,3 @@ impl<'a> StatisticsConverter<'a> {
new_null_array(data_type, num_row_groups)
}
}
-
-#[cfg(test)]
-mod test {
- use super::*;
- use crate::arrow::arrow_reader::ArrowReaderBuilder;
- use crate::arrow::arrow_writer::ArrowWriter;
- use crate::file::metadata::{ParquetMetaData, RowGroupMetaData};
- use crate::file::properties::{EnabledStatistics, WriterProperties};
- use arrow::compute::kernels::cast_utils::Parser;
- use arrow::datatypes::{i256, Date32Type, Date64Type};
- use arrow::util::test_util::parquet_test_data;
- use arrow_array::{
- new_empty_array, new_null_array, Array, ArrayRef, BinaryArray, BinaryViewArray,
- BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, Float32Array,
- Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, RecordBatch,
- StringArray, StringViewArray, StructArray, TimestampNanosecondArray,
- };
- use arrow_schema::{DataType, Field, SchemaRef};
- use bytes::Bytes;
- use std::path::PathBuf;
- use std::sync::Arc;
- // TODO error cases (with parquet statistics that are mismatched in expected type)
-
- #[test]
- fn roundtrip_empty() {
- let empty_bool_array = new_empty_array(&DataType::Boolean);
- Test {
- input: empty_bool_array.clone(),
- expected_min: empty_bool_array.clone(),
- expected_max: empty_bool_array.clone(),
- }
- .run()
- }
-
- #[test]
- fn roundtrip_bool() {
- Test {
- input: bool_array([
- // row group 1
- Some(true),
- None,
- Some(true),
- // row group 2
- Some(true),
- Some(false),
- None,
- // row group 3
- None,
- None,
- None,
- ]),
- expected_min: bool_array([Some(true), Some(false), None]),
- expected_max: bool_array([Some(true), Some(true), None]),
- }
- .run()
- }
-
- #[test]
- fn roundtrip_int32() {
- Test {
- input: i32_array([
- // row group 1
- Some(1),
- None,
- Some(3),
- // row group 2
- Some(0),
- Some(5),
- None,
- // row group 3
- None,
- None,
- None,
- ]),
- expected_min: i32_array([Some(1), Some(0), None]),
- expected_max: i32_array([Some(3), Some(5), None]),
- }
- .run()
- }
-
- #[test]
- fn roundtrip_int64() {
- Test {
- input: i64_array([
- // row group 1
- Some(1),
- None,
- Some(3),
- // row group 2
- Some(0),
- Some(5),
- None,
- // row group 3
- None,
- None,
- None,
- ]),
- expected_min: i64_array([Some(1), Some(0), None]),
- expected_max: i64_array(vec![Some(3), Some(5), None]),
- }
- .run()
- }
-
- #[test]
- fn roundtrip_f32() {
- Test {
- input: f32_array([
- // row group 1
- Some(1.0),
- None,
- Some(3.0),
- // row group 2
- Some(-1.0),
- Some(5.0),
- None,
- // row group 3
- None,
- None,
- None,
- ]),
- expected_min: f32_array([Some(1.0), Some(-1.0), None]),
- expected_max: f32_array([Some(3.0), Some(5.0), None]),
- }
- .run()
- }
-
- #[test]
- fn roundtrip_f64() {
- Test {
- input: f64_array([
- // row group 1
- Some(1.0),
- None,
- Some(3.0),
- // row group 2
- Some(-1.0),
- Some(5.0),
- None,
- // row group 3
- None,
- None,
- None,
- ]),
- expected_min: f64_array([Some(1.0), Some(-1.0), None]),
- expected_max: f64_array([Some(3.0), Some(5.0), None]),
- }
- .run()
- }
-
- #[test]
- fn roundtrip_timestamp() {
- Test {
- input: timestamp_seconds_array(
- [
- // row group 1
- Some(1),
- None,
- Some(3),
- // row group 2
- Some(9),
- Some(5),
- None,
- // row group 3
- None,
- None,
- None,
- ],
- None,
- ),
- expected_min: timestamp_seconds_array([Some(1), Some(5), None], None),
- expected_max: timestamp_seconds_array([Some(3), Some(9), None], None),
- }
- .run();
-
- Test {
- input: timestamp_milliseconds_array(
- [
- // row group 1
- Some(1),
- None,
- Some(3),
- // row group 2
- Some(9),
- Some(5),
- None,
- // row group 3
- None,
- None,
- None,
- ],
- None,
- ),
- expected_min: timestamp_milliseconds_array([Some(1), Some(5), None], None),
- expected_max: timestamp_milliseconds_array([Some(3), Some(9), None], None),
- }
- .run();
-
- Test {
- input: timestamp_microseconds_array(
- [
- // row group 1
- Some(1),
- None,
- Some(3),
- // row group 2
- Some(9),
- Some(5),
- None,
- // row group 3
- None,
- None,
- None,
- ],
- None,
- ),
- expected_min: timestamp_microseconds_array([Some(1), Some(5), None], None),
- expected_max: timestamp_microseconds_array([Some(3), Some(9), None], None),
- }
- .run();
-
- Test {
- input: timestamp_nanoseconds_array(
- [
- // row group 1
- Some(1),
- None,
- Some(3),
- // row group 2
- Some(9),
- Some(5),
- None,
- // row group 3
- None,
- None,
- None,
- ],
- None,
- ),
- expected_min: timestamp_nanoseconds_array([Some(1), Some(5), None], None),
- expected_max: timestamp_nanoseconds_array([Some(3), Some(9), None], None),
- }
- .run()
- }
-
- #[test]
- fn roundtrip_timestamp_timezoned() {
- Test {
- input: timestamp_seconds_array(
- [
- // row group 1
- Some(1),
- None,
- Some(3),
- // row group 2
- Some(9),
- Some(5),
- None,
- // row group 3
- None,
- None,
- None,
- ],
- Some("UTC"),
- ),
- expected_min: timestamp_seconds_array([Some(1), Some(5), None], Some("UTC")),
- expected_max: timestamp_seconds_array([Some(3), Some(9), None], Some("UTC")),
- }
- .run();
-
- Test {
- input: timestamp_milliseconds_array(
- [
- // row group 1
- Some(1),
- None,
- Some(3),
- // row group 2
- Some(9),
- Some(5),
- None,
- // row group 3
- None,
- None,
- None,
- ],
- Some("UTC"),
- ),
- expected_min: timestamp_milliseconds_array([Some(1), Some(5), None], Some("UTC")),
- expected_max: timestamp_milliseconds_array([Some(3), Some(9), None], Some("UTC")),
- }
- .run();
-
- Test {
- input: timestamp_microseconds_array(
- [
- // row group 1
- Some(1),
- None,
- Some(3),
- // row group 2
- Some(9),
- Some(5),
- None,
- // row group 3
- None,
- None,
- None,
- ],
- Some("UTC"),
- ),
- expected_min: timestamp_microseconds_array([Some(1), Some(5), None], Some("UTC")),
- expected_max: timestamp_microseconds_array([Some(3), Some(9), None], Some("UTC")),
- }
- .run();
-
- Test {
- input: timestamp_nanoseconds_array(
- [
- // row group 1
- Some(1),
- None,
- Some(3),
- // row group 2
- Some(9),
- Some(5),
- None,
- // row group 3
- None,
- None,
- None,
- ],
- Some("UTC"),
- ),
- expected_min: timestamp_nanoseconds_array([Some(1), Some(5), None], Some("UTC")),
- expected_max: timestamp_nanoseconds_array([Some(3), Some(9), None], Some("UTC")),
- }
- .run()
- }
-
- #[test]
- fn roundtrip_decimal() {
- Test {
- input: Arc::new(
- Decimal128Array::from(vec![
- // row group 1
- Some(100),
- None,
- Some(22000),
- // row group 2
- Some(500000),
- Some(330000),
- None,
- // row group 3
- None,
- None,
- None,
- ])
- .with_precision_and_scale(9, 2)
- .unwrap(),
- ),
- expected_min: Arc::new(
- Decimal128Array::from(vec![Some(100), Some(330000), None])
- .with_precision_and_scale(9, 2)
- .unwrap(),
- ),
- expected_max: Arc::new(
- Decimal128Array::from(vec![Some(22000), Some(500000), None])
- .with_precision_and_scale(9, 2)
- .unwrap(),
- ),
- }
- .run();
-
- Test {
- input: Arc::new(
- Decimal256Array::from(vec![
- // row group 1
- Some(i256::from(100)),
- None,
- Some(i256::from(22000)),
- // row group 2
- Some(i256::MAX),
- Some(i256::MIN),
- None,
- // row group 3
- None,
- None,
- None,
- ])
- .with_precision_and_scale(76, 76)
- .unwrap(),
- ),
- expected_min: Arc::new(
- Decimal256Array::from(vec![Some(i256::from(100)), Some(i256::MIN), None])
- .with_precision_and_scale(76, 76)
- .unwrap(),
- ),
- expected_max: Arc::new(
- Decimal256Array::from(vec![Some(i256::from(22000)), Some(i256::MAX), None])
- .with_precision_and_scale(76, 76)
- .unwrap(),
- ),
- }
- .run()
- }
-
- #[test]
- fn roundtrip_utf8() {
- Test {
- input: utf8_array([
- // row group 1
- Some("A"),
- None,
- Some("Q"),
- // row group 2
- Some("ZZ"),
- Some("AA"),
- None,
- // row group 3
- None,
- None,
- None,
- ]),
- expected_min: utf8_array([Some("A"), Some("AA"), None]),
- expected_max: utf8_array([Some("Q"), Some("ZZ"), None]),
- }
- .run()
- }
-
- #[test]
- fn roundtrip_string_view() {
- Test {
- input: string_view_array([
- // row group 1
- Some("A"),
- None,
- Some("Q"),
- // row group 2
- Some("ZZ"),
- Some("A_longerthan12"),
- None,
- // row group 3
- Some("A_longerthan12"),
- None,
- None,
- ]),
- expected_min: string_view_array([
- Some("A"),
- Some("A_longerthan12"),
- Some("A_longerthan12"),
- ]),
- expected_max: string_view_array([Some("Q"), Some("ZZ"), Some("A_longerthan12")]),
- }
- .run()
- }
-
- #[test]
- fn roundtrip_binary_view() {
- let input: Vec