From f9bb8cf6d021442cd0adbd59133df262a08f6699 Mon Sep 17 00:00:00 2001 From: Ye Yuan Date: Sun, 18 Aug 2024 00:28:49 +0800 Subject: [PATCH] add pruned parquet reading test --- parquet_derive_test/src/lib.rs | 79 +++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 11 deletions(-) diff --git a/parquet_derive_test/src/lib.rs b/parquet_derive_test/src/lib.rs index e7c7896cb7f3..04c4c8fe9ee7 100644 --- a/parquet_derive_test/src/lib.rs +++ b/parquet_derive_test/src/lib.rs @@ -70,12 +70,12 @@ struct APartiallyCompleteRecord { // If these fields are guaranteed to be valid // we can load this struct into APartiallyCompleteRecord #[derive(PartialEq, ParquetRecordWriter, Debug)] -struct APartiallyOptionalRecord { +struct AnOptionalRecord { pub bool: bool, pub string: String, - pub maybe_i16: Option, - pub maybe_i32: Option, - pub maybe_u64: Option, + pub i16: Option, + pub i32: Option, + pub u64: Option, pub isize: isize, pub float: f32, pub double: f64, @@ -85,6 +85,22 @@ struct APartiallyOptionalRecord { pub byte_vec: Vec, } +// This struct removes several fields from the "APartiallyCompleteRecord", +// and it sorts the field in another order. +// we can load this struct into APartiallyCompleteRecord +#[derive(PartialEq, ParquetRecordReader, Debug)] +struct APrunedRecord { + pub bool: bool, + pub string: String, + pub byte_vec: Vec, + pub float: f32, + pub double: f64, + pub i16: i16, + pub i32: i32, + pub u64: u64, + pub isize: isize, +} + #[cfg(test)] mod tests { use super::*; @@ -240,12 +256,12 @@ mod tests { #[test] fn test_parquet_derive_read_optional_but_valid_column() { let file = get_temp_file("test_parquet_derive_read_optional", &[]); - let drs: Vec = vec![APartiallyOptionalRecord { + let drs = vec![AnOptionalRecord { bool: true, string: "a string".into(), - maybe_i16: Some(-45), - maybe_i32: Some(456), - maybe_u64: Some(4563424), + i16: Some(-45), + i32: Some(456), + u64: Some(4563424), isize: -365, float: 3.5, double: f64::NAN, @@ -273,9 +289,50 @@ mod tests { let mut row_group = reader.get_row_group(0).unwrap(); out.read_from_row_group(&mut *row_group, 1).unwrap(); - assert_eq!(drs[0].maybe_i16.unwrap(), out[0].i16); - assert_eq!(drs[0].maybe_i32.unwrap(), out[0].i32); - assert_eq!(drs[0].maybe_u64.unwrap(), out[0].u64); + assert_eq!(drs[0].i16.unwrap(), out[0].i16); + assert_eq!(drs[0].i32.unwrap(), out[0].i32); + assert_eq!(drs[0].u64.unwrap(), out[0].u64); + } + + #[test] + fn test_parquet_derive_read_pruned_and_reordered_columns() { + let file = get_temp_file("test_parquet_derive_read_pruned", &[]); + let drs = vec![APartiallyCompleteRecord { + bool: true, + string: "a string".into(), + i16: -45, + i32: 456, + u64: 4563424, + isize: -365, + float: 3.5, + double: f64::NAN, + now: chrono::Utc::now().naive_local(), + date: chrono::naive::NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(), + uuid: uuid::Uuid::new_v4(), + byte_vec: vec![0x65, 0x66, 0x67], + }]; + + let generated_schema = drs.as_slice().schema().unwrap(); + + let props = Default::default(); + let mut writer = + SerializedFileWriter::new(file.try_clone().unwrap(), generated_schema, props).unwrap(); + + let mut row_group = writer.next_row_group().unwrap(); + drs.as_slice().write_to_row_group(&mut row_group).unwrap(); + row_group.close().unwrap(); + writer.close().unwrap(); + + use parquet::file::{reader::FileReader, serialized_reader::SerializedFileReader}; + let reader = SerializedFileReader::new(file).unwrap(); + let mut out: Vec = Vec::new(); + + let mut row_group = reader.get_row_group(0).unwrap(); + out.read_from_row_group(&mut *row_group, 1).unwrap(); + + assert_eq!(drs[0].i16, out[0].i16); + assert_eq!(drs[0].i32, out[0].i32); + assert_eq!(drs[0].u64, out[0].u64); } /// Returns file handle for a temp file in 'target' directory with a provided content