From f9bb8cf6d021442cd0adbd59133df262a08f6699 Mon Sep 17 00:00:00 2001
From: Ye Yuan <yuanye_ptr@qq.com>
Date: Sun, 18 Aug 2024 00:28:49 +0800
Subject: [PATCH] add pruned parquet reading test

---
 parquet_derive_test/src/lib.rs | 79 +++++++++++++++++++++++++++++-----
 1 file changed, 68 insertions(+), 11 deletions(-)
diff --git a/parquet_derive_test/src/lib.rs b/parquet_derive_test/src/lib.rs
index e7c7896cb7f3..04c4c8fe9ee7 100644
--- a/parquet_derive_test/src/lib.rs
+++ b/parquet_derive_test/src/lib.rs
@@ -70,12 +70,12 @@ struct APartiallyCompleteRecord {
 // If these fields are guaranteed to be valid
 // we can load this struct into APartiallyCompleteRecord
 #[derive(PartialEq, ParquetRecordWriter, Debug)]
-struct APartiallyOptionalRecord {
+struct AnOptionalRecord {
     pub bool: bool,
     pub string: String,
-    pub maybe_i16: Option<i16>,
-    pub maybe_i32: Option<i32>,
-    pub maybe_u64: Option<u64>,
+    pub i16: Option<i16>,
+    pub i32: Option<i32>,
+    pub u64: Option<u64>,
     pub isize: isize,
     pub float: f32,
     pub double: f64,
@@ -85,6 +85,22 @@ struct APartiallyOptionalRecord {
     pub byte_vec: Vec<u8>,
 }
 
+// This struct removes several fields from the "APartiallyCompleteRecord",
+// and it sorts the field in another order.
+// we can load this struct into APartiallyCompleteRecord
+#[derive(PartialEq, ParquetRecordReader, Debug)]
+struct APrunedRecord {
+    pub bool: bool,
+    pub string: String,
+    pub byte_vec: Vec<u8>,
+    pub float: f32,
+    pub double: f64,
+    pub i16: i16,
+    pub i32: i32,
+    pub u64: u64,
+    pub isize: isize,
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -240,12 +256,12 @@ mod tests {
     #[test]
     fn test_parquet_derive_read_optional_but_valid_column() {
         let file = get_temp_file("test_parquet_derive_read_optional", &[]);
-        let drs: Vec<APartiallyOptionalRecord> = vec![APartiallyOptionalRecord {
+        let drs = vec![AnOptionalRecord {
             bool: true,
             string: "a string".into(),
-            maybe_i16: Some(-45),
-            maybe_i32: Some(456),
-            maybe_u64: Some(4563424),
+            i16: Some(-45),
+            i32: Some(456),
+            u64: Some(4563424),
             isize: -365,
             float: 3.5,
             double: f64::NAN,
@@ -273,9 +289,50 @@ mod tests {
         let mut row_group = reader.get_row_group(0).unwrap();
         out.read_from_row_group(&mut *row_group, 1).unwrap();
 
-        assert_eq!(drs[0].maybe_i16.unwrap(), out[0].i16);
-        assert_eq!(drs[0].maybe_i32.unwrap(), out[0].i32);
-        assert_eq!(drs[0].maybe_u64.unwrap(), out[0].u64);
+        assert_eq!(drs[0].i16.unwrap(), out[0].i16);
+        assert_eq!(drs[0].i32.unwrap(), out[0].i32);
+        assert_eq!(drs[0].u64.unwrap(), out[0].u64);
+    }
+
+    #[test]
+    fn test_parquet_derive_read_pruned_and_reordered_columns() {
+        let file = get_temp_file("test_parquet_derive_read_pruned", &[]);
+        let drs = vec![APartiallyCompleteRecord {
+            bool: true,
+            string: "a string".into(),
+            i16: -45,
+            i32: 456,
+            u64: 4563424,
+            isize: -365,
+            float: 3.5,
+            double: f64::NAN,
+            now: chrono::Utc::now().naive_local(),
+            date: chrono::naive::NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
+            uuid: uuid::Uuid::new_v4(),
+            byte_vec: vec![0x65, 0x66, 0x67],
+        }];
+
+        let generated_schema = drs.as_slice().schema().unwrap();
+
+        let props = Default::default();
+        let mut writer =
+            SerializedFileWriter::new(file.try_clone().unwrap(), generated_schema, props).unwrap();
+
+        let mut row_group = writer.next_row_group().unwrap();
+        drs.as_slice().write_to_row_group(&mut row_group).unwrap();
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        use parquet::file::{reader::FileReader, serialized_reader::SerializedFileReader};
+        let reader = SerializedFileReader::new(file).unwrap();
+        let mut out: Vec<APrunedRecord> = Vec::new();
+
+        let mut row_group = reader.get_row_group(0).unwrap();
+        out.read_from_row_group(&mut *row_group, 1).unwrap();
+
+        assert_eq!(drs[0].i16, out[0].i16);
+        assert_eq!(drs[0].i32, out[0].i32);
+        assert_eq!(drs[0].u64, out[0].u64);
     }
 
     /// Returns file handle for a temp file in 'target' directory with a provided content