Skip to content

Commit

Permalink
add pruned parquet reading test
Browse files Browse the repository at this point in the history
  • Loading branch information
Ye Yuan committed Aug 17, 2024
1 parent b767af5 commit f9bb8cf
Showing 1 changed file with 68 additions and 11 deletions.
79 changes: 68 additions & 11 deletions parquet_derive_test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ struct APartiallyCompleteRecord {
// If these fields are guaranteed to be valid
// we can load this struct into APartiallyCompleteRecord
#[derive(PartialEq, ParquetRecordWriter, Debug)]
struct APartiallyOptionalRecord {
struct AnOptionalRecord {
pub bool: bool,
pub string: String,
pub maybe_i16: Option<i16>,
pub maybe_i32: Option<i32>,
pub maybe_u64: Option<u64>,
pub i16: Option<i16>,
pub i32: Option<i32>,
pub u64: Option<u64>,
pub isize: isize,
pub float: f32,
pub double: f64,
Expand All @@ -85,6 +85,22 @@ struct APartiallyOptionalRecord {
pub byte_vec: Vec<u8>,
}

// This struct removes several fields from the "APartiallyCompleteRecord",
// and it sorts the field in another order.
// we can load this struct into APartiallyCompleteRecord
#[derive(PartialEq, ParquetRecordReader, Debug)]
struct APrunedRecord {
pub bool: bool,
pub string: String,
pub byte_vec: Vec<u8>,
pub float: f32,
pub double: f64,
pub i16: i16,
pub i32: i32,
pub u64: u64,
pub isize: isize,
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -240,12 +256,12 @@ mod tests {
#[test]
fn test_parquet_derive_read_optional_but_valid_column() {
let file = get_temp_file("test_parquet_derive_read_optional", &[]);
let drs: Vec<APartiallyOptionalRecord> = vec![APartiallyOptionalRecord {
let drs = vec![AnOptionalRecord {
bool: true,
string: "a string".into(),
maybe_i16: Some(-45),
maybe_i32: Some(456),
maybe_u64: Some(4563424),
i16: Some(-45),
i32: Some(456),
u64: Some(4563424),
isize: -365,
float: 3.5,
double: f64::NAN,
Expand Down Expand Up @@ -273,9 +289,50 @@ mod tests {
let mut row_group = reader.get_row_group(0).unwrap();
out.read_from_row_group(&mut *row_group, 1).unwrap();

assert_eq!(drs[0].maybe_i16.unwrap(), out[0].i16);
assert_eq!(drs[0].maybe_i32.unwrap(), out[0].i32);
assert_eq!(drs[0].maybe_u64.unwrap(), out[0].u64);
assert_eq!(drs[0].i16.unwrap(), out[0].i16);
assert_eq!(drs[0].i32.unwrap(), out[0].i32);
assert_eq!(drs[0].u64.unwrap(), out[0].u64);
}

#[test]
fn test_parquet_derive_read_pruned_and_reordered_columns() {
let file = get_temp_file("test_parquet_derive_read_pruned", &[]);
let drs = vec![APartiallyCompleteRecord {
bool: true,
string: "a string".into(),
i16: -45,
i32: 456,
u64: 4563424,
isize: -365,
float: 3.5,
double: f64::NAN,
now: chrono::Utc::now().naive_local(),
date: chrono::naive::NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
uuid: uuid::Uuid::new_v4(),
byte_vec: vec![0x65, 0x66, 0x67],
}];

let generated_schema = drs.as_slice().schema().unwrap();

let props = Default::default();
let mut writer =
SerializedFileWriter::new(file.try_clone().unwrap(), generated_schema, props).unwrap();

let mut row_group = writer.next_row_group().unwrap();
drs.as_slice().write_to_row_group(&mut row_group).unwrap();
row_group.close().unwrap();
writer.close().unwrap();

use parquet::file::{reader::FileReader, serialized_reader::SerializedFileReader};
let reader = SerializedFileReader::new(file).unwrap();
let mut out: Vec<APrunedRecord> = Vec::new();

let mut row_group = reader.get_row_group(0).unwrap();
out.read_from_row_group(&mut *row_group, 1).unwrap();

assert_eq!(drs[0].i16, out[0].i16);
assert_eq!(drs[0].i32, out[0].i32);
assert_eq!(drs[0].u64, out[0].u64);
}

/// Returns file handle for a temp file in 'target' directory with a provided content
Expand Down

0 comments on commit f9bb8cf

Please sign in to comment.