Skip to content

Commit

Permalink
Update data checks in test
Browse files Browse the repository at this point in the history
  • Loading branch information
adamreeve committed Dec 19, 2024
1 parent 8243d38 commit 42695b4
Showing 1 changed file with 46 additions and 12 deletions.
58 changes: 46 additions & 12 deletions parquet/src/arrow/arrow_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1737,7 +1737,7 @@ mod tests {
assert_eq!(file_metadata.schema_descr().num_columns(), 8);
assert_eq!(
file_metadata.created_by().unwrap(),
"parquet-cpp-arrow version 14.0.0-SNAPSHOT"
"parquet-cpp-arrow version 19.0.0-SNAPSHOT"
);

metadata.metadata.row_groups().iter().for_each(|rg| {
Expand All @@ -1746,7 +1746,6 @@ mod tests {
assert_eq!(rg.total_byte_size(), 4172);
});

// todo: decrypting data
let decryption_properties = Some(
ciphers::FileDecryptionProperties::builder()
.with_footer_key(key_code.to_vec())
Expand All @@ -1758,24 +1757,59 @@ mod tests {
decryption_properties.as_ref(),
)
.unwrap();
// todo check contents

let mut row_count = 0;
for batch in record_reader {
let batch = batch.unwrap();
row_count += batch.num_rows();
let f32_col = batch.column(0).as_primitive::<Float32Type>();
let f64_col = batch.column(1).as_primitive::<Float64Type>();

// This file contains floats from a standard normal distribution
for &x in f32_col.values() {
assert!(x > -10.0);
assert!(x < 10.0);
let bool_col = batch.column(0).as_boolean();
let time_col = batch
.column(1)
.as_primitive::<types::Time32MillisecondType>();
let list_col = batch.column(2).as_list::<i32>();
let timestamp_col = batch
.column(3)
.as_primitive::<types::TimestampNanosecondType>();
let f32_col = batch.column(4).as_primitive::<types::Float32Type>();
let f64_col = batch.column(5).as_primitive::<types::Float64Type>();
let binary_col = batch.column(6).as_binary::<i32>();
let fixed_size_binary_col = batch.column(7).as_fixed_size_binary();

for (i, x) in bool_col.iter().enumerate() {
assert_eq!(x.unwrap(), i % 2 == 0);
}
for &x in f64_col.values() {
assert!(x > -10.0);
assert!(x < 10.0);
for (i, x) in time_col.iter().enumerate() {
assert_eq!(x.unwrap(), i as i32);
}
for (i, list_item) in list_col.iter().enumerate() {
let list_item = list_item.unwrap();
let list_item = list_item.as_primitive::<types::Int64Type>();
assert_eq!(list_item.len(), 2);
assert_eq!(list_item.value(0), ((i * 2) * 1000000000000) as i64);
assert_eq!(list_item.value(1), ((i * 2 + 1) * 1000000000000) as i64);
}
for x in timestamp_col.iter() {
assert!(x.is_some());
}
for (i, x) in f32_col.iter().enumerate() {
assert_eq!(x.unwrap(), i as f32 * 1.1f32);
}
for (i, x) in f64_col.iter().enumerate() {
assert_eq!(x.unwrap(), i as f64 * 1.1111111f64);
}
for (i, x) in binary_col.iter().enumerate() {
assert_eq!(x.is_some(), i % 2 == 0);
if let Some(x) = x {
assert_eq!(&x[0..7], b"parquet");
}
}
for (i, x) in fixed_size_binary_col.iter().enumerate() {
assert_eq!(x.unwrap(), &[i as u8; 10]);
}
}

assert_eq!(row_count, file_metadata.num_rows() as usize);
}

#[test]
Expand Down

0 comments on commit 42695b4

Please sign in to comment.