Skip to content

Commit

Permalink
Pass empty vectors as min/max for all null pages when building Column…
Browse files Browse the repository at this point in the history
…Index (#6316)

* pass empty vecs as min/max for all null pages

* add test

* add some comments to test
  • Loading branch information
etseidl authored Aug 31, 2024
1 parent 6785170 commit 1336973
Showing 1 changed file with 28 additions and 2 deletions.
30 changes: 28 additions & 2 deletions parquet/src/column/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -756,8 +756,8 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
if null_page && self.column_index_builder.valid() {
self.column_index_builder.append(
null_page,
vec![0; 1],
vec![0; 1],
vec![],
vec![],
self.page_metrics.num_page_nulls as i64,
);
} else if self.column_index_builder.valid() {
Expand Down Expand Up @@ -2668,6 +2668,32 @@ mod tests {
),);
}

#[test]
fn test_column_index_with_null_pages() {
// write a single page of all nulls
let page_writer = get_test_page_writer();
let props = Default::default();
let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 0, props);
writer.write_batch(&[], Some(&[0, 0, 0, 0]), None).unwrap();

let r = writer.close().unwrap();
assert!(r.column_index.is_some());
let col_idx = r.column_index.unwrap();
// null_pages should be true for page 0
assert!(col_idx.null_pages[0]);
// min and max should be empty byte arrays
assert_eq!(col_idx.min_values[0].len(), 0);
assert_eq!(col_idx.max_values[0].len(), 0);
// null_counts should be defined and be 4 for page 0
assert!(col_idx.null_counts.is_some());
assert_eq!(col_idx.null_counts.as_ref().unwrap()[0], 4);
// there is no repetition so rep histogram should be absent
assert!(col_idx.repetition_level_histograms.is_none());
// definition_level_histogram should be present and should be 0:4, 1:0
assert!(col_idx.definition_level_histograms.is_some());
assert_eq!(col_idx.definition_level_histograms.unwrap(), &[4, 0]);
}

#[test]
fn test_column_offset_index_metadata() {
// write data
Expand Down

0 comments on commit 1336973

Please sign in to comment.