Skip to content

Commit

Permalink
support reading pruned parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
Ye Yuan committed Aug 17, 2024
1 parent 27789d7 commit b767af5
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions parquet_derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ pub fn parquet_record_reader(input: proc_macro::TokenStream) -> proc_macro::Toke
let field_names: Vec<_> = fields.iter().map(|f| f.ident.clone()).collect();
let reader_snippets: Vec<proc_macro2::TokenStream> =
field_infos.iter().map(|x| x.reader_snippet()).collect();
let i: Vec<_> = (0..reader_snippets.len()).collect();

let derived_for = input.ident;
let generics = input.generics;
Expand All @@ -206,6 +205,13 @@ pub fn parquet_record_reader(input: proc_macro::TokenStream) -> proc_macro::Toke

let mut row_group_reader = row_group_reader;

// build map to index
let mut name_to_index = std::collections::HashMap::new();
for (idx, col) in row_group_reader.metadata().schema_descr().columns().iter().enumerate() {
// println!("col {} name {:?}", idx, col.name());
name_to_index.insert(col.name().to_string(), idx);
}

for _ in 0..num_records {
self.push(#derived_for {
#(
Expand All @@ -218,7 +224,9 @@ pub fn parquet_record_reader(input: proc_macro::TokenStream) -> proc_macro::Toke

#(
{
if let Ok(mut column_reader) = row_group_reader.get_column_reader(#i) {
let idx = name_to_index.get(stringify!(#field_names)).unwrap_or_else(
|| panic!("column name '{}' is not found in parquet file!", stringify!(#field_names)));
if let Ok(mut column_reader) = row_group_reader.get_column_reader(idx.clone()) {
#reader_snippets
} else {
return Err(::parquet::errors::ParquetError::General("Failed to get next column".into()))
Expand Down

0 comments on commit b767af5

Please sign in to comment.