From e116c3cf7e365244c728e75fe3ee612d3d96e673 Mon Sep 17 00:00:00 2001 From: mwish Date: Wed, 7 Aug 2024 15:30:05 +0800 Subject: [PATCH] Parquet metadata Printing sort-columns if having --- cpp/src/parquet/printer.cc | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/cpp/src/parquet/printer.cc b/cpp/src/parquet/printer.cc index 33df5925a1cf1..d250e4bb6411f 100644 --- a/cpp/src/parquet/printer.cc +++ b/cpp/src/parquet/printer.cc @@ -128,6 +128,15 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list selecte stream << "--- Total Bytes: " << group_metadata->total_byte_size() << " ---\n"; stream << "--- Total Compressed Bytes: " << group_metadata->total_compressed_size() << " ---\n"; + auto sorting_columns = group_metadata->sorting_columns(); + if (!sorting_columns.empty()) { + stream << "--- Sort Columns:\n"; + for (auto column : sorting_columns) { + stream << "column_idx: " << column.column_idx + << ", descending: " << column.descending + << ", nulls_first: " << column.nulls_first << "\n"; + } + } stream << "--- Rows: " << group_metadata->num_rows() << " ---\n"; // Print column metadata @@ -267,6 +276,20 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list selected stream << " \"TotalBytes\": \"" << group_metadata->total_byte_size() << "\", "; stream << " \"TotalCompressedBytes\": \"" << group_metadata->total_compressed_size() << "\", "; + auto row_group_sorting_columns = group_metadata->sorting_columns(); + if (!row_group_sorting_columns.empty()) { + stream << " \"SortColumns\": ["; + for (size_t i = 0; i < row_group_sorting_columns.size(); i++) { + stream << "\"{\"column_idx\":" << row_group_sorting_columns[i].column_idx + << ", \"descending\":" << row_group_sorting_columns[i].descending + << ", \"nulls_first\": " << row_group_sorting_columns[i].nulls_first + << "}"; + if (i + 1 != row_group_sorting_columns.size()) { + stream << ", "; + } + } + stream << "], "; + } stream << " \"Rows\": \"" << group_metadata->num_rows() << "\",\n"; // Print column metadata