diff --git a/cpp/src/parquet/printer.cc b/cpp/src/parquet/printer.cc index 3ce3e1da4bb09..730e1e17ab23d 100644 --- a/cpp/src/parquet/printer.cc +++ b/cpp/src/parquet/printer.cc @@ -142,6 +142,15 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list selecte stream << "--- Total Bytes: " << group_metadata->total_byte_size() << " ---\n"; stream << "--- Total Compressed Bytes: " << group_metadata->total_compressed_size() << " ---\n"; + auto sorting_columns = group_metadata->sorting_columns(); + if (!sorting_columns.empty()) { + stream << "--- Sort Columns:\n"; + for (auto column : sorting_columns) { + stream << "column_idx: " << column.column_idx + << ", descending: " << column.descending + << ", nulls_first: " << column.nulls_first << "\n"; + } + } stream << "--- Rows: " << group_metadata->num_rows() << " ---\n"; // Print column metadata @@ -285,6 +294,21 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list selected stream << " \"TotalBytes\": \"" << group_metadata->total_byte_size() << "\", "; stream << " \"TotalCompressedBytes\": \"" << group_metadata->total_compressed_size() << "\", "; + auto row_group_sorting_columns = group_metadata->sorting_columns(); + if (!row_group_sorting_columns.empty()) { + stream << " \"SortColumns\": [\n"; + for (size_t i = 0; i < row_group_sorting_columns.size(); i++) { + stream << " {\"column_idx\": " << row_group_sorting_columns[i].column_idx + << ", \"descending\": " << row_group_sorting_columns[i].descending + << ", \"nulls_first\": " << row_group_sorting_columns[i].nulls_first + << "}"; + if (i + 1 != row_group_sorting_columns.size()) { + stream << ","; + } + stream << '\n'; + } + stream << " ], "; + } stream << " \"Rows\": \"" << group_metadata->num_rows() << "\",\n"; // Print column metadata diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index 688c875b9ec0f..62a971799c2db 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -1180,6 +1180,16 @@ TEST_F(TestJSONWithLocalFile, JSONOutputFLBA) { EXPECT_THAT(json_content, testing::HasSubstr(json_contains)); } +TEST_F(TestJSONWithLocalFile, JSONOutputSortColumns) { + std::string json_content = ReadFromLocalFile("sort_columns.parquet"); + + std::string json_contains = R"###("SortColumns": [ + {"column_idx": 0, "descending": 1, "nulls_first": 1}, + {"column_idx": 1, "descending": 0, "nulls_first": 0} + ])###"; + EXPECT_THAT(json_content, testing::HasSubstr(json_contains)); +} + // GH-44101: Test that JSON output is valid JSON TEST_F(TestJSONWithLocalFile, ValidJsonOutput) { auto check_json_valid = [](std::string_view json_string) -> ::arrow::Status { @@ -1195,8 +1205,11 @@ TEST_F(TestJSONWithLocalFile, ValidJsonOutput) { }; std::vector check_file_lists = { "data_index_bloom_encoding_with_length.parquet", - "data_index_bloom_encoding_stats.parquet", "alltypes_tiny_pages_plain.parquet", - "concatenated_gzip_members.parquet", "nulls.snappy.parquet"}; + "data_index_bloom_encoding_stats.parquet", + "alltypes_tiny_pages_plain.parquet", + "concatenated_gzip_members.parquet", + "nulls.snappy.parquet", + "sort_columns.parquet"}; for (const auto& file : check_file_lists) { std::string json_content = ReadFromLocalFile(file); ASSERT_OK(check_json_valid(json_content))