Skip to content

Commit

Permalink
ORC-1489: Assign a writer id to CUDF
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

This pr is aimed at assigning a writer id to the CUDF.

### Why are the changes needed?

This helps to locate the writer of a specific orc file, and it also helps the reader to do some special reads for files created by different writers.

### How was this patch tested?

Added UT

Closes #1594 from guiyanakuang/ORC-1489.

Lead-authored-by: zhangyiqun <zhangyiqun@huya.com>
Co-authored-by: Yiqun Zhang <guiyanakuang@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit 5d163d2)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
  • Loading branch information
2 people authored and dongjoon-hyun committed Nov 5, 2023
1 parent b3016d7 commit 488184b
Show file tree
Hide file tree
Showing 9 changed files with 23 additions and 1 deletion.
1 change: 1 addition & 0 deletions c++/include/orc/Common.hh
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ namespace orc {
PRESTO_WRITER = 2,
SCRITCHLEY_GO = 3,
TRINO_WRITER = 4,
CUDF_WRITER = 5,
UNKNOWN_WRITER = INT32_MAX
};

Expand Down
2 changes: 2 additions & 0 deletions c++/src/Common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ namespace orc {
return "Scritchley Go";
case TRINO_WRITER:
return "Trino";
case CUDF_WRITER:
return "CUDF";
default: {
std::ostringstream buffer;
buffer << "Unknown(" << id << ")";
Expand Down
2 changes: 1 addition & 1 deletion c++/src/Reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ namespace orc {
WriterId ReaderImpl::getWriterId() const {
if (footer->has_writer()) {
uint32_t id = footer->writer();
if (id > WriterId::TRINO_WRITER) {
if (id > WriterId::CUDF_WRITER) {
return WriterId::UNKNOWN_WRITER;
} else {
return static_cast<WriterId>(id);
Expand Down
4 changes: 4 additions & 0 deletions java/core/src/java/org/apache/orc/OrcFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ public enum WriterImplementation {
PRESTO(2), // Presto writer
SCRITCHLEY_GO(3), // Go writer from https://github.com/scritchley/orc
TRINO(4), // Trino writer
CUDF(5), // CUDF writer
UNKNOWN(Integer.MAX_VALUE);

private final int id;
Expand Down Expand Up @@ -189,6 +190,9 @@ public enum WriterVersion {
// Trino Writer
TRINO_ORIGINAL(WriterImplementation.TRINO, 6),

// CUDF Writer
CUDF_ORIGINAL(WriterImplementation.CUDF, 6),

// Don't use any magic numbers here except for the below:
FUTURE(WriterImplementation.UNKNOWN, Integer.MAX_VALUE); // a version from a future writer

Expand Down
3 changes: 3 additions & 0 deletions java/core/src/java/org/apache/orc/OrcUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,9 @@ public static String getSoftwareVersion(int writer,
case 4:
base = "Trino";
break;
case 5:
base = "CUDF";
break;
default:
base = String.format("Unknown(%d)", writer);
break;
Expand Down
6 changes: 6 additions & 0 deletions java/core/src/test/org/apache/orc/TestVectorOrcFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -3595,6 +3595,8 @@ public void testWriterVersion(Version fileFormat) throws Exception {
OrcFile.WriterImplementation.from(2));
assertEquals(OrcFile.WriterImplementation.TRINO,
OrcFile.WriterImplementation.from(4));
assertEquals(OrcFile.WriterImplementation.CUDF,
OrcFile.WriterImplementation.from(5));
assertEquals(OrcFile.WriterImplementation.UNKNOWN,
OrcFile.WriterImplementation.from(99));

Expand All @@ -3613,6 +3615,8 @@ public void testWriterVersion(Version fileFormat) throws Exception {
OrcFile.WriterVersion.from(OrcFile.WriterImplementation.PRESTO, 6));
assertEquals(OrcFile.WriterVersion.TRINO_ORIGINAL,
OrcFile.WriterVersion.from(OrcFile.WriterImplementation.TRINO, 6));
assertEquals(OrcFile.WriterVersion.CUDF_ORIGINAL,
OrcFile.WriterVersion.from(OrcFile.WriterImplementation.CUDF, 6));
assertEquals(OrcFile.WriterVersion.FUTURE,
OrcFile.WriterVersion.from(OrcFile.WriterImplementation.UNKNOWN, 0));

Expand All @@ -3631,6 +3635,8 @@ public void testWriterVersion(Version fileFormat) throws Exception {
OrcFile.WriterVersion.PRESTO_ORIGINAL));
assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
OrcFile.WriterVersion.TRINO_ORIGINAL));
assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
OrcFile.WriterVersion.CUDF_ORIGINAL));
}

@ParameterizedTest
Expand Down
4 changes: 4 additions & 0 deletions proto/orc_proto.proto
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ message Footer {
// 2 = Presto
// 3 = Scritchley Go from https://github.com/scritchley/orc
// 4 = Trino
// 5 = CUDF
optional uint32 writer = 9;

// information about the encryption in this file
Expand Down Expand Up @@ -432,6 +433,9 @@ message PostScript {
// Version of the Trino writer:
// 6 = original
//
// Version of the CUDF writer:
// 6 = original
//
optional uint32 writerVersion = 6;

// the number of bytes in the encrypted stripe statistics
Expand Down
1 change: 1 addition & 0 deletions site/specification/ORCv1.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ message Footer {
// 2 = Presto
// 3 = Scritchley Go from https://github.com/scritchley/orc
// 4 = Trino
// 5 = CUDF
optional uint32 writer = 9;
// information about the encryption in this file
optional Encryption encryption = 10;
Expand Down
1 change: 1 addition & 0 deletions site/specification/ORCv2.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ message Footer {
// 2 = Presto
// 3 = Scritchley Go from https://github.com/scritchley/orc
// 4 = Trino
// 5 = CUDF
optional uint32 writer = 9;
// information about the encryption in this file
optional Encryption encryption = 10;
Expand Down

0 comments on commit 488184b

Please sign in to comment.