Skip to content

Commit

Permalink
ORC-1482: Adaptation to read ORC files created by CUDF
Browse files Browse the repository at this point in the history
This pr is aimed at adapting to read ORC files created by CUDF, which may have missing statistics in their DOUBLE/FLOAT columns.

Official ORC readers can't read CUDF-created ORC files properly.

Added UT.

Closes #1598 from guiyanakuang/ORC-1482-to-1.8.

Authored-by: Yiqun Zhang <guiyanakuang@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
  • Loading branch information
guiyanakuang authored and dongjoon-hyun committed Nov 5, 2023
1 parent 03df130 commit b3016d7
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 2 deletions.
12 changes: 10 additions & 2 deletions java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -650,8 +650,8 @@ static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto,
" include ORC-517. Writer version: {}",
predicate.getColumnName(), writerVersion);
return TruthValue.YES_NO_NULL;
} else if (category == TypeDescription.Category.DOUBLE
|| category == TypeDescription.Category.FLOAT) {
} else if ((category == TypeDescription.Category.DOUBLE ||
category == TypeDescription.Category.FLOAT) && cs instanceof DoubleColumnStatistics) {
DoubleColumnStatistics dstas = (DoubleColumnStatistics) cs;
if (Double.isNaN(dstas.getSum())) {
LOG.debug("Not using predication pushdown on {} because stats contain NaN values",
Expand Down Expand Up @@ -1654,4 +1654,12 @@ public CompressionCodec getCompressionCodec() {
public int getMaxDiskRangeChunkLimit() {
return maxDiskRangeChunkLimit;
}

/**
* Get sargApplier for testing.
* @return sargApplier in record reader.
*/
SargApplier getSargApp() {
return sargApp;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.apache.orc.CompressionCodec;
import org.apache.orc.CompressionKind;
import org.apache.orc.DataReader;
import org.apache.orc.DoubleColumnStatistics;
import org.apache.orc.OrcConf;
import org.apache.orc.OrcFile;
import org.apache.orc.OrcProto;
Expand Down
Binary file not shown.

0 comments on commit b3016d7

Please sign in to comment.