Skip to content

Commit

Permalink
ORC-1570: Add supportVectoredIO API to HadoopShimsCurrent and use it
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

This PR aims to improve `HadoopShimsCurrent` by adding `supportVectoredIO`.

### Why are the changes needed?

Hadoop Vectored IO exists at Apache Hadoop 3.3.5+ via HADOOP-18103.

### How was this patch tested?

Pass the CIs.

Closes #1725 from dongjoon-hyun/ORC-1570.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
  • Loading branch information
dongjoon-hyun committed Jan 4, 2024
1 parent e8374e3 commit 68f4a8b
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
*/
public class RecordReaderUtils {
private static final HadoopShims SHIMS = HadoopShimsFactory.get();
private static final boolean supportVectoredIO = SHIMS.supportVectoredIO();
private static final Logger LOG = LoggerFactory.getLogger(RecordReaderUtils.class);

private static class DefaultDataReader implements DataReader {
Expand Down Expand Up @@ -107,7 +108,7 @@ public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws I
public BufferChunkList readFileData(BufferChunkList range,
boolean doForceDirect
) throws IOException {
if (zcr == null) {
if (supportVectoredIO && zcr == null) {
RecordReaderUtils.readDiskRangesVectored(file, range, doForceDirect);
} else {
RecordReaderUtils.readDiskRanges(file, zcr, range, doForceDirect,
Expand Down
10 changes: 10 additions & 0 deletions java/shims/src/java/org/apache/orc/impl/HadoopShims.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.util.VersionInfo;
import org.apache.orc.EncryptionAlgorithm;

import java.io.Closeable;
Expand Down Expand Up @@ -131,6 +132,15 @@ ByteBuffer readBuffer(int maxLength,
*/
boolean endVariableLengthBlock(OutputStream output) throws IOException;

default boolean supportVectoredIO() {
// HADOOP-18103 is available since Apache Hadoop 3.3.5+
String[] versionParts = VersionInfo.getVersion().split("[.]");
int major = Integer.parseInt(versionParts[0]);
int minor = Integer.parseInt(versionParts[1]);
int patch = Integer.parseInt(versionParts[2]);
return major == 3 && (minor > 3 || (minor == 3 && patch > 4));
}

/**
* The known KeyProviders for column encryption.
* These are identical to OrcProto.KeyProviderKind.
Expand Down

0 comments on commit 68f4a8b

Please sign in to comment.