diff --git a/java/core/src/java/org/apache/orc/impl/IOUtils.java b/java/core/src/java/org/apache/orc/impl/IOUtils.java index ff13bdaec3..e2554f0083 100644 --- a/java/core/src/java/org/apache/orc/impl/IOUtils.java +++ b/java/core/src/java/org/apache/orc/impl/IOUtils.java @@ -30,6 +30,11 @@ public final class IOUtils { public static final int DEFAULT_BUFFER_SIZE = 8192; + /** + * The maximum size of array to allocate, value being the same as {@link java.util.Hashtable} + */ + public static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8; + /** * Returns a new byte array of size {@link #DEFAULT_BUFFER_SIZE}. * diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java index ede844276d..141cc7aa12 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java @@ -760,18 +760,18 @@ void readRanges(FSDataInputStream file, boolean allocateDirect, double extraByte } static ChunkReader create(BufferChunk from, BufferChunk to) { - long f = Integer.MAX_VALUE; - long e = Integer.MIN_VALUE; + long f = Long.MAX_VALUE; + long e = Long.MIN_VALUE; - long cf = Integer.MAX_VALUE; - long ef = Integer.MIN_VALUE; - int reqBytes = 0; + long cf = Long.MAX_VALUE; + long ef = Long.MIN_VALUE; + long reqBytes = 0L; BufferChunk current = from; while (current != to.next) { f = Math.min(f, current.getOffset()); e = Math.max(e, current.getEnd()); - if (ef == Integer.MIN_VALUE || current.getOffset() <= ef) { + if (ef == Long.MIN_VALUE || current.getOffset() <= ef) { cf = Math.min(cf, current.getOffset()); ef = Math.max(ef, current.getEnd()); } else { @@ -782,7 +782,14 @@ static ChunkReader create(BufferChunk from, BufferChunk to) { current = (BufferChunk) current.next; } reqBytes += ef - cf; - return new ChunkReader(from, to, (int) (e - f), reqBytes); + if (reqBytes > IOUtils.MAX_ARRAY_SIZE) { + throw new IllegalArgumentException("invalid reqBytes value " + reqBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE); + } + long readBytes = e - f; + if (readBytes > IOUtils.MAX_ARRAY_SIZE) { + throw new IllegalArgumentException("invalid readBytes value " + readBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE); + } + return new ChunkReader(from, to, (int) readBytes, (int) reqBytes); } static ChunkReader create(BufferChunk from, int minSeekSize) { diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java index 08890b37d5..617fc740e6 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java @@ -21,10 +21,13 @@ import org.junit.jupiter.api.Test; import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; import java.util.Objects; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrowsExactly; import static org.junit.jupiter.api.Assertions.assertTrue; class TestRecordReaderUtils { @@ -140,6 +143,71 @@ public void testExtraBytesReadWithinThreshold() { assertEquals(chunkReader.getReadBytes(), chunkReader.getFrom().getData().array().length); } + @Test + public void testChunkReaderCreateOffsetExceedsMaxInt() { + List mockData = Arrays.asList( + new long[]{15032282586L, 15032298848L}, + new long[]{15032298848L, 15032299844L}, + new long[]{15032299844L, 15032377804L}, + new long[]{15058260587L, 15058261632L}, + new long[]{15058261632L, 15058288409L}, + new long[]{15058288409L, 15058288862L}, + new long[]{15058339730L, 15058340775L}, + new long[]{15058340775L, 15058342439L}, + new long[]{15058449794L, 15058449982L}, + new long[]{15058449982L, 15058451700L}, + new long[]{15058451700L, 15058451749L}, + new long[]{15058484358L, 15058484422L}, + new long[]{15058484422L, 15058484862L}, + new long[]{15058484862L, 15058484878L} + ); + TestOrcLargeStripe.RangeBuilder rangeBuilder = new TestOrcLargeStripe.RangeBuilder(); + mockData.forEach(e -> rangeBuilder.range(e[0], (int) (e[1] - e[0]))); + BufferChunkList rangeList = rangeBuilder.build(); + + RecordReaderUtils.ChunkReader chunkReader = RecordReaderUtils.ChunkReader.create(rangeList.get(), 134217728); + long readBytes = mockData.get(mockData.size() - 1)[1] - mockData.get(0)[0]; + long reqBytes = mockData.stream().mapToLong(e -> (int) (e[1] - e[0])).sum(); + assertEquals(chunkReader.getReadBytes(), readBytes); + assertEquals(chunkReader.getReqBytes(), reqBytes); + } + + @Test + public void testChunkReaderCreateReqBytesAndReadBytesValidation() { + BufferChunkList rangeList = new TestOrcLargeStripe.RangeBuilder() + .range(0, IOUtils.MAX_ARRAY_SIZE) + .range(1L + IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE + 1) + .range(2L * IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE - 4) + .range(3L * IOUtils.MAX_ARRAY_SIZE, 2) + .build(); + + // reqBytes,readBytes boundary value + RecordReaderUtils.ChunkReader chunkReader = RecordReaderUtils.ChunkReader.create(rangeList.get(0), 0); + assertEquals(chunkReader.getReadBytes(), IOUtils.MAX_ARRAY_SIZE); + assertEquals(chunkReader.getReqBytes(), IOUtils.MAX_ARRAY_SIZE); + + // reqBytes > IOUtils.MAX_ARRAY_SIZE validation + assertThrowsExactly(IllegalArgumentException.class, + () -> RecordReaderUtils.ChunkReader.create(rangeList.get(1), 0), + () -> String.format("invalid reqBytes value %d,out of bounds %d", + rangeList.get(1).getLength(), IOUtils.MAX_ARRAY_SIZE) + ); + + // readBytes > IOUtils.MAX_ARRAY_SIZE validation + assertThrowsExactly(IllegalArgumentException.class, + () -> RecordReaderUtils.ChunkReader.create(rangeList.get(2), 100), + () -> String.format("invalid readBytes value %d,out of bounds %d", + rangeList.get(3).getEnd() - rangeList.get(2).getOffset(), IOUtils.MAX_ARRAY_SIZE) + ); + } + + private static byte[] byteBufferToArray(ByteBuffer buf) { + byte[] resultArray = new byte[buf.remaining()]; + ByteBuffer buffer = buf.slice(); + buffer.get(resultArray); + return resultArray; + } + private ByteBuffer makeByteBuffer(int length, long offset) { byte[] readBytes = new byte[length]; for (int i = 0; i < readBytes.length; i++) {