Skip to content

Commit

Permalink
ORC-1528: Fix readBytes potential overflow in RecordReaderUtils.Chunk…
Browse files Browse the repository at this point in the history
…Reader#create

- I have adjusted the calculation of `reqBytes` and `readBytes` ranges in the `org.apache.orc.impl.RecordReaderUtils.ChunkReader#create` method from `Integer` to `Long`, and added validation for `Integer` overflow

- Adds more test cases

This PR aims to fix [ORC-1528](https://issues.apache.org/jira/browse/ORC-1528)

org.apache.orc.impl.TestRecordReaderUtils#testBufferChunkOffsetExceedsMaxInt

Closes #1662 from yebukong/main.

Lead-authored-by: yebukong <yebukong@qq.com>
Co-authored-by: yebukong <yebukong@live.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit e554765)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
  • Loading branch information
2 people authored and dongjoon-hyun committed Nov 27, 2023
1 parent c3604ae commit 3600346
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 7 deletions.
5 changes: 5 additions & 0 deletions java/core/src/java/org/apache/orc/impl/IOUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ public final class IOUtils {

public static final int DEFAULT_BUFFER_SIZE = 8192;

/**
* The maximum size of array to allocate, value being the same as {@link java.util.Hashtable}
*/
public static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;

/**
* Returns a new byte array of size {@link #DEFAULT_BUFFER_SIZE}.
*
Expand Down
21 changes: 14 additions & 7 deletions java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -760,18 +760,18 @@ void readRanges(FSDataInputStream file, boolean allocateDirect, double extraByte
}

static ChunkReader create(BufferChunk from, BufferChunk to) {
long f = Integer.MAX_VALUE;
long e = Integer.MIN_VALUE;
long f = Long.MAX_VALUE;
long e = Long.MIN_VALUE;

long cf = Integer.MAX_VALUE;
long ef = Integer.MIN_VALUE;
int reqBytes = 0;
long cf = Long.MAX_VALUE;
long ef = Long.MIN_VALUE;
long reqBytes = 0L;

BufferChunk current = from;
while (current != to.next) {
f = Math.min(f, current.getOffset());
e = Math.max(e, current.getEnd());
if (ef == Integer.MIN_VALUE || current.getOffset() <= ef) {
if (ef == Long.MIN_VALUE || current.getOffset() <= ef) {
cf = Math.min(cf, current.getOffset());
ef = Math.max(ef, current.getEnd());
} else {
Expand All @@ -782,7 +782,14 @@ static ChunkReader create(BufferChunk from, BufferChunk to) {
current = (BufferChunk) current.next;
}
reqBytes += ef - cf;
return new ChunkReader(from, to, (int) (e - f), reqBytes);
if (reqBytes > IOUtils.MAX_ARRAY_SIZE) {
throw new IllegalArgumentException("invalid reqBytes value " + reqBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE);
}
long readBytes = e - f;
if (readBytes > IOUtils.MAX_ARRAY_SIZE) {
throw new IllegalArgumentException("invalid readBytes value " + readBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE);
}
return new ChunkReader(from, to, (int) readBytes, (int) reqBytes);
}

static ChunkReader create(BufferChunk from, int minSeekSize) {
Expand Down
68 changes: 68 additions & 0 deletions java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,14 @@
import org.junit.jupiter.api.Test;

import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertThrowsExactly;

class TestRecordReaderUtils {

Expand Down Expand Up @@ -140,6 +143,71 @@ public void testExtraBytesReadWithinThreshold() {
assertEquals(chunkReader.getReadBytes(), chunkReader.getFrom().getData().array().length);
}

@Test
public void testChunkReaderCreateOffsetExceedsMaxInt() {
List<long[]> mockData = Arrays.asList(
new long[]{15032282586L, 15032298848L},
new long[]{15032298848L, 15032299844L},
new long[]{15032299844L, 15032377804L},
new long[]{15058260587L, 15058261632L},
new long[]{15058261632L, 15058288409L},
new long[]{15058288409L, 15058288862L},
new long[]{15058339730L, 15058340775L},
new long[]{15058340775L, 15058342439L},
new long[]{15058449794L, 15058449982L},
new long[]{15058449982L, 15058451700L},
new long[]{15058451700L, 15058451749L},
new long[]{15058484358L, 15058484422L},
new long[]{15058484422L, 15058484862L},
new long[]{15058484862L, 15058484878L}
);
TestOrcLargeStripe.RangeBuilder rangeBuilder = new TestOrcLargeStripe.RangeBuilder();
mockData.forEach(e -> rangeBuilder.range(e[0], (int) (e[1] - e[0])));
BufferChunkList rangeList = rangeBuilder.build();

RecordReaderUtils.ChunkReader chunkReader = RecordReaderUtils.ChunkReader.create(rangeList.get(), 134217728);
long readBytes = mockData.get(mockData.size() - 1)[1] - mockData.get(0)[0];
long reqBytes = mockData.stream().mapToLong(e -> (int) (e[1] - e[0])).sum();
assertEquals(chunkReader.getReadBytes(), readBytes);
assertEquals(chunkReader.getReqBytes(), reqBytes);
}

@Test
public void testChunkReaderCreateReqBytesAndReadBytesValidation() {
BufferChunkList rangeList = new TestOrcLargeStripe.RangeBuilder()
.range(0, IOUtils.MAX_ARRAY_SIZE)
.range(1L + IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE + 1)
.range(2L * IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE - 4)
.range(3L * IOUtils.MAX_ARRAY_SIZE, 2)
.build();

// reqBytes,readBytes boundary value
RecordReaderUtils.ChunkReader chunkReader = RecordReaderUtils.ChunkReader.create(rangeList.get(0), 0);
assertEquals(chunkReader.getReadBytes(), IOUtils.MAX_ARRAY_SIZE);
assertEquals(chunkReader.getReqBytes(), IOUtils.MAX_ARRAY_SIZE);

// reqBytes > IOUtils.MAX_ARRAY_SIZE validation
assertThrowsExactly(IllegalArgumentException.class,
() -> RecordReaderUtils.ChunkReader.create(rangeList.get(1), 0),
() -> String.format("invalid reqBytes value %d,out of bounds %d",
rangeList.get(1).getLength(), IOUtils.MAX_ARRAY_SIZE)
);

// readBytes > IOUtils.MAX_ARRAY_SIZE validation
assertThrowsExactly(IllegalArgumentException.class,
() -> RecordReaderUtils.ChunkReader.create(rangeList.get(2), 100),
() -> String.format("invalid readBytes value %d,out of bounds %d",
rangeList.get(3).getEnd() - rangeList.get(2).getOffset(), IOUtils.MAX_ARRAY_SIZE)
);
}

private static byte[] byteBufferToArray(ByteBuffer buf) {
byte[] resultArray = new byte[buf.remaining()];
ByteBuffer buffer = buf.slice();
buffer.get(resultArray);
return resultArray;
}

private ByteBuffer makeByteBuffer(int length, long offset) {
byte[] readBytes = new byte[length];
for (int i = 0; i < readBytes.length; i++) {
Expand Down

0 comments on commit 3600346

Please sign in to comment.