Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Should not read past end for CBOR string values #518

Merged
merged 1 commit into from
Oct 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2281,18 +2281,14 @@ protected void _finishToken() throws IOException
}
return;
}
// 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
// the longest individual unit is 4 bytes (surrogate pair) so we
// actually need len+3 bytes to avoid bounds checks
// 18-Jan-2024, tatu: For malicious input / Fuzzers, need to worry about overflow
// like Integer.MAX_VALUE
final int needed = Math.max(len, len + 3);
final int available = _inputEnd - _inputPtr;

if ((available >= needed)
if ((available >= len)
// if not, could we read? NOTE: we do not require it, just attempt to read
|| ((_inputBuffer.length >= needed)
&& _tryToLoadToHaveAtLeast(needed))) {
|| ((_inputBuffer.length >= len)
&& _tryToLoadToHaveAtLeast(len))) {
_finishShortText(len);
return;
}
Expand Down Expand Up @@ -2326,22 +2322,18 @@ protected String _finishTextToken(int ch) throws IOException
_finishChunkedText();
return _textBuffer.contentsAsString();
}
// 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
// the longest individual unit is 4 bytes (surrogate pair) so we
// actually need len+3 bytes to avoid bounds checks

// 19-Mar-2021, tatu: [dataformats-binary#259] shows the case where length
// we get is Integer.MAX_VALUE, leading to overflow. Could change values
// to longs but simpler to truncate "needed" (will never pass following test
// due to inputBuffer never being even close to that big).

final int needed = Math.max(len + 3, len);
final int available = _inputEnd - _inputPtr;

if ((available >= needed)
if ((available >= len)
// if not, could we read? NOTE: we do not require it, just attempt to read
|| ((_inputBuffer.length >= needed)
&& _tryToLoadToHaveAtLeast(needed))) {
|| ((_inputBuffer.length >= len)
&& _tryToLoadToHaveAtLeast(len))) {
return _finishShortText(len);
}
// If not enough space, need handling similar to chunked
Expand Down Expand Up @@ -2369,7 +2361,7 @@ private final String _finishShortText(int len) throws IOException
final byte[] inputBuf = _inputBuffer;

// Let's actually do a tight loop for ASCII first:
final int end = inPtr + len;
final int end = _inputPtr;

int i;
while ((i = inputBuf[inPtr]) >= 0) {
Expand All @@ -2386,44 +2378,50 @@ private final String _finishShortText(int len) throws IOException
final int[] codes = UTF8_UNIT_CODES;
do {
i = inputBuf[inPtr++] & 0xFF;
switch (codes[i]) {
case 0:
break;
case 1:
{
final int c2 = inputBuf[inPtr++];
if ((c2 & 0xC0) != 0x080) {
_reportInvalidOther(c2 & 0xFF, inPtr);
}
i = ((i & 0x1F) << 6) | (c2 & 0x3F);
int code = codes[i];
if (code != 0) {
// 05-Jul-2021, tatu: As per [dataformats-binary#289] need to
// be careful wrt end-of-buffer truncated codepoints
if ((inPtr + code) > end) {
final int firstCharOffset = len - (end - inPtr) - 1;
_reportTruncatedUTF8InString(len, firstCharOffset, i, code);
}
break;
case 2:
{
final int c2 = inputBuf[inPtr++];
if ((c2 & 0xC0) != 0x080) {
_reportInvalidOther(c2 & 0xFF, inPtr);

switch (code) {
case 1: {
final int c2 = inputBuf[inPtr++];
if ((c2 & 0xC0) != 0x080) {
_reportInvalidOther(c2 & 0xFF, inPtr);
}
i = ((i & 0x1F) << 6) | (c2 & 0x3F);
}
final int c3 = inputBuf[inPtr++];
if ((c3 & 0xC0) != 0x080) {
_reportInvalidOther(c3 & 0xFF, inPtr);
break;
case 2: {
final int c2 = inputBuf[inPtr++];
if ((c2 & 0xC0) != 0x080) {
_reportInvalidOther(c2 & 0xFF, inPtr);
}
final int c3 = inputBuf[inPtr++];
if ((c3 & 0xC0) != 0x080) {
_reportInvalidOther(c3 & 0xFF, inPtr);
}
i = ((i & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
}
i = ((i & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
break;
case 3:
// 30-Jan-2021, tatu: TODO - validate these too?
i = ((i & 0x07) << 18)
| ((inputBuf[inPtr++] & 0x3F) << 12)
| ((inputBuf[inPtr++] & 0x3F) << 6)
| (inputBuf[inPtr++] & 0x3F);
// note: this is the codepoint value; need to split, too
i -= 0x10000;
outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
i = 0xDC00 | (i & 0x3FF);
break;
default: // invalid
_reportInvalidInitial(i);
}
break;
case 3:
// 30-Jan-2021, tatu: TODO - validate these too?
i = ((i & 0x07) << 18)
| ((inputBuf[inPtr++] & 0x3F) << 12)
| ((inputBuf[inPtr++] & 0x3F) << 6)
| (inputBuf[inPtr++] & 0x3F);
// note: this is the codepoint value; need to split, too
i -= 0x10000;
outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
i = 0xDC00 | (i & 0x3FF);
break;
default: // invalid
_reportInvalidInitial(i);
}
outBuf[outPtr++] = (char) i;
} while (inPtr < end);
Expand Down Expand Up @@ -3850,18 +3848,16 @@ protected void _reportIncompleteBinaryRead(int expLen, int actLen) throws IOExce
expLen, actLen), _currToken);
}

// @since 2.13
/*
// @since 2.19
private String _reportTruncatedUTF8InString(int strLenBytes, int truncatedCharOffset,
int firstUTFByteValue, int bytesExpected)
throws IOException
{
throw _constructError(String.format(
"Truncated UTF-8 character in Chunked Unicode String value (%d bytes): "
"Truncated UTF-8 character in Unicode String value (%d bytes): "
+"byte 0x%02X at offset #%d indicated %d more bytes needed",
strLenBytes, firstUTFByteValue, truncatedCharOffset, bytesExpected));
}
*/

// @since 2.13
private String _reportTruncatedUTF8InName(int strLenBytes, int truncatedCharOffset,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public void testInvalidTextValueWithBrokenUTF8() throws Exception
p.getText();
fail("Should not pass");
} catch (StreamReadException e) {
verifyException(e, "Malformed UTF-8 character at the end of a");
verifyException(e, "Truncated UTF-8 character in Unicode String value (256 bytes)");
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import com.fasterxml.jackson.dataformat.cbor.CBORParser;
import com.fasterxml.jackson.dataformat.cbor.CBORTestBase;

import java.nio.charset.StandardCharsets;
import java.util.Arrays;

public class ParseInvalidUTF8String236Test extends CBORTestBase
{
// [dataformats-binary#236]: Original version; broken UTF-8 all around.
Expand All @@ -24,8 +27,8 @@ public void testShortString236Original() throws Exception
}

// Variant where the length would be valid, but the last byte is partial UTF-8
// code point
public void testShortString236EndsWithPartialUTF8() throws Exception
// code point and no more bytes are available due to end-of-stream
public void testShortString236EndsWithPartialUTF8AtEndOfStream() throws Exception
{
final byte[] input = {0x63, 0x41, 0x2d, (byte) 0xda};
try (CBORParser p = cborParser(input)) {
Expand All @@ -34,7 +37,23 @@ public void testShortString236EndsWithPartialUTF8() throws Exception
String str = p.getText();
fail("Should have failed, did not, String = '"+str+"'");
} catch (StreamReadException e) {
verifyException(e, "Malformed UTF-8 character at the end of");
verifyException(e, "Truncated UTF-8 character in Unicode String value (3 bytes)");
}
}
}

// Variant where the length would be valid, but the last byte is partial UTF-8
// code point and the subsequent byte would be a valid continuation byte, but belongs to next data item
public void testShortString236EndsWithPartialUTF8() throws Exception
{
final byte[] input = {0x62, 0x33, (byte) 0xdb, (byte) 0xa0};
try (CBORParser p = cborParser(input)) {
assertToken(JsonToken.VALUE_STRING, p.nextToken());
try {
String str = p.getText();
fail("Should have failed, did not, String = '"+str+"'");
} catch (StreamReadException e) {
verifyException(e, "Truncated UTF-8 character in Unicode String value (2 bytes)");
}
}
}
Expand Down