Skip to content

Commit

Permalink
[GR-43906] Avoid hitting Java array length limit during UTF-8 convers…
Browse files Browse the repository at this point in the history
…ion in PolyglotContextImpl#printResult.

PullRequest: graal/15698
  • Loading branch information
djoooooe committed Oct 4, 2023
2 parents 1ae3532 + b9ed90d commit e4e0203
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ public final void writeDouble(double value) {
*/
public final void writeUTF(String string) throws IllegalArgumentException {
int len = string.length();
int utfLen = 0;
long utfLen = 0;
int c;
int count = 0;

Expand All @@ -220,12 +220,12 @@ public final void writeUTF(String string) throws IllegalArgumentException {
int headerSize;
if (utfLen > MAX_SHORT_LENGTH) {
headerSize = Integer.BYTES;
ensureBufferSize(headerSize, utfLen);
ensureBufferSize(headerSize, (int) utfLen);
tempDecodingBuffer[count++] = (byte) ((LARGE_STRING_TAG | (utfLen >>> 24)) & 0xff);
tempDecodingBuffer[count++] = (byte) ((utfLen >>> 16) & 0xFF);
} else {
headerSize = Short.BYTES;
ensureBufferSize(headerSize, utfLen);
ensureBufferSize(headerSize, (int) utfLen);
}
tempDecodingBuffer[count++] = (byte) ((utfLen >>> 8) & 0xFF);
tempDecodingBuffer[count++] = (byte) (utfLen & 0xFF);
Expand All @@ -252,7 +252,7 @@ public final void writeUTF(String string) throws IllegalArgumentException {
tempDecodingBuffer[count++] = (byte) (0x80 | (c & 0x3F));
}
}
write(tempDecodingBuffer, 0, headerSize + utfLen);
write(tempDecodingBuffer, 0, (int) (headerSize + utfLen));
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
*
* Subject to the condition set forth below, permission is hereby granted to any
* person obtaining a copy of this software, associated documentation and/or
* data (collectively the "Software"), free of charge and under any and all
* copyright rights in the Software, and any and all patent rights owned or
* freely licensable by each licensor hereunder covering either (i) the
* unmodified Software as contributed to or provided by such licensor, or (ii)
* the Larger Works (as defined below), to deal in both
*
* (a) the Software, and
*
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
* one is included with the Software each a "Larger Work" to which the Software
* is contributed by such licensors),
*
* without restriction, including without limitation the rights to copy, create
* derivative works of, display, perform, and distribute the Software and make,
* use, sell, offer for sale, import, export, have made, and have sold the
* Software and the Larger Work(s), and to sublicense the foregoing rights on
* either these or other terms.
*
* This license is subject to the following condition:
*
* The above copyright notice and either this complete permission notice or at a
* minimum a reference to the UPL must be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.oracle.truffle.api.test.polyglot;

import static com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import static com.oracle.truffle.api.TruffleLanguage.Registration;
import static com.oracle.truffle.api.test.common.AbstractExecutableTestLanguage.evalTestLanguage;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;

import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.Source;
import org.junit.Assert;
import org.junit.Test;

import com.oracle.truffle.api.nodes.RootNode;
import com.oracle.truffle.api.test.common.AbstractExecutableTestLanguage;
import com.oracle.truffle.api.test.common.TestUtils;

public class GR43906Test {
private static final String CHAR = "\u200b";
private static final byte[] UTF_8 = CHAR.getBytes(StandardCharsets.UTF_8);
public static final int CHUNK_SIZE = 10_000;
private static final byte[] UTF_8_CHUNK = CHAR.repeat(CHUNK_SIZE).getBytes(StandardCharsets.UTF_8);
private static final int COUNT = 1_000_000;

@Registration
static class LargeStringTestLanguage extends AbstractExecutableTestLanguage {

@TruffleBoundary
@Override
protected Object execute(RootNode node, Env env, Object[] contextArguments, Object[] frameArguments) throws Exception {
return CHAR.repeat(COUNT);
}
}

private static class DummyOutputStream extends OutputStream {
private int i = 0;
private int length = 0;

@Override
public void write(int b) {
if (length >= COUNT) {
Assert.assertEquals(System.getProperty("line.separator").charAt(i++), b);
} else {
Assert.assertEquals(UTF_8[i++], b);
if (i == UTF_8.length) {
i = 0;
length++;
}
}
}

@Override
public void write(byte[] b, int off, int len) {
int j = 0;
while (j < len && i != 0) {
write(b[off + j++]);
}
while (j + UTF_8_CHUNK.length < len) {
Assert.assertTrue(Arrays.equals(b, j + off, j + off + UTF_8_CHUNK.length, UTF_8_CHUNK, 0, UTF_8_CHUNK.length));
length += CHUNK_SIZE;
j += UTF_8_CHUNK.length;
}
while (j < len) {
write(b[off + j++]);
}
}
}

@Test
public void testPrintLargeString() throws IOException {
DummyOutputStream out = new DummyOutputStream();
try (Context context = Context.newBuilder().out(out).build()) {
evalTestLanguage(context, LargeStringTestLanguage.class, Source.newBuilder(TestUtils.getDefaultLanguageId(LargeStringTestLanguage.class), "", "").interactive(true).build());
}
Assert.assertEquals(COUNT, out.length);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1722,7 +1722,18 @@ static void printResult(PolyglotLanguageContext languageContext, Object result)
}
try {
OutputStream out = languageContext.context.config.out;
out.write(stringResult.getBytes(StandardCharsets.UTF_8));
int lastEndPos = 0;
// avoid hitting the java array length limit during conversion to UTF-8 by printing in
// chunks
while (lastEndPos < stringResult.length()) {
int endPos = (int) Math.min(stringResult.length(), ((long) lastEndPos) + (Integer.MAX_VALUE / 4));
if (endPos < stringResult.length() && Character.isHighSurrogate(stringResult.charAt(endPos - 1)) && Character.isLowSurrogate(stringResult.charAt(endPos))) {
// don't split in the middle of surrogate pairs
endPos++;
}
out.write(stringResult.substring(lastEndPos, endPos).getBytes(StandardCharsets.UTF_8));
lastEndPos = endPos;
}
out.write(System.getProperty("line.separator").getBytes(StandardCharsets.UTF_8));
} catch (IOException ioex) {
// out stream has problems.
Expand Down

0 comments on commit e4e0203

Please sign in to comment.