Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Swar 20250120 #3305

Merged
merged 12 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions benchmark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,6 @@
</properties>

<dependencies>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2-codegen</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2-extension</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.alibaba.fastjson2.benchmark;

import com.alibaba.fastjson2.JSONException;
import com.alibaba.fastjson2.benchmark.eishay.EishayParseBinaryArrayMapping;
import com.alibaba.fastjson2.util.JDKUtils;
import org.apache.commons.io.IOUtils;
Expand All @@ -11,68 +12,110 @@
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.io.InputStream;
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.util.concurrent.TimeUnit;

import static com.alibaba.fastjson2.util.JDKUtils.ARRAY_BYTE_BASE_OFFSET;
import static com.alibaba.fastjson2.util.JDKUtils.UNSAFE;

public class BytesAsciiCheck {
static byte[] bytes;
static char[] chars;
static String str;
static final MethodHandle INDEX_OF_CHAR;

static {
MethodHandle indexOfChar = null;
try {
try {
Class<?> cStringLatin1 = Class.forName("java.lang.StringLatin1");
MethodHandles.Lookup lookup = JDKUtils.trustedLookup(cStringLatin1);
indexOfChar = lookup.findStatic(
cStringLatin1,
"indexOfChar",
MethodType.methodType(int.class, byte[].class, int.class, int.class, int.class));
} catch (Throwable ignored) {
// ignore
}
} catch (Exception e) {
e.printStackTrace();
}

INDEX_OF_CHAR = indexOfChar;
try {
InputStream is = EishayParseBinaryArrayMapping.class.getClassLoader().getResourceAsStream("data/eishay.json");
String str = IOUtils.toString(is, "UTF-8");
str = IOUtils.toString(is, "UTF-8");
bytes = str.getBytes();
chars = str.toCharArray();
} catch (Exception e) {
e.printStackTrace();
}
}

@Benchmark
// @Benchmark
public void handler(Blackhole bh) throws Throwable {
bh.consume(
JDKUtils.METHOD_HANDLE_HAS_NEGATIVE.invoke(bytes, 0, bytes.length)
);
}

@Benchmark
// @Benchmark
public void lambda(Blackhole bh) throws Throwable {
bh.consume(
JDKUtils.PREDICATE_IS_ASCII.test(bytes)
);
}

@Benchmark
// @Benchmark
public void direct(Blackhole bh) throws Throwable {
bh.consume(hasNegatives(bytes, 0, bytes.length));
}

// @Benchmark
public void isASCII(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(bytes, 0, bytes.length));
}

@Benchmark
public void direct8(Blackhole bh) throws Throwable {
bh.consume(hasNegatives_8(bytes, 0, bytes.length));
public void isLatin1(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.isLatin1(chars, 0, chars.length));
}

public static boolean hasNegatives(byte[] ba, int off, int len) {
for (int i = off; i < off + len; i++) {
if (ba[i] < 0) {
return true;
}
}
return false;
@Benchmark
public void isASCIIJDK(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.JDKUtils.PREDICATE_IS_ASCII.test(bytes));
}

public static boolean hasNegatives_8(byte[] bytes, int off, int len) {
int i = off;
while (i + 8 <= off + len) {
if ((UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + i) & 0x8080808080808080L) != 0) {
return true;
}
i += 8;
@Benchmark
public void indexOfSlash(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlash(bytes, 0, bytes.length));
}

@Benchmark
public void indexOfSlashV(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlashV(bytes, 0, bytes.length));
}

@Benchmark
public void indexOfChar(Blackhole bh) throws Throwable {
bh.consume(indexOfChar(bytes, '\'', 0, bytes.length));
}

@Benchmark
public void indexOfString(Blackhole bh) throws Throwable {
bh.consume(str.indexOf('\\'));
}

private static int indexOfChar(byte[] bytes, int ch, int fromIndex, int toIndex) {
try {
return (int) INDEX_OF_CHAR.invokeExact(bytes, ch, fromIndex, toIndex);
} catch (Throwable ignored) {
throw new JSONException("");
}
}

for (; i < off + len; i++) {
if (bytes[i] < 0) {
public static boolean hasNegatives(byte[] ba, int off, int len) {
for (int i = off; i < off + len; i++) {
if (ba[i] < 0) {
return true;
}
}
Expand All @@ -85,6 +128,7 @@ public static void main(String[] args) throws Exception {
.mode(Mode.Throughput)
.timeUnit(TimeUnit.MILLISECONDS)
.warmupIterations(3)
.threads(1)
.forks(1)
.build();
new Runner(options).run();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package com.alibaba.fastjson2.benchmark.wast;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.infra.Blackhole;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.util.concurrent.TimeUnit;

public class IOUtilsBench {
static byte[] bytes;
static char[] chars;
static String str;

static {
StringBuilder buf = new StringBuilder();
for (int i = 0; i < 1000; i++) {
buf.append(12345678);
}
str = buf.toString();
bytes = str.getBytes();
chars = str.toCharArray();
}

@Benchmark
public void digit4(Blackhole bh) throws Throwable {
for (int i = 0; i < 1000; i += 8) {
bh.consume(com.alibaba.fastjson2.util.IOUtils.digit4(bytes, 0));
}
}

public static void main(String[] args) throws Exception {
Options options = new OptionsBuilder()
.include(IOUtilsBench.class.getName())
.mode(Mode.Throughput)
.timeUnit(TimeUnit.MILLISECONDS)
.warmupIterations(3)
.threads(1)
.forks(1)
.build();
new Runner(options).run();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ public static void lambda() throws Throwable {
}
}

public static void isASCII() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < LOOP_COUNT; ++i) {
benchmark.isASCII(BH);
}
long millis = System.currentTimeMillis() - start;
System.out.println("BytesAsciiCheck-isASCII : " + millis);

// zulu17.40.19 : 118
}
}

public static void direct() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
Expand All @@ -47,14 +60,14 @@ public static void direct() throws Throwable {
}
}

public static void direct8() throws Throwable {
public static void isLatin1() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < LOOP_COUNT; ++i) {
benchmark.direct8(BH);
benchmark.isLatin1(BH);
}
long millis = System.currentTimeMillis() - start;
System.out.println("BytesAsciiCheck-direct8 : " + millis);
System.out.println("BytesAsciiCheck-isASCII_chars : " + millis);

// zulu17.40.19 : 478
}
Expand All @@ -75,6 +88,7 @@ public static void main(String[] args) throws Throwable {
// handler();
// lambda();
// direct();
// direct8();
isLatin1();
// isASCII();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ private void writeString0(char[] chars, int coff, int strlen) {

off = this.off;
} else {
ascii = isASCII(chars, coff, strlen);
ascii = isLatin1(chars, coff, strlen);
}

int minCapacity = (ascii ? strlen : strlen * 3) + off + 6;
Expand Down
76 changes: 59 additions & 17 deletions core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public class IOUtils {
DIGITS_K_64[i] = c0 + v;
}
ZERO_DOT_LATIN1 = UNSAFE.getShort(new byte[] {'0', '.'}, ARRAY_BYTE_BASE_OFFSET);
ZERO_DOT_UTF16 = UNSAFE.getInt(new char[] {'0', '.'}, ARRAY_BYTE_BASE_OFFSET);
ZERO_DOT_UTF16 = UNSAFE.getInt(new char[] {'0', '.'}, ARRAY_CHAR_BASE_OFFSET);
}

public static void writeDigitPair(byte[] buf, int charPos, int value) {
Expand Down Expand Up @@ -1622,20 +1622,45 @@ public static int digit1(byte[] bytes, int off) {
}

public static int indexOfQuote(byte[] value, int quote, int fromIndex, int max) {
if (INDEX_OF_CHAR_LATIN1 == null) {
return indexOfQuote0(value, quote, fromIndex, max);
}
try {
return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, quote, fromIndex, max);
} catch (Throwable e) {
throw new JSONException(e.getMessage());
}
}
static int indexOfQuote0(byte[] value, int quote, int fromIndex, int max) {
int i = fromIndex;
long address = ARRAY_BYTE_BASE_OFFSET + fromIndex;
int upperBound = fromIndex + ((max - fromIndex) & ~7);
long vectorQuote = quote == '\'' ? 0x2727_2727_2727_2727L : 0x2222_2222_2222_2222L;
while (i < upperBound && notContains(getLongLE(value, i), vectorQuote)) {
while (i < upperBound && notContains(UNSAFE.getLong(value, address), vectorQuote)) {
i += 8;
address += 8;
}
return indexOfChar0(value, quote, i, max);
}

public static int indexOfSlash(byte[] value, int fromIndex, int max) {
if (INDEX_OF_CHAR_LATIN1 == null) {
return indexOfSlashV(value, fromIndex, max);
}
try {
return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, (int) '\\', fromIndex, max);
} catch (Throwable e) {
throw new JSONException(e.getMessage());
}
}

public static int indexOfSlashV(byte[] value, int fromIndex, int max) {
int i = fromIndex;
long address = ARRAY_BYTE_BASE_OFFSET + fromIndex;
int upperBound = fromIndex + ((max - fromIndex) & ~7);
while (i < upperBound && notContains(getLongLE(value, i), 0x5C5C5C5C5C5C5C5CL)) {
while (i < upperBound && notContains(UNSAFE.getLong(value, address), 0x5C5C5C5C5C5C5C5CL)) {
i += 8;
address += 8;
}
return indexOfChar0(value, '\\', i, max);
}
Expand Down Expand Up @@ -1710,7 +1735,7 @@ public static int getIntUnaligned(byte[] bytes, int offset) {
}

public static int getIntUnaligned(char[] bytes, int offset) {
return UNSAFE.getInt(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1));
return UNSAFE.getInt(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1));
}

public static long getLongBE(byte[] bytes, int offset) {
Expand All @@ -1723,7 +1748,7 @@ public static long getLongUnaligned(byte[] bytes, int offset) {
}

public static long getLongUnaligned(char[] bytes, int offset) {
return UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1));
return UNSAFE.getLong(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1));
}

public static long getLongLE(byte[] bytes, int offset) {
Expand All @@ -1733,7 +1758,7 @@ public static long getLongLE(byte[] bytes, int offset) {

public static long getLongLE(char[] bytes, int offset) {
return convEndian(false,
UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1)));
UNSAFE.getLong(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1)));
}

public static short hex2(int i) {
Expand Down Expand Up @@ -1833,19 +1858,36 @@ static short convEndian(boolean big, short n) {
return big == BIG_ENDIAN ? n : Short.reverseBytes(n);
}

public static boolean isASCII(char[] chars, int coff, int strlen) {
int i = coff;
for (int upperBound = coff + (strlen & ~3); i < upperBound; i += 4) {
if ((getLongLE(chars, i) & 0xFF00FF00FF00FF00L) != 0) {
return false;
}
public static boolean isLatin1(char[] chars, int off, int len) {
int upperBound = off + (len & ~7);
int end = off + len;
long address = ARRAY_CHAR_BASE_OFFSET + off;
long value = 0;
while (off < upperBound) {
value |= UNSAFE.getLong(chars, address) | UNSAFE.getLong(chars, address + 8);
address += 16;
off += 8;
}
while (off++ < end) {
value |= UNSAFE.getShort(chars, address);
address += 2;
}
return (convEndian(false, value) & 0xFF00FF00FF00FF00L) == 0;
}

for (; i < strlen; ++i) {
if (chars[i] > 0x00FF) {
return false;
}
public static boolean isASCII(byte[] bytes, int off, int len) {
int upperBound = off + (len & ~7);
int end = off + len;
long address = ARRAY_BYTE_BASE_OFFSET + off;
long value = 0;
while (off < upperBound) {
value |= UNSAFE.getLong(bytes, address);
address += 8;
off += 8;
}
return true;
while (off < end) {
value |= bytes[off++];
}
return (value & 0x8080808080808080L) == 0;
}
}
Loading
Loading