Skip to content

Commit

Permalink
Swar 20250120 (#3305)
Browse files Browse the repository at this point in the history
* isASCII

* swar

* isASCIIChar

* codestyle

* fix build error

* fix build error

* add IOUtilsBench

* optimize isASCII

* remove unused code

* add benchmark

* bug fix
  • Loading branch information
wenshao authored Jan 22, 2025
1 parent 402d21d commit 4e2476c
Show file tree
Hide file tree
Showing 10 changed files with 231 additions and 85 deletions.
5 changes: 0 additions & 5 deletions benchmark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,6 @@
</properties>

<dependencies>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2-codegen</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2-extension</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.alibaba.fastjson2.benchmark;

import com.alibaba.fastjson2.JSONException;
import com.alibaba.fastjson2.benchmark.eishay.EishayParseBinaryArrayMapping;
import com.alibaba.fastjson2.util.JDKUtils;
import org.apache.commons.io.IOUtils;
Expand All @@ -11,68 +12,110 @@
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.io.InputStream;
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.util.concurrent.TimeUnit;

import static com.alibaba.fastjson2.util.JDKUtils.ARRAY_BYTE_BASE_OFFSET;
import static com.alibaba.fastjson2.util.JDKUtils.UNSAFE;

public class BytesAsciiCheck {
static byte[] bytes;
static char[] chars;
static String str;
static final MethodHandle INDEX_OF_CHAR;

static {
MethodHandle indexOfChar = null;
try {
try {
Class<?> cStringLatin1 = Class.forName("java.lang.StringLatin1");
MethodHandles.Lookup lookup = JDKUtils.trustedLookup(cStringLatin1);
indexOfChar = lookup.findStatic(
cStringLatin1,
"indexOfChar",
MethodType.methodType(int.class, byte[].class, int.class, int.class, int.class));
} catch (Throwable ignored) {
// ignore
}
} catch (Exception e) {
e.printStackTrace();
}

INDEX_OF_CHAR = indexOfChar;
try {
InputStream is = EishayParseBinaryArrayMapping.class.getClassLoader().getResourceAsStream("data/eishay.json");
String str = IOUtils.toString(is, "UTF-8");
str = IOUtils.toString(is, "UTF-8");
bytes = str.getBytes();
chars = str.toCharArray();
} catch (Exception e) {
e.printStackTrace();
}
}

@Benchmark
// @Benchmark
public void handler(Blackhole bh) throws Throwable {
bh.consume(
JDKUtils.METHOD_HANDLE_HAS_NEGATIVE.invoke(bytes, 0, bytes.length)
);
}

@Benchmark
// @Benchmark
public void lambda(Blackhole bh) throws Throwable {
bh.consume(
JDKUtils.PREDICATE_IS_ASCII.test(bytes)
);
}

@Benchmark
// @Benchmark
public void direct(Blackhole bh) throws Throwable {
bh.consume(hasNegatives(bytes, 0, bytes.length));
}

// @Benchmark
public void isASCII(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(bytes, 0, bytes.length));
}

@Benchmark
public void direct8(Blackhole bh) throws Throwable {
bh.consume(hasNegatives_8(bytes, 0, bytes.length));
public void isLatin1(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.isLatin1(chars, 0, chars.length));
}

public static boolean hasNegatives(byte[] ba, int off, int len) {
for (int i = off; i < off + len; i++) {
if (ba[i] < 0) {
return true;
}
}
return false;
@Benchmark
public void isASCIIJDK(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.JDKUtils.PREDICATE_IS_ASCII.test(bytes));
}

public static boolean hasNegatives_8(byte[] bytes, int off, int len) {
int i = off;
while (i + 8 <= off + len) {
if ((UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + i) & 0x8080808080808080L) != 0) {
return true;
}
i += 8;
@Benchmark
public void indexOfSlash(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlash(bytes, 0, bytes.length));
}

@Benchmark
public void indexOfSlashV(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlashV(bytes, 0, bytes.length));
}

@Benchmark
public void indexOfChar(Blackhole bh) throws Throwable {
bh.consume(indexOfChar(bytes, '\'', 0, bytes.length));
}

@Benchmark
public void indexOfString(Blackhole bh) throws Throwable {
bh.consume(str.indexOf('\\'));
}

private static int indexOfChar(byte[] bytes, int ch, int fromIndex, int toIndex) {
try {
return (int) INDEX_OF_CHAR.invokeExact(bytes, ch, fromIndex, toIndex);
} catch (Throwable ignored) {
throw new JSONException("");
}
}

for (; i < off + len; i++) {
if (bytes[i] < 0) {
public static boolean hasNegatives(byte[] ba, int off, int len) {
for (int i = off; i < off + len; i++) {
if (ba[i] < 0) {
return true;
}
}
Expand All @@ -85,6 +128,7 @@ public static void main(String[] args) throws Exception {
.mode(Mode.Throughput)
.timeUnit(TimeUnit.MILLISECONDS)
.warmupIterations(3)
.threads(1)
.forks(1)
.build();
new Runner(options).run();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package com.alibaba.fastjson2.benchmark.wast;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.infra.Blackhole;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.util.concurrent.TimeUnit;

public class IOUtilsBench {
static byte[] bytes;
static char[] chars;
static String str;

static {
StringBuilder buf = new StringBuilder();
for (int i = 0; i < 1000; i++) {
buf.append(12345678);
}
str = buf.toString();
bytes = str.getBytes();
chars = str.toCharArray();
}

@Benchmark
public void digit4(Blackhole bh) throws Throwable {
for (int i = 0; i < 1000; i += 8) {
bh.consume(com.alibaba.fastjson2.util.IOUtils.digit4(bytes, 0));
}
}

public static void main(String[] args) throws Exception {
Options options = new OptionsBuilder()
.include(IOUtilsBench.class.getName())
.mode(Mode.Throughput)
.timeUnit(TimeUnit.MILLISECONDS)
.warmupIterations(3)
.threads(1)
.forks(1)
.build();
new Runner(options).run();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ public static void lambda() throws Throwable {
}
}

public static void isASCII() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < LOOP_COUNT; ++i) {
benchmark.isASCII(BH);
}
long millis = System.currentTimeMillis() - start;
System.out.println("BytesAsciiCheck-isASCII : " + millis);

// zulu17.40.19 : 118
}
}

public static void direct() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
Expand All @@ -47,14 +60,14 @@ public static void direct() throws Throwable {
}
}

public static void direct8() throws Throwable {
public static void isLatin1() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < LOOP_COUNT; ++i) {
benchmark.direct8(BH);
benchmark.isLatin1(BH);
}
long millis = System.currentTimeMillis() - start;
System.out.println("BytesAsciiCheck-direct8 : " + millis);
System.out.println("BytesAsciiCheck-isASCII_chars : " + millis);

// zulu17.40.19 : 478
}
Expand All @@ -75,6 +88,7 @@ public static void main(String[] args) throws Throwable {
// handler();
// lambda();
// direct();
// direct8();
isLatin1();
// isASCII();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ private void writeString0(char[] chars, int coff, int strlen) {

off = this.off;
} else {
ascii = isASCII(chars, coff, strlen);
ascii = isLatin1(chars, coff, strlen);
}

int minCapacity = (ascii ? strlen : strlen * 3) + off + 6;
Expand Down
76 changes: 59 additions & 17 deletions core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public class IOUtils {
DIGITS_K_64[i] = c0 + v;
}
ZERO_DOT_LATIN1 = UNSAFE.getShort(new byte[] {'0', '.'}, ARRAY_BYTE_BASE_OFFSET);
ZERO_DOT_UTF16 = UNSAFE.getInt(new char[] {'0', '.'}, ARRAY_BYTE_BASE_OFFSET);
ZERO_DOT_UTF16 = UNSAFE.getInt(new char[] {'0', '.'}, ARRAY_CHAR_BASE_OFFSET);
}

public static void writeDigitPair(byte[] buf, int charPos, int value) {
Expand Down Expand Up @@ -1622,20 +1622,45 @@ public static int digit1(byte[] bytes, int off) {
}

public static int indexOfQuote(byte[] value, int quote, int fromIndex, int max) {
if (INDEX_OF_CHAR_LATIN1 == null) {
return indexOfQuote0(value, quote, fromIndex, max);
}
try {
return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, quote, fromIndex, max);
} catch (Throwable e) {
throw new JSONException(e.getMessage());
}
}
static int indexOfQuote0(byte[] value, int quote, int fromIndex, int max) {
int i = fromIndex;
long address = ARRAY_BYTE_BASE_OFFSET + fromIndex;
int upperBound = fromIndex + ((max - fromIndex) & ~7);
long vectorQuote = quote == '\'' ? 0x2727_2727_2727_2727L : 0x2222_2222_2222_2222L;
while (i < upperBound && notContains(getLongLE(value, i), vectorQuote)) {
while (i < upperBound && notContains(UNSAFE.getLong(value, address), vectorQuote)) {
i += 8;
address += 8;
}
return indexOfChar0(value, quote, i, max);
}

public static int indexOfSlash(byte[] value, int fromIndex, int max) {
if (INDEX_OF_CHAR_LATIN1 == null) {
return indexOfSlashV(value, fromIndex, max);
}
try {
return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, (int) '\\', fromIndex, max);
} catch (Throwable e) {
throw new JSONException(e.getMessage());
}
}

public static int indexOfSlashV(byte[] value, int fromIndex, int max) {
int i = fromIndex;
long address = ARRAY_BYTE_BASE_OFFSET + fromIndex;
int upperBound = fromIndex + ((max - fromIndex) & ~7);
while (i < upperBound && notContains(getLongLE(value, i), 0x5C5C5C5C5C5C5C5CL)) {
while (i < upperBound && notContains(UNSAFE.getLong(value, address), 0x5C5C5C5C5C5C5C5CL)) {
i += 8;
address += 8;
}
return indexOfChar0(value, '\\', i, max);
}
Expand Down Expand Up @@ -1710,7 +1735,7 @@ public static int getIntUnaligned(byte[] bytes, int offset) {
}

public static int getIntUnaligned(char[] bytes, int offset) {
return UNSAFE.getInt(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1));
return UNSAFE.getInt(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1));
}

public static long getLongBE(byte[] bytes, int offset) {
Expand All @@ -1723,7 +1748,7 @@ public static long getLongUnaligned(byte[] bytes, int offset) {
}

public static long getLongUnaligned(char[] bytes, int offset) {
return UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1));
return UNSAFE.getLong(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1));
}

public static long getLongLE(byte[] bytes, int offset) {
Expand All @@ -1733,7 +1758,7 @@ public static long getLongLE(byte[] bytes, int offset) {

public static long getLongLE(char[] bytes, int offset) {
return convEndian(false,
UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1)));
UNSAFE.getLong(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1)));
}

public static short hex2(int i) {
Expand Down Expand Up @@ -1833,19 +1858,36 @@ static short convEndian(boolean big, short n) {
return big == BIG_ENDIAN ? n : Short.reverseBytes(n);
}

public static boolean isASCII(char[] chars, int coff, int strlen) {
int i = coff;
for (int upperBound = coff + (strlen & ~3); i < upperBound; i += 4) {
if ((getLongLE(chars, i) & 0xFF00FF00FF00FF00L) != 0) {
return false;
}
public static boolean isLatin1(char[] chars, int off, int len) {
int upperBound = off + (len & ~7);
int end = off + len;
long address = ARRAY_CHAR_BASE_OFFSET + off;
long value = 0;
while (off < upperBound) {
value |= UNSAFE.getLong(chars, address) | UNSAFE.getLong(chars, address + 8);
address += 16;
off += 8;
}
while (off++ < end) {
value |= UNSAFE.getShort(chars, address);
address += 2;
}
return (convEndian(false, value) & 0xFF00FF00FF00FF00L) == 0;
}

for (; i < strlen; ++i) {
if (chars[i] > 0x00FF) {
return false;
}
public static boolean isASCII(byte[] bytes, int off, int len) {
int upperBound = off + (len & ~7);
int end = off + len;
long address = ARRAY_BYTE_BASE_OFFSET + off;
long value = 0;
while (off < upperBound) {
value |= UNSAFE.getLong(bytes, address);
address += 8;
off += 8;
}
return true;
while (off < end) {
value |= bytes[off++];
}
return (value & 0x8080808080808080L) == 0;
}
}
Loading

0 comments on commit 4e2476c

Please sign in to comment.