Skip to content

Commit

Permalink
perf: optimize Set<Integer> which are used for checking if oid should…
Browse files Browse the repository at this point in the history
… be transferred with binary or text

It looks like BitSet is faster than HashSet, so let's use it.

Unfortunately, BitSet would consume memory in case the Oid value is high,
and Oids can be up to 32bit, so we limit the max OID stored in a BitSet with
8192*8, so we effectively limit the extra per-connection memory by
16KiB (8KiB for send and 8KiB for receive)
  • Loading branch information
vlsi committed Jul 25, 2024
1 parent 7a65cf3 commit 5e3190d
Show file tree
Hide file tree
Showing 5 changed files with 385 additions and 5 deletions.
2 changes: 2 additions & 0 deletions benchmarks/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ dependencies {
// Make jmhCompileClasspath resolvable
jmhImplementation(project(":postgresql"))
jmhImplementation(testFixtures(project(":postgresql")))
jmhImplementation("org.roaringbitmap:RoaringBitmap:1.0.6")
jmhImplementation("it.unimi.dsi:fastutil:8.5.13")
jmhRuntimeOnly("com.ongres.scram:client:2.1")
jmhImplementation("org.openjdk.jmh:jmh-core:1.37")
jmhImplementation("org.openjdk.jmh:jmh-generator-annprocess:1.37")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Copyright (c) 2024, PostgreSQL Global Development Group
* See the LICENSE file in the project root for more information.
*/

package org.postgresql.benchmark.util;

import org.postgresql.core.Oid;
import org.postgresql.util.internal.IntSet;

import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import org.roaringbitmap.RoaringBitmap;

import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.TimeUnit;

/**
* Tests the performance of hex encoding. The results for OpenJDK 17.0.10 are as follows, so it
* looks like {@link Character#forDigit(int, int)} is the way to go.
* Here's a result for Apple M1 Max, Java 17.0.10, ARM64.
* 23 is present in the set, 73 and 123456 are both absent.
* <pre>
* Benchmark (oid) Mode Cnt Score Error Units
* IntSetBenchmark.bitSet_contains 23 avgt 15 0,984 ± 0,009 ns/op
* IntSetBenchmark.bitSet_contains 73 avgt 15 0,979 ± 0,005 ns/op
* IntSetBenchmark.bitSet_contains 123456 avgt 15 0,688 ± 0,015 ns/op
* IntSetBenchmark.hashSet_contains 23 avgt 15 2,026 ± 0,013 ns/op
* IntSetBenchmark.hashSet_contains 73 avgt 15 1,985 ± 0,004 ns/op
* IntSetBenchmark.hashSet_contains 123456 avgt 15 1,968 ± 0,032 ns/op
* IntSetBenchmark.intOpenSet_contains 23 avgt 15 1,015 ± 0,011 ns/op
* IntSetBenchmark.intOpenSet_contains 73 avgt 15 5,720 ± 0,596 ns/op
* IntSetBenchmark.intOpenSet_contains 123456 avgt 15 8,430 ± 0,007 ns/op
* IntSetBenchmark.intSet_contains 23 avgt 15 1,101 ± 0,009 ns/op
* IntSetBenchmark.intSet_contains 73 avgt 15 1,117 ± 0,005 ns/op
* IntSetBenchmark.intSet_contains 123456 avgt 15 0,693 ± 0,010 ns/op
* IntSetBenchmark.roaring_contains 23 avgt 15 5,012 ± 0,044 ns/op
* IntSetBenchmark.roaring_contains 73 avgt 15 5,561 ± 0,077 ns/op
* IntSetBenchmark.roaring_contains 123456 avgt 15 1,163 ± 0,012 ns/op
* </pre>
*/
@Fork(value = 3, jvmArgsPrepend = "-Xmx128m")
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@State(Scope.Thread)
@Threads(1)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public class IntSetBenchmark {
/**
* See {@code PgConnection#getSupportedBinaryOids()}.
*/
private static final Collection<? extends Integer> SUPPORTED_BINARY_OIDS = Arrays.asList(
Oid.BYTEA,
Oid.INT2,
Oid.INT4,
Oid.INT8,
Oid.FLOAT4,
Oid.FLOAT8,
Oid.NUMERIC,
Oid.TIME,
Oid.DATE,
Oid.TIMETZ,
Oid.TIMESTAMP,
Oid.TIMESTAMPTZ,
Oid.BYTEA_ARRAY,
Oid.INT2_ARRAY,
Oid.INT4_ARRAY,
Oid.INT8_ARRAY,
Oid.OID_ARRAY,
Oid.FLOAT4_ARRAY,
Oid.FLOAT8_ARRAY,
Oid.VARCHAR_ARRAY,
Oid.TEXT_ARRAY,
Oid.POINT,
Oid.BOX,
Oid.UUID);

IntSet intSet = new IntSet();
BitSet bitSet = new BitSet();
Set<Integer> hashSet = new HashSet<>();
RoaringBitmap roaringBitmap = new RoaringBitmap();
IntOpenHashSet intOpenHashSet = new IntOpenHashSet();

@Param({"23", "73", "123456"})
int oid;

@Setup
public void setup() {
intSet.addAll(SUPPORTED_BINARY_OIDS);
hashSet.addAll(SUPPORTED_BINARY_OIDS);
for (Integer oid : SUPPORTED_BINARY_OIDS) {
bitSet.set(oid);
roaringBitmap.add(oid);
intOpenHashSet.add((int) oid);
}
}

@Benchmark
public boolean intSet_contains() {
return intSet.contains(oid);
}

@Benchmark
public boolean bitSet_contains() {
return bitSet.get(oid);
}

@Benchmark
public boolean hashSet_contains() {
return hashSet.contains(oid);
}

@Benchmark
public boolean roaring_contains() {
return roaringBitmap.contains(oid);
}

@Benchmark
public boolean intOpenSet_contains() {
return intOpenHashSet.contains(oid);
}

public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder()
.include(IntSetBenchmark.class.getSimpleName())
//.addProfiler(GCProfiler.class)
.detectJvmArgs()
.build();

new Runner(opt).run();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.postgresql.util.PSQLState;
import org.postgresql.util.PSQLWarning;
import org.postgresql.util.ServerErrorMessage;
import org.postgresql.util.internal.IntSet;
import org.postgresql.util.internal.SourceStreamIOException;

import org.checkerframework.checker.nullness.qual.Nullable;
Expand All @@ -66,7 +67,6 @@
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
Expand Down Expand Up @@ -123,12 +123,12 @@ public class QueryExecutorImpl extends QueryExecutorBase {
/**
* Bit set that has a bit set for each oid which should be received using binary format.
*/
private final Set<Integer> useBinaryReceiveForOids = new HashSet<>();
private final IntSet useBinaryReceiveForOids = new IntSet();

/**
* Bit set that has a bit set for each oid which should be sent using binary format.
*/
private final Set<Integer> useBinarySendForOids = new HashSet<>();
private final IntSet useBinarySendForOids = new IntSet();

/**
* This is a fake query object so processResults can distinguish "ReadyForQuery" messages
Expand Down Expand Up @@ -2990,7 +2990,7 @@ public void removeBinaryReceiveOid(int oid) {
public Set<? extends Integer> getBinaryReceiveOids() {
// copy the values to prevent ConcurrentModificationException when reader accesses the elements
synchronized (useBinaryReceiveForOids) {
return new HashSet<>(useBinaryReceiveForOids);
return useBinaryReceiveForOids.toMutableSet();
}
}

Expand Down Expand Up @@ -3028,7 +3028,7 @@ public void removeBinarySendOid(int oid) {
public Set<? extends Integer> getBinarySendOids() {
// copy the values to prevent ConcurrentModificationException when reader accesses the elements
synchronized (useBinarySendForOids) {
return new HashSet<>(useBinarySendForOids);
return useBinarySendForOids.toMutableSet();
}
}

Expand Down
126 changes: 126 additions & 0 deletions pgjdbc/src/main/java/org/postgresql/util/internal/IntSet.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright (c) 2024, PostgreSQL Global Development Group
* See the LICENSE file in the project root for more information.
*/

package org.postgresql.util.internal;

import org.postgresql.core.Oid;

import org.checkerframework.checker.nullness.qual.Nullable;

import java.util.BitSet;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

/**
* Read-optimized {@code Set} for storing {@link Oid} values.
* Note: the set does not support nullable values.
*/
public final class IntSet {
/**
* Maximal Oid that will bs stored in {@link BitSet}.
* If Oid exceeds this value, then it will be stored in {@code Set<Int>} only.
* In theory, Oids can be up to 32bit, so we want to limit per-connection memory utilization.
* Allow {@code BitSet} to consume up to 8KiB (one for send and one for receive).
*/
private static final int MAX_OID_TO_STORE_IN_BITSET = 8192 * 8;

/**
* Contains values outside [0..MAX_OID_TO_STORE_IN_BITSET] range.
* This field is null if bitSet contains all the values.
*/
private @Nullable Set<Integer> set;

/**
* Contains values in range of [0..MAX_OID_TO_STORE_IN_BITSET].
*/
private final BitSet bitSet = new BitSet();

/**
* Clears the contents of the set.
*/
public void clear() {
set = null;
bitSet.clear();
}

/**
* Adds all the values to the set.
* @param values set of values to add
*/
public void addAll(Collection<? extends Integer> values) {
for (Integer value : values) {
add(value);
}
}

/**
* Adds a single value to the set.
*
* @param value value to add
* @return true if the set did not already contain the specified value
*/
public boolean add(int value) {
if (value >= 0 && value <= MAX_OID_TO_STORE_IN_BITSET) {
boolean contains = bitSet.get(value);
if (!contains) {
bitSet.set(value);
return true;
}
return false;
}
Set<Integer> set = this.set;
if (set == null) {
this.set = set = new HashSet<>();
}
return set.add(value);
}

/**
* Removes a value from the set.
* @param value value to remove
* @return true if the element was
*/
public boolean remove(int value) {
if (value >= 0 && value <= MAX_OID_TO_STORE_IN_BITSET) {
boolean contains = bitSet.get(value);
if (contains) {
bitSet.clear(value);
return true;
}
return false;
}
Set<Integer> set = this.set;
return set != null && set.remove(value);
}

/**
* Checks if a given value belongs to the set.
* @param value value to check
* @return true if the value belons to the set
*/
public boolean contains(int value) {
if (value >= 0 && value <= MAX_OID_TO_STORE_IN_BITSET) {
return bitSet.get(value);
}
Set<Integer> set = this.set;
return set != null && set.contains(value);
}

/**
* Returns a mutable snapshot of the values stored in the current set.
* @return a mutable snapshot of the values stored in the current set
*/
public Set<Integer> toMutableSet() {
Set<Integer> set = this.set;
Set<Integer> result = new HashSet<>(
(int) ((bitSet.cardinality() + (set != null ? set.size() : 0)) / 0.75f));
if (set != null) {
result.addAll(set);
}
bitSet.stream().forEach(result::add);
return result;
}
}
Loading

0 comments on commit 5e3190d

Please sign in to comment.