Skip to content

Commit

Permalink
perf: Remove unused naturalJoin key states. (#5770)
Browse files Browse the repository at this point in the history
The generated hash tables for naturalJoin (and other operations) never
removed a bucket's hash table entry when it became entry. For cases
where a naturalJoin cycles through buckets, the hash table could be
filled with empty values consuming memory without reason.

This changes the generated open addressed hash table for both-sides
incremental naturalJoin to mark entries deleted with a tombstone when
both the left and right side are empty. The occupancy of the hash table
for use in load factor computation includes the number of entries in the
hash table and tombstones. The rehash operation may copy entries into a
new table that is the same size as the original table instead of always
doubling in size, allowing tombstone entries to be reaped.

Closes #5769

---------

Co-authored-by: Ryan Caudy <rcaudy@gmail.com>
  • Loading branch information
cpwright and rcaudy authored Jul 21, 2024
1 parent ce45558 commit ec36842
Show file tree
Hide file tree
Showing 435 changed files with 6,348 additions and 34,136 deletions.
14 changes: 14 additions & 0 deletions Base/src/main/java/io/deephaven/base/MathUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,20 @@ public static int roundUpPowerOf2(int x) {
return Math.max(Integer.highestOneBit(x - 1) << 1, 1);
}

/**
* Rounds up to the next power of 2 for {@code x}; if {@code x} is already a power of 2, {@code x} will be returned.
* Values outside the range {@code 1 <= x <= Long.MAX_VALUE} will return {@code 1}.
*
* <p>
* Equivalent to {@code Math.max(Long.highestOneBit(x - 1) << 1, 1)}.
*
* @param x the value
* @return the next power of 2 for {@code x}
*/
public static long roundUpPowerOf2(long x) {
return Math.max(Long.highestOneBit(x - 1) << 1, 1);
}

/**
* Rounds up to the next power of 2 for {@code size <= MAX_POWER_OF_2}, otherwise returns
* {@link ArrayUtil#MAX_ARRAY_SIZE}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,26 @@ private SegmentedSortedArray makeSsaFromRowSet(int slot, Function<RowSet, Segmen
return ssa;
}

protected void newAlternate() {
/**
* After creating the new alternate key states, advise the derived classes, so they can cast them to the typed
* versions of the column source and adjust the derived class pointers.
*/
protected abstract void adviseNewAlternate();

private void setupNewAlternate(int oldTableSize) {
Assert.eqZero(rehashPointer, "rehashPointer");

for (int ii = 0; ii < mainKeySources.length; ++ii) {
alternateKeySources[ii] = mainKeySources[ii];
mainKeySources[ii] = InMemoryColumnSource.getImmutableMemoryColumnSource(tableSize,
alternateKeySources[ii].getType(), alternateKeySources[ii].getComponentType());
mainKeySources[ii].ensureCapacity(tableSize);
}
alternateTableSize = oldTableSize;
if (numEntries > 0) {
rehashPointer = alternateTableSize;
}

alternateRightRowSetSource = rightRowSetSource;
rightRowSetSource = new ImmutableObjectArraySource<>(Object.class, null);
rightRowSetSource.ensureCapacity(tableSize);
Expand Down Expand Up @@ -951,20 +970,8 @@ public boolean doRehash(boolean fullRehash, MutableInt rehashCredits, int nextCh
return false;
}

Assert.eqZero(rehashPointer, "rehashPointer");

for (int ii = 0; ii < mainKeySources.length; ++ii) {
alternateKeySources[ii] = mainKeySources[ii];
mainKeySources[ii] = InMemoryColumnSource.getImmutableMemoryColumnSource(tableSize,
alternateKeySources[ii].getType(), alternateKeySources[ii].getComponentType());
mainKeySources[ii].ensureCapacity(tableSize);
}
alternateTableSize = oldTableSize;
if (numEntries > 0) {
rehashPointer = alternateTableSize;
}

newAlternate();
setupNewAlternate(oldTableSize);
adviseNewAlternate();

return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ protected void buildFromLeftSide(RowSequence rowSequence, Chunk[] sourceKeyChunk
int tableLocation = firstTableLocation;
MAIN_SEARCH: while (true) {
byte rowState = stateSource.getUnsafe(tableLocation);
if (rowState == ENTRY_EMPTY_STATE) {
if (isStateEmpty(rowState)) {
final int firstAlternateTableLocation = hashToTableLocationAlternate(hash);
int alternateTableLocation = firstAlternateTableLocation;
while (alternateTableLocation < rehashPointer) {
rowState = alternateStateSource.getUnsafe(alternateTableLocation);
if (rowState == ENTRY_EMPTY_STATE) {
if (isStateEmpty(rowState)) {
break;
} else if (eq(alternateKeySource0.getUnsafe(alternateTableLocation), k0)) {
final long cookie = getCookieAlternate(alternateTableLocation);
Expand Down Expand Up @@ -120,12 +120,12 @@ protected void buildFromRightSide(RowSequence rowSequence, Chunk[] sourceKeyChun
int tableLocation = firstTableLocation;
MAIN_SEARCH: while (true) {
byte rowState = stateSource.getUnsafe(tableLocation);
if (rowState == ENTRY_EMPTY_STATE) {
if (isStateEmpty(rowState)) {
final int firstAlternateTableLocation = hashToTableLocationAlternate(hash);
int alternateTableLocation = firstAlternateTableLocation;
while (alternateTableLocation < rehashPointer) {
rowState = alternateStateSource.getUnsafe(alternateTableLocation);
if (rowState == ENTRY_EMPTY_STATE) {
if (isStateEmpty(rowState)) {
break;
} else if (eq(alternateKeySource0.getUnsafe(alternateTableLocation), k0)) {
final long cookie = getCookieAlternate(alternateTableLocation);
Expand Down Expand Up @@ -181,7 +181,7 @@ protected void probeRightSide(RowSequence rowSequence, Chunk[] sourceKeyChunks,
boolean found = false;
int tableLocation = firstTableLocation;
byte rowState;
while ((rowState = stateSource.getUnsafe(tableLocation)) != ENTRY_EMPTY_STATE) {
while (!isStateEmpty(rowState = stateSource.getUnsafe(tableLocation))) {
if (eq(mainKeySource0.getUnsafe(tableLocation), k0)) {
if (sequentialBuilders != null) {
final long cookie = getCookieMain(tableLocation);
Expand All @@ -200,7 +200,7 @@ protected void probeRightSide(RowSequence rowSequence, Chunk[] sourceKeyChunks,
final int firstAlternateTableLocation = hashToTableLocationAlternate(hash);
if (firstAlternateTableLocation < rehashPointer) {
int alternateTableLocation = firstAlternateTableLocation;
while ((rowState = alternateStateSource.getUnsafe(alternateTableLocation)) != ENTRY_EMPTY_STATE) {
while (!isStateEmpty(rowState = alternateStateSource.getUnsafe(alternateTableLocation))) {
if (eq(alternateKeySource0.getUnsafe(alternateTableLocation), k0)) {
if (sequentialBuilders != null) {
final long cookie = getCookieAlternate(alternateTableLocation);
Expand All @@ -224,15 +224,19 @@ private static int hash(byte k0) {
return hash;
}

private static boolean isStateEmpty(byte state) {
return state == ENTRY_EMPTY_STATE;
}

private boolean migrateOneLocation(int locationToMigrate) {
final byte currentStateValue = alternateStateSource.getUnsafe(locationToMigrate);
if (currentStateValue == ENTRY_EMPTY_STATE) {
if (isStateEmpty(currentStateValue)) {
return false;
}
final byte k0 = alternateKeySource0.getUnsafe(locationToMigrate);
final int hash = hash(k0);
int destinationTableLocation = hashToTableLocation(hash);
while (stateSource.getUnsafe(destinationTableLocation) != ENTRY_EMPTY_STATE) {
while (!isStateEmpty(stateSource.getUnsafe(destinationTableLocation))) {
destinationTableLocation = nextTableLocation(destinationTableLocation);
}
mainKeySource0.set(destinationTableLocation, k0);
Expand All @@ -259,8 +263,7 @@ protected int rehashInternalPartial(int entriesToRehash) {
}

@Override
protected void newAlternate() {
super.newAlternate();
protected void adviseNewAlternate() {
this.mainKeySource0 = (ImmutableByteArraySource)super.mainKeySources[0];
this.alternateKeySource0 = (ImmutableByteArraySource)super.alternateKeySources[0];
}
Expand All @@ -274,7 +277,7 @@ protected void clearAlternate() {
@Override
protected void migrateFront() {
int location = 0;
while (migrateOneLocation(location++));
while (migrateOneLocation(location++) && location < alternateTableSize);
}

@Override
Expand All @@ -298,15 +301,15 @@ protected void rehashInternalFull(final int oldSize) {
mainCookieSource.setArray(destModifiedCookie);
for (int sourceBucket = 0; sourceBucket < oldSize; ++sourceBucket) {
final byte currentStateValue = originalStateArray[sourceBucket];
if (currentStateValue == ENTRY_EMPTY_STATE) {
if (isStateEmpty(currentStateValue)) {
continue;
}
final byte k0 = originalKeyArray0[sourceBucket];
final int hash = hash(k0);
final int firstDestinationTableLocation = hashToTableLocation(hash);
int destinationTableLocation = firstDestinationTableLocation;
while (true) {
if (destState[destinationTableLocation] == ENTRY_EMPTY_STATE) {
if (isStateEmpty(destState[destinationTableLocation])) {
destKeyArray0[destinationTableLocation] = k0;
destState[destinationTableLocation] = originalStateArray[sourceBucket];
destLeftSource[destinationTableLocation] = oldLeftSource[sourceBucket];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ protected void buildFromLeftSide(RowSequence rowSequence, Chunk[] sourceKeyChunk
int tableLocation = firstTableLocation;
MAIN_SEARCH: while (true) {
byte rowState = stateSource.getUnsafe(tableLocation);
if (rowState == ENTRY_EMPTY_STATE) {
if (isStateEmpty(rowState)) {
final int firstAlternateTableLocation = hashToTableLocationAlternate(hash);
int alternateTableLocation = firstAlternateTableLocation;
while (alternateTableLocation < rehashPointer) {
rowState = alternateStateSource.getUnsafe(alternateTableLocation);
if (rowState == ENTRY_EMPTY_STATE) {
if (isStateEmpty(rowState)) {
break;
} else if (eq(alternateKeySource0.getUnsafe(alternateTableLocation), k0)) {
final long cookie = getCookieAlternate(alternateTableLocation);
Expand Down Expand Up @@ -120,12 +120,12 @@ protected void buildFromRightSide(RowSequence rowSequence, Chunk[] sourceKeyChun
int tableLocation = firstTableLocation;
MAIN_SEARCH: while (true) {
byte rowState = stateSource.getUnsafe(tableLocation);
if (rowState == ENTRY_EMPTY_STATE) {
if (isStateEmpty(rowState)) {
final int firstAlternateTableLocation = hashToTableLocationAlternate(hash);
int alternateTableLocation = firstAlternateTableLocation;
while (alternateTableLocation < rehashPointer) {
rowState = alternateStateSource.getUnsafe(alternateTableLocation);
if (rowState == ENTRY_EMPTY_STATE) {
if (isStateEmpty(rowState)) {
break;
} else if (eq(alternateKeySource0.getUnsafe(alternateTableLocation), k0)) {
final long cookie = getCookieAlternate(alternateTableLocation);
Expand Down Expand Up @@ -181,7 +181,7 @@ protected void probeRightSide(RowSequence rowSequence, Chunk[] sourceKeyChunks,
boolean found = false;
int tableLocation = firstTableLocation;
byte rowState;
while ((rowState = stateSource.getUnsafe(tableLocation)) != ENTRY_EMPTY_STATE) {
while (!isStateEmpty(rowState = stateSource.getUnsafe(tableLocation))) {
if (eq(mainKeySource0.getUnsafe(tableLocation), k0)) {
if (sequentialBuilders != null) {
final long cookie = getCookieMain(tableLocation);
Expand All @@ -200,7 +200,7 @@ protected void probeRightSide(RowSequence rowSequence, Chunk[] sourceKeyChunks,
final int firstAlternateTableLocation = hashToTableLocationAlternate(hash);
if (firstAlternateTableLocation < rehashPointer) {
int alternateTableLocation = firstAlternateTableLocation;
while ((rowState = alternateStateSource.getUnsafe(alternateTableLocation)) != ENTRY_EMPTY_STATE) {
while (!isStateEmpty(rowState = alternateStateSource.getUnsafe(alternateTableLocation))) {
if (eq(alternateKeySource0.getUnsafe(alternateTableLocation), k0)) {
if (sequentialBuilders != null) {
final long cookie = getCookieAlternate(alternateTableLocation);
Expand All @@ -224,15 +224,19 @@ private static int hash(char k0) {
return hash;
}

private static boolean isStateEmpty(byte state) {
return state == ENTRY_EMPTY_STATE;
}

private boolean migrateOneLocation(int locationToMigrate) {
final byte currentStateValue = alternateStateSource.getUnsafe(locationToMigrate);
if (currentStateValue == ENTRY_EMPTY_STATE) {
if (isStateEmpty(currentStateValue)) {
return false;
}
final char k0 = alternateKeySource0.getUnsafe(locationToMigrate);
final int hash = hash(k0);
int destinationTableLocation = hashToTableLocation(hash);
while (stateSource.getUnsafe(destinationTableLocation) != ENTRY_EMPTY_STATE) {
while (!isStateEmpty(stateSource.getUnsafe(destinationTableLocation))) {
destinationTableLocation = nextTableLocation(destinationTableLocation);
}
mainKeySource0.set(destinationTableLocation, k0);
Expand All @@ -259,8 +263,7 @@ protected int rehashInternalPartial(int entriesToRehash) {
}

@Override
protected void newAlternate() {
super.newAlternate();
protected void adviseNewAlternate() {
this.mainKeySource0 = (ImmutableCharArraySource)super.mainKeySources[0];
this.alternateKeySource0 = (ImmutableCharArraySource)super.alternateKeySources[0];
}
Expand All @@ -274,7 +277,7 @@ protected void clearAlternate() {
@Override
protected void migrateFront() {
int location = 0;
while (migrateOneLocation(location++));
while (migrateOneLocation(location++) && location < alternateTableSize);
}

@Override
Expand All @@ -298,15 +301,15 @@ protected void rehashInternalFull(final int oldSize) {
mainCookieSource.setArray(destModifiedCookie);
for (int sourceBucket = 0; sourceBucket < oldSize; ++sourceBucket) {
final byte currentStateValue = originalStateArray[sourceBucket];
if (currentStateValue == ENTRY_EMPTY_STATE) {
if (isStateEmpty(currentStateValue)) {
continue;
}
final char k0 = originalKeyArray0[sourceBucket];
final int hash = hash(k0);
final int firstDestinationTableLocation = hashToTableLocation(hash);
int destinationTableLocation = firstDestinationTableLocation;
while (true) {
if (destState[destinationTableLocation] == ENTRY_EMPTY_STATE) {
if (isStateEmpty(destState[destinationTableLocation])) {
destKeyArray0[destinationTableLocation] = k0;
destState[destinationTableLocation] = originalStateArray[sourceBucket];
destLeftSource[destinationTableLocation] = oldLeftSource[sourceBucket];
Expand Down
Loading

0 comments on commit ec36842

Please sign in to comment.