Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: seek row for sorted columns #5238

Merged
merged 21 commits into from
Apr 11, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@
import io.deephaven.engine.rowset.RowSetFactory;
import io.deephaven.engine.table.ColumnSource;
import io.deephaven.engine.table.Table;
import io.deephaven.engine.table.impl.SortedColumnsAttribute;
import io.deephaven.engine.table.impl.SortingOrder;
import io.deephaven.internal.log.LoggerFactory;
import io.deephaven.io.logger.Logger;
import io.deephaven.time.DateTimeUtils;

import java.util.*;
import java.util.Random;
wusteven815 marked this conversation as resolved.
Show resolved Hide resolved

public class SeekRow implements Function<Table, Long> {
Expand Down Expand Up @@ -46,37 +49,38 @@ public SeekRow(long startingRow, String columnName, Object seekValue, boolean in
}

@Override
@ConcurrentMethod
wusteven815 marked this conversation as resolved.
Show resolved Hide resolved
wusteven815 marked this conversation as resolved.
Show resolved Hide resolved
public Long apply(Table table) {
final int sortDirection = guessSorted(table);
final boolean isSorted = !contains && sortDirection != 0;

final Optional<SortingOrder> order = SortedColumnsAttribute.getOrderForColumn(table, columnName);
final RowSet index = table.getRowSet();
wusteven815 marked this conversation as resolved.
Show resolved Hide resolved
long row;
if (isSorted) {
final Comparable value =
(Comparable) table.getColumnSource(columnName).get((int) index.get((int) startingRow));
final int compareTo =
sortDirection * nullSafeCompare(value, (Comparable) seekValue) * (isBackward ? -1 : 1);
final int start = isBackward ? (int) startingRow + 1 : (int) startingRow;
if (compareTo == 0) {
return startingRow;
} else if (compareTo < 0) {
// value is less than seek value
log.info().append("Value is before: ").append(nullSafeToString(value)).append(" < ")
.append(nullSafeToString(seekValue)).endl();
row = maybeBinarySearch(table, index, sortDirection, start, (int) index.size() - 1);

if (!order.isEmpty()) {
wusteven815 marked this conversation as resolved.
Show resolved Hide resolved
if (isBackward) {
// check prev row
if (startingRow != 0) {
final Comparable prevValue =
(Comparable) table.getColumnSource(columnName).get((int) index.get((int) startingRow - 1));
wusteven815 marked this conversation as resolved.
Show resolved Hide resolved
if (nullSafeCompare(prevValue, (Comparable) seekValue) == 0) {
return startingRow - 1;
}
}
// no values before, loop back around and find the last value
wusteven815 marked this conversation as resolved.
Show resolved Hide resolved
return findEdgeOccurence(table, index, startingRow, index.size() - 1, false,
order.get() == SortingOrder.Ascending);
} else {
log.info().append("Value is after: ").append(nullSafeToString(value)).append(" > ")
.append(nullSafeToString(seekValue)).endl();
row = maybeBinarySearch(table, index, sortDirection, 0, start);
}
if (row >= 0) {
return row;
// check next row
if (startingRow != index.size() - 1) {
final Comparable nextValue =
(Comparable) table.getColumnSource(columnName).get((int) index.get((int) startingRow + 1));
if (nullSafeCompare(nextValue, (Comparable) seekValue) == 0) {
return startingRow + 1;
}
}
// no values after, loop back around and find the first value
return findEdgeOccurence(table, index, 0, startingRow, true, order.get() == SortingOrder.Ascending);
}
// we aren't really sorted
}

long row;
if (isBackward) {
row = findRow(table, index, 0, (int) startingRow);
if (row >= 0) {
Expand Down Expand Up @@ -136,64 +140,44 @@ public Long apply(Table table) {
}
}

private long maybeBinarySearch(Table table, RowSet index, int sortDirection, int start, int end) {
log.info().append("Doing binary search ").append(start).append(", ").append(end).endl();

final ColumnSource columnSource = table.getColumnSource(columnName);

int minBound = start;
int maxBound = end;

Comparable minValue = (Comparable) columnSource.get(index.get(minBound));
Comparable maxValue = (Comparable) columnSource.get(index.get(maxBound));

final Comparable comparableSeek = (Comparable) this.seekValue;

log.info().append("Seek Value ").append(nullSafeToString(comparableSeek)).endl();

if (nullSafeCompare(minValue, comparableSeek) * sortDirection >= 0) {
log.info().append("Less than min ").append(nullSafeToString(comparableSeek)).append(" < ")
.append(nullSafeToString(minValue)).endl();
return minBound;
} else if (nullSafeCompare(maxValue, comparableSeek) * sortDirection <= 0) {
log.info().append("Greater than max: ").append(nullSafeToString(comparableSeek)).append(" < ")
.append(nullSafeToString(maxValue)).endl();
return maxBound;
}


do {
log.info().append("Bounds (").append(minBound).append(", ").append(maxBound).append(")").endl();
if (minBound == maxBound || minBound == maxBound - 1) {
return minBound;
}

if (nullSafeCompare(minValue, maxValue) * sortDirection > 0) {
log.info().append("Not Sorted (").append(minValue.toString()).append(", ").append(maxValue.toString())
.append(")").endl();
// not really sorted
return -1;
}

final int check = (minBound + maxBound) / 2;
final Comparable checkValue = (Comparable) columnSource.get(index.get(check));
// Search up by default, reverse the result to search down
final int compareResult =
nullSafeCompare(checkValue, comparableSeek) * sortDirection * (isBackward ? -1 : 1);
/**
* Finds the first/last occurence of the target value by using binary search
wusteven815 marked this conversation as resolved.
Show resolved Hide resolved
*
* @param table the table to check for sorted-ness
* @param start the starting index to search
* @param end the ending index to search
* @param findFirst whether to find the first or last occurence (false for last)
wusteven815 marked this conversation as resolved.
Show resolved Hide resolved
* @param isAscending whether the table is sorted in ascending order (false for descending)
* @return the index of the first/last occurence of the target value, -1 if not found
*/
private long findEdgeOccurence(Table table, RowSet index, long start, long end, boolean findFirst,
wusteven815 marked this conversation as resolved.
Show resolved Hide resolved
boolean isAscending) {
long result = -1;

log.info().append("Check[").append(check).append("] ").append(checkValue.toString()).append(" -> ")
.append(compareResult).endl();
while (start <= end) {
long mid = start + (end - start) / 2;
Comparable midValue =
(Comparable) table.getColumnSource(columnName).get((int) index.get((int) mid));
int compareResult = nullSafeCompare(midValue, (Comparable) seekValue);

if (compareResult == 0) {
return check;
} else if (compareResult < 0) {
minBound = check;
minValue = checkValue;
result = mid;
if (findFirst) {
end = mid - 1;
} else {
start = mid + 1;
}
} else if ((compareResult < 0 && isAscending) || (compareResult > 0 && !isAscending)) {
// mid less than target and list is ascending
// mid more than target and list is descending
// search right half
start = mid + 1;
} else {
maxBound = check;
maxValue = checkValue;
// other way around, search left half
end = mid - 1;
}
} while (true);
}
return result;
}

int nullSafeCompare(Comparable c1, Comparable c2) {
Expand Down Expand Up @@ -286,71 +270,4 @@ private long findRow(Table table, RowSet index, int start, int end) {

return -1L;
}

/**
* Take a guess as to whether the table is sorted, such that we should do a binary search instead
*
* @param table the table to check for sorted-ness
* @return 0 if the table is not sorted; 1 if might be ascending sorted, -1 if it might be descending sorted.
*/
int guessSorted(Table table) {
final ColumnSource columnSource = table.getColumnSource(columnName);
if (!Comparable.class.isAssignableFrom(columnSource.getType())) {
return 0;
}

RowSet index = table.getRowSet();
if (index.size() > 10000) {
Random random = new Random();
TLongSet set = new TLongHashSet();
long sampleSize = Math.min(index.size() / 4, 10000L);
while (sampleSize > 0) {
final long row = (long) (random.nextDouble() * index.size() - 1);
if (set.add(row)) {
sampleSize--;
}
}
RowSetBuilderRandom builder = RowSetFactory.builderRandom();
set.forEach(row -> {
builder.addKey(table.getRowSet().get(row));
return true;
});
index = builder.build();
}

boolean isAscending = true;
boolean isDescending = true;
boolean first = true;

Object previous = null;
for (RowSet.Iterator it = index.iterator(); it.hasNext();) {
long key = it.nextLong();
Object current = columnSource.get(key);
if (current == previous) {
continue;
}

int compareTo = first ? 0 : nullSafeCompare((Comparable) previous, (Comparable) current);
first = false;

if (compareTo > 0) {
isAscending = false;
} else if (compareTo < 0) {
isDescending = false;
}

if (!isAscending && !isDescending) {
break;
}

previous = current;
}

if (isAscending)
return 1;
else if (isDescending)
return -1;
else
return 0;
}
}
Loading