Skip to content

Commit

Permalink
fix: correct floating point ChunkHasher (deephaven#5778)
Browse files Browse the repository at this point in the history
This adds documentation and a new hashCode method to the
`io.deephaven.util.compare` package that is consistent with Deephaven
equality. Specifically, this ensures that the floating points values
-0.0 and 0.0 hash to the same value.

Testing was added around aggregation keys, join keys, aggregations,
min/max formulas, and sort results. Of note is that unique and distinct
aggregations currently rely on
`io.deephaven.chunk.WritableChunk#sort()`, which treats -0.0 < 0.0. This
effects the encounter order returned by these aggregations, and could be
seen as inconsistent with our stated goals of treating -0.0 == 0.0. This
does not affect the consistency of the sort operation. It is hard to be
confident that the testing coverage around this issue is fully complete;
that said, this does make us much more consistent in the tested
operations wrt Deephaven equality.

This is a prerequisite for deephaven#5605

Fixes deephaven#3768
  • Loading branch information
devinrsmith committed Jul 18, 2024
1 parent 90b9283 commit 2ea2363
Show file tree
Hide file tree
Showing 33 changed files with 1,956 additions and 168 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
package io.deephaven.util.compare;

public class BooleanComparisons {

/**
* Compares two booleans with {@code false} before {@code true}.
*
* @param lhs the first value
* @param rhs the second value
* @return the value {@code 0} if {@code lhs} is equal to {@code rhs}; a value less than {@code 0} if {@code lhs} is
* less than {@code rhs}; and a value greater than {@code 0} if {@code lhs} is greater than {@code rhs}
*/
public static int compare(boolean lhs, boolean rhs) {
return Boolean.compare(lhs, rhs);
}

/**
* Compare two booleans for equality consistent with {@link #compare(boolean, boolean)}; that is
* {@code compare(lhs, rhs) == 0 ⇒ eq(lhs, rhs)} and {@code compare(lhs, rhs) != 0 ⇒ !eq(lhs, rhs)}.
*
* <p>
* Logically equivalent to {@code compare(lhs, rhs) == 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} if the values are equal, {@code false} otherwise
*/
public static boolean eq(boolean lhs, boolean rhs) {
return lhs == rhs;
}

/**
* Returns a hash code for a {@code boolean} value consistent with {@link #eq(boolean, boolean)}; that is,
* {@code eq(x, y) ⇒ hashCode(x) == hashCode(y)}.
*
* @param x the value to hash
* @return a hash code value for a {@code boolean} value
*/
public static int hashCode(boolean x) {
return Boolean.hashCode(x);
}

/**
* Logically equivalent to {@code compare(lhs, rhs) > 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is greater than {@code rhs}
*/
public static boolean gt(boolean lhs, boolean rhs) {
return compare(lhs, rhs) > 0;
}

/**
* Logically equivalent to {@code compare(lhs, rhs) < 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is less than {@code rhs}
*/
public static boolean lt(boolean lhs, boolean rhs) {
return compare(lhs, rhs) < 0;
}

/**
* Logically equivalent to {@code compare(lhs, rhs) >= 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is greater than or equal to {@code rhs}
*/
public static boolean geq(boolean lhs, boolean rhs) {
return compare(lhs, rhs) >= 0;
}

/**
* Logically equivalent to {@code compare(lhs, rhs) <= 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is less than or equal to {@code rhs}
*/
public static boolean leq(boolean lhs, boolean rhs) {
return compare(lhs, rhs) <= 0;
}
}
73 changes: 69 additions & 4 deletions Util/src/main/java/io/deephaven/util/compare/ByteComparisons.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,94 @@
//
package io.deephaven.util.compare;

import io.deephaven.util.QueryConstants;

public class ByteComparisons {

/**
* Compares two bytes according to the following rules:
*
* <ul>
* <li>{@link QueryConstants#NULL_BYTE} is less than all other {@code byte} values</li>
* <li>Otherwise, normal {@code byte} comparison logic is used</li>
* </ul>
*
* @param lhs the first value
* @param rhs the second value
* @return the value {@code 0} if {@code lhs} is equal to {@code rhs}; a value less than {@code 0} if {@code lhs} is
* less than {@code rhs}; and a value greater than {@code 0} if {@code lhs} is greater than {@code rhs}
*/
public static int compare(byte lhs, byte rhs) {
return Byte.compare(lhs, rhs);
}

/**
* Compare two bytes for equality consistent with {@link #compare(byte, byte)}; that is
* {@code compare(lhs, rhs) == 0 ⇒ eq(lhs, rhs)} and {@code compare(lhs, rhs) != 0 ⇒ !eq(lhs, rhs)}.
*
* <p>
* Logically equivalent to {@code compare(lhs, rhs) == 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} if the values are equal, {@code false} otherwise
*/
public static boolean eq(byte lhs, byte rhs) {
return lhs == rhs;
}

/**
* Returns a hash code for a {@code byte} value consistent with {@link #eq(byte, byte)}; that is,
* {@code eq(x, y) ⇒ hashCode(x) == hashCode(y)}.
*
* @param x the value to hash
* @return a hash code value for a {@code byte} value
*/
public static int hashCode(byte x) {
return Byte.hashCode(x);
}

/**
* Logically equivalent to {@code compare(lhs, rhs) > 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is greater than {@code rhs}
*/
public static boolean gt(byte lhs, byte rhs) {
return compare(lhs, rhs) > 0;
return lhs > rhs;
}

/**
* Logically equivalent to {@code compare(lhs, rhs) < 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is less than {@code rhs}
*/
public static boolean lt(byte lhs, byte rhs) {
return compare(lhs, rhs) < 0;
return lhs < rhs;
}

/**
* Logically equivalent to {@code compare(lhs, rhs) >= 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is greater than or equal to {@code rhs}
*/
public static boolean geq(byte lhs, byte rhs) {
return compare(lhs, rhs) >= 0;
return lhs >= rhs;
}

/**
* Logically equivalent to {@code compare(lhs, rhs) <= 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is less than or equal to {@code rhs}
*/
public static boolean leq(byte lhs, byte rhs) {
return compare(lhs, rhs) <= 0;
return lhs <= rhs;
}
}
79 changes: 75 additions & 4 deletions Util/src/main/java/io/deephaven/util/compare/CharComparisons.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@

public class CharComparisons {

/**
* Compares two chars according to the following rules:
*
* <ul>
* <li>{@link QueryConstants#NULL_CHAR} is less than all other {@code char} values</li>
* <li>Otherwise, normal {@code char} comparison logic is used</li>
* </ul>
*
* <p>
* Note: this differs from the Java language numerical comparison operators {@code <, <=, >=, >} and
* {@link Character#compare(char, char)}.
*
* @param lhs the first value
* @param rhs the second value
* @return the value {@code 0} if {@code lhs} is equal to {@code rhs}; a value less than {@code 0} if {@code lhs} is
* less than {@code rhs}; and a value greater than {@code 0} if {@code lhs} is greater than {@code rhs}
*/
public static int compare(char lhs, char rhs) {
if (lhs == rhs) {
return 0;
Expand All @@ -20,23 +37,77 @@ public static int compare(char lhs, char rhs) {
return Character.compare(lhs, rhs);
}

/**
* Compare two chars for equality consistent with {@link #compare(char, char)}; that is
* {@code compare(lhs, rhs) == 0 ⇒ eq(lhs, rhs)} and {@code compare(lhs, rhs) != 0 ⇒ !eq(lhs, rhs)}.
*
* <p>
* Logically equivalent to {@code compare(lhs, rhs) == 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} if the values are equal, {@code false} otherwise
*/
public static boolean eq(char lhs, char rhs) {
return lhs == rhs;
}

/**
* Returns a hash code for a {@code char} value consistent with {@link #eq(char, char)}; that is,
* {@code eq(x, y) ⇒ hashCode(x) == hashCode(y)}.
*
* @param x the value to hash
* @return a hash code value for a {@code char} value
*/
public static int hashCode(char x) {
return Character.hashCode(x);
}

/**
* Logically equivalent to {@code compare(lhs, rhs) > 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is greater than {@code rhs}
*/
public static boolean gt(char lhs, char rhs) {
return compare(lhs, rhs) > 0;
// return compare(lhs, rhs) > 0;
return !leq(lhs, rhs);
}

/**
* Logically equivalent to {@code compare(lhs, rhs) < 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is less than {@code rhs}
*/
public static boolean lt(char lhs, char rhs) {
return compare(lhs, rhs) < 0;
// return compare(lhs, rhs) < 0;
return (lhs < rhs || lhs == QueryConstants.NULL_CHAR) && rhs != QueryConstants.NULL_CHAR;
}

/**
* Logically equivalent to {@code compare(lhs, rhs) >= 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is greater than or equal to {@code rhs}
*/
public static boolean geq(char lhs, char rhs) {
return compare(lhs, rhs) >= 0;
// return compare(lhs, rhs) >= 0;
return !lt(lhs, rhs);
}

/**
* Logically equivalent to {@code compare(lhs, rhs) <= 0}.
*
* @param lhs the first value
* @param rhs the second value
* @return {@code true} iff {@code lhs} is less than or equal to {@code rhs}
*/
public static boolean leq(char lhs, char rhs) {
return compare(lhs, rhs) <= 0;
// return compare(lhs, rhs) <= 0;
return (lhs <= rhs && rhs != QueryConstants.NULL_CHAR) || lhs == QueryConstants.NULL_CHAR;
}
}
Loading

0 comments on commit 2ea2363

Please sign in to comment.