Skip to content

Commit

Permalink
Implement Unordered Comparison Function
Browse files Browse the repository at this point in the history
  • Loading branch information
athmaja-n committed Sep 2, 2024
1 parent 6e52cbd commit 6059b1c
Show file tree
Hide file tree
Showing 9 changed files with 71 additions and 65 deletions.
5 changes: 1 addition & 4 deletions velox/exec/fuzzer/AggregationFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ namespace facebook::velox::exec::test {
/// Runs the aggregation fuzzer.
/// @param signatureMap Map of all aggregate function signatures.
/// @param seed Random seed - Pass the same seed for reproducibility.
/// @param orderDependentFunctions Map of functions that depend on order of
/// input.
/// @param orderableGroupKeys Whether group keys must be orderable or be just
/// comparable.
/// @param planPath Path to persisted plan information. If this is
Expand All @@ -36,8 +34,7 @@ namespace facebook::velox::exec::test {
void aggregateFuzzer(
AggregateFunctionSignatureMap signatureMap,
size_t seed,
const std::unordered_map<std::string, std::shared_ptr<ResultVerifier>>&
orderDependentFunctions,
const std::unordered_map<std::string, std::shared_ptr<ResultVerifier>>& customVerificationFunctions,
const std::unordered_map<std::string, std::shared_ptr<InputGenerator>>&
customInputGenerators,
VectorFuzzer::Options::TimestampPrecision timestampPrecision,
Expand Down
12 changes: 11 additions & 1 deletion velox/exec/fuzzer/AggregationFuzzerBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,9 +523,19 @@ void AggregationFuzzerBase::compare(
return;
}

MaterializedRowMultiset expectedRows(
materialize(expected.result).begin(), materialize(expected.result).end());
MaterializedRowMultiset actualRows(
materialize(actual.result).begin(), materialize(actual.result).end());

if (!customVerification) {
VELOX_CHECK(
assertEqualResults({expected.result}, {actual.result}),
assertUnorderedEqualResults(
expectedRows,
expected.result->type(),
actualRows,
actual.result->type(),
"Logically equivalent plans produced different results"),
"Logically equivalent plans produced different results");
return;
}
Expand Down
2 changes: 0 additions & 2 deletions velox/exec/fuzzer/AggregationFuzzerOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ struct AggregationFuzzerOptions {
std::unordered_map<std::string, std::shared_ptr<InputGenerator>>
customInputGenerators;

std::unordered_set<std::string> orderDependentFunctions;

/// Timestamp precision to use when generating inputs of type TIMESTAMP.
VectorFuzzer::Options::TimestampPrecision timestampPrecision{
VectorFuzzer::Options::TimestampPrecision::kMilliSeconds};
Expand Down
27 changes: 14 additions & 13 deletions velox/exec/fuzzer/WindowFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,6 @@ void WindowFuzzer::go() {
if (customVerification) {
customVerifier = customVerificationFunctions_.at(signature.name);
}
const bool requireSortedInput =
orderDependentFunctions_.count(signature.name) != 0;

std::vector<TypePtr> argTypes = signature.args;
std::vector<std::string> argNames = makeNames(argTypes.size());
Expand All @@ -252,12 +250,6 @@ void WindowFuzzer::go() {
generateFrameClause(argNames, argTypes, isRowsFrame);
const auto input = generateInputDataWithRowNumber(
argNames, argTypes, partitionKeys, signature);
// If the function is order-dependent or uses "rows" frame, sort all input
// rows by row_number additionally.
if (requireSortedInput || isRowsFrame) {
sortingKeysAndOrders.emplace_back("row_number", core::kAscNullsLast);
++stats_.numSortedInputs;
}

logVectors(input);

Expand Down Expand Up @@ -429,11 +421,22 @@ bool WindowFuzzer::verifyWindow(
++stats_.numVerified;
stats_.verifiedFunctionNames.insert(
retrieveWindowFunctionName(plan)[0]);
// Convert expected results from vector to multiset
MaterializedRowMultiset expectedMultiset(
expectedResult.value().begin(), expectedResult.value().end());

// Convert actual results from vector to multiset
MaterializedRowMultiset actualMultiset(
materialize({resultOrError.result}).begin(),
materialize({resultOrError.result}).end());

// Now call the assert function with the correct types
VELOX_CHECK(
assertEqualResults(
expectedResult.value(),
assertUnorderedEqualResults(
expectedMultiset,
plan->outputType(),
{resultOrError.result}),
actualMultiset,
plan->outputType()),
"Velox and reference DB results don't match");
LOG(INFO) << "Verified results against reference DB";
}
Expand Down Expand Up @@ -484,7 +487,6 @@ void windowFuzzer(
customVerificationFunctions,
const std::unordered_map<std::string, std::shared_ptr<InputGenerator>>&
customInputGenerators,
const std::unordered_set<std::string>& orderDependentFunctions,
VectorFuzzer::Options::TimestampPrecision timestampPrecision,
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::string>& hiveConfigs,
Expand All @@ -497,7 +499,6 @@ void windowFuzzer(
seed,
customVerificationFunctions,
customInputGenerators,
orderDependentFunctions,
timestampPrecision,
queryConfigs,
hiveConfigs,
Expand Down
30 changes: 13 additions & 17 deletions velox/exec/fuzzer/WindowFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,19 @@ namespace facebook::velox::exec::test {
class WindowFuzzer : public AggregationFuzzerBase {
public:
WindowFuzzer(
AggregateFunctionSignatureMap aggregationSignatureMap,
WindowFunctionMap windowSignatureMap,
size_t seed,
const std::unordered_map<std::string, std::shared_ptr<ResultVerifier>>&
customVerificationFunctions,
const std::unordered_map<std::string, std::shared_ptr<InputGenerator>>&
customInputGenerators,
const std::unordered_set<std::string>& orderDependentFunctions,
VectorFuzzer::Options::TimestampPrecision timestampPrecision,
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::string>& hiveConfigs,
bool orderableGroupKeys,
std::unique_ptr<ReferenceQueryRunner> referenceQueryRunner)
: AggregationFuzzerBase{seed, customVerificationFunctions, customInputGenerators, timestampPrecision, queryConfigs, hiveConfigs, orderableGroupKeys, std::move(referenceQueryRunner)},
orderDependentFunctions_{orderDependentFunctions} {
AggregateFunctionSignatureMap aggregationSignatureMap,
WindowFunctionMap windowSignatureMap,
size_t seed,
const std::unordered_map<std::string, std::shared_ptr<ResultVerifier>>&
customVerificationFunctions,
const std::unordered_map<std::string, std::shared_ptr<InputGenerator>>&
customInputGenerators,
VectorFuzzer::Options::TimestampPrecision timestampPrecision,
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::string>& hiveConfigs,
bool orderableGroupKeys,
std::unique_ptr<ReferenceQueryRunner> referenceQueryRunner)
: AggregationFuzzerBase{seed, customVerificationFunctions, customInputGenerators, timestampPrecision, queryConfigs, hiveConfigs, orderableGroupKeys, std::move(referenceQueryRunner)} {
VELOX_CHECK(
!aggregationSignatureMap.empty() || !windowSignatureMap.empty(),
"No function signatures available.");
Expand Down Expand Up @@ -122,7 +120,6 @@ class WindowFuzzer : public AggregationFuzzerBase {
const std::shared_ptr<ResultVerifier>& customVerifier,
const velox::fuzzer::ResultOrError& expected);

const std::unordered_set<std::string> orderDependentFunctions_;

struct Stats : public AggregationFuzzerBase::Stats {
std::unordered_set<std::string> verifiedFunctionNames;
Expand All @@ -149,7 +146,6 @@ void windowFuzzer(
customVerificationFunctions,
const std::unordered_map<std::string, std::shared_ptr<InputGenerator>>&
customInputGenerators,
const std::unordered_set<std::string>& orderDependentFunctions,
VectorFuzzer::Options::TimestampPrecision timestampPrecision,
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::string>& hiveConfigs,
Expand Down
1 change: 0 additions & 1 deletion velox/exec/fuzzer/WindowFuzzerRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ class WindowFuzzerRunner {
seed,
options.customVerificationFunctions,
options.customInputGenerators,
options.orderDependentFunctions,
options.timestampPrecision,
options.queryConfigs,
options.hiveConfigs,
Expand Down
25 changes: 25 additions & 0 deletions velox/exec/tests/utils/QueryAssertions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1068,6 +1068,31 @@ void assertEqualTypeAndNumRows(
EXPECT_EQ(expectedNumRows, actualNumRows);
}

bool assertUnorderedEqualResults(
const MaterializedRowMultiset& expectedRows,
const TypePtr& expectedType,
const MaterializedRowMultiset& actualRows,
const TypePtr& actualType,
const std::string& message) {
// Compare type kinds to ensure compatibility.
if (!equalTypeKinds(*expectedRows.begin(), *actualRows.begin())) {
ADD_FAILURE() << "Types of expected and actual results do not match: "
<< toTypeString(*expectedRows.begin()) << " vs. "
<< toTypeString(*actualRows.begin());
return false;
}

// If no floating-point columns are involved, perform a direct comparison.
if (!compareMaterializedRows(expectedRows, actualRows)) {
ADD_FAILURE() << generateUserFriendlyDiff(
expectedRows, actualRows, expectedType)
<< message;
return false;
}

return true;
}

/// Returns the number of floating-point columns and a list of columns indices
/// with floating-point columns placed at the end.
std::tuple<uint32_t, std::vector<velox::column_index_t>>
Expand Down
7 changes: 7 additions & 0 deletions velox/exec/tests/utils/QueryAssertions.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,13 @@ void assertEqualTypeAndNumRows(
vector_size_t expectedNumRows,
const std::vector<RowVectorPtr>& actual);

bool assertUnorderedEqualResults(
const MaterializedRowMultiset& expectedRows,
const TypePtr& expectedType,
const MaterializedRowMultiset& actualRows,
const TypePtr& actualType,
const std::string& message = "");

void printResults(const RowVectorPtr& result, std::ostream& out);

/// Aggregates operator stats by operator type. If a task has more than one plan
Expand Down
27 changes: 0 additions & 27 deletions velox/functions/prestosql/fuzzer/WindowFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,32 +140,6 @@ int main(int argc, char** argv) {
{"sum_data_size_for_stats", nullptr},
};

static const std::unordered_set<std::string> orderDependentFunctions = {
// Window functions.
"first_value",
"last_value",
"nth_value",
"ntile",
"lag",
"lead",
"row_number",
"cume_dist",
"rank",
"dense_rank",
"percent_rank",
// Aggregation functions.
"any_value",
"arbitrary",
"array_agg",
"set_agg",
"set_union",
"map_agg",
"map_union",
"map_union_sum",
"max_by",
"min_by",
"multimap_agg",
};

using Runner = facebook::velox::exec::test::WindowFuzzerRunner;
using Options = facebook::velox::exec::test::AggregationFuzzerOptions;
Expand All @@ -177,7 +151,6 @@ int main(int argc, char** argv) {
options.customVerificationFunctions = customVerificationFunctions;
options.customInputGenerators =
facebook::velox::exec::test::getCustomInputGenerators();
options.orderDependentFunctions = orderDependentFunctions;
options.timestampPrecision =
facebook::velox::VectorFuzzer::Options::TimestampPrecision::kMilliSeconds;
std::shared_ptr<facebook::velox::memory::MemoryPool> rootPool{
Expand Down

0 comments on commit 6059b1c

Please sign in to comment.