Skip to content

Commit

Permalink
misc(fuzzer): Support custom result verifier with compare() API in wi…
Browse files Browse the repository at this point in the history
…ndow fuzzer (#12148)

Summary:
Pull Request resolved: #12148

The custom result verifiers of some functions provides a compare() API that can be used to
compare the expected and actual results after certain transformation. This is needed for
functions like `avg(interval) -> interval` because the result of this function can have a small
precision error that should be tolerated. However, the window fuzzer used to only support
the verify() API of custom result verifiers. This diff makes the window fuzzer to use the
compare() API as well when available.

Reviewed By: kgpai

Differential Revision: D68507422

fbshipit-source-id: cae34c2936335e5639e5ec7af00fa80c36c39bd6
  • Loading branch information
kagamiori authored and facebook-github-bot committed Jan 23, 2025
1 parent a1ef29d commit 419de77
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 71 deletions.
25 changes: 2 additions & 23 deletions velox/exec/fuzzer/AggregationFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1052,27 +1052,6 @@ void AggregationFuzzer::Stats::print(size_t numIterations) const {
AggregationFuzzerBase::Stats::print(numIterations);
}

namespace {
// Merges a vector of RowVectors into one RowVector.
RowVectorPtr mergeRowVectors(
const std::vector<RowVectorPtr>& results,
velox::memory::MemoryPool* pool) {
auto totalCount = 0;
for (const auto& result : results) {
totalCount += result->size();
}
auto copy =
BaseVector::create<RowVector>(results[0]->type(), totalCount, pool);
auto copyCount = 0;
for (const auto& result : results) {
copy->copy(result.get(), copyCount, 0, result->size());
copyCount += result->size();
}
return copy;
}

} // namespace

bool AggregationFuzzer::compareEquivalentPlanResults(
const std::vector<PlanWithSplits>& plans,
bool customVerification,
Expand Down Expand Up @@ -1123,8 +1102,8 @@ bool AggregationFuzzer::compareEquivalentPlanResults(

if (referenceResult.first) {
velox::fuzzer::ResultOrError expected;
expected.result =
mergeRowVectors(referenceResult.first.value(), pool_.get());
expected.result = fuzzer::mergeRowVectors(
referenceResult.first.value(), pool_.get());

compare(
resultOrError, customVerification, {customVerifier}, expected);
Expand Down
157 changes: 109 additions & 48 deletions velox/exec/fuzzer/WindowFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,34 @@ void initializeVerifier(
frame,
"w0");
}

template <typename T, bool resultAsVector>
T getReferenceResult(
const core::PlanNodePtr& plan,
core::PlanNodeId windowNodeId,
const std::string& prestoFrameClause,
ReferenceQueryRunner* referenceQueryRunner) {
auto prestoQueryRunner =
dynamic_cast<PrestoQueryRunner*>(referenceQueryRunner);
bool isPrestoQueryRunner = (prestoQueryRunner != nullptr);
if (isPrestoQueryRunner) {
prestoQueryRunner->queryRunnerContext()
->windowFrames_[windowNodeId]
.push_back(prestoFrameClause);
}

T referenceResult;
if constexpr (resultAsVector) {
referenceResult =
computeReferenceResultsAsVector(plan, referenceQueryRunner);
} else {
referenceResult = computeReferenceResults(plan, referenceQueryRunner);
}
if (isPrestoQueryRunner) {
prestoQueryRunner->queryRunnerContext()->windowFrames_.clear();
}
return referenceResult;
}
} // namespace

bool WindowFuzzer::verifyWindow(
Expand All @@ -689,12 +717,6 @@ bool WindowFuzzer::verifyWindow(
const std::shared_ptr<ResultVerifier>& customVerifier,
bool enableWindowVerification,
const std::string& prestoFrameClause) {
SCOPE_EXIT {
if (customVerifier) {
customVerifier->reset();
}
};

core::PlanNodeId windowNodeId;
auto frame = getFrame(partitionKeys, sortingKeysAndOrders, frameClause);
auto plan = PlanBuilder()
Expand All @@ -707,58 +729,97 @@ bool WindowFuzzer::verifyWindow(
persistReproInfo({{plan, {}}}, reproPersistPath_);
}

bool customVerifierInitialized = false;
if (customVerifier) {
try {
initializeVerifier(
plan,
customVerifier,
input,
partitionKeys,
sortingKeysAndOrders,
frame);
customVerifierInitialized = true;
} catch (...) {
LOG(WARNING) << "Custom verifier initialization failed";
}
}

SCOPE_EXIT {
if (customVerifier) {
customVerifier->reset();
}
};

velox::fuzzer::ResultOrError resultOrError;
try {
resultOrError = execute(plan);
if (resultOrError.exceptionPtr) {
++stats_.numFailed;
}

if (!customVerification) {
if (resultOrError.result && enableWindowVerification) {
auto prestoQueryRunner =
dynamic_cast<PrestoQueryRunner*>(referenceQueryRunner_.get());
bool isPrestoQueryRunner = (prestoQueryRunner != nullptr);
if (isPrestoQueryRunner) {
prestoQueryRunner->queryRunnerContext()
->windowFrames_[windowNodeId]
.push_back(prestoFrameClause);
}
auto referenceResult =
computeReferenceResults(plan, referenceQueryRunner_.get());
if (isPrestoQueryRunner) {
prestoQueryRunner->queryRunnerContext()->windowFrames_.clear();
if (resultOrError.result) {
if (!customVerification) {
if (enableWindowVerification) {
auto referenceResult = getReferenceResult<
std::pair<
std::optional<MaterializedRowMultiset>,
ReferenceQueryErrorCode>,
false>(
plan,
windowNodeId,
prestoFrameClause,
referenceQueryRunner_.get());
stats_.updateReferenceQueryStats(referenceResult.second);
if (auto expectedResult = referenceResult.first) {
++stats_.numVerified;
stats_.verifiedFunctionNames.insert(
retrieveWindowFunctionName(plan)[0]);
VELOX_CHECK(
assertEqualResults(
expectedResult.value(),
plan->outputType(),
{resultOrError.result}),
"Velox and reference DB results don't match");
LOG(INFO) << "Verified results against reference DB";
}
}
stats_.updateReferenceQueryStats(referenceResult.second);
if (auto expectedResult = referenceResult.first) {
++stats_.numVerified;
stats_.verifiedFunctionNames.insert(
retrieveWindowFunctionName(plan)[0]);
VELOX_CHECK(
assertEqualResults(
expectedResult.value(),
plan->outputType(),
{resultOrError.result}),
"Velox and reference DB results don't match");
LOG(INFO) << "Verified results against reference DB";
} else if (referenceQueryRunner_->supportsVeloxVectorResults()) {
if (enableWindowVerification) {
auto referenceResult = getReferenceResult<
std::pair<
std::optional<std::vector<RowVectorPtr>>,
ReferenceQueryErrorCode>,
true>(
plan,
windowNodeId,
prestoFrameClause,
referenceQueryRunner_.get());
stats_.updateReferenceQueryStats(referenceResult.second);
if (auto expectedResult = referenceResult.first) {
++stats_.numVerified;
stats_.verifiedFunctionNames.insert(
retrieveWindowFunctionName(plan)[0]);
velox::fuzzer::ResultOrError expected;
expected.result = fuzzer::mergeRowVectors(
referenceResult.first.value(), pool_.get());

if (customVerifier) {
VELOX_CHECK(customVerifierInitialized);
}
compare(
resultOrError, customVerification, {customVerifier}, expected);
LOG(INFO) << "Verified results against reference DB";
}
}
}
} else {
LOG(INFO) << "Verification through custom verifier";
++stats_.numVerificationSkipped;

if (customVerifier && resultOrError.result) {
VELOX_CHECK(
customVerifier->supportsVerify(),
"Window fuzzer only uses custom verify() methods.");
initializeVerifier(
plan,
customVerifier,
input,
partitionKeys,
sortingKeysAndOrders,
frame);
} else if (customVerifier && customVerifier->supportsVerify()) {
LOG(INFO) << "Verification through custom verifier";
++stats_.numVerificationSkipped;

VELOX_CHECK(customVerifierInitialized);
customVerifier->verify(resultOrError.result);
} else {
LOG(WARNING) << "No Verification Performed";
}
}

Expand Down
17 changes: 17 additions & 0 deletions velox/expression/fuzzer/FuzzerToolkit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,23 @@ void compareVectors(
LOG(INFO) << "Two vectors match.";
}

RowVectorPtr mergeRowVectors(
const std::vector<RowVectorPtr>& results,
velox::memory::MemoryPool* pool) {
auto totalCount = 0;
for (const auto& result : results) {
totalCount += result->size();
}
auto copy =
BaseVector::create<RowVector>(results[0]->type(), totalCount, pool);
auto copyCount = 0;
for (const auto& result : results) {
copy->copy(result.get(), copyCount, 0, result->size());
copyCount += result->size();
}
return copy;
}

void InputRowMetadata::saveToFile(const char* filePath) const {
std::ofstream outputFile(filePath, std::ofstream::binary);
saveStdVector(columnsToWrapInLazy, outputFile);
Expand Down
5 changes: 5 additions & 0 deletions velox/expression/fuzzer/FuzzerToolkit.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ void compareVectors(
const std::string& rightName = "right",
const std::optional<SelectivityVector>& rows = std::nullopt);

// Merges a vector of RowVectors into one RowVector.
RowVectorPtr mergeRowVectors(
const std::vector<RowVectorPtr>& results,
velox::memory::MemoryPool* pool);

struct InputRowMetadata {
// Column indices to wrap in LazyVector (in a strictly increasing order)
std::vector<int> columnsToWrapInLazy;
Expand Down
14 changes: 14 additions & 0 deletions velox/functions/prestosql/fuzzer/AverageResultVerifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,20 @@ class AverageResultVerifier : public ResultVerifier {
}
}

void initializeWindow(
const std::vector<RowVectorPtr>& input,
const std::vector<std::string>& /*partitionByKeys*/,
const std::vector<SortingKeyAndOrder>& /*sortingKeysAndOrders*/,
const core::WindowNode::Function& function,
const std::string& /*frame*/,
const std::string& windowName) override {
if (function.functionCall->type()->isIntervalDayTime()) {
projections_ = asRowType(input[0]->type())->names();
projections_.push_back(
fmt::format("cast(to_milliseconds({}) as double)", windowName));
}
}

bool compare(const RowVectorPtr& result, const RowVectorPtr& altResult)
override {
if (projections_.empty()) {
Expand Down
3 changes: 3 additions & 0 deletions velox/functions/prestosql/fuzzer/WindowFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "velox/functions/prestosql/fuzzer/ApproxDistinctResultVerifier.h"
#include "velox/functions/prestosql/fuzzer/ApproxPercentileInputGenerator.h"
#include "velox/functions/prestosql/fuzzer/ApproxPercentileResultVerifier.h"
#include "velox/functions/prestosql/fuzzer/AverageResultVerifier.h"
#include "velox/functions/prestosql/fuzzer/ClassificationAggregationInputGenerator.h"
#include "velox/functions/prestosql/fuzzer/MinMaxInputGenerator.h"
#include "velox/functions/prestosql/fuzzer/WindowOffsetInputGenerator.h"
Expand Down Expand Up @@ -130,6 +131,7 @@ int main(int argc, char** argv) {
// TODO: allow custom result verifiers.
using facebook::velox::exec::test::ApproxDistinctResultVerifier;
using facebook::velox::exec::test::ApproxPercentileResultVerifier;
using facebook::velox::exec::test::AverageResultVerifier;

static const std::unordered_map<
std::string,
Expand All @@ -149,6 +151,7 @@ int main(int argc, char** argv) {
// https://github.com/facebookincubator/velox/issues/6330
{"max_data_size_for_stats", nullptr},
{"sum_data_size_for_stats", nullptr},
{"avg", std::make_shared<AverageResultVerifier>()},
};

static const std::unordered_set<std::string> orderDependentFunctions = {
Expand Down

0 comments on commit 419de77

Please sign in to comment.