From ce17738276469e7da3e89468fbdac171775afdc5 Mon Sep 17 00:00:00 2001 From: howsohazard <143410553+howsohazard@users.noreply.github.com> Date: Thu, 5 Dec 2024 22:26:13 -0500 Subject: [PATCH] 22379: Fixes to performance bug involving union and intersect opcodes (#320) --- .../EvaluableNodeTreeManipulation.cpp | 76 ++++++++++--------- .../EvaluableNodeTreeManipulation.h | 47 ++++++------ 2 files changed, 64 insertions(+), 59 deletions(-) diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp index 5b42b4b79..d63d4a89f 100644 --- a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp @@ -388,7 +388,8 @@ EvaluableNode *EvaluableNodeTreeManipulation::CreateGeneralizedNode(NodesMergeMe return enm->AllocNode(n2); } - auto [node, commonality] = EvaluableNodeTreeManipulation::CommonalityBetweenNodeTypesAndValues(n1, n2); + auto [node, commonality] = EvaluableNodeTreeManipulation::CommonalityBetweenNodeTypesAndValues(n1, n2, + mm->RequireExactMatches()); //if both are nullptr, nothing more to do if(node == nullptr) @@ -694,20 +695,20 @@ EvaluableNodeType EvaluableNodeTreeManipulation::GetRandomEvaluableNodeType(Rand } MergeMetricResults EvaluableNodeTreeManipulation::NumberOfSharedNodes(EvaluableNode *tree1, EvaluableNode *tree2, - MergeMetricResultsCache &memoized, EvaluableNode::ReferenceSetType *checked) + MergeMetricResultsParams &mmrp) { if(tree1 == nullptr && tree2 == nullptr) return MergeMetricResults(1.0, tree1, tree2, false, true); //if the pair of nodes has already been computed, then just return the result - auto found = memoized.find(std::make_pair(tree1, tree2)); - if(found != end(memoized)) + auto found = mmrp.memoizedNodeMergePairs.find(std::make_pair(tree1, tree2)); + if(found != end(mmrp.memoizedNodeMergePairs)) return found->second; - if(checked != nullptr) + if(mmrp.checked != nullptr) { //if either is already checked, then neither adds shared nodes - if(checked->find(tree1) != end(*checked) || checked->find(tree2) != end(*checked)) + if(mmrp.checked->find(tree1) != end(*(mmrp.checked)) || mmrp.checked->find(tree2) != end(*(mmrp.checked))) return MergeMetricResults(0.0, tree1, tree2, false, true); } @@ -715,12 +716,12 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare if(tree1 == tree2) { MergeMetricResults results(static_cast(EvaluableNode::GetDeepSize(tree1)), tree1, tree2, true, true); - memoized.emplace(std::make_pair(tree1, tree2), results); + mmrp.memoizedNodeMergePairs.emplace(std::make_pair(tree1, tree2), results); return results; } //check current top nodes - auto commonality = CommonalityBetweenNodes(tree1, tree2); + auto commonality = CommonalityBetweenNodes(tree1, tree2, mmrp.requireExactMatches); //see if can exit early, before inserting the nodes into the checked list and then removing them size_t tree1_ordered_nodes_size = 0; @@ -741,15 +742,15 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare if(tree1_ordered_nodes_size == 0 && tree2_ordered_nodes_size == 0 && tree1_mapped_nodes_size == 0 && tree2_mapped_nodes_size == 0) { - memoized.emplace(std::make_pair(tree1, tree2), commonality); + mmrp.memoizedNodeMergePairs.emplace(std::make_pair(tree1, tree2), commonality); return commonality; } - if(checked != nullptr) + if(mmrp.checked != nullptr) { //remember that it has already checked when traversing tree, and then remove from checked at the end of the function - checked->insert(tree1); - checked->insert(tree2); + mmrp.checked->insert(tree1); + mmrp.checked->insert(tree2); } if(tree1_ordered_nodes_size > 0 && tree2_ordered_nodes_size > 0) @@ -776,7 +777,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare MergeMetricResults best_match_value(0.0, tree1, tree2, false, false); for(size_t match_index = 0; match_index < a2.size(); match_index++) { - auto match_value = NumberOfSharedNodes(a1_current, a2[match_index], memoized, checked); + auto match_value = NumberOfSharedNodes(a1_current, a2[match_index], mmrp); if(!best_match_found || match_value > best_match_value) { best_match_found = true; @@ -814,17 +815,17 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare { auto smallest_list_size = std::min(size1, size2); if(smallest_list_size >= 1) - commonality += NumberOfSharedNodes(ocn1[0], ocn2[0], memoized, checked); + commonality += NumberOfSharedNodes(ocn1[0], ocn2[0], mmrp); starting_index = 1; } FlatMatrix> sequence_commonality; ComputeSequenceCommonalityMatrix(sequence_commonality, ocn1, ocn2, - [&memoized, checked] + [&mmrp] (EvaluableNode *a, EvaluableNode *b) { - return EvaluableNodeTreeManipulation::NumberOfSharedNodes(a, b, memoized, checked); + return EvaluableNodeTreeManipulation::NumberOfSharedNodes(a, b, mmrp); }, starting_index); commonality += sequence_commonality.At(size1, size2); @@ -842,7 +843,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare auto smallest_list_size = std::min(a1.size(), a2.size()); if(smallest_list_size >= 1) { - commonality += NumberOfSharedNodes(a1[0], a2[0], memoized, checked); + commonality += NumberOfSharedNodes(a1[0], a2[0], mmrp); a1.erase(begin(a1)); a2.erase(begin(a2)); @@ -860,7 +861,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare for(size_t match_index = 0; match_index < a2.size(); match_index += 2) { - auto match_key = NumberOfSharedNodes(a1[0], a2[match_index], memoized, checked); + auto match_key = NumberOfSharedNodes(a1[0], a2[match_index], mmrp); // key match dominates value match if(!best_match_found || match_key > best_match_key) @@ -871,7 +872,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare //count the value node commonality as long as it exists and is nontrivial if(match_key.IsNontrivialMatch() && a1.size() > 1 && a2.size() > match_index + 1) - best_match_value = NumberOfSharedNodes(a1[1], a2[match_index + 1], memoized, checked); + best_match_value = NumberOfSharedNodes(a1[1], a2[match_index + 1], mmrp); else best_match_value = MergeMetricResults(0.0, nullptr, nullptr, false, false); } @@ -906,7 +907,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare //use size of smallest list auto smallest_list_size = std::min(ocn1.size(), ocn2.size()); for(size_t i = 0; i < smallest_list_size; i++) - commonality += NumberOfSharedNodes(ocn1[i], ocn2[i], memoized, checked); + commonality += NumberOfSharedNodes(ocn1[i], ocn2[i], mmrp); break; } @@ -924,7 +925,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare if(other_node == end(tree_2_mcn)) continue; - commonality += NumberOfSharedNodes(node, other_node->second, memoized, checked); + commonality += NumberOfSharedNodes(node, other_node->second, mmrp); } } @@ -935,7 +936,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare { for(auto node : tree1->GetOrderedChildNodesReference()) { - auto sub_match = NumberOfSharedNodes(node, tree2, memoized, checked); + auto sub_match = NumberOfSharedNodes(node, tree2, mmrp); //mark as nonexact match because had to traverse downward, // but preserve whether was an exact match for early stopping @@ -944,7 +945,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare if(sub_match > commonality) { commonality = sub_match; - memoized.emplace(std::make_pair(node, tree2), commonality); + mmrp.memoizedNodeMergePairs.emplace(std::make_pair(node, tree2), commonality); if(exact_match) break; } @@ -954,7 +955,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare { for(auto &[node_id, node] : tree1->GetMappedChildNodesReference()) { - auto sub_match = NumberOfSharedNodes(node, tree2, memoized, checked); + auto sub_match = NumberOfSharedNodes(node, tree2, mmrp); //mark as nonexact match because had to traverse downward, // but preserve whether was an exact match for early stopping @@ -963,7 +964,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare if(sub_match > commonality) { commonality = sub_match; - memoized.emplace(std::make_pair(node, tree2), commonality); + mmrp.memoizedNodeMergePairs.emplace(std::make_pair(node, tree2), commonality); if(exact_match) break; } @@ -978,7 +979,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare { for(auto node : tree2->GetOrderedChildNodesReference()) { - auto sub_match = NumberOfSharedNodes(tree1, node, memoized, checked); + auto sub_match = NumberOfSharedNodes(tree1, node, mmrp); //mark as nonexact match because had to traverse downward, // but preserve whether was an exact match for early stopping @@ -987,7 +988,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare if(sub_match > commonality) { commonality = sub_match; - memoized.emplace(std::make_pair(tree1, node), commonality); + mmrp.memoizedNodeMergePairs.emplace(std::make_pair(tree1, node), commonality); if(exact_match) break; } @@ -997,7 +998,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare { for(auto &[node_id, node] : tree2->GetMappedChildNodesReference()) { - auto sub_match = NumberOfSharedNodes(tree1, node, memoized, checked); + auto sub_match = NumberOfSharedNodes(tree1, node, mmrp); //mark as nonexact match because had to traverse downward, // but preserve whether was an exact match for early stopping @@ -1006,7 +1007,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare if(sub_match > commonality) { commonality = sub_match; - memoized.emplace(std::make_pair(tree1, node), commonality); + mmrp.memoizedNodeMergePairs.emplace(std::make_pair(tree1, node), commonality); if(exact_match) break; } @@ -1014,14 +1015,14 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare } } - if(checked != nullptr) + if(mmrp.checked != nullptr) { //remove from the checked list so don't block other traversals - checked->erase(tree1); - checked->erase(tree2); + mmrp.checked->erase(tree1); + mmrp.checked->erase(tree2); } - memoized.emplace(std::make_pair(tree1, tree2), commonality); + mmrp.memoizedNodeMergePairs.emplace(std::make_pair(tree1, tree2), commonality); return commonality; } @@ -1207,14 +1208,15 @@ bool EvaluableNodeTreeManipulation::CollectLabelIndexesFromTreeAndMakeLabelNorma return collected_all_label_values; } -MergeMetricResults EvaluableNodeTreeManipulation::CommonalityBetweenNodes(EvaluableNode *n1, EvaluableNode *n2) +MergeMetricResults EvaluableNodeTreeManipulation::CommonalityBetweenNodes( + EvaluableNode *n1, EvaluableNode *n2, bool require_exact_matches) { if(n1 == nullptr && n2 == nullptr) return MergeMetricResults(1.0, n1, n2, false, true); auto [num_common_labels, num_unique_labels] = EvaluableNode::GetNodeCommonAndUniqueLabelCounts(n1, n2); - auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(n1, n2); + auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(n1, n2, require_exact_matches); bool must_match = (num_unique_labels == 0 && num_common_labels > 0); bool exact_match = (num_unique_labels == 0 && commonality == 1.0); @@ -1222,7 +1224,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::CommonalityBe } std::pair EvaluableNodeTreeManipulation::CommonalityBetweenNodeTypesAndValues( - EvaluableNode *n1, EvaluableNode *n2, bool require_exact_node_match) + EvaluableNode *n1, EvaluableNode *n2, bool require_exact_matches) { bool n1_null = EvaluableNode::IsNull(n1); bool n2_null = EvaluableNode::IsNull(n2); @@ -1239,7 +1241,7 @@ std::pair EvaluableNodeTreeManipulation::CommonalityBet auto n2_type = n2->GetType(); //can have much faster and lighter computations if only checking for exact matches - if(require_exact_node_match) + if(require_exact_matches) { if(n1_type != n2_type) return std::make_pair(n1, 0.0); diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h index 976e72637..c5c6cca5f 100644 --- a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h @@ -45,7 +45,12 @@ constexpr bool operator==(const std::pair &a, const std::pair, MergeMetricResults> MergeMetricResultsCache; +struct MergeMetricResultsParams +{ + EvaluableNode::ReferenceSetType *checked; + FastHashMap, MergeMetricResults> memoizedNodeMergePairs; + bool requireExactMatches; +}; //returns a commonality measure of difference between numbers a and b in [0,1] //if the numbers are equal, returns 1, returns closer to 0 the less similar they are @@ -122,15 +127,7 @@ class EvaluableNodeTreeManipulation virtual MergeMetricResults MergeMetric(EvaluableNode *a, EvaluableNode *b) { - if((a != nullptr && a->GetNeedCycleCheck()) || (b != nullptr && b->GetNeedCycleCheck())) - { - EvaluableNode::ReferenceSetType checked; - return NumberOfSharedNodes(a, b, memoizedMergeMetricResults, &checked); - } - else //don't need to check for cycles - { - return NumberOfSharedNodes(a, b, memoizedMergeMetricResults, nullptr); - } + return NumberOfSharedNodes(a, b, requireExactMatches); } virtual EvaluableNode *MergeValues(EvaluableNode *a, EvaluableNode *b, bool must_merge = false) @@ -168,7 +165,6 @@ class EvaluableNodeTreeManipulation bool keepAllOfBoth; bool requireExactMatches; EvaluableNode::ReferenceAssocType references; - MergeMetricResultsCache memoizedMergeMetricResults; }; //functionality to mix nodes @@ -422,9 +418,10 @@ class EvaluableNodeTreeManipulation } //computes the edit distance between the two trees - static double EditDistance(EvaluableNode *tree1, EvaluableNode *tree2) + // If require_exact_matches is true, then it will only compare nodes that match exactly + static double EditDistance(EvaluableNode *tree1, EvaluableNode *tree2, bool require_exact_matches = false) { - auto shared_nodes = NumberOfSharedNodes(tree1, tree2); + auto shared_nodes = NumberOfSharedNodes(tree1, tree2, require_exact_matches); size_t tree_1_size = EvaluableNode::GetDeepSize(tree1); size_t tree_2_size = EvaluableNode::GetDeepSize(tree2); @@ -433,25 +430,29 @@ class EvaluableNodeTreeManipulation } //Computes the total number of nodes in both trees that are equal - inline static MergeMetricResults NumberOfSharedNodes(EvaluableNode *tree1, EvaluableNode *tree2) + // If require_exact_matches is true, then it will only compare nodes that match exactly + inline static MergeMetricResults NumberOfSharedNodes( + EvaluableNode *tree1, EvaluableNode *tree2, bool require_exact_matches = false) { - MergeMetricResultsCache memoized; + MergeMetricResultsParams mmrp; + mmrp.requireExactMatches = require_exact_matches; if((tree1 != nullptr && tree1->GetNeedCycleCheck()) || (tree2 != nullptr && tree2->GetNeedCycleCheck())) { EvaluableNode::ReferenceSetType checked; - return NumberOfSharedNodes(tree1, tree2, memoized, &checked); + mmrp.checked = &checked; + return NumberOfSharedNodes(tree1, tree2, mmrp); } else //don't need to check for cycles { - return NumberOfSharedNodes(tree1, tree2, memoized, nullptr); + mmrp.checked = nullptr; + return NumberOfSharedNodes(tree1, tree2, mmrp); } } //Returns the total number of nodes in both trees that are equal, ignoring those in the checked set // Assists the public function NumberOfSharedNodes - //if checked is nullptr, then it will not keep track of the nodes, which can be done if neither needs cycle checks static MergeMetricResults NumberOfSharedNodes(EvaluableNode *tree1, EvaluableNode *tree2, - MergeMetricResultsCache &memoized, EvaluableNode::ReferenceSetType *checked); + MergeMetricResultsParams &mmrp); //Recursively traverses tree, storing any nodes with labels into an index map, and returning the map, // as well as a flag indicating true if it was able to just retrieve the labels, or false @@ -543,16 +544,18 @@ class EvaluableNodeTreeManipulation EvaluableNode *replacement, EvaluableNode::ReferenceSetType &checked); //Evaluates commonality metric between the two nodes passed in, including labels. 1.0 if identical, 0.0 if completely different, and some value between if similar - static MergeMetricResults CommonalityBetweenNodes(EvaluableNode *n1, EvaluableNode *n2); + // If require_exact_matches is true, then it will only return 1.0 or 0.0 + static MergeMetricResults CommonalityBetweenNodes( + EvaluableNode *n1, EvaluableNode *n2, bool require_exact_matches = false); //Evaluates the functional commonality between the types and immediate values of n1 and n2 (excluding labels, comments, etc.) // Returns a pair: the first value is the more general of the two nodes and the second is a commonality value // The more general of the two nodes will be the one whose type is more general // The commonality metric will return 1.0 if identical, 0.0 if completely different, and some value between if similar - // If require_exact_node_match is true, then it will only return 1.0 or 0.0 + // If require_exact_matches is true, then it will only return 1.0 or 0.0 //The EvaluableNode * returned should not be modified, nor should it be included in any data outside the scope of the caller static std::pair CommonalityBetweenNodeTypesAndValues( - EvaluableNode *n1, EvaluableNode *n2, bool require_exact_node_match = false); + EvaluableNode *n1, EvaluableNode *n2, bool require_exact_matches = false); //Mutates the current node n, changing its type or value, based on the mutation_rate // strings contains a list of strings to likely choose from if mutating to a string value