From e17f52b8baf84757d2e312725fe2f62368fcee53 Mon Sep 17 00:00:00 2001 From: howsohazard <143410553+howsohazard@users.noreply.github.com> Date: Mon, 21 Oct 2024 21:34:39 -0400 Subject: [PATCH] 21953: Fixes bugs in merging code, especially regarding label matches (#295) --- src/Amalgam/Merger.h | 6 +- src/Amalgam/entity/EntityManipulation.cpp | 4 - src/Amalgam/evaluablenode/EvaluableNode.cpp | 40 +++---- src/Amalgam/evaluablenode/EvaluableNode.h | 4 +- .../EvaluableNodeTreeManipulation.cpp | 21 +--- src/Amalgam/out.txt | 111 ++++++++---------- 6 files changed, 80 insertions(+), 106 deletions(-) diff --git a/src/Amalgam/Merger.h b/src/Amalgam/Merger.h index 5ac25318..f3c7eacc 100644 --- a/src/Amalgam/Merger.h +++ b/src/Amalgam/Merger.h @@ -44,15 +44,13 @@ class MergeMetricResultsBase // if require_nontrivial_match, then it requires at least one node or atomic value to be equal constexpr bool IsBetterMatchThan(const MergeMetricResultsBase &mmr) { - if(mmr.mustMatch) - return false; - if(mustMatch) + if(mustMatch && !mmr.mustMatch) return true; //if same amount of commonality, prefer exact matches if(commonality == mmr.commonality) { - if(mmr.exactMatch && !exactMatch) + if(!exactMatch && mmr.exactMatch) return false; if(exactMatch && !mmr.exactMatch) return true; diff --git a/src/Amalgam/entity/EntityManipulation.cpp b/src/Amalgam/entity/EntityManipulation.cpp index df93451f..03815de5 100644 --- a/src/Amalgam/entity/EntityManipulation.cpp +++ b/src/Amalgam/entity/EntityManipulation.cpp @@ -468,10 +468,6 @@ MergeMetricResults EntityManipulation::NumberOfSharedNodes(Entity *ent best_match_found = true; best_match_value = match_value; best_match_key = e2c_id; - - //don't need to check any more - if(match_value.mustMatch) - break; } } diff --git a/src/Amalgam/evaluablenode/EvaluableNode.cpp b/src/Amalgam/evaluablenode/EvaluableNode.cpp index 5a6dc6e7..8c5b22a3 100644 --- a/src/Amalgam/evaluablenode/EvaluableNode.cpp +++ b/src/Amalgam/evaluablenode/EvaluableNode.cpp @@ -24,39 +24,38 @@ FastHashSet EvaluableNode::debugWatch; Concurrency::SingleMutex EvaluableNode::debugWatchMutex; #endif -void EvaluableNode::GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, EvaluableNode *n2, size_t &num_common_labels, size_t &num_unique_labels) +std::pair EvaluableNode::GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, EvaluableNode *n2) { - num_common_labels = 0; - num_unique_labels = 0; - size_t num_n1_labels = 0; - size_t num_n2_labels = 0; + if(n1 == nullptr) + { + if(n2 == nullptr) + return std::make_pair(0, 0); - if(n1 != nullptr) - num_n1_labels = n1->GetNumLabels(); + return std::make_pair(0, n2->GetNumLabels()); + } - if(n2 != nullptr) - num_n2_labels = n2->GetNumLabels(); + if(n2 == nullptr) + return std::make_pair(0, n1->GetNumLabels()); - //if no labels in either, then done - if(num_n1_labels == 0 && num_n2_labels == 0) - return; + size_t num_n1_labels = n1->GetNumLabels(); + size_t num_n2_labels = n2->GetNumLabels(); - //if labels in one (but not both, because would have exited), then count total and done + //if no labels in one, just return the nonzero count as the total unique if(num_n1_labels == 0 || num_n2_labels == 0) - { - num_unique_labels = std::max(num_n1_labels, num_n2_labels); - return; - } + return std::make_pair(0, std::max(num_n1_labels, num_n2_labels)); //if only have one label in each, compare immediately for speed if(num_n1_labels == 1 && num_n2_labels == 1) { + //if the same, only one common label, if unique, then two unique if(n1->GetLabel(0) == n2->GetLabel(0)) - num_common_labels = 1; - return; + return std::make_pair(1, 0); + else + return std::make_pair(0, 2); } //compare + size_t num_common_labels = 0; for(auto s_id : n1->GetLabelsStringIds()) { auto n2_label_sids = n2->GetLabelsStringIds(); @@ -64,7 +63,8 @@ void EvaluableNode::GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, Evaluab num_common_labels++; } - num_unique_labels = num_n1_labels + num_n2_labels - num_common_labels; //don't double-count the common labels + //don't count the common labels in the uncommon + return std::make_pair(num_common_labels, num_n1_labels + num_n2_labels - 2 * num_common_labels); } bool EvaluableNode::AreShallowEqual(EvaluableNode *a, EvaluableNode *b) diff --git a/src/Amalgam/evaluablenode/EvaluableNode.h b/src/Amalgam/evaluablenode/EvaluableNode.h index b42bb3f6..501ab502 100644 --- a/src/Amalgam/evaluablenode/EvaluableNode.h +++ b/src/Amalgam/evaluablenode/EvaluableNode.h @@ -202,8 +202,8 @@ class EvaluableNode } //Evaluates the fraction of the labels of nodes that are the same, 1.0 if no labels on either - //num_common_labels and num_unique_labels are set to the appropriate number in common and number of labels that are unique when the two sets are merged - static void GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, EvaluableNode *n2, size_t &num_common_labels, size_t &num_unique_labels); + //returns the number of followed by the number of unique labels if the two sets were merged + static std::pair GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, EvaluableNode *n2); //Returns true if the immediate data structure of a is equal to b static bool AreShallowEqual(EvaluableNode *a, EvaluableNode *b); diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp index 5c50fbb9..12e2e30f 100644 --- a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp @@ -71,9 +71,7 @@ inline StringInternPool::StringID MixStringValues(StringInternPool::StringID a, bool EvaluableNodeTreeManipulation::NodesMergeMethod::AreMergeable(EvaluableNode *a, EvaluableNode *b) { - size_t num_common_labels; - size_t num_unique_labels; - EvaluableNode::GetNodeCommonAndUniqueLabelCounts(a, b, num_common_labels, num_unique_labels); + auto [num_common_labels, num_unique_labels] = EvaluableNode::GetNodeCommonAndUniqueLabelCounts(a, b); auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(a, b, true); @@ -121,9 +119,7 @@ EvaluableNode *EvaluableNodeTreeManipulation::NodesMixMethod::MergeValues(Evalua bool EvaluableNodeTreeManipulation::NodesMixMethod::AreMergeable(EvaluableNode *a, EvaluableNode *b) { - size_t num_common_labels; - size_t num_unique_labels; - EvaluableNode::GetNodeCommonAndUniqueLabelCounts(a, b, num_common_labels, num_unique_labels); + auto [num_common_labels, num_unique_labels] = EvaluableNode::GetNodeCommonAndUniqueLabelCounts(a, b); auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(a, b); @@ -1180,18 +1176,13 @@ MergeMetricResults EvaluableNodeTreeManipulation::CommonalityBe if(n1 == nullptr || n2 == nullptr) return MergeMetricResults(0.0, n1, n2, false, false); - size_t num_common_labels; - size_t num_unique_labels; - EvaluableNode::GetNodeCommonAndUniqueLabelCounts(n1, n2, num_common_labels, num_unique_labels); + auto [num_common_labels, num_unique_labels] = EvaluableNode::GetNodeCommonAndUniqueLabelCounts(n1, n2); auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(n1, n2); - //if no labels, as is usually the case, then just address normal commonality - // and if the nodes are exactly equal - if(num_unique_labels == 0) - return MergeMetricResults(commonality, n1, n2, false, commonality == 1.0); - - return MergeMetricResults(commonality + num_common_labels, n1, n2, num_common_labels == num_unique_labels, commonality == 1.0); + bool must_match = (num_unique_labels == 0 && num_common_labels > 0); + bool exact_match = (num_unique_labels == 0 && commonality == 1.0); + return MergeMetricResults(commonality + num_common_labels, n1, n2, must_match, exact_match); } std::pair EvaluableNodeTreeManipulation::CommonalityBetweenNodeTypesAndValues( diff --git a/src/Amalgam/out.txt b/src/Amalgam/out.txt index c5283c79..e290ea15 100644 --- a/src/Amalgam/out.txt +++ b/src/Amalgam/out.txt @@ -231,11 +231,11 @@ hello world: 12 and 2 (print "hello") [(null) (null) .infinity -.infinity] -{b 2 c ["alpha" "beta" "gamma"] a 1} +{a 1 b 2 c ["alpha" "beta" "gamma"]} { + a 1 b 2 c ["alpha" "beta" "gamma"] - a 1 } (apply "6") @@ -635,7 +635,7 @@ abcdef 0.14384103622589045 --first-- 4 -2 +1 1 0 a @@ -654,15 +654,15 @@ a a 1 b 2 c 3 - d 4 e 5 + f 6 } -{a 1 e 5} +{b 2 c 3} { a 1 + b 2 c 3 - d 4 - e 5 + f 6 } { a 1 @@ -686,7 +686,7 @@ abcdef --last-- this -2 +1 1 0 c @@ -705,15 +705,15 @@ c a 1 b 2 c 3 - d 4 e 5 + f 6 } -{a 1 e 5} +{b 2 c 3} { a 1 + b 2 c 3 - d 4 - e 5 + f 6 } { a 1 @@ -1064,7 +1064,7 @@ abcdef [1 3] [9 5] --indices-- -["b" "c" "a" "4"] +["a" "4" "b" "c"] [ 0 1 @@ -1076,7 +1076,7 @@ abcdef 7 ] --values-- -[2 3 1 "d"] +[1 "d" 2 3] [ "a" 1 @@ -1097,7 +1097,7 @@ abcdef 4 "d" ] -["d" 2 3 1] +[1 "d" 2 3] [ 1 2 @@ -1327,7 +1327,7 @@ current_index: 2 rmfile "del /s /q " rwww 1 slash "\\" - start_time 1729534711.806223 + start_time 1729551682.854369 www 1 x 12 zz 10 @@ -1373,7 +1373,7 @@ current_index: 2 rmfile "del /s /q " rwww 1 slash "\\" - start_time 1729534711.806223 + start_time 1729551682.854369 www 1 x 12 zz 10 @@ -1418,7 +1418,7 @@ current_index: 2 rmfile "del /s /q " rwww 1 slash "\\" - start_time 1729534711.806223 + start_time 1729551682.854369 www 1 x 12 zz 10 @@ -1551,7 +1551,7 @@ true --weighted_rand-- b -["b" "b" "b" "b"] +["a" "a" "a" "a"] b ["a" "b" @(get (target 2) 1) @(get (target 2) 1)] @@ -1562,11 +1562,11 @@ infinity test c or d: ["c" "c" "c" "c"] infinity test c or d: ["c" @(get (target 2) 0) @(get (target 2) 0) @(get (target 2) 0)] -{a 27 b 55 c 18} +{a 29 b 44 c 27} {a 25 b 50 c 25} -["4" "3" "1"] +["1" "7" "2"] --get_rand_seed-- ¶‡¨± ÎA)p1͉ÿ @@ -1612,8 +1612,8 @@ infinity test c or d: ["c" @(get (target 2) 0) @(get (target 2) 0) @(get (target string --set_type-- (- 3 4) -["b" 3 "a" 4] -["b" 3 "a" 4] +["a" 4 "b" 3] +["a" 4 "b" 3] {a 4 b 3} 8.7 (parallel @@ -1662,17 +1662,17 @@ string {a 3 b 4} {c "c"} ] -21: [{"b":4,"a":3},{"c":"c","d":null}] +21: [{"a":3,"b":4},{"d":null,"c":"c"}] 22: [{"a":3,"b":4},{"c":"c","d":null}] -23: b: 2 -c: 3 -d: 4 +23: d: 4 e: - a - b - - .inf a: 1 +b: 2 +c: 3 24: a: 1 b: 2 @@ -1685,7 +1685,7 @@ e: - .inf 25: {a 1} -current date-time in epoch: 2024-10-21-14.18.32.1879240 +current date-time in epoch: 2024-10-21-19.01.23.5965500 2020-06-07 00:22:59 1391230800 1391230800 @@ -2234,16 +2234,6 @@ decrypted: hello {_ (null)} (replace _ - ["g"] - (lambda - [ - (get - (current_value 1) - 0 - ) - 4 - ] - ) [] (lambda { @@ -2254,6 +2244,16 @@ decrypted: hello ) } ) + ["g"] + (lambda + [ + (get + (current_value 1) + 0 + ) + 4 + ] + ) ) ) (declare @@ -2824,18 +2824,16 @@ flatten restore with parallel 19.264241099357605 --intersect_entities-- (associate "b" 4) -MergeEntityChild1 -(associate "x" 3 "y" 4) MergeEntityChild2 (associate "p" 3 "q" 4) +MergeEntityChild1 +(associate "x" 3 "y" 4) _2710920158 (associate "E" 3 "F" 4) _1797215995 (associate "e" 3 "f" 4) --union_entities-- (associate "b" 4 "a" 3 "c" 3) -MergeEntityChild1 -(associate "x" 3 "y" 4 "z" 5) MergeEntityChild2 (associate "p" @@ -2849,6 +2847,8 @@ MergeEntityChild2 "w" 7 ) +MergeEntityChild1 +(associate "x" 3 "y" 4 "z" 5) _2710920158 (associate "E" @@ -3504,21 +3504,10 @@ difference between DiffContainer and DiffEntity2: ) --mix_entities-- (associate "b" 4 "a" 3) -MergeEntityChild1 -(associate "x" 3 "y" 4) MergeEntityChild2 -(associate - "p" - 3 - "q" - 4 - "u" - 5 - "v" - 6 - "w" - 7 -) +(associate "p" 3 "q" 4) +MergeEntityChild1 +(associate "x" 3 "y" 4 "z" 5) _2710920158 (associate "E" 3 "F" 4 "H" 6) _1797215995 @@ -3601,7 +3590,7 @@ deep sets --set_entity_root_permission-- RootTest -1729534712.534056 +1729551683.747663 (true) RootTest @@ -4443,12 +4432,12 @@ cyclic test expected: 155, 200, 190 ... deg values of 0 8 and 12: 155: 0.1 (null ##deg 0 ) -200: 0.05555555555555555 (null - ##deg 8 -) 190: 0.045454545454545456 (null ##deg 12 ) +200: 0.05555555555555555 (null + ##deg 8 +) --contains_label-- (true) @@ -4949,4 +4938,4 @@ rmdir /s /q amlg_code\persistent_tree_test_root del /s /q amlg_code\persist_module_test\psm.mdam del /s /q amlg_code\persist_module_test.mdam --total execution time-- -1.7309911251068115 +2.0564751625061035