From 73e82d298c0be7cbcddea68cdff4d46a030d0662 Mon Sep 17 00:00:00 2001 From: Balamurali Gopalswami <167726375+b-gopalswami@users.noreply.github.com> Date: Wed, 20 Nov 2024 07:52:56 -0500 Subject: [PATCH] Fix flake in detecting reorg by nodes (#15289) * Fix flake in detecting reorg by nodes * represent as f+1 nodes --- integration-tests/ccip-tests/smoke/ccip_test.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/integration-tests/ccip-tests/smoke/ccip_test.go b/integration-tests/ccip-tests/smoke/ccip_test.go index a729bd5daec..0dab46c5a25 100644 --- a/integration-tests/ccip-tests/smoke/ccip_test.go +++ b/integration-tests/ccip-tests/smoke/ccip_test.go @@ -886,7 +886,7 @@ func TestSmokeCCIPReorgBelowFinality(t *testing.T) { // Test creates above finality reorg at destination and // expects ccip transactions in-flight and the one initiated after reorg -// doesn't go through and verifies every node is able to detect reorg. +// doesn't go through and verifies f+1 nodes is able to detect reorg. // Note: LogPollInterval interval is set as 1s to detect the reorg immediately func TestSmokeCCIPReorgAboveFinalityAtDestination(t *testing.T) { t.Parallel() @@ -897,7 +897,7 @@ func TestSmokeCCIPReorgAboveFinalityAtDestination(t *testing.T) { // Test creates above finality reorg at destination and // expects ccip transactions in-flight doesn't go through, the transaction initiated after reorg -// shouldn't even get initiated and verifies every node is able to detect reorg. +// shouldn't even get initiated and verifies f+1 nodes is able to detect reorg. // Note: LogPollInterval interval is set as 1s to detect the reorg immediately func TestSmokeCCIPReorgAboveFinalityAtSource(t *testing.T) { t.Parallel() @@ -936,11 +936,13 @@ func performAboveFinalityReorgAndValidate(t *testing.T, network string) { logPollerName = fmt.Sprintf("EVM.%d.LogPoller", lane.SourceChain.GetChainID()) rs.RunReorg(rs.SrcClient, rs.Cfg.SrcFinalityDepth+rs.Cfg.FinalityDelta, network, 2*time.Second) } - clNodes := setUpOutput.Env.CLNodes - // assert every node is detecting the reorg (LogPollInterval is set as 1s for faster detection) + // DON is 3F+1, finding f+1 from the given number of nodes in the environment + fPlus1Nodes := int(math.Ceil(float64(len(setUpOutput.Env.CLNodes)-1)/3)) + 1 + // assert at least f+1 nodes is detecting the reorg (LogPollInterval is set as 1s for faster detection) + // additional context: Commit requires 2f+1 observations, so f+1 nodes need to detect it in order to force the entire DON to stop processing messages. nodesDetectedViolation := make(map[string]bool) assert.Eventually(t, func() bool { - for _, node := range clNodes { + for _, node := range setUpOutput.Env.CLNodes { if _, ok := nodesDetectedViolation[node.ChainlinkClient.URL()]; ok { continue } @@ -953,8 +955,8 @@ func performAboveFinalityReorgAndValidate(t *testing.T, network string) { } } } - return len(nodesDetectedViolation) == len(clNodes) - }, 3*time.Minute, 20*time.Second, "Reorg above finality depth is not detected by every node") + return len(nodesDetectedViolation) >= fPlus1Nodes + }, 3*time.Minute, 20*time.Second, "Reorg above finality depth is not detected by f+1 nodes") log.Debug().Interface("Nodes", nodesDetectedViolation).Msg("Violation detection details") // send another request and verify it fails err = lane.SendRequests(1, gasLimit)