From 93312200166ef8cd5691107902642ba47ff711ef Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:20:29 -0600 Subject: [PATCH 1/4] improve bootstrapper resilience --- .github/workflows/systest.yml | 2 +- cmd/bootstrapper/server.go | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/.github/workflows/systest.yml b/.github/workflows/systest.yml index dfd15b59e5a..86d38aeacca 100644 --- a/.github/workflows/systest.yml +++ b/.github/workflows/systest.yml @@ -5,7 +5,7 @@ on: inputs: test_name: description: "Test to run" - default: "." + default: "TestCheckpoint" log_level: description: "Log level" default: "debug" diff --git a/cmd/bootstrapper/server.go b/cmd/bootstrapper/server.go index 5f9986ffb23..1f2834a0a1e 100644 --- a/cmd/bootstrapper/server.go +++ b/cmd/bootstrapper/server.go @@ -134,8 +134,37 @@ func (s *Server) Start(ctx context.Context, errCh chan error, params *NetworkPar // start generating fallback data s.eg.Go( func() error { - s.genDataLoop(ctx, errCh, last, params.updateActiveSetTime, s.GenFallbackActiveSet) - return nil + var ( + errs = 0 + maxErrs = 10 + timer *time.Timer + backoff = 10 * time.Second + ) + for epoch := last; ; epoch++ { + wait := time.Until(params.updateActiveSetTime(epoch)) + select { + case <-timer.C: + if err := s.GenFallbackActiveSet(ctx, epoch); err != nil { + errs++ + timer.Reset(backoff) + continue + } + errs = 0 + if !timer.Stop() { + <-timer.C + } + case <-time.After(wait): + if err := s.GenFallbackActiveSet(ctx, epoch); err != nil { + timer = time.NewTimer(backoff) + if errs >= maxErrs { + errCh <- err + return err + } + } + case <-ctx.Done(): + return ctx.Err() + } + } }) s.eg.Go( func() error { From 630ec7e83bd0b59990419a12d418961f500ecb67 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:06:53 -0600 Subject: [PATCH 2/4] chore: proxy read from channel not directly from timer --- cmd/bootstrapper/server.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/bootstrapper/server.go b/cmd/bootstrapper/server.go index 1f2834a0a1e..af4159b69c5 100644 --- a/cmd/bootstrapper/server.go +++ b/cmd/bootstrapper/server.go @@ -138,12 +138,13 @@ func (s *Server) Start(ctx context.Context, errCh chan error, params *NetworkPar errs = 0 maxErrs = 10 timer *time.Timer + timeC <-chan time.Time backoff = 10 * time.Second ) for epoch := last; ; epoch++ { wait := time.Until(params.updateActiveSetTime(epoch)) select { - case <-timer.C: + case <-timeC: if err := s.GenFallbackActiveSet(ctx, epoch); err != nil { errs++ timer.Reset(backoff) @@ -153,9 +154,11 @@ func (s *Server) Start(ctx context.Context, errCh chan error, params *NetworkPar if !timer.Stop() { <-timer.C } + timeC = nil case <-time.After(wait): if err := s.GenFallbackActiveSet(ctx, epoch); err != nil { timer = time.NewTimer(backoff) + timeC = timer.C if errs >= maxErrs { errCh <- err return err From 53f144a02141a28bcf2f545875c9892bdcd75787 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:26:01 -0600 Subject: [PATCH 3/4] chore: skip other tests cuz makefile flag dont work --- systest/tests/distributed_post_verification_test.go | 1 + systest/tests/equivocation_test.go | 1 + systest/tests/fallback_test.go | 1 + systest/tests/nodes_test.go | 1 + systest/tests/partition_test.go | 2 ++ systest/tests/poets_test.go | 3 +++ systest/tests/smeshing_test.go | 1 + systest/tests/timeskew_test.go | 1 + 8 files changed, 11 insertions(+) diff --git a/systest/tests/distributed_post_verification_test.go b/systest/tests/distributed_post_verification_test.go index 7b78ce6d5c5..c33552afc24 100644 --- a/systest/tests/distributed_post_verification_test.go +++ b/systest/tests/distributed_post_verification_test.go @@ -39,6 +39,7 @@ import ( ) func TestPostMalfeasanceProof(t *testing.T) { + t.Skip() t.Parallel() testDir := t.TempDir() diff --git a/systest/tests/equivocation_test.go b/systest/tests/equivocation_test.go index 262f1ffe0a8..c85471b9a54 100644 --- a/systest/tests/equivocation_test.go +++ b/systest/tests/equivocation_test.go @@ -17,6 +17,7 @@ import ( ) func TestEquivocation(t *testing.T) { + t.Skip() t.Parallel() const bootnodes = 2 cctx := testcontext.New(t) diff --git a/systest/tests/fallback_test.go b/systest/tests/fallback_test.go index d1c265665c5..a923c2568df 100644 --- a/systest/tests/fallback_test.go +++ b/systest/tests/fallback_test.go @@ -19,6 +19,7 @@ import ( ) func TestFallback(t *testing.T) { + t.Skip() t.Parallel() tctx := testcontext.New(t) diff --git a/systest/tests/nodes_test.go b/systest/tests/nodes_test.go index b68667dd4ce..98b75615924 100644 --- a/systest/tests/nodes_test.go +++ b/systest/tests/nodes_test.go @@ -22,6 +22,7 @@ func init() { } func TestAddNodes(t *testing.T) { + t.Skip() t.Parallel() const ( diff --git a/systest/tests/partition_test.go b/systest/tests/partition_test.go index 89431bfdfe0..628c644218f 100644 --- a/systest/tests/partition_test.go +++ b/systest/tests/partition_test.go @@ -176,6 +176,7 @@ func testPartition(t *testing.T, tctx *testcontext.Context, cl *cluster.Cluster, } func TestPartition_30_70(t *testing.T) { + t.Skip() t.Parallel() tctx := testcontext.New(t) @@ -190,6 +191,7 @@ func TestPartition_30_70(t *testing.T) { } func TestPartition_50_50(t *testing.T) { + t.Skip() t.Parallel() tctx := testcontext.New(t) diff --git a/systest/tests/poets_test.go b/systest/tests/poets_test.go index 92d10d42c4c..a17266176a0 100644 --- a/systest/tests/poets_test.go +++ b/systest/tests/poets_test.go @@ -26,6 +26,7 @@ var layersToCheck = parameters.Int( ) func TestPoetsFailures(t *testing.T) { + t.Skip() t.Parallel() tctx := testcontext.New(t) tctx.Log.Debug("TestPoetsFailures start") @@ -123,6 +124,7 @@ func testPoetDies(t *testing.T, tctx *testcontext.Context, cl *cluster.Cluster) } func TestNodesUsingDifferentPoets(t *testing.T) { + t.Skip() t.Parallel() tctx := testcontext.New(t) if tctx.PoetSize < 2 { @@ -213,6 +215,7 @@ func TestNodesUsingDifferentPoets(t *testing.T) { // TODO: When PoW support is removed, convert this test to verify only the cert path. // https://github.com/spacemeshos/go-spacemesh/issues/5212 func TestRegisteringInPoetWithPowAndCert(t *testing.T) { + t.Skip() t.Parallel() tctx := testcontext.New(t) tctx.PoetSize = 2 diff --git a/systest/tests/smeshing_test.go b/systest/tests/smeshing_test.go index 3bc8ad8ede1..7de82671b29 100644 --- a/systest/tests/smeshing_test.go +++ b/systest/tests/smeshing_test.go @@ -29,6 +29,7 @@ import ( ) func TestSmeshing(t *testing.T) { + t.Skip() // TODO(mafa): add new test with multi-smeshing nodes t.Parallel() diff --git a/systest/tests/timeskew_test.go b/systest/tests/timeskew_test.go index 5c5c0069f0c..fc1172dd838 100644 --- a/systest/tests/timeskew_test.go +++ b/systest/tests/timeskew_test.go @@ -14,6 +14,7 @@ import ( ) func TestShortTimeskew(t *testing.T) { + t.Skip() t.Parallel() tctx := testcontext.New(t) From 026c8475c56d7e76e495c8553091dd144b713951 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Tue, 30 Jul 2024 11:22:26 -0600 Subject: [PATCH 4/4] chore: run a bunch in parallel --- systest/tests/checkpoint_test.go | 15 +++++++++++++++ systest/tests/nodes_test.go | 1 + 2 files changed, 16 insertions(+) diff --git a/systest/tests/checkpoint_test.go b/systest/tests/checkpoint_test.go index bd74e489c59..871396d71c7 100644 --- a/systest/tests/checkpoint_test.go +++ b/systest/tests/checkpoint_test.go @@ -32,6 +32,21 @@ func reuseCluster(tctx *testcontext.Context, restoreLayer uint32) (*cluster.Clus ) } +func TestCheckpoint1(t *testing.T) { + // t.Parallel() + TestCheckpoint(t) +} + +func TestCheckpoint2(t *testing.T) { + // t.Parallel() + TestCheckpoint(t) +} + +func TestCheckpoint3(t *testing.T) { + // t.Parallel() + TestCheckpoint(t) +} + func TestCheckpoint(t *testing.T) { // TODO(mafa): add new test with multi-smeshing nodes t.Parallel() diff --git a/systest/tests/nodes_test.go b/systest/tests/nodes_test.go index 98b75615924..ab8e2c2ab20 100644 --- a/systest/tests/nodes_test.go +++ b/systest/tests/nodes_test.go @@ -126,6 +126,7 @@ func TestAddNodes(t *testing.T) { } func TestFailedNodes(t *testing.T) { + t.Skip() t.Parallel() tctx := testcontext.New(t)