Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - test: improve bootstrapper and checkpoint test #6193

Closed
wants to merge 16 commits into from
98 changes: 64 additions & 34 deletions cmd/bootstrapper/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,21 +132,72 @@ func (s *Server) Start(ctx context.Context, errCh chan error, params *NetworkPar
}

// start generating fallback data
s.eg.Go(
func() error {
s.genDataLoop(ctx, errCh, last, params.updateActiveSetTime, s.GenFallbackActiveSet)
fasmat marked this conversation as resolved.
Show resolved Hide resolved
return nil
})
s.eg.Go(
func() error {
s.genDataLoop(ctx, errCh, last+1, params.updateBeaconTime, s.GenFallbackBeacon)
return nil
})
s.eg.Go(func() error {
for epoch := last; ; epoch++ {
wait := time.Until(params.updateActiveSetTime(epoch))
select {
case <-time.After(wait):
if err := s.genWithRetry(ctx, epoch, 10); err != nil {
errCh <- err
return nil
}
case <-ctx.Done():
return nil
}
}
})
s.eg.Go(func() error {
for epoch := last + 1; ; epoch++ {
wait := time.Until(params.updateBeaconTime(epoch))
select {
case <-time.After(wait):
if err := s.GenFallbackBeacon(epoch); err != nil {
errCh <- err
return err
}
case <-ctx.Done():
return nil
}
}
})

return nil
})
}

func (s *Server) genWithRetry(ctx context.Context, epoch types.EpochID, maxRetries int) error {
err := s.GenFallbackActiveSet(ctx, epoch)
if err == nil {
return nil
}
s.logger.With().Debug("generate fallback active set retry", log.Err(err))

retries := 0
backoff := 10 * time.Second
timer := time.NewTimer(backoff)

for {
select {
case <-timer.C:
if err := s.GenFallbackActiveSet(ctx, epoch); err != nil {
s.logger.With().Debug("generate fallback active set retry", log.Err(err))
retries++
if retries >= maxRetries {
return err
}
timer.Reset(backoff)
continue
}
return nil
case <-ctx.Done():
if !timer.Stop() {
<-timer.C
}
return ctx.Err()
}
}
}

// in systests, we want to be sure the nodes use the fallback data unconditionally.
// use a fixed known value for beacon to be sure that fallback is used during testing.
func epochBeacon(epoch types.EpochID) types.Beacon {
Expand All @@ -165,7 +216,7 @@ func (s *Server) GenBootstrap(ctx context.Context, epoch types.EpochID) error {
return err
}

func (s *Server) GenFallbackBeacon(_ context.Context, epoch types.EpochID) error {
func (s *Server) GenFallbackBeacon(epoch types.EpochID) error {
suffix := bootstrap.SuffixBeacon
_, err := s.gen.GenUpdate(epoch, epochBeacon(epoch), nil, suffix)
return err
Expand Down Expand Up @@ -193,31 +244,10 @@ func getPartialActiveSet(ctx context.Context, smEndpoint string, targetEpoch typ
return actives[:cutoff], nil
}

func (s *Server) genDataLoop(
ctx context.Context,
errCh chan error,
start types.EpochID,
timeFunc func(types.EpochID) time.Time,
genFunc func(context.Context, types.EpochID) error,
) {
for epoch := start; ; epoch++ {
wait := time.Until(timeFunc(epoch))
select {
case <-time.After(wait):
if err := genFunc(ctx, epoch); err != nil {
errCh <- err
return
}
case <-ctx.Done():
return
}
}
}

func (s *Server) Stop(ctx context.Context) {
s.logger.With().Info("shutting down server")
_ = s.Shutdown(ctx)
_ = s.eg.Wait()
s.Shutdown(ctx)
s.eg.Wait()
}

func (s *Server) handle(w http.ResponseWriter, r *http.Request) {
Expand Down
4 changes: 3 additions & 1 deletion config/presets/fastnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ func fastnet() config.Config {
conf.LayerDuration = 15 * time.Second
conf.Sync.Interval = 5 * time.Second
conf.Sync.GossipDuration = 10 * time.Second
conf.Sync.AtxSync.EpochInfoInterval = 20 * time.Second
conf.Sync.AtxSync.EpochInfoInterval = 1 * time.Second
conf.Sync.AtxSync.EpochInfoPeers = 10
conf.Sync.AtxSync.RequestsLimit = 100
conf.Sync.MalSync.IDRequestInterval = 20 * time.Second
conf.LayersPerEpoch = 4
conf.RegossipAtxInterval = 30 * time.Second
Expand Down
8 changes: 4 additions & 4 deletions systest/tests/checkpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ func TestCheckpoint(t *testing.T) {
// the start of the next poet round
snapshotLayer := uint32(15)
restoreLayer := uint32(18)
checkpointEpoch := uint32(4)
lastEpoch := uint32(8)
checkpointEpoch := uint32(8)
lastEpoch := uint32(14)

// bootstrap the checkpoint epoch and the next epoch as the beacon protocol was interrupted in the last epoch
cl, err := reuseCluster(tctx, restoreLayer)
Expand Down Expand Up @@ -168,8 +168,8 @@ func TestCheckpoint(t *testing.T) {
}
}

tctx.Log.Infow("waiting for all miners to be smeshing", "last epoch", checkpointEpoch+2)
ensureSmeshing(t, tctx, cl, checkpointEpoch+2)
tctx.Log.Infow("waiting for all miners to be smeshing", "last epoch", checkpointEpoch)
ensureSmeshing(t, tctx, cl, checkpointEpoch)

// increase the cluster size to the original test size
tctx.Log.Info("cluster size changed to ", size)
Expand Down
Loading