From 61e4cfd1b7600a2b58d05fe592d6bbb56eb997a9 Mon Sep 17 00:00:00 2001 From: Dmitry Shulyak Date: Thu, 21 Sep 2023 05:58:55 +0000 Subject: [PATCH] sync: parametrize out of sync threshold and set it to 3h for mainnet (#5040) related: https://github.com/spacemeshos/go-spacemesh/issues/5036 in future we should drop it completely, and use only connectivity information to decide if node should stop participating in consensus. there should be no risk of interrupting consensus, because of any unexpected failures in sync process. --- config/mainnet.go | 11 ++++++----- syncer/syncer.go | 38 ++++++++++++++++++-------------------- syncer/syncer_test.go | 15 +++++++++------ 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/config/mainnet.go b/config/mainnet.go index 9b28038c6e..b8472f8a76 100644 --- a/config/mainnet.go +++ b/config/mainnet.go @@ -136,11 +136,12 @@ func MainnetConfig() Config { FETCH: fetch.DefaultConfig(), LOGGING: logging, Sync: syncer.Config{ - Interval: time.Minute, - EpochEndFraction: 0.8, - MaxStaleDuration: time.Hour, - Standalone: false, - GossipDuration: 50 * time.Second, + Interval: time.Minute, + EpochEndFraction: 0.8, + MaxStaleDuration: time.Hour, + Standalone: false, + GossipDuration: 50 * time.Second, + OutOfSyncThresholdLayers: 36, // 3h }, Recovery: checkpoint.DefaultConfig(), Cache: datastore.DefaultConfig(), diff --git a/syncer/syncer.go b/syncer/syncer.go index c840e69db9..faab996b59 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -21,31 +21,29 @@ import ( // Config is the config params for syncer. type Config struct { - Interval time.Duration - EpochEndFraction float64 - HareDelayLayers uint32 - SyncCertDistance uint32 - MaxStaleDuration time.Duration - Standalone bool - GossipDuration time.Duration + Interval time.Duration + EpochEndFraction float64 + HareDelayLayers uint32 + SyncCertDistance uint32 + MaxStaleDuration time.Duration + Standalone bool + GossipDuration time.Duration + OutOfSyncThresholdLayers uint32 `mapstructure:"out-of-sync-threshold"` } // DefaultConfig for the syncer. func DefaultConfig() Config { return Config{ - Interval: 10 * time.Second, - EpochEndFraction: 0.8, - HareDelayLayers: 10, - SyncCertDistance: 10, - MaxStaleDuration: time.Second, - GossipDuration: 15 * time.Second, + Interval: 10 * time.Second, + EpochEndFraction: 0.8, + HareDelayLayers: 10, + SyncCertDistance: 10, + MaxStaleDuration: time.Second, + GossipDuration: 15 * time.Second, + OutOfSyncThresholdLayers: 3, } } -const ( - outOfSyncThreshold uint32 = 3 // see notSynced -) - type syncState uint32 const ( @@ -452,7 +450,7 @@ func (s *Syncer) syncAtx(ctx context.Context) error { return nil } -func isTooFarBehind(ctx context.Context, logger log.Log, current, lastSynced types.LayerID) bool { +func isTooFarBehind(ctx context.Context, logger log.Log, current, lastSynced types.LayerID, outOfSyncThreshold uint32) bool { if current.After(lastSynced) && current.Difference(lastSynced) >= outOfSyncThreshold { logger.WithContext(ctx).With().Info("node is too far behind", log.Stringer("current", current), @@ -472,7 +470,7 @@ func (s *Syncer) setStateBeforeSync(ctx context.Context) { } return } - if isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer()) { + if isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer(), s.cfg.OutOfSyncThresholdLayers) { s.setSyncState(ctx, notSynced) } } @@ -492,7 +490,7 @@ func (s *Syncer) setStateAfterSync(ctx context.Context, success bool) { // network outage. switch currSyncState { case synced: - if !success && isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer()) { + if !success && isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer(), s.cfg.OutOfSyncThresholdLayers) { s.setSyncState(ctx, notSynced) } case gossipSync: diff --git a/syncer/syncer_test.go b/syncer/syncer_test.go index bd2d36c81a..1cdba9772b 100644 --- a/syncer/syncer_test.go +++ b/syncer/syncer_test.go @@ -28,6 +28,8 @@ import ( const ( layersPerEpoch = 3 never = time.Second * 60 * 24 + + outOfSyncThreshold = 3 ) func TestMain(m *testing.M) { @@ -93,11 +95,12 @@ func newTestSyncer(t *testing.T, interval time.Duration) *testSyncer { require.NoError(t, err) cfg := Config{ - Interval: interval, - GossipDuration: 5 * time.Millisecond, - EpochEndFraction: 0.66, - SyncCertDistance: 4, - HareDelayLayers: 5, + Interval: interval, + GossipDuration: 5 * time.Millisecond, + EpochEndFraction: 0.66, + SyncCertDistance: 4, + HareDelayLayers: 5, + OutOfSyncThresholdLayers: outOfSyncThreshold, } ts.syncer = NewSyncer(ts.cdb, ts.mTicker, ts.mBeacon, ts.msh, nil, nil, ts.mLyrPatrol, ts.mCertHdr, WithConfig(cfg), @@ -535,7 +538,7 @@ func TestNetworkHasNoData(t *testing.T) { require.True(t, ts.syncer.IsSynced(context.Background())) } // the network hasn't received any data - require.Greater(t, ts.syncer.ticker.CurrentLayer()-ts.msh.LatestLayer(), outOfSyncThreshold) + require.Greater(t, int(ts.syncer.ticker.CurrentLayer()-ts.msh.LatestLayer()), outOfSyncThreshold) } // test the case where the node was originally synced, and somehow gets out of sync, but