Skip to content

Commit

Permalink
sync: parametrize out of sync threshold and set it to 3h for mainnet (#…
Browse files Browse the repository at this point in the history
…5040)

related: #5036

in future we should drop it completely, and use only connectivity information to decide if node should stop participating
in consensus. there should be no risk of interrupting consensus, because of any unexpected failures in sync process.
  • Loading branch information
dshulyak committed Sep 21, 2023
1 parent 74a1ba7 commit 61e4cfd
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 31 deletions.
11 changes: 6 additions & 5 deletions config/mainnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,12 @@ func MainnetConfig() Config {
FETCH: fetch.DefaultConfig(),
LOGGING: logging,
Sync: syncer.Config{
Interval: time.Minute,
EpochEndFraction: 0.8,
MaxStaleDuration: time.Hour,
Standalone: false,
GossipDuration: 50 * time.Second,
Interval: time.Minute,
EpochEndFraction: 0.8,
MaxStaleDuration: time.Hour,
Standalone: false,
GossipDuration: 50 * time.Second,
OutOfSyncThresholdLayers: 36, // 3h
},
Recovery: checkpoint.DefaultConfig(),
Cache: datastore.DefaultConfig(),
Expand Down
38 changes: 18 additions & 20 deletions syncer/syncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,29 @@ import (

// Config is the config params for syncer.
type Config struct {
Interval time.Duration
EpochEndFraction float64
HareDelayLayers uint32
SyncCertDistance uint32
MaxStaleDuration time.Duration
Standalone bool
GossipDuration time.Duration
Interval time.Duration
EpochEndFraction float64
HareDelayLayers uint32
SyncCertDistance uint32
MaxStaleDuration time.Duration
Standalone bool
GossipDuration time.Duration
OutOfSyncThresholdLayers uint32 `mapstructure:"out-of-sync-threshold"`
}

// DefaultConfig for the syncer.
func DefaultConfig() Config {
return Config{
Interval: 10 * time.Second,
EpochEndFraction: 0.8,
HareDelayLayers: 10,
SyncCertDistance: 10,
MaxStaleDuration: time.Second,
GossipDuration: 15 * time.Second,
Interval: 10 * time.Second,
EpochEndFraction: 0.8,
HareDelayLayers: 10,
SyncCertDistance: 10,
MaxStaleDuration: time.Second,
GossipDuration: 15 * time.Second,
OutOfSyncThresholdLayers: 3,
}
}

const (
outOfSyncThreshold uint32 = 3 // see notSynced
)

type syncState uint32

const (
Expand Down Expand Up @@ -452,7 +450,7 @@ func (s *Syncer) syncAtx(ctx context.Context) error {
return nil
}

func isTooFarBehind(ctx context.Context, logger log.Log, current, lastSynced types.LayerID) bool {
func isTooFarBehind(ctx context.Context, logger log.Log, current, lastSynced types.LayerID, outOfSyncThreshold uint32) bool {
if current.After(lastSynced) && current.Difference(lastSynced) >= outOfSyncThreshold {
logger.WithContext(ctx).With().Info("node is too far behind",
log.Stringer("current", current),
Expand All @@ -472,7 +470,7 @@ func (s *Syncer) setStateBeforeSync(ctx context.Context) {
}
return
}
if isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer()) {
if isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer(), s.cfg.OutOfSyncThresholdLayers) {
s.setSyncState(ctx, notSynced)
}
}
Expand All @@ -492,7 +490,7 @@ func (s *Syncer) setStateAfterSync(ctx context.Context, success bool) {
// network outage.
switch currSyncState {
case synced:
if !success && isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer()) {
if !success && isTooFarBehind(ctx, s.logger, current, s.getLastSyncedLayer(), s.cfg.OutOfSyncThresholdLayers) {
s.setSyncState(ctx, notSynced)
}
case gossipSync:
Expand Down
15 changes: 9 additions & 6 deletions syncer/syncer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import (
const (
layersPerEpoch = 3
never = time.Second * 60 * 24

outOfSyncThreshold = 3
)

func TestMain(m *testing.M) {
Expand Down Expand Up @@ -93,11 +95,12 @@ func newTestSyncer(t *testing.T, interval time.Duration) *testSyncer {
require.NoError(t, err)

cfg := Config{
Interval: interval,
GossipDuration: 5 * time.Millisecond,
EpochEndFraction: 0.66,
SyncCertDistance: 4,
HareDelayLayers: 5,
Interval: interval,
GossipDuration: 5 * time.Millisecond,
EpochEndFraction: 0.66,
SyncCertDistance: 4,
HareDelayLayers: 5,
OutOfSyncThresholdLayers: outOfSyncThreshold,
}
ts.syncer = NewSyncer(ts.cdb, ts.mTicker, ts.mBeacon, ts.msh, nil, nil, ts.mLyrPatrol, ts.mCertHdr,
WithConfig(cfg),
Expand Down Expand Up @@ -535,7 +538,7 @@ func TestNetworkHasNoData(t *testing.T) {
require.True(t, ts.syncer.IsSynced(context.Background()))
}
// the network hasn't received any data
require.Greater(t, ts.syncer.ticker.CurrentLayer()-ts.msh.LatestLayer(), outOfSyncThreshold)
require.Greater(t, int(ts.syncer.ticker.CurrentLayer()-ts.msh.LatestLayer()), outOfSyncThreshold)
}

// test the case where the node was originally synced, and somehow gets out of sync, but
Expand Down

0 comments on commit 61e4cfd

Please sign in to comment.