Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pausable uptime manager #1372

Merged
merged 40 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
ce36343
add validator state
ceyonur Sep 16, 2024
549d319
add pausable uptime manager
ceyonur Sep 16, 2024
9987248
remove stuttering name
ceyonur Sep 16, 2024
e1ef744
Merge branch 'master' into validator-state
ceyonur Sep 16, 2024
b33ffbe
Merge branch 'validator-state' into pausable-uptime-manager
ceyonur Sep 16, 2024
0f16af2
rename state listener
ceyonur Sep 16, 2024
df3ce63
Merge branch 'validator-state' into pausable-uptime-manager
ceyonur Sep 16, 2024
92f6b7e
Update plugin/evm/validators/state.go
ceyonur Sep 19, 2024
0db2041
use update enum
ceyonur Sep 19, 2024
c5520bc
Update plugin/evm/validators/state.go
ceyonur Sep 19, 2024
dea94af
Update plugin/evm/validators/state.go
ceyonur Sep 19, 2024
c0f6ff4
respond to comments
ceyonur Sep 19, 2024
b7de0f6
Merge branch 'validator-state' of https://github.com/ava-labs/subnet-…
ceyonur Sep 19, 2024
b566103
update avalanchego dep branch
ceyonur Sep 19, 2024
66ab74b
update avalanchego dep branch
ceyonur Sep 19, 2024
ad3a35a
reviews
ceyonur Sep 19, 2024
64fe238
reword errs
ceyonur Sep 19, 2024
33d24d1
Merge branch 'pausable-uptime-manager' of https://github.com/ava-labs…
ceyonur Sep 19, 2024
d7338da
fix test changes
ceyonur Sep 19, 2024
8eab611
Merge branch 'master' into uptime-tracking-base
ceyonur Sep 19, 2024
9ad5528
fix upgrades after deactivating latest in context
ceyonur Sep 19, 2024
4536590
Merge branch 'uptime-tracking-base' into validator-state
ceyonur Sep 19, 2024
fc71949
Merge branch 'validator-state' into pausable-uptime-manager
ceyonur Sep 19, 2024
df6ad02
use branch commit for ava version
ceyonur Sep 20, 2024
cc6ce95
Merge branch 'master' into uptime-tracking-base
ceyonur Sep 20, 2024
bcd4c9c
Merge branch 'uptime-tracking-base' into validator-state
ceyonur Sep 20, 2024
a49dd8d
Merge branch 'validator-state' into pausable-uptime-manager
ceyonur Sep 20, 2024
734b201
Merge branch 'master' into validator-state
ceyonur Oct 28, 2024
3e94861
Merge branch 'validator-state' into pausable-uptime-manager
ceyonur Oct 28, 2024
cab1ddf
reviews
ceyonur Oct 29, 2024
374d885
add listener mock
ceyonur Oct 29, 2024
1fcca58
Merge branch 'master' into validator-state
ceyonur Oct 29, 2024
c41f39c
Merge branch 'validator-state' into pausable-uptime-manager
ceyonur Oct 29, 2024
af39b2c
remove errs from resume and pause
ceyonur Oct 30, 2024
d5d3545
check after stopping
ceyonur Oct 30, 2024
6fffc2b
use expectedTime in tests
ceyonur Oct 31, 2024
733da4c
Merge branch 'master' into pausable-uptime-manager
ceyonur Nov 5, 2024
66fa2aa
reviews
ceyonur Nov 5, 2024
4ca958c
fix requires
ceyonur Nov 5, 2024
366c946
underscore unused params
ceyonur Nov 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions plugin/evm/uptime/pausable_manager.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
marun marked this conversation as resolved.
Show resolved Hide resolved
// See the file LICENSE for licensing terms.

package uptime

import (
"errors"

"github.com/ava-labs/subnet-evm/plugin/evm/validators"
"github.com/ethereum/go-ethereum/log"

"github.com/ava-labs/avalanchego/ids"
"github.com/ava-labs/avalanchego/snow/uptime"
"github.com/ava-labs/avalanchego/utils/set"
)

var _ validators.StateCallbackListener = &pausableManager{}

var errPausedDisconnect = errors.New("paused node cannot be disconnected")

type PausableManager interface {
ceyonur marked this conversation as resolved.
Show resolved Hide resolved
uptime.Manager
validators.StateCallbackListener
IsPaused(nodeID ids.NodeID) bool
}

type pausableManager struct {
uptime.Manager
pausedVdrs set.Set[ids.NodeID]
// connectedVdrs is a set of nodes that are connected to the manager.
// This is used to immediately connect nodes when they are unpaused.
connectedVdrs set.Set[ids.NodeID]
}

// NewPausableManager takes an uptime.Manager and returns a PausableManager
func NewPausableManager(manager uptime.Manager) PausableManager {
return &pausableManager{
pausedVdrs: make(set.Set[ids.NodeID]),
connectedVdrs: make(set.Set[ids.NodeID]),
Manager: manager,
}
}

// Connect connects the node with the given ID to the uptime.Manager
// If the node is paused, it will not be connected
func (p *pausableManager) Connect(nodeID ids.NodeID) error {
p.connectedVdrs.Add(nodeID)
ceyonur marked this conversation as resolved.
Show resolved Hide resolved
if !p.IsPaused(nodeID) && !p.Manager.IsConnected(nodeID) {
return p.Manager.Connect(nodeID)
}
return nil
}

// Disconnect disconnects the node with the given ID from the uptime.Manager
// If the node is paused, it will not be disconnected
// Invariant: we should never have a connected paused node that is disconnecting
func (p *pausableManager) Disconnect(nodeID ids.NodeID) error {
p.connectedVdrs.Remove(nodeID)
if p.Manager.IsConnected(nodeID) {
if p.IsPaused(nodeID) {
// We should never see this case
return errPausedDisconnect
}
return p.Manager.Disconnect(nodeID)
}
return nil
}

// StartTracking starts tracking uptime for the nodes with the given IDs
// If a node is paused, it will not be tracked
func (p *pausableManager) StartTracking(nodeIDs []ids.NodeID) error {
activeNodeIDs := make([]ids.NodeID, 0, len(nodeIDs))
for _, nodeID := range nodeIDs {
if !p.IsPaused(nodeID) {
activeNodeIDs = append(activeNodeIDs, nodeID)
}
}
return p.Manager.StartTracking(activeNodeIDs)
ceyonur marked this conversation as resolved.
Show resolved Hide resolved
}

// OnValidatorAdded is called when a validator is added.
// If the node is inactive, it will be paused.
func (p *pausableManager) OnValidatorAdded(vID ids.ID, nodeID ids.NodeID, startTime uint64, isActive bool) {
ceyonur marked this conversation as resolved.
Show resolved Hide resolved
if !isActive {
err := p.pause(nodeID)
if err != nil {
log.Error("failed to handle added validator %s: %s", nodeID, err)
}
}
}

// OnValidatorRemoved is called when a validator is removed.
// If the node is already paused, it will be resumed.
michaelkaplan13 marked this conversation as resolved.
Show resolved Hide resolved
func (p *pausableManager) OnValidatorRemoved(vID ids.ID, nodeID ids.NodeID) {
ceyonur marked this conversation as resolved.
Show resolved Hide resolved
if p.IsPaused(nodeID) {
err := p.resume(nodeID)
if err != nil {
log.Error("failed to handle validator removed %s: %s", nodeID, err)
}
}
}

// OnValidatorStatusUpdated is called when the status of a validator is updated.
// If the node is active, it will be resumed. If the node is inactive, it will be paused.
func (p *pausableManager) OnValidatorStatusUpdated(vID ids.ID, nodeID ids.NodeID, isActive bool) {
ceyonur marked this conversation as resolved.
Show resolved Hide resolved
var err error
if isActive {
err = p.resume(nodeID)
} else {
err = p.pause(nodeID)
}
if err != nil {
log.Error("failed to update status for node %s: %s", nodeID, err)
}
}

// IsPaused returns true if the node with the given ID is paused.
func (p *pausableManager) IsPaused(nodeID ids.NodeID) bool {
return p.pausedVdrs.Contains(nodeID)
}

// pause pauses uptime tracking for the node with the given ID
// pause can disconnect the node from the uptime.Manager if it is connected.
func (p *pausableManager) pause(nodeID ids.NodeID) error {
p.pausedVdrs.Add(nodeID)
if p.Manager.IsConnected(nodeID) {
// If the node is connected, then we need to disconnect it from
// manager
// This should be fine in case tracking has not started yet since
// the inner manager should handle disconnects accordingly
return p.Manager.Disconnect(nodeID)
}
return nil
}

// resume resumes uptime tracking for the node with the given ID
// resume can connect the node to the uptime.Manager if it was connected.
func (p *pausableManager) resume(nodeID ids.NodeID) error {
p.pausedVdrs.Remove(nodeID)
if p.connectedVdrs.Contains(nodeID) && !p.Manager.IsConnected(nodeID) {
return p.Manager.Connect(nodeID)
darioush marked this conversation as resolved.
Show resolved Hide resolved
}
return nil
}
241 changes: 241 additions & 0 deletions plugin/evm/uptime/pausable_manager_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.

package uptime

import (
"testing"
"time"

"github.com/ava-labs/avalanchego/ids"
"github.com/ava-labs/avalanchego/snow/uptime"
"github.com/ava-labs/avalanchego/utils/timer/mockable"
"github.com/stretchr/testify/require"
)

func TestPausableManager(t *testing.T) {
vID := ids.GenerateTestID()
nodeID0 := ids.GenerateTestNodeID()
startTime := time.Now()

tests := []struct {
name string
testFunc func(t *testing.T, up PausableManager, clk *mockable.Clock, s uptime.State)
}{
{
name: "Case 1: Connect, pause, start tracking",
testFunc: func(t *testing.T, up PausableManager, clk *mockable.Clock, s uptime.State) {
require := require.New(t)

// Connect before tracking
require.NoError(up.Connect(nodeID0))
darioush marked this conversation as resolved.
Show resolved Hide resolved
addTime(clk, time.Second)

// Pause before tracking
up.OnValidatorStatusUpdated(vID, nodeID0, false)
require.True(up.IsPaused(nodeID0))

// Elapse Time
addTime(clk, time.Second)

// Start tracking
require.NoError(up.StartTracking([]ids.NodeID{nodeID0}))
currentTime := addTime(clk, time.Second)
// Uptime should not have increased since the node was paused
expectedUptime := 0 * time.Second
checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Disconnect
require.NoError(up.Disconnect(nodeID0))
// Uptime should not have increased
checkUptime(t, up, nodeID0, expectedUptime, currentTime)
},
},
{
name: "Case 2: Start tracking, connect, pause, re-connect, resume",
testFunc: func(t *testing.T, up PausableManager, clk *mockable.Clock, s uptime.State) {
require := require.New(t)

// Start tracking
require.NoError(up.StartTracking([]ids.NodeID{nodeID0}))

// Connect
addTime(clk, 1*time.Second)
require.NoError(up.Connect(nodeID0))

// Pause
addTime(clk, 1*time.Second)
up.OnValidatorStatusUpdated(vID, nodeID0, false)
require.True(up.IsPaused(nodeID0))

// Elapse time
currentTime := addTime(clk, 2*time.Second)
// Uptime should be 1 second since the node was paused after 1 sec
expectedUptime := 1 * time.Second
checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Disconnect and check uptime
currentTime = addTime(clk, 3*time.Second)
require.NoError(up.Disconnect(nodeID0))
// Uptime should not have increased since the node was paused
checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Connect again and check uptime
addTime(clk, 4*time.Second)
require.NoError(up.Connect(nodeID0))
currentTime = addTime(clk, 5*time.Second)
// Uptime should not have increased since the node was paused
checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Resume and check uptime
currentTime = addTime(clk, 6*time.Second)
up.OnValidatorStatusUpdated(vID, nodeID0, true)
require.False(up.IsPaused(nodeID0))
// Uptime should not have increased since the node was paused
// and we just resumed it
checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Elapsed time check
currentTime = addTime(clk, 7*time.Second)
// Uptime should increase by 7 seconds above since the node was resumed
expectedUptime += 7 * time.Second
checkUptime(t, up, nodeID0, expectedUptime, currentTime)
},
},
{
name: "Case 3: Pause, start tracking, connect, re-connect, resume",
testFunc: func(t *testing.T, up PausableManager, clk *mockable.Clock, s uptime.State) {
require := require.New(t)

// Pause before tracking
up.OnValidatorStatusUpdated(vID, nodeID0, false)
require.True(up.IsPaused(nodeID0))

// Start tracking
addTime(clk, time.Second)
require.NoError(up.StartTracking([]ids.NodeID{nodeID0}))

// Connect and check uptime
addTime(clk, 1*time.Second)
require.NoError(up.Connect(nodeID0))

currentTime := addTime(clk, 2*time.Second)
// Uptime should not have increased since the node was paused
expectedUptime := 0 * time.Second
checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Disconnect and check uptime
currentTime = addTime(clk, 3*time.Second)
require.NoError(up.Disconnect(nodeID0))
// Uptime should not have increased since the node was paused
checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Connect again and resume
addTime(clk, 4*time.Second)
require.NoError(up.Connect(nodeID0))
addTime(clk, 5*time.Second)
up.OnValidatorStatusUpdated(vID, nodeID0, true)
require.False(up.IsPaused(nodeID0))

// Check uptime after resume
currentTime = addTime(clk, 6*time.Second)
// Uptime should have increased by 6 seconds since the node was resumed
expectedUptime += 6 * time.Second
checkUptime(t, up, nodeID0, expectedUptime, currentTime)
},
},
{
name: "Case 4: Start tracking, connect, pause, stop tracking, resume tracking",
testFunc: func(t *testing.T, up PausableManager, clk *mockable.Clock, s uptime.State) {
require := require.New(t)

// Start tracking and connect
require.NoError(up.StartTracking([]ids.NodeID{nodeID0}))
addTime(clk, time.Second)
require.NoError(up.Connect(nodeID0))

// Pause and check uptime
currentTime := addTime(clk, 2*time.Second)
up.OnValidatorStatusUpdated(vID, nodeID0, false)
require.True(up.IsPaused(nodeID0))
// Uptime should be 2 seconds since the node was paused after 2 seconds
expectedUptime := 2 * time.Second

checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Stop tracking and reinitialize manager
currentTime = addTime(clk, 3*time.Second)
require.NoError(up.StopTracking([]ids.NodeID{nodeID0}))
checkUptime(t, up, nodeID0, expectedUptime, currentTime)
up = NewPausableManager(uptime.NewManager(s, clk))

// Uptime should not have increased since the node was paused
// and we have not started tracking again
checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Pause and check uptime
up.OnValidatorStatusUpdated(vID, nodeID0, false)
require.True(up.IsPaused(nodeID0))
// Uptime should not have increased since the node was paused
checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Resume and check uptime
currentTime = addTime(clk, 5*time.Second)
up.OnValidatorStatusUpdated(vID, nodeID0, true)
require.False(up.IsPaused(nodeID0))
// Uptime should have increased by 5 seconds since the node was resumed
expectedUptime += 5 * time.Second
checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Start tracking and check elapsed time
currentTime = addTime(clk, 6*time.Second)
require.NoError(up.StartTracking([]ids.NodeID{nodeID0}))
// Uptime should have increased by 6 seconds since we started tracking
// and node was resumed (we assume the node was online until we started tracking)
expectedUptime += 6 * time.Second
checkUptime(t, up, nodeID0, expectedUptime, currentTime)
marun marked this conversation as resolved.
Show resolved Hide resolved

// Elapsed time
currentTime = addTime(clk, 7*time.Second)
// Uptime should not have increased since the node was not connected
checkUptime(t, up, nodeID0, expectedUptime, currentTime)

// Connect and final uptime check
require.NoError(up.Connect(nodeID0))
currentTime = addTime(clk, 8*time.Second)
// Uptime should have increased by 8 seconds since the node was connected
expectedUptime += 8 * time.Second
checkUptime(t, up, nodeID0, expectedUptime, currentTime)
},
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
up, clk, s := setupTestEnv(nodeID0, startTime)
test.testFunc(t, up, clk, s)
})
}
}

func setupTestEnv(nodeID ids.NodeID, startTime time.Time) (PausableManager, *mockable.Clock, uptime.State) {
clk := mockable.Clock{}
clk.Set(startTime)
s := uptime.NewTestState()
s.AddNode(nodeID, startTime)
up := NewPausableManager(uptime.NewManager(s, &clk))
return up, &clk, s
}

func addTime(clk *mockable.Clock, duration time.Duration) time.Time {
clk.Set(clk.Time().Add(duration))
return clk.Time()
}

func checkUptime(t *testing.T, up PausableManager, nodeID ids.NodeID, expectedUptime time.Duration, expectedLastUpdate time.Time) {
t.Helper()
uptime, lastUpdated, err := up.CalculateUptime(nodeID)
require.NoError(t, err)
require.Equal(t, expectedLastUpdate.Unix(), lastUpdated.Unix())
require.Equal(t, expectedUptime, uptime)
}
Loading