Skip to content

Commit

Permalink
add warning alert and metrics to fly component (#576)
Browse files Browse the repository at this point in the history
* Add unknown guardian set index alert

* Add prometheus metrics to check if exists observations without txhash

* Add max sequence cache error prometheus metrics
  • Loading branch information
walker-16 committed Jul 25, 2023
1 parent 13819e2 commit 75fffe3
Show file tree
Hide file tree
Showing 10 changed files with 80 additions and 20 deletions.
31 changes: 22 additions & 9 deletions fly/guardiansets/guardianset.go
Original file line number Diff line number Diff line change
@@ -1,31 +1,42 @@
package guardiansets

import (
"context"
"errors"
"fmt"
"time"

"github.com/certusone/wormhole/node/pkg/common"
eth_common "github.com/ethereum/go-ethereum/common"
"github.com/wormhole-foundation/wormhole-explorer/common/client/alert"
"github.com/wormhole-foundation/wormhole-explorer/common/domain"
flyAlert "github.com/wormhole-foundation/wormhole-explorer/fly/internal/alert"
sdk "github.com/wormhole-foundation/wormhole/sdk/vaa"

eth_common "github.com/ethereum/go-ethereum/common"
)

// GuardianSetHistory contains information about all guardian sets for the current network (past and present).
type GuardianSetHistory struct {
guardianSetsByIndex []common.GuardianSet
expirationTimesByIndex []time.Time
alertClient alert.AlertClient
}

// Verify takes a VAA as input and validates its guardian signatures.
func (h *GuardianSetHistory) Verify(vaa *sdk.VAA) error {
func (h *GuardianSetHistory) Verify(ctx context.Context, vaa *sdk.VAA) error {

idx := vaa.GuardianSetIndex

// Make sure the index exists
if idx >= uint32(len(h.guardianSetsByIndex)) {
return fmt.Errorf("Guardian Set Index is out of bounds: got %d, max is %d",
alertContext := alert.AlertContext{
Details: map[string]string{
"vaaID": vaa.MessageID(),
"vaaGuardianSetIndex": fmt.Sprint(vaa.GuardianSetIndex),
"guardianSetIndex": fmt.Sprint(len(h.guardianSetsByIndex)),
},
}
_ = h.alertClient.CreateAndSend(ctx, flyAlert.GuardianSetUnknown, alertContext)
return fmt.Errorf("guardian Set Index is out of bounds: got %d, max is %d",
vaa.GuardianSetIndex,
len(h.guardianSetsByIndex),
)
Expand All @@ -45,16 +56,16 @@ func (h GuardianSetHistory) GetLatest() common.GuardianSet {
}

// Get get guardianset config by enviroment.
func GetByEnv(enviroment string) GuardianSetHistory {
func GetByEnv(enviroment string, alertClient alert.AlertClient) GuardianSetHistory {
switch enviroment {
case domain.P2pTestNet:
return getTestnetGuardianSet()
return getTestnetGuardianSet(alertClient)
default:
return getMainnetGuardianSet()
return getMainnetGuardianSet(alertClient)
}
}

func getTestnetGuardianSet() GuardianSetHistory {
func getTestnetGuardianSet(alertClient alert.AlertClient) GuardianSetHistory {
const tenYears = time.Hour * 24 * 365 * 10
gs0TestValidUntil := time.Now().Add(tenYears)
gstest0 := common.GuardianSet{
Expand All @@ -66,10 +77,11 @@ func getTestnetGuardianSet() GuardianSetHistory {
return GuardianSetHistory{
guardianSetsByIndex: []common.GuardianSet{gstest0},
expirationTimesByIndex: []time.Time{gs0TestValidUntil},
alertClient: alertClient,
}
}

func getMainnetGuardianSet() GuardianSetHistory {
func getMainnetGuardianSet(alertClient alert.AlertClient) GuardianSetHistory {
gs0ValidUntil := time.Unix(1628599904, 0) // Tue Aug 10 2021 12:51:44 GMT+0000
gs0 := common.GuardianSet{
Index: 0,
Expand Down Expand Up @@ -163,5 +175,6 @@ func getMainnetGuardianSet() GuardianSetHistory {
return GuardianSetHistory{
guardianSetsByIndex: []common.GuardianSet{gs0, gs1, gs2, gs3},
expirationTimesByIndex: []time.Time{gs0ValidUntil, gs1ValidUntil, gs2ValidUntil, gs3ValidUntil},
alertClient: alertClient,
}
}
10 changes: 6 additions & 4 deletions fly/guardiansets/guardianset_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package guardiansets

import (
"context"
_ "embed"
"testing"

"github.com/wormhole-foundation/wormhole-explorer/common/client/alert"
sdk "github.com/wormhole-foundation/wormhole/sdk/vaa"
)

Expand All @@ -21,8 +23,8 @@ func TestValidSignatures(t *testing.T) {
}

// assert that the signatures must be valid
h := getMainnetGuardianSet()
err = h.Verify(&vaa)
h := getMainnetGuardianSet(alert.NewDummyClient())
err = h.Verify(context.TODO(), &vaa)
if err != nil {
t.Fatalf("Failed to verify VAA: %v", err)
}
Expand All @@ -45,8 +47,8 @@ func TestInvalidSignatures(t *testing.T) {
}

// assert that the signatures must be invalid
h := getMainnetGuardianSet()
err = h.Verify(&vaa)
h := getMainnetGuardianSet(alert.NewDummyClient())
err = h.Verify(context.TODO(), &vaa)
if err == nil {
t.Fatal("Expected signatures to be invalid")
}
Expand Down
20 changes: 16 additions & 4 deletions fly/internal/alert/alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ const (
ErrorSaveObservation = "ERROR_SAVE_OBSERVATION"
ErrorSaveHeartbeat = "ERROR_SAVE_HEARTBEAT"
ErrorSaveGovernorStatus = "ERROR_SAVE_GOVERNOR_STATUS"
EroorSaveGovernorConfig = "ERROR_SAVE_GOVERNOR_CONFIG"
ErrorSaveGovernorConfig = "ERROR_SAVE_GOVERNOR_CONFIG"

// warning alerts
GuardianSetUnknown = "GUARDIAN_SET_UNKNOWN"
ObservationWithoutTxHash = "OBSERVATION_WITHOUT_TX_HASH"
)

func LoadAlerts(cfg alert.AlertConfig) map[string]alert.Alert {
Expand Down Expand Up @@ -68,15 +72,23 @@ func LoadAlerts(cfg alert.AlertConfig) map[string]alert.Alert {
Entity: "fly",
Priority: alert.CRITICAL,
}
alerts[EroorSaveGovernorConfig] = alert.Alert{
Alias: EroorSaveGovernorConfig,
alerts[ErrorSaveGovernorConfig] = alert.Alert{
Alias: ErrorSaveGovernorConfig,
Message: fmt.Sprintf("[%s] %s", cfg.Environment, "Error saving governor config in governorConfig collection"),
Description: "An error was found persisting the governor config in mongo in the governorConfig collection.",
Actions: []string{},
Tags: []string{cfg.Environment, "fly", "governorConfig", "mongo"},
Entity: "fly",
Priority: alert.CRITICAL,
}

alerts[GuardianSetUnknown] = alert.Alert{
Alias: GuardianSetUnknown,
Message: fmt.Sprintf("[%s] %s", cfg.Environment, "Guardian set unknown"),
Description: "The guardian set from the vaa is unknown.",
Actions: []string{},
Tags: []string{cfg.Environment, "fly", "guardianSet", "vaa"},
Entity: "fly",
Priority: alert.INFORMATIONAL,
}
return alerts
}
6 changes: 6 additions & 0 deletions fly/internal/metrics/dummy.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ func (d *DummyMetrics) IncObservationUnfiltered(chain sdk.ChainID) {}
// IncObservationInserted increases the number of observation inserted in database.
func (d *DummyMetrics) IncObservationInserted(chain sdk.ChainID) {}

// IncObservationWithoutTxHash increases the number of observation without tx hash.
func (d *DummyMetrics) IncObservationWithoutTxHash(chain sdk.ChainID) {}

// IncObservationTotal increases the number of observation received from Gossip network.
func (d *DummyMetrics) IncObservationTotal() {}

Expand All @@ -55,3 +58,6 @@ func (d *DummyMetrics) IncGovernorStatusFromGossipNetwork(guardianName string) {

// IncGovernorStatusInserted increases the number of guardian status inserted in database.
func (d *DummyMetrics) IncGovernorStatusInserted(guardianName string) {}

// IncMaxSequenceCacheError increases the number of errors when updating max sequence cache.
func (d *DummyMetrics) IncMaxSequenceCacheError(chain sdk.ChainID) {}
4 changes: 4 additions & 0 deletions fly/internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ type Metrics interface {
IncObservationFromGossipNetwork(chain sdk.ChainID)
IncObservationUnfiltered(chain sdk.ChainID)
IncObservationInserted(chain sdk.ChainID)
IncObservationWithoutTxHash(chain sdk.ChainID)
IncObservationTotal()

// heartbeat metrics
Expand All @@ -29,4 +30,7 @@ type Metrics interface {
// governor status metrics
IncGovernorStatusFromGossipNetwork(guardianName string)
IncGovernorStatusInserted(guardianName string)

// max sequence cache metrics
IncMaxSequenceCacheError(chain sdk.ChainID)
}
21 changes: 21 additions & 0 deletions fly/internal/metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type PrometheusMetrics struct {
heartbeatReceivedCount *prometheus.CounterVec
governorConfigReceivedCount *prometheus.CounterVec
governorStatusReceivedCount *prometheus.CounterVec
maxSequenceCacheCount *prometheus.CounterVec
}

// NewPrometheusMetrics returns a new instance of PrometheusMetrics.
Expand Down Expand Up @@ -88,6 +89,15 @@ func NewPrometheusMetrics(environment string) *PrometheusMetrics {
"service": serviceName,
},
}, []string{"guardian_node", "type"})
maxSequenceCacheCount := promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "max_sequence_cache_count_by_chain",
Help: "Total number of errors when updating max sequence cache",
ConstLabels: map[string]string{
"environment": environment,
"service": serviceName,
},
}, []string{"chain"})
return &PrometheusMetrics{
vaaReceivedCount: vaaReceivedCount,
vaaTotal: vaaTotal,
Expand All @@ -96,6 +106,7 @@ func NewPrometheusMetrics(environment string) *PrometheusMetrics {
heartbeatReceivedCount: heartbeatReceivedCount,
governorConfigReceivedCount: governorConfigReceivedCount,
governorStatusReceivedCount: governorStatusReceivedCount,
maxSequenceCacheCount: maxSequenceCacheCount,
}
}

Expand Down Expand Up @@ -139,6 +150,11 @@ func (m *PrometheusMetrics) IncObservationInserted(chain sdk.ChainID) {
m.observationReceivedCount.WithLabelValues(chain.String(), "inserted").Inc()
}

// IncObservationWithoutTxHash increases the number of observation without tx hash.
func (m *PrometheusMetrics) IncObservationWithoutTxHash(chain sdk.ChainID) {
m.observationReceivedCount.WithLabelValues(chain.String(), "without_txhash").Inc()
}

// IncObservationTotal increases the number of observation received from Gossip network.
func (m *PrometheusMetrics) IncObservationTotal() {
m.observationTotal.Inc()
Expand Down Expand Up @@ -173,3 +189,8 @@ func (m *PrometheusMetrics) IncGovernorStatusFromGossipNetwork(guardianName stri
func (m *PrometheusMetrics) IncGovernorStatusInserted(guardianName string) {
m.governorStatusReceivedCount.WithLabelValues(guardianName, "inserted").Inc()
}

// IncMaxSequenceCacheError increases the number of errors when updating max sequence cache.
func (m *PrometheusMetrics) IncMaxSequenceCacheError(chain sdk.ChainID) {
m.maxSequenceCacheCount.WithLabelValues(chain.String()).Inc()
}
2 changes: 1 addition & 1 deletion fly/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ func main() {

// Bootstrap guardian set, otherwise heartbeats would be skipped
// TODO: fetch this and probably figure out how to update it live
guardianSetHistory := guardiansets.GetByEnv(p2pNetworkConfig.Enviroment)
guardianSetHistory := guardiansets.GetByEnv(p2pNetworkConfig.Enviroment, alertClient)
gsLastet := guardianSetHistory.GetLatest()
gst.Set(&gsLastet)

Expand Down
2 changes: 1 addition & 1 deletion fly/processor/vaa_gossip_consumer.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func NewVAAGossipConsumer(
// Push handles incoming VAAs depending on whether it is a pyth or non pyth.
func (p *vaaGossipConsumer) Push(ctx context.Context, v *vaa.VAA, serializedVaa []byte) error {

if err := p.guardianSetHistory.Verify(v); err != nil {
if err := p.guardianSetHistory.Verify(ctx, v); err != nil {
p.logger.Error("Received invalid vaa", zap.String("id", v.MessageID()))
return err
}
Expand Down
1 change: 1 addition & 0 deletions fly/processor/vaa_queue_consumer.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ func (c *VAAQueueConsumer) Start(ctx context.Context) {

err = c.notifyFunc(ctx, v, msg.Data())
if err != nil {
c.metrics.IncMaxSequenceCacheError(v.EmitterChain)
c.logger.Error("Error notifying vaa",
zap.String("id", v.MessageID()),
zap.Error(err))
Expand Down
3 changes: 2 additions & 1 deletion fly/storage/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ func (s *Repository) UpsertObservation(o *gossipv1.SignedObservation) error {
zap.Uint64("chainId", chainID),
zap.ByteString("txHash", o.GetTxHash()),
zap.Error(err))
s.metrics.IncObservationWithoutTxHash(vaa.ChainID(chainID))
}

vaaTxHash := VaaIdTxHashUpdate{
Expand Down Expand Up @@ -284,7 +285,7 @@ func (s *Repository) UpsertGovernorConfig(govC *gossipv1.SignedChainGovernorConf
},
Error: err2,
}
s.alertClient.CreateAndSend(context.TODO(), flyAlert.EroorSaveGovernorConfig, alertContext)
s.alertClient.CreateAndSend(context.TODO(), flyAlert.ErrorSaveGovernorConfig, alertContext)
}
return err2
}
Expand Down

0 comments on commit 75fffe3

Please sign in to comment.