Skip to content

Commit

Permalink
Add critical opsgenie alert to fly (#463)
Browse files Browse the repository at this point in the history
  • Loading branch information
walker-16 authored Jun 27, 2023
1 parent f7543a1 commit 914580c
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 2 deletions.
58 changes: 56 additions & 2 deletions fly/internal/alert/alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,19 @@ import (

// alert key constants definition.
const (
ErrorSaveVAA = "ERROR_SAVE_VAA"
ErrorSaveVAA = "ERROR_SAVE_VAA"
ErrorSavePyth = "ERROR_SAVE_PYTH"
ErrorSaveObservation = "ERROR_SAVE_OBSERVATION"
ErrorSaveHeartbeat = "ERROR_SAVE_HEARTBEAT"
ErrorSaveGovernorStatus = "ERROR_SAVE_GOVERNOR_STATUS"
EroorSaveGovernorConfig = "ERROR_SAVE_GOVERNOR_CONFIG"
)

func LoadAlerts(cfg alert.AlertConfig) map[string]alert.Alert {
alerts := make(map[string]alert.Alert)

messagePrefix := alert.GetMessagePrefix(cfg.Enviroment, cfg.P2PNetwork)
// Alert Error saving vaa.
// Alert error saving vaa.
alerts[ErrorSaveVAA] = alert.Alert{
Alias: ErrorSaveVAA,
Message: fmt.Sprintf("%s %s", messagePrefix, "Error saving VAA in vaas collection"),
Expand All @@ -25,5 +30,54 @@ func LoadAlerts(cfg alert.AlertConfig) map[string]alert.Alert {
Entity: "fly",
Priority: alert.CRITICAL,
}
// Alert error saving pyth
alerts[ErrorSavePyth] = alert.Alert{
Alias: ErrorSavePyth,
Message: fmt.Sprintf("%s %s", messagePrefix, "Error saving pyth in vaasPythnet collection"),
Description: "An error was found persisting the pyth in mongo in the vaasPythnet collection.",
Actions: []string{"pyth may have persisted by retry"},
Tags: []string{cfg.Enviroment, cfg.P2PNetwork, "fly", "vaasPythnet", "mongo"},
Entity: "fly",
Priority: alert.INFORMATIONAL,
}
// Alert error saving observation
alerts[ErrorSaveObservation] = alert.Alert{
Alias: ErrorSaveObservation,
Message: fmt.Sprintf("%s %s", messagePrefix, "Error saving observation in observations collection"),
Description: "An error was found persisting the observation in mongo in the observations collection.",
Actions: []string{},
Tags: []string{cfg.Enviroment, cfg.P2PNetwork, "fly", "observations", "mongo"},
Entity: "fly",
Priority: alert.CRITICAL,
}
// Alert error saving heartbeat
alerts[ErrorSaveHeartbeat] = alert.Alert{
Alias: ErrorSaveHeartbeat,
Message: fmt.Sprintf("%s %s", messagePrefix, "Error saving heartbeat in heartbeats collection"),
Description: "An error was found persisting the heartbeat in mongo in the heartbeats collection.",
Actions: []string{},
Tags: []string{cfg.Enviroment, cfg.P2PNetwork, "fly", "heartbeats", "mongo"},
Entity: "fly",
Priority: alert.CRITICAL,
}
alerts[ErrorSaveGovernorStatus] = alert.Alert{
Alias: ErrorSaveGovernorStatus,
Message: fmt.Sprintf("%s %s", messagePrefix, "Error saving governor status in governorStatus collection"),
Description: "An error was found persisting the governor status in mongo in the governorStatus collection.",
Actions: []string{},
Tags: []string{cfg.Enviroment, cfg.P2PNetwork, "fly", "governorStatus", "mongo"},
Entity: "fly",
Priority: alert.CRITICAL,
}
alerts[EroorSaveGovernorConfig] = alert.Alert{
Alias: EroorSaveGovernorConfig,
Message: fmt.Sprintf("%s %s", messagePrefix, "Error saving governor config in governorConfig collection"),
Description: "An error was found persisting the governor config in mongo in the governorConfig collection.",
Actions: []string{},
Tags: []string{cfg.Enviroment, cfg.P2PNetwork, "fly", "governorConfig", "mongo"},
Entity: "fly",
Priority: alert.CRITICAL,
}

return alerts
}
13 changes: 13 additions & 0 deletions fly/storage/documents.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"time"

"github.com/wormhole-foundation/wormhole-explorer/common/domain"
"github.com/wormhole-foundation/wormhole/sdk/vaa"
)

Expand Down Expand Up @@ -51,6 +52,18 @@ type ObservationUpdate struct {
UpdatedAt *time.Time `bson:"updatedAt"`
}

func (v *ObservationUpdate) ToMap() map[string]string {
txHash, _ := domain.EncodeTrxHashByChainID(v.ChainID, v.TxHash)
return map[string]string{
"messageId": v.MessageID,
"emitterChain": v.ChainID.String(),
"emitterAddr": v.Emitter,
"sequence": v.Sequence,
"txHash": txHash,
"guardianAddr": v.GuardianAddr,
}
}

type VaaIdTxHashUpdate struct {
ChainID vaa.ChainID `bson:"emitterChain"`
Emitter string `bson:"emitterAddr"`
Expand Down
42 changes: 42 additions & 0 deletions fly/storage/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,14 @@ func (s *Repository) UpsertVaa(ctx context.Context, v *vaa.VAA, serializedVaa []
var result *mongo.UpdateResult
if vaa.ChainIDPythNet == v.EmitterChain {
result, err = s.collections.vaasPythnet.UpdateByID(ctx, id, update, opts)
if err != nil {
// send alert when exists an error saving ptth vaa.
alertContext := alert.AlertContext{
Details: vaaDoc.ToMap(),
Error: err,
}
s.alertClient.CreateAndSend(ctx, flyAlert.ErrorSavePyth, alertContext)
}
} else {
var vaaIdTxHash VaaIdTxHashUpdate
if err := s.collections.vaaIdTxHash.FindOne(ctx, bson.M{"_id": id}).Decode(&vaaIdTxHash); err != nil {
Expand Down Expand Up @@ -155,6 +163,12 @@ func (s *Repository) UpsertObservation(o *gossipv1.SignedObservation) error {
_, err = s.collections.observations.UpdateByID(ctx, id, update, opts)
if err != nil {
s.log.Error("Error inserting observation", zap.Error(err))
// send alert when exists an error saving observation.
alertContext := alert.AlertContext{
Details: obs.ToMap(),
Error: err,
}
s.alertClient.CreateAndSend(ctx, flyAlert.ErrorSaveObservation, alertContext)
return err
}

Expand Down Expand Up @@ -229,6 +243,18 @@ func (s *Repository) UpsertHeartbeat(hb *gossipv1.Heartbeat) error {
update := bson.D{{Key: "$set", Value: hb}, {Key: "$set", Value: bson.D{{Key: "updatedAt", Value: now}}}, {Key: "$setOnInsert", Value: bson.D{{Key: "indexedAt", Value: now}}}}
opts := options.Update().SetUpsert(true)
_, err := s.collections.heartbeats.UpdateByID(context.TODO(), id, update, opts)
if err != nil {
s.log.Error("Error inserting heartbeat", zap.Error(err))
// send alert when exists an error saving heartbeat.
alertContext := alert.AlertContext{
Details: map[string]string{
"guardianAddr": hb.GuardianAddr,
"nodeName": hb.NodeName,
},
Error: err,
}
s.alertClient.CreateAndSend(context.TODO(), flyAlert.ErrorSaveHeartbeat, alertContext)
}
return err
}

Expand All @@ -251,6 +277,14 @@ func (s *Repository) UpsertGovernorConfig(govC *gossipv1.SignedChainGovernorConf

if err2 != nil {
s.log.Error("Error inserting govr cfg", zap.Error(err2))
// send alert when exists an error saving governor config.
alertContext := alert.AlertContext{
Details: map[string]string{
"nodeName": cfg.NodeName,
},
Error: err2,
}
s.alertClient.CreateAndSend(context.TODO(), flyAlert.EroorSaveGovernorConfig, alertContext)
}
return err2
}
Expand All @@ -274,6 +308,14 @@ func (s *Repository) UpsertGovernorStatus(govS *gossipv1.SignedChainGovernorStat

if err2 != nil {
s.log.Error("Error inserting govr status", zap.Error(err2))
// send alert when exists an error saving governor status.
alertContext := alert.AlertContext{
Details: map[string]string{
"nodeName": status.NodeName,
},
Error: err2,
}
s.alertClient.CreateAndSend(context.TODO(), flyAlert.ErrorSaveGovernorStatus, alertContext)
}
return err2
}
Expand Down

0 comments on commit 914580c

Please sign in to comment.