Skip to content

Commit

Permalink
Add pipeline prometheus metrics and opsgenie alerts (#471)
Browse files Browse the repository at this point in the history
* Add pipeline prometheus metrics

* Add alerts & metrics env vars for pipeline

* fix pipeline env vars

* Fix enviroment in fly and pipeline

* Add alert client to pipeline

* Add critical alerts to pipeline component

* fix pipeline deploy

* fix ALERTS_ENABLED env var

* Add prometheus annotation metrics

* configurate prometheus in fiber

* fix vaaSendNotifications metrics counter

* Fix NewWatcher
  • Loading branch information
walker-16 committed Jul 3, 2023
1 parent 297b9aa commit b07d569
Show file tree
Hide file tree
Showing 23 changed files with 626 additions and 84 deletions.
4 changes: 2 additions & 2 deletions deploy/fly/fly-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ spec:
path: /api/health
port: 8000
env:
- name: ENVIRONMENT
value: {{ .ENVIRONMENT }}
- name: API_PORT
value: "8000"
- name: MONGODB_URI
Expand Down Expand Up @@ -74,8 +76,6 @@ spec:
key: api-key
- name: ALERT_ENABLED
value: "{{ .ALERT_ENABLED }}"
- name: ENVIRONMENT
value: {{ .ENVIRONMENT }}
- name: METRICS_ENABLED
value: "{{ .METRICS_ENABLED }}"
resources:
Expand Down
4 changes: 3 additions & 1 deletion deploy/pipeline/env/production.env
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ SNS_URL=
SNS_AWS_REGION=
AWS_IAM_ROLE=
PPROF_ENABLED=false
P2P_NETWORK=mainnet
P2P_NETWORK=mainnet
ALERT_ENABLED=false
METRICS_ENABLED=false
4 changes: 3 additions & 1 deletion deploy/pipeline/env/staging.env
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ SNS_URL=
SNS_AWS_REGION=
AWS_IAM_ROLE=
PPROF_ENABLED=true
P2P_NETWORK=mainnet
P2P_NETWORK=mainnet
ALERT_ENABLED=false
METRICS_ENABLED=false
4 changes: 3 additions & 1 deletion deploy/pipeline/env/test.env
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ SNS_URL=
SNS_AWS_REGION=
AWS_IAM_ROLE=
PPROF_ENABLED=true
P2P_NETWORK=testnet
P2P_NETWORK=testnet
ALERT_ENABLED=false
METRICS_ENABLED=false
11 changes: 9 additions & 2 deletions deploy/pipeline/pipeline-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ spec:
metadata:
labels:
app: {{ .NAME }}
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
spec:
restartPolicy: Always
terminationGracePeriodSeconds: 40
Expand All @@ -37,8 +40,8 @@ spec:
path: /api/health
port: 8000
env:
- name: ENV
value: "PRODUCTION"
- name: ENVIRONMENT
value: {{ .ENVIRONMENT }}
- name: PORT
value: "8000"
- name: LOG_LEVEL
Expand All @@ -61,6 +64,10 @@ spec:
value: "{{ .PPROF_ENABLED }}"
- name: P2P_NETWORK
value: {{ .P2P_NETWORK }}
- name: ALERT_ENABLED
value: "{{ .ALERT_ENABLED }}"
- name: METRICS_ENABLED
value: "{{ .METRICS_ENABLED }}"
resources:
limits:
memory: {{ .RESOURCES_LIMITS_MEMORY }}
Expand Down
2 changes: 2 additions & 0 deletions fly/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ require (
)

require (
github.com/algorand/go-algorand-sdk v1.23.0 // indirect
github.com/algorand/go-codec/codec v1.1.8 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.0.2 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.28 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.22 // indirect
Expand Down
5 changes: 5 additions & 0 deletions fly/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,11 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5
github.com/alexflint/go-filemutex v0.0.0-20171022225611-72bdc8eae2ae/go.mod h1:CgnQgUtFrFz9mxFNtED3jI5tLDjKlOM+oUF/sTk6ps0=
github.com/alexflint/go-filemutex v1.1.0/go.mod h1:7P4iRhttt/nUvUOrYIhcpMzv2G6CY9UnI16Z+UJqRyk=
github.com/alexkohler/prealloc v1.0.0/go.mod h1:VetnK3dIgFBBKmg0YnD9F9x6Icjd+9cvfHR56wJVlKE=
github.com/algorand/go-algorand-sdk v1.23.0 h1:wlEV6OgDVc/sLeF2y41bwNG/Lr8EoMnN87Ur8N2Gyyo=
github.com/algorand/go-algorand-sdk v1.23.0/go.mod h1:7i2peZBcE48kfoxNZnLA+mklKh812jBKvQ+t4bn0KBQ=
github.com/algorand/go-codec v1.1.8/go.mod h1:XhzVs6VVyWMLu6cApb9/192gBjGRVGm5cX5j203Heg4=
github.com/algorand/go-codec/codec v1.1.8 h1:lsFuhcOH2LiEhpBH3BVUUkdevVmwCRyvb7FCAAPeY6U=
github.com/algorand/go-codec/codec v1.1.8/go.mod h1:tQ3zAJ6ijTps6V+wp8KsGDnPC2uhHVC7ANyrtkIY0bA=
github.com/alingse/asasalint v0.0.11/go.mod h1:nCaoMhw7a9kSJObvQyVzNTPBDbNpdocqrSP7t/cW5+I=
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8=
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM=
Expand Down
28 changes: 19 additions & 9 deletions fly/internal/metrics/prometheus.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package metrics

import (
"fmt"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
sdk "github.com/wormhole-foundation/wormhole/sdk/vaa"
Expand All @@ -18,14 +20,14 @@ type PrometheusMetrics struct {
}

// NewPrometheusMetrics returns a new instance of PrometheusMetrics.
func NewPrometheusMetrics(environment string) *PrometheusMetrics {

func NewPrometheusMetrics(environment string, p2pNetwork string) *PrometheusMetrics {
metricsEnviroment := getMetricsEnviroment(environment, p2pNetwork)
vaaReceivedCount := promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "vaa_count_by_chain",
Help: "Total number of vaa by chain",
ConstLabels: map[string]string{
"environment": environment,
"environment": metricsEnviroment,
"service": serviceName,
},
}, []string{"chain", "type"})
Expand All @@ -35,7 +37,7 @@ func NewPrometheusMetrics(environment string) *PrometheusMetrics {
Name: "vaa_total",
Help: "Total number of vaa from Gossip network",
ConstLabels: map[string]string{
"environment": environment,
"environment": metricsEnviroment,
"service": serviceName,
},
})
Expand All @@ -45,7 +47,7 @@ func NewPrometheusMetrics(environment string) *PrometheusMetrics {
Name: "observation_count_by_chain",
Help: "Total number of observation by chain",
ConstLabels: map[string]string{
"environment": environment,
"environment": metricsEnviroment,
"service": serviceName,
},
}, []string{"chain", "type"})
Expand All @@ -55,7 +57,7 @@ func NewPrometheusMetrics(environment string) *PrometheusMetrics {
Name: "observation_total",
Help: "Total number of observation from Gossip network",
ConstLabels: map[string]string{
"environment": environment,
"environment": metricsEnviroment,
"service": serviceName,
},
})
Expand All @@ -65,7 +67,7 @@ func NewPrometheusMetrics(environment string) *PrometheusMetrics {
Name: "heartbeat_count_by_guardian",
Help: "Total number of heartbeat by guardian",
ConstLabels: map[string]string{
"environment": environment,
"environment": metricsEnviroment,
"service": serviceName,
},
}, []string{"guardian_node", "type"})
Expand All @@ -75,7 +77,7 @@ func NewPrometheusMetrics(environment string) *PrometheusMetrics {
Name: "governor_config_count_by_guardian",
Help: "Total number of governor config by guardian",
ConstLabels: map[string]string{
"environment": environment,
"environment": metricsEnviroment,
"service": serviceName,
},
}, []string{"guardian_node", "type"})
Expand All @@ -85,7 +87,7 @@ func NewPrometheusMetrics(environment string) *PrometheusMetrics {
Name: "governor_status_count_by_guardian",
Help: "Total number of governor status by guardian",
ConstLabels: map[string]string{
"environment": environment,
"environment": metricsEnviroment,
"service": serviceName,
},
}, []string{"guardian_node", "type"})
Expand All @@ -100,6 +102,14 @@ func NewPrometheusMetrics(environment string) *PrometheusMetrics {
}
}

// getMetricsEnviroment returns the enviroment to use in metrics.
func getMetricsEnviroment(enviroment, p2pPNetwork string) string {
if enviroment == "production" {
return fmt.Sprintf("%s-%s", enviroment, p2pPNetwork)
}
return enviroment
}

// IncVaaFromGossipNetwork increases the number of vaa received by chain from Gossip network.
func (m *PrometheusMetrics) IncVaaFromGossipNetwork(chain sdk.ChainID) {
m.vaaReceivedCount.WithLabelValues(chain.String(), "gossip").Inc()
Expand Down
6 changes: 3 additions & 3 deletions fly/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,12 @@ func newAlertClient() (alert.AlertClient, error) {
return alert.NewAlertService(alertConfig, flyAlert.LoadAlerts)
}

func newMetrics(p2pNetwork *config.P2pNetworkConfig) metrics.Metrics {
func newMetrics(enviroment string, p2pNetwork *config.P2pNetworkConfig) metrics.Metrics {
metricsEnabled := config.GetMetricsEnabled()
if !metricsEnabled {
return metrics.NewDummyMetrics()
}
return metrics.NewPrometheusMetrics(p2pNetwork.Enviroment)
return metrics.NewPrometheusMetrics(enviroment, p2pNetwork.Enviroment)
}

func main() {
Expand Down Expand Up @@ -245,7 +245,7 @@ func main() {
logger.Fatal("could not create alert client", zap.Error(err))
}

metrics := newMetrics(p2pNetworkConfig)
metrics := newMetrics(config.GetEnviroment(), p2pNetworkConfig)

// Setup DB
uri := os.Getenv("MONGODB_URI")
Expand Down
46 changes: 40 additions & 6 deletions pipeline/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@ import (
"github.com/aws/aws-sdk-go-v2/aws"
awsconfig "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/wormhole-foundation/wormhole-explorer/common/client/alert"
"github.com/wormhole-foundation/wormhole-explorer/common/logger"
"github.com/wormhole-foundation/wormhole-explorer/pipeline/config"
"github.com/wormhole-foundation/wormhole-explorer/pipeline/healthcheck"
"github.com/wormhole-foundation/wormhole-explorer/pipeline/http/infrastructure"
pipelineAlert "github.com/wormhole-foundation/wormhole-explorer/pipeline/internal/alert"
"github.com/wormhole-foundation/wormhole-explorer/pipeline/internal/db"
"github.com/wormhole-foundation/wormhole-explorer/pipeline/internal/metrics"
"github.com/wormhole-foundation/wormhole-explorer/pipeline/internal/sns"
"github.com/wormhole-foundation/wormhole-explorer/pipeline/pipeline"
"github.com/wormhole-foundation/wormhole-explorer/pipeline/topic"
Expand Down Expand Up @@ -54,8 +57,17 @@ func main() {
logger.Fatal("failed to connect MongoDB", zap.Error(err))
}

// get alert client.
alertClient, err := newAlertClient(config)
if err != nil {
logger.Fatal("failed to create alert client", zap.Error(err))
}

// get metrics.
metrics := newMetrics(config)

// get publish function.
pushFunc, err := newTopicProducer(rootCtx, config, logger)
pushFunc, err := newTopicProducer(rootCtx, config, alertClient, metrics, logger)
if err != nil {
logger.Fatal("failed to create publish function", zap.Error(err))
}
Expand All @@ -71,12 +83,12 @@ func main() {

// create and start a new tx hash handler.
quit := make(chan bool)
txHashHandler := pipeline.NewTxHashHandler(repository, pushFunc, logger, quit)
txHashHandler := pipeline.NewTxHashHandler(repository, pushFunc, alertClient, metrics, logger, quit)
go txHashHandler.Run(rootCtx)

// create a new publisher.
publisher := pipeline.NewPublisher(pushFunc, repository, config.P2pNetwork, txHashHandler, logger)
watcher := watcher.NewWatcher(rootCtx, db.Database, config.MongoDatabase, publisher.Publish, logger)
publisher := pipeline.NewPublisher(pushFunc, metrics, repository, config.P2pNetwork, txHashHandler, logger)
watcher := watcher.NewWatcher(rootCtx, db.Database, config.MongoDatabase, publisher.Publish, alertClient, metrics, logger)
err = watcher.Start(rootCtx)
if err != nil {
logger.Fatal("failed to watch MongoDB", zap.Error(err))
Expand Down Expand Up @@ -139,7 +151,7 @@ func newAwsConfig(appCtx context.Context, cfg *config.Configuration) (aws.Config
return awsconfig.LoadDefaultConfig(appCtx, awsconfig.WithRegion(region))
}

func newTopicProducer(appCtx context.Context, config *config.Configuration, logger *zap.Logger) (topic.PushFunc, error) {
func newTopicProducer(appCtx context.Context, config *config.Configuration, alertClient alert.AlertClient, metrics metrics.Metrics, logger *zap.Logger) (topic.PushFunc, error) {
awsConfig, err := newAwsConfig(appCtx, config)
if err != nil {
return nil, err
Expand All @@ -150,7 +162,7 @@ func newTopicProducer(appCtx context.Context, config *config.Configuration, logg
return nil, err
}

return topic.NewVAASNS(snsProducer, logger).Publish, nil
return topic.NewVAASNS(snsProducer, alertClient, metrics, logger).Publish, nil
}

func newHealthChecks(ctx context.Context, config *config.Configuration, db *mongo.Database) ([]healthcheck.Check, error) {
Expand All @@ -160,3 +172,25 @@ func newHealthChecks(ctx context.Context, config *config.Configuration, db *mong
}
return []healthcheck.Check{healthcheck.Mongo(db), healthcheck.SNS(awsConfig, config.SNSUrl)}, nil
}

func newMetrics(cfg *config.Configuration) metrics.Metrics {
metricsEnabled := cfg.MetricsEnabled
if !metricsEnabled {
return metrics.NewDummyMetrics()
}
return metrics.NewPrometheusMetrics(cfg.Enviroment, cfg.P2pNetwork)
}

func newAlertClient(cfg *config.Configuration) (alert.AlertClient, error) {
if !cfg.AlertEnabled {
return alert.NewDummyClient(), nil
}

alertConfig := alert.AlertConfig{
Enviroment: cfg.Enviroment,
P2PNetwork: cfg.P2pNetwork,
ApiKey: cfg.AlertApiKey,
Enabled: cfg.AlertEnabled,
}
return alert.NewAlertService(alertConfig, pipelineAlert.LoadAlerts)
}
5 changes: 4 additions & 1 deletion pipeline/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (

// Configuration represents the application configuration with the default values.
type Configuration struct {
Env string `env:"ENV,default=development"`
Enviroment string `env:"ENVIRONMENT,default=development"`
LogLevel string `env:"LOG_LEVEL,default=INFO"`
Port string `env:"PORT,default=8000"`
P2pNetwork string `env:"P2P_NETWORK,required"`
Expand All @@ -21,6 +21,9 @@ type Configuration struct {
AwsRegion string `env:"AWS_REGION"`
SNSUrl string `env:"SNS_URL"`
PprofEnabled bool `env:"PPROF_ENABLED,default=false"`
AlertEnabled bool `env:"ALERT_ENABLED,default=false"`
AlertApiKey string `env:"ALERT_API_KEY"`
MetricsEnabled bool `env:"METRICS_ENABLED,default=false"`
}

// New creates a configuration with the values from .env file and environment variables.
Expand Down
Loading

0 comments on commit b07d569

Please sign in to comment.