From 6da3fbd5eb1c62742881d695bf600660b7d88c2b Mon Sep 17 00:00:00 2001 From: Chris Martin Date: Fri, 12 Jul 2024 09:30:28 +0100 Subject: [PATCH] Adds Some Prometheus Metrics To Lookout (#3781) * Better metrics for lookout Signed-off-by: Chris Martin * lint Signed-off-by: Chris Martin * add service monitor Signed-off-by: Chris Martin --------- Signed-off-by: Chris Martin Co-authored-by: Chris Martin --- config/lookoutv2/config.yaml | 1 + .../lookout-v2/templates/servicemonitor.yaml | 20 ++++++++++++++++++ go.mod | 1 + go.sum | 2 ++ internal/lookoutv2/application.go | 11 +++++++++- internal/lookoutv2/configuration/types.go | 3 ++- .../gen/restapi/configure_lookout.go | 17 ++++++++++++++- internal/lookoutv2/metrics/metrics.go | 21 +++++++++++++++++++ 8 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 deployment/lookout-v2/templates/servicemonitor.yaml create mode 100644 internal/lookoutv2/metrics/metrics.go diff --git a/config/lookoutv2/config.yaml b/config/lookoutv2/config.yaml index 066fcf74de6..78432997326 100644 --- a/config/lookoutv2/config.yaml +++ b/config/lookoutv2/config.yaml @@ -1,4 +1,5 @@ apiPort: 10000 +metricsPort: 9003 corsAllowedOrigins: - "http://localhost:3000" - "http://localhost:8089" diff --git a/deployment/lookout-v2/templates/servicemonitor.yaml b/deployment/lookout-v2/templates/servicemonitor.yaml new file mode 100644 index 00000000000..bc26263dfe6 --- /dev/null +++ b/deployment/lookout-v2/templates/servicemonitor.yaml @@ -0,0 +1,20 @@ +{{- if .Values.prometheus.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "lookout_v2.name" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "lookout_v2.name.labels.all" . | nindent 4 -}} + {{- if .Values.prometheus.labels }} + {{- toYaml .Values.prometheus.labels | nindent 4 -}} + {{- end }} +spec: + selector: + matchLabels: + {{- include "lookout_v2.name.labels.identity" . | nindent 6 }} + endpoints: + - port: metrics + interval: {{ .Values.prometheus.scrapeInterval }} + scrapeTimeout: {{ .Values.prometheus.scrapeTimeout }} +{{- end }} diff --git a/go.mod b/go.mod index c03c249b2c5..e366284c638 100644 --- a/go.mod +++ b/go.mod @@ -65,6 +65,7 @@ require ( ) require ( + github.com/IBM/pgxpoolprometheus v1.1.1 github.com/Masterminds/semver/v3 v3.2.1 github.com/benbjohnson/immutable v0.4.3 github.com/charmbracelet/glamour v0.7.0 diff --git a/go.sum b/go.sum index ca765fd3121..bef3ddd75a7 100644 --- a/go.sum +++ b/go.sum @@ -35,6 +35,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/DataDog/zstd v1.5.0 h1:+K/VEwIAaPcHiMtQvpLD4lqW7f0Gk3xdYZmI1hD+CXo= github.com/DataDog/zstd v1.5.0/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= +github.com/IBM/pgxpoolprometheus v1.1.1 h1:xkWNUe87TIuBj/ypdSiDgNYktsuM7MoZCT8a+kjhh2s= +github.com/IBM/pgxpoolprometheus v1.1.1/go.mod h1:GFJDkHbidFfB2APbhBTSy2X4PKH3bLWsEMBhmzK1ipo= github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0= github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= github.com/alecthomas/assert/v2 v2.2.1 h1:XivOgYcduV98QCahG8T5XTezV5bylXe+lBxLG2K2ink= diff --git a/internal/lookoutv2/application.go b/internal/lookoutv2/application.go index 47385dc9414..bf7c03a9ecd 100644 --- a/internal/lookoutv2/application.go +++ b/internal/lookoutv2/application.go @@ -3,15 +3,18 @@ package lookoutv2 import ( + "github.com/IBM/pgxpoolprometheus" "github.com/go-openapi/loads" "github.com/go-openapi/runtime/middleware" "github.com/jessevdk/go-flags" + "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" + "github.com/armadaproject/armada/internal/common" "github.com/armadaproject/armada/internal/common/armadacontext" "github.com/armadaproject/armada/internal/common/compress" "github.com/armadaproject/armada/internal/common/database" - slices "github.com/armadaproject/armada/internal/common/slices" + "github.com/armadaproject/armada/internal/common/slices" "github.com/armadaproject/armada/internal/lookoutv2/configuration" "github.com/armadaproject/armada/internal/lookoutv2/conversions" "github.com/armadaproject/armada/internal/lookoutv2/gen/restapi" @@ -31,6 +34,9 @@ func Serve(configuration configuration.LookoutV2Config) error { return err } + collector := pgxpoolprometheus.NewCollector(db, map[string]string{}) + prometheus.MustRegister(collector) + getJobsRepo := repository.NewSqlGetJobsRepository(db) groupJobsRepo := repository.NewSqlGroupJobsRepository(db) decompressor := compress.NewThreadSafeZlibDecompressor() @@ -148,6 +154,9 @@ func Serve(configuration configuration.LookoutV2Config) error { }, ) + shutdownMetricServer := common.ServeMetrics(uint16(configuration.MetricsPort)) + defer shutdownMetricServer() + server := restapi.NewServer(api) defer func() { shutdownErr := server.Shutdown() diff --git a/internal/lookoutv2/configuration/types.go b/internal/lookoutv2/configuration/types.go index a35fa4f5d1e..1fb0be21595 100644 --- a/internal/lookoutv2/configuration/types.go +++ b/internal/lookoutv2/configuration/types.go @@ -7,7 +7,8 @@ import ( ) type LookoutV2Config struct { - ApiPort int + ApiPort int + MetricsPort int // If non-nil, net/http/pprof endpoints are exposed on localhost on this port. PprofPort *uint16 diff --git a/internal/lookoutv2/gen/restapi/configure_lookout.go b/internal/lookoutv2/gen/restapi/configure_lookout.go index 733949fde9f..aaacd8b22e7 100644 --- a/internal/lookoutv2/gen/restapi/configure_lookout.go +++ b/internal/lookoutv2/gen/restapi/configure_lookout.go @@ -7,6 +7,7 @@ import ( "encoding/json" "net/http" "strings" + "time" "github.com/go-openapi/errors" "github.com/go-openapi/runtime" @@ -16,6 +17,7 @@ import ( "github.com/armadaproject/armada/internal/common/serve" "github.com/armadaproject/armada/internal/lookoutv2/configuration" "github.com/armadaproject/armada/internal/lookoutv2/gen/restapi/operations" + "github.com/armadaproject/armada/internal/lookoutv2/metrics" ) //go:generate swagger generate server --target ../../gen --name Lookout --spec ../../swagger.yaml --principal interface{} --exclude-main @@ -89,7 +91,7 @@ var UIConfig configuration.UIConfig // The middleware configuration happens before anything, this middleware also applies to serving the swagger.json document. // So this is a good place to plug in a panic handling middleware, logging and metrics. func setupGlobalMiddleware(apiHandler http.Handler) http.Handler { - return allowCORS(uiHandler(apiHandler), corsAllowedOrigins) + return recordRequestDuration(allowCORS(uiHandler(apiHandler), corsAllowedOrigins)) } func uiHandler(apiHandler http.Handler) http.Handler { @@ -137,6 +139,19 @@ func allowCORS(handler http.Handler, corsAllowedOrigins []string) http.Handler { }) } +func recordRequestDuration(handler http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // TODO: for autheticated users, record the username + const unknownUser = "unknown" + start := time.Now() + handler.ServeHTTP(w, r) + duration := time.Since(start) + if strings.HasPrefix(r.URL.Path, "/api/v1/") { + metrics.RecordRequestDuration(unknownUser, r.URL.Path, float64(duration.Milliseconds())) + } + }) +} + func preflightHandler(w http.ResponseWriter) { headers := []string{"Content-Type", "Accept", "Authorization"} w.Header().Set("Access-Control-Allow-Headers", strings.Join(headers, ",")) diff --git a/internal/lookoutv2/metrics/metrics.go b/internal/lookoutv2/metrics/metrics.go new file mode 100644 index 00000000000..790a4a64fbb --- /dev/null +++ b/internal/lookoutv2/metrics/metrics.go @@ -0,0 +1,21 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var requestDuration = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "lookout_request_duration_ms", + Help: "Request duration in milliseconds", + Buckets: []float64{1, 10, 100, 1000, 10000, 100000, 1000000}, + }, + []string{"user", "endpoint"}, +) + +func RecordRequestDuration(user, endpoint string, duration float64) { + requestDuration. + With(map[string]string{"user": user, "endpoint": endpoint}). + Observe(duration) +}