Skip to content

Commit

Permalink
Exposing Prometheus metrics (#1)
Browse files Browse the repository at this point in the history
* exposing prometheus metrics
  • Loading branch information
bernardoVale authored Oct 28, 2018
1 parent 8cf50e0 commit ab9a300
Show file tree
Hide file tree
Showing 9 changed files with 184 additions and 251 deletions.
4 changes: 4 additions & 0 deletions cmd/downscaler/awaker.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ func awakeWatcher(ctx context.Context, poster storage.Poster, getter kube.GetDep

deploy, err := getter.Get(app.Name(), app.Namespace())
if err != nil {
awakeErr.Inc()
logger.WithError(err).Errorf("Failed to get deployment")
}
logger.Infof("Desired: %d Ready: %d", *deploy.Spec.Replicas, deploy.Status.ReadyReplicas)
Expand All @@ -35,8 +36,11 @@ func awakeWatcher(ctx context.Context, poster storage.Poster, getter kube.GetDep
err := poster.Post(app.Key(), "awake", awakeTTL)
if err != nil {
logger.WithError(err).Panicf("Could not set backend status to awake. Key: %s", app.Key())
awakeErr.Inc()
panic(err)
}
sleepingGauge.Dec()
awakeCounter.Inc()
return
}
case <-ctx.Done():
Expand Down
79 changes: 77 additions & 2 deletions cmd/downscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,14 @@ import (
"strings"
"time"

"net/http"

"github.com/bernardoVale/downscaler/internal/kube"
"github.com/bernardoVale/downscaler/internal/metrics"
"github.com/bernardoVale/downscaler/internal/storage"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"

"github.com/sirupsen/logrus"
"github.com/spf13/viper"
)
Expand All @@ -20,6 +25,61 @@ var (
"/etc/downscaler",
"$HOME/downscaler",
}
reconciliatorCounter = prometheus.NewCounter(prometheus.CounterOpts{
Name: "count",
Subsystem: "reconciliator",
Namespace: "downscaler",
Help: "Counts reconciliator actions",
})
reconciliatorErrCounter = prometheus.NewCounter(prometheus.CounterOpts{
Name: "errors",
Subsystem: "reconciliator",
Namespace: "downscaler",
Help: "Counts reconciliator errors",
})
wakingUpCounter = prometheus.NewCounter(prometheus.CounterOpts{
Name: "count",
Subsystem: "wakingup",
Namespace: "downscaler",
Help: "Counts waking up actions",
})
wakingUpErr = prometheus.NewCounter(prometheus.CounterOpts{
Name: "errors",
Subsystem: "wakingup",
Namespace: "downscaler",
Help: "Counts waking up actions",
})
awakeCounter = prometheus.NewCounter(prometheus.CounterOpts{
Name: "count",
Subsystem: "awake",
Namespace: "downscaler",
Help: "Counts awake actions",
})
awakeErr = prometheus.NewCounter(prometheus.CounterOpts{
Name: "errors",
Subsystem: "awake",
Namespace: "downscaler",
Help: "Counts awake actions",
})
sleepingCounter = prometheus.NewCounter(prometheus.CounterOpts{
Name: "count",
Subsystem: "sleeping",
Namespace: "downscaler",
Help: "Counts sleeper actions",
})
sleepingErr = prometheus.NewCounter(prometheus.CounterOpts{
Name: "errors",
Subsystem: "sleeping",
Namespace: "downscaler",
Help: "Counts sleeper actions",
})

sleepingGauge = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "sleeping",
Subsystem: "apps",
Namespace: "downscaler",
Help: "Apps sleeping",
})
)

func init() {
Expand All @@ -31,6 +91,8 @@ func init() {
viper.SetDefault("metrics.host", "http://localhost:9090")
viper.SetDefault("sleeper.interval", "4h")
viper.SetDefault("sleeper.max.idle", "10h")
viper.SetDefault("debug", false)
viper.SetDefault("metrics.expvars.bind", ":9090")

viper.SetConfigName("downscaler")
viper.SetConfigType("yaml")
Expand All @@ -40,6 +102,16 @@ func init() {
viper.AddConfigPath(p)
}
mustWithMsg(viper.ReadInConfig(), "Could not read config")

prometheus.MustRegister(reconciliatorCounter)
prometheus.MustRegister(reconciliatorErrCounter)
prometheus.MustRegister(sleepingGauge)
prometheus.MustRegister(wakingUpCounter)
prometheus.MustRegister(wakingUpErr)
prometheus.MustRegister(awakeCounter)
prometheus.MustRegister(awakeErr)
prometheus.MustRegister(sleepingCounter)
prometheus.MustRegister(sleepingErr)
}

func main() {
Expand Down Expand Up @@ -80,8 +152,6 @@ func main() {
redis := storage.NewRedisClient(backendHost, backendPassword, "wakeup")
defer redis.Close()

mustWithMsg(redis.MigrateKeys("sleeping", "downscaler"), "could not migrate redis keys")

//abscure code, if metrics.host is a file use fakeMetrics
var metricsClient metrics.Client
if _, err := os.Stat(prometheusURL); os.IsNotExist(err) {
Expand All @@ -94,6 +164,11 @@ func main() {
kubeClient, err := kube.NewKubernetesClient()
mustWithMsg(err, "Failed to create a Kubernetes client")

go func() {
http.Handle("/metrics", promhttp.Handler())
mustWithMsg(http.ListenAndServe(viper.GetString("metrics.expvars.bind"), nil), "could not expose metrics")
}()

// Reconciliate first
reconciliate(ctx, redis, kubeClient)

Expand Down
13 changes: 13 additions & 0 deletions cmd/downscaler/reconciliator.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,21 @@ import (
"github.com/sirupsen/logrus"
)

func sleepingState(searcher storage.KeySearcher) {
keys, err := searcher.KeysByValue("downscaler:*:*", "sleeping")
if err != nil {
logrus.WithError(err).Error("Could not read downscaler keys")
return
}
sleepingGauge.Set(float64(len(keys)))
}

func reconciliate(ctx context.Context, backend storage.PostSearcher, kube kube.GetScaler) {
var wg sync.WaitGroup
logger := logrus.WithField("method", "reconciliator")
logger.Info("Starting reconciliator")

go sleepingState(backend)
keys, err := backend.KeysByValue("downscaler:*:*", "waking_up")

logger.Infof("%d apps to reconciliate", len(keys))
Expand All @@ -28,14 +38,17 @@ func reconciliate(ctx context.Context, backend storage.PostSearcher, kube kube.G
defer wg.Done()
app, err := types.NewApp(key)
if err != nil {
reconciliatorErrCounter.Inc()
logger.WithError(err).Errorf("Could not create an App representation")
return
}
err = kube.Scale(app.Namespace(), app.Name(), types.ScaleUp)
if err != nil {
reconciliatorErrCounter.Inc()
logger.Errorf("Failed to scale deployment. Err: %v", err)
return
}
reconciliatorCounter.Inc()
awakeWatcher(ctx, backend, kube, app)
}(key)
}
Expand Down
13 changes: 10 additions & 3 deletions cmd/downscaler/sleeper.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,24 +44,31 @@ func sleeper(c sleeperConfig, backend storage.PosterRetriever, metrics metrics.C
break
}
}

if status == "waking_up" {
switch status {
case "waking_up":
logger.WithField("app", app).Info("Skipping app with status waking_up")
break
continue
case "sleeping":
logger.WithField("app", app).Info("Skipping app with status sleeping")
continue
}
// should check if app is waking_up before trying to put it to sleep
// Notify backend that sleeper will put a new app to sleep
err = backend.Post(app.Key(), "sleeping", sleepingTTL)
if err != nil {
logger.WithError(err).Error("Could not write sleep signal on backend.")
sleepingErr.Inc()
break
}
err = kube.Scale(app.Namespace(), app.Name(), types.ScaleDown)
if err != nil {
logger.WithError(err).WithField("app", app).Error("Could not put app to sleep")
sleepingErr.Inc()
break
}
logger.WithField("app", app).Info("App is now sleeping :)")
sleepingGauge.Inc()
sleepingCounter.Inc()
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions cmd/downscaler/wakeuper.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,18 @@ func wakeuper(ctx context.Context, posterReceiver storage.PosterReceiver, kube k
err = kube.Scale(app.Namespace(), app.Name(), types.ScaleUp)
if err != nil {
logger.WithError(err).Error("Failed to scale app")
wakingUpErr.Inc()
return
}
// Up to 20 min
err = posterReceiver.Post(app.Key(), "waking_up", wakingUpTTL)
if err != nil {
logger.WithError(err).WithField("app", app).Error("Could not post app new status: waking_up")
wakingUpErr.Inc()
return
}
// Notify awaker watcher if needed otherwise set status to awake
wakingUpCounter.Inc()
awakeWatcher(ctx, posterReceiver, kube, app)
}()
}
Expand Down
6 changes: 6 additions & 0 deletions deployments/helm/downscaler/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ spec:
annotations:
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
checksum/secrets: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }}
{{- range $key, $value := .Values.service.annotations }}
{{ $key }}: {{ $value | quote }}
{{- end }}
labels:
app: {{ $name }}
release: {{ .Release.Name }}
Expand All @@ -32,6 +35,9 @@ spec:
- name: {{ .Chart.Name }}
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: metrics
containerPort: {{ .Values.downscaler.metricsPort }}
envFrom:
- secretRef:
name: {{ $name }}
Expand Down
26 changes: 26 additions & 0 deletions deployments/helm/downscaler/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{{- $name := include "downscaler.fullname" . -}}
apiVersion: v1
kind: Service
metadata:
{{- if .Values.service.annotations }}
annotations:
{{- range $key, $value := .Values.service.annotations }}
{{ $key }}: {{ $value | quote }}
{{- end }}
{{- end }}
labels:
app: {{ $name }}
chart: {{ template "downscaler.chart" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
name: {{ $name }}-metrics
spec:
ports:
- name: metrics
port: 9090
protocol: TCP
targetPort: metrics
selector:
app: {{ $name }}
release: {{ .Release.Name }}
type: "{{ .Values.service.type }}"
43 changes: 43 additions & 0 deletions deployments/helm/downscaler/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,23 @@ backend:
annotations: {}

service:
annotations:
prometheus.io/port: "9090"
prometheus.io/scrape: "true"
prometheus.io/name: "downscaler"
type: ClusterIP
port: 80

downscaler:
metricsPort: 9090
serviceAccountName: downscaler
backend: # Fill if redis.enabled=false
host:
password: ""
metrics: # Fill if prometheus.enabled=false
host:
expvars: # Internal Metrics
bind: :9090
sleeper:
interval: 4h
max:
Expand Down Expand Up @@ -68,11 +75,47 @@ prometheus:
- /etc/config/rules
- /etc/config/alerts
scrape_configs:
- job_name: 'downscaler-endpoints'
scrape_interval: 5s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_container_name]
separator: ;
regex: downscaler
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2

- source_labels: [__meta_kubernetes_service_name]
regex: prometheus-service
action: drop

- job_name: 'ingress-nginx-endpoints'
scrape_interval: 5s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_container_name]
separator: ;
regex: nginx-ingress-controller
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
Expand Down
Loading

0 comments on commit ab9a300

Please sign in to comment.