Skip to content

Commit

Permalink
Merge pull request #390 from vshn/addMetrics
Browse files Browse the repository at this point in the history
Adding PrometheusAlertRules for cloud billing
  • Loading branch information
wejdross authored Jul 5, 2024
2 parents 567d136 + 090eb5d commit 4d50c02
Show file tree
Hide file tree
Showing 20 changed files with 523 additions and 14 deletions.
2 changes: 1 addition & 1 deletion component/class/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ parameters:
collector:
registry: ghcr.io
repository: vshn/billing-collector-cloudservices
tag: v3.2.0
tag: v3.4.0

=_crd_version: ${appcat:images:appcat:tag}

Expand Down
121 changes: 120 additions & 1 deletion component/component/cloud_billing.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ local serviceAccount(name, clusterRole) = {
local deployment(name, args, config) =
kube.Deployment(name) {
metadata+: {
labels+: labels,
labels+: labels { 'automated-billing': 'true' },
namespace: params.namespace,
},
spec+: {
Expand All @@ -127,6 +127,18 @@ local deployment(name, args, config) =
imagePullPolicy: 'IfNotPresent',
image: collectorImage,
args: args,
// add resource limits
// during my local tests it consumes arround 50% of requests{} values
resources: {
limits: {
cpu: '250m',
memory: '256Mi',
},
requests: {
cpu: '100m',
memory: '128Mi',
},
},
envFrom: [
{
configMapRef: {
Expand All @@ -139,13 +151,43 @@ local deployment(name, args, config) =
},
},
],
ports: [
{
containerPort: 2112,
protocol: 'TCP',
},
],
},
},
},
},
},
};


local service = {
apiVersion: 'v1',
kind: 'Service',
metadata: {
labels: labels { 'automated-billing': 'true' },
name: 'automated-billing',
namespace: params.namespace,
},
spec: {
ports: [
{
name: 'metrics',
port: 2112,
protocol: 'TCP',
targetPort: 2112,
},
],
selector: {
'automated-billing': 'true',
},
},
};

local config(name, extraConfig) = kube.ConfigMap(name) {
metadata: {
name: name,
Expand All @@ -161,6 +203,77 @@ local config(name, extraConfig) = kube.ConfigMap(name) {
},
} + extraConfig;

local alertRule = {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule',
metadata: {
labels: labels { 'automated-billing': 'true' },
name: 'cloudservices-billing',
namespace: params.namespace,
},
spec: {
groups+: [
{
name: 'odoo_http_failures',
rules: [
{
alert: 'HighOdooHTTPFailureRate',
expr: |||
billing_cloud_collector_http_requests_odoo_failed_total != 0
|||,
'for': '1m',
labels: {
severity: 'critical',
syn_team: 'schedar',
},
annotations: {
summary: 'High rate of Odoo HTTP failures detected',
description: 'The rate of failed Odoo HTTP requests (`billing_cloud_collector_http_requests_odoo_failed_total`) has increased significantly in the last minute.',
},
},
{
alert: 'HighProviderHTTPFailureRate',
expr: |||
billing_cloud_collector_http_requests_provider_failed_total != 0
|||,
'for': '1m',
labels: {
severity: 'critical',
syn_team: 'schedar',
},
annotations: {
summary: "High rate of Automated-billing collector's providers HTTP failures detected",
description: 'The rate of failed Odoo HTTP requests (`billing_cloud_collector_http_requests_provider_failed_total`) has increased significantly in the last minute.',
},
},
],
},
],
},
};


local serviceMonitor = {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
labels: labels { 'automated-billing': 'true' },
name: 'cloudservices-servicemonitor',
namespace: params.namespace,
},
spec: {
endpoints: [
{
port: 'metrics',
},
],
selector: {
matchLabels+: { 'automated-billing': 'true' },
},
},
};


({
local odoo = params.odoo,
assert odoo.oauth != null : 'odoo.oauth must be set.',
Expand Down Expand Up @@ -191,6 +304,8 @@ local config(name, extraConfig) = kube.ConfigMap(name) {
'10_exoscale_dbaas_role_binding': sa.rb,
'10_exoscale_dbaas_configmap': cm,
'10_exoscale_dbaas_exporter': deployment(name, [ 'exoscale', 'dbaas' ], name + '-env'),
'20_exoscale_dbaas_alerts': alertRule,
'30_exoscale_dbaas_servicemonitor': serviceMonitor,
} else {})
+
(if paramsCloud.exoscale.enabled && paramsCloud.exoscale.objectStorage.enabled then {
Expand All @@ -217,6 +332,8 @@ local config(name, extraConfig) = kube.ConfigMap(name) {
'10_exoscale_object_storage_rolebinding': sa.rb,
'10_exoscale_object_storage_configmap': cm,
'20_exoscale_object_storage_exporter': deployment(name, [ 'exoscale', 'objectstorage' ], name + '-env'),
'30_exoscale_object_storage_alerts': alertRule,
'40_exoscale_object_storage_servicemonitor': serviceMonitor,

} else {})
+
Expand Down Expand Up @@ -244,4 +361,6 @@ local config(name, extraConfig) = kube.ConfigMap(name) {
'10_cloudscale_rolebinding': sa.rb,
'10_cloudscale_configmap': cm,
'20_cloudscale_exporter': deployment(name, [ 'cloudscale', 'objectstorage' ], name + '-env'),
'30_cloudscale_alerts': alertRule,
'40_cloudscale_servicemonitor': serviceMonitor,
} else {})
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ metadata:
app.kubernetes.io/component: billing-collector-cloudservices
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: billing-collector-cloudservices
automated-billing: 'true'
name: cloudscale
name: cloudscale
namespace: syn-appcat
Expand All @@ -18,6 +19,7 @@ spec:
app.kubernetes.io/component: billing-collector-cloudservices
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: billing-collector-cloudservices
automated-billing: 'true'
name: cloudscale
strategy:
rollingUpdate:
Expand All @@ -31,6 +33,7 @@ spec:
app.kubernetes.io/component: billing-collector-cloudservices
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: billing-collector-cloudservices
automated-billing: 'true'
name: cloudscale
spec:
containers:
Expand All @@ -43,10 +46,19 @@ spec:
name: cloudscale-env
- secretRef:
name: credentials-cloudscale
image: ghcr.io/vshn/billing-collector-cloudservices:v3.2.0
image: ghcr.io/vshn/billing-collector-cloudservices:v3.4.0
imagePullPolicy: IfNotPresent
name: exporter
ports: []
ports:
- containerPort: 2112
protocol: TCP
resources:
limits:
cpu: 250m
memory: 256Mi
requests:
cpu: 100m
memory: 128Mi
stdin: false
tty: false
volumeMounts: []
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: billing-collector-cloudservices
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: billing-collector-cloudservices
automated-billing: 'true'
name: cloudservices-billing
namespace: syn-appcat
spec:
groups:
- name: odoo_http_failures
rules:
- alert: HighOdooHTTPFailureRate
annotations:
description: The rate of failed Odoo HTTP requests (`billing_cloud_collector_http_requests_odoo_failed_total`)
has increased significantly in the last minute.
summary: High rate of Odoo HTTP failures detected
expr: |
billing_cloud_collector_http_requests_odoo_failed_total != 0
for: 1m
labels:
severity: critical
syn_team: schedar
- alert: HighProviderHTTPFailureRate
annotations:
description: The rate of failed Odoo HTTP requests (`billing_cloud_collector_http_requests_provider_failed_total`)
has increased significantly in the last minute.
summary: High rate of Automated-billing collector's providers HTTP failures
detected
expr: |
billing_cloud_collector_http_requests_provider_failed_total != 0
for: 1m
labels:
severity: critical
syn_team: schedar
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: billing-collector-cloudservices
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: billing-collector-cloudservices
automated-billing: 'true'
name: cloudservices-servicemonitor
namespace: syn-appcat
spec:
endpoints:
- port: metrics
selector:
matchLabels:
automated-billing: 'true'
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ metadata:
app.kubernetes.io/component: billing-collector-cloudservices
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: billing-collector-cloudservices
automated-billing: 'true'
name: cloudscale
name: cloudscale
namespace: syn-appcat
Expand All @@ -18,6 +19,7 @@ spec:
app.kubernetes.io/component: billing-collector-cloudservices
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: billing-collector-cloudservices
automated-billing: 'true'
name: cloudscale
strategy:
rollingUpdate:
Expand All @@ -31,6 +33,7 @@ spec:
app.kubernetes.io/component: billing-collector-cloudservices
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: billing-collector-cloudservices
automated-billing: 'true'
name: cloudscale
spec:
containers:
Expand All @@ -43,10 +46,19 @@ spec:
name: cloudscale-env
- secretRef:
name: credentials-cloudscale
image: ghcr.io/vshn/billing-collector-cloudservices:v3.2.0
image: ghcr.io/vshn/billing-collector-cloudservices:v3.4.0
imagePullPolicy: IfNotPresent
name: exporter
ports: []
ports:
- containerPort: 2112
protocol: TCP
resources:
limits:
cpu: 250m
memory: 256Mi
requests:
cpu: 100m
memory: 128Mi
stdin: false
tty: false
volumeMounts: []
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: billing-collector-cloudservices
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: billing-collector-cloudservices
automated-billing: 'true'
name: cloudservices-billing
namespace: syn-appcat
spec:
groups:
- name: odoo_http_failures
rules:
- alert: HighOdooHTTPFailureRate
annotations:
description: The rate of failed Odoo HTTP requests (`billing_cloud_collector_http_requests_odoo_failed_total`)
has increased significantly in the last minute.
summary: High rate of Odoo HTTP failures detected
expr: |
billing_cloud_collector_http_requests_odoo_failed_total != 0
for: 1m
labels:
severity: critical
syn_team: schedar
- alert: HighProviderHTTPFailureRate
annotations:
description: The rate of failed Odoo HTTP requests (`billing_cloud_collector_http_requests_provider_failed_total`)
has increased significantly in the last minute.
summary: High rate of Automated-billing collector's providers HTTP failures
detected
expr: |
billing_cloud_collector_http_requests_provider_failed_total != 0
for: 1m
labels:
severity: critical
syn_team: schedar
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: billing-collector-cloudservices
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: billing-collector-cloudservices
automated-billing: 'true'
name: cloudservices-servicemonitor
namespace: syn-appcat
spec:
endpoints:
- port: metrics
selector:
matchLabels:
automated-billing: 'true'
Loading

0 comments on commit 4d50c02

Please sign in to comment.