From 0f998aff9ef7be532fbee8e6813d2a070425c740 Mon Sep 17 00:00:00 2001 From: Nicolas Bigler Date: Tue, 21 Nov 2023 18:31:49 +0100 Subject: [PATCH 1/3] Add composition for MariaDB service Signed-off-by: Nicolas Bigler --- component/class/appcat.yml | 1 + component/class/defaults.yml | 167 ++++-- component/component/main.jsonnet | 14 +- component/component/provider.jsonnet | 5 + component/component/slos.libsonnet | 112 +--- .../component/vshn_appcat_services.jsonnet | 213 +++++++ component/component/vshn_redis.jsonnet | 20 - .../appcat/appcat/10_function_appcat.yaml | 2 +- .../appcat/apiserver/10_apiserver_envs.yaml | 1 + .../appcat/appcat/10_function_appcat.yaml | 2 +- .../appcat/appcat/10_function_appcat.yaml | 2 +- .../controllers/appcat/30_deployment.yaml | 2 +- .../appcat/appcat/10_function_appcat.yaml | 2 +- ...appcat-sliexporter-controller-manager.yaml | 2 +- .../appcat/appcat/10_function_appcat.yaml | 2 +- .../appcat/appcat/10_function_appcat.yaml | 2 +- .../minio/appcat/appcat/10_provider_helm.yaml | 12 + .../appcat/21_composition_vshn_minio.yaml | 2 +- .../appcat/apiserver/10_apiserver_envs.yaml | 1 + .../controllers/appcat/30_deployment.yaml | 2 +- .../appcat/sla_reporter/01_cronjob.yaml | 2 +- .../sli_exporter/90_slo_vshn_minio_ha.yaml | 2 +- ...appcat-sliexporter-controller-manager.yaml | 2 +- .../appcat/appcat/10_function_appcat.yaml | 2 +- ...appcat-sliexporter-controller-manager.yaml | 2 +- .../appcat/appcat/10_function_appcat.yaml | 2 +- .../vshn/appcat/appcat/10_provider_helm.yaml | 12 + .../appcat/appcat/20_plans_vshn_mariadb.yaml | 15 + .../appcat/appcat/20_rbac_vshn_mariadb.yaml | 36 ++ .../appcat/20_rbac_vshn_mariadb_resize.yaml | 72 +++ ...bac_vshn_redis_metrics_servicemonitor.yaml | 30 - .../appcat/20_role_vshn_mariadb_restore.yaml | 80 +++ .../appcat/appcat/20_xrd_vshn_mariadb.yaml | 527 ++++++++++++++++++ .../appcat/21_composition_vshn_mariadb.yaml | 53 ++ .../appcat/21_composition_vshn_postgres.yaml | 2 +- .../21_composition_vshn_postgresrestore.yaml | 2 +- .../appcat/21_composition_vshn_redis.yaml | 2 +- .../appcat/22_prom_rule_sla_mariadb.yaml | 16 + .../appcat/apiserver/10_apiserver_envs.yaml | 1 + .../controllers/appcat/30_deployment.yaml | 2 +- .../appcat/sla_reporter/01_cronjob.yaml | 2 +- .../sli_exporter/90_slo_vshn_mariadb.yaml | 206 +++++++ .../sli_exporter/90_slo_vshn_mariadb_ha.yaml | 206 +++++++ .../90_slo_vshn_postgresql_ha.yaml | 6 +- .../sli_exporter/90_slo_vshn_redis_ha.yaml | 2 +- ...appcat-sliexporter-controller-manager.yaml | 2 +- component/tests/vshn.yml | 5 + .../ROOT/pages/runbooks/vshn-mariadb.adoc | 118 ++++ .../ROOT/pages/runbooks/vshn-minio.adoc | 120 ++++ .../ROOT/pages/runbooks/vshn-redis.adoc | 118 ++++ package/main.yaml | 2 +- 51 files changed, 2010 insertions(+), 205 deletions(-) create mode 100644 component/component/vshn_appcat_services.jsonnet create mode 100644 component/tests/golden/vshn/appcat/appcat/20_plans_vshn_mariadb.yaml create mode 100644 component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb.yaml create mode 100644 component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb_resize.yaml delete mode 100644 component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_redis_metrics_servicemonitor.yaml create mode 100644 component/tests/golden/vshn/appcat/appcat/20_role_vshn_mariadb_restore.yaml create mode 100644 component/tests/golden/vshn/appcat/appcat/20_xrd_vshn_mariadb.yaml create mode 100644 component/tests/golden/vshn/appcat/appcat/21_composition_vshn_mariadb.yaml create mode 100644 component/tests/golden/vshn/appcat/appcat/22_prom_rule_sla_mariadb.yaml create mode 100644 component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_mariadb.yaml create mode 100644 component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_mariadb_ha.yaml create mode 100644 docs/modules/ROOT/pages/runbooks/vshn-mariadb.adoc create mode 100644 docs/modules/ROOT/pages/runbooks/vshn-minio.adoc create mode 100644 docs/modules/ROOT/pages/runbooks/vshn-redis.adoc diff --git a/component/class/appcat.yml b/component/class/appcat.yml index 6f155842e..acc2da865 100644 --- a/component/class/appcat.yml +++ b/component/class/appcat.yml @@ -65,6 +65,7 @@ parameters: - ${_base_directory}/component/vshn_services.jsonnet - ${_base_directory}/component/statefuleset-resize-controller.jsonnet - ${_base_directory}/component/functions.jsonnet + - ${_base_directory}/component/vshn_appcat_services.jsonnet input_type: jsonnet output_path: appcat/ diff --git a/component/class/defaults.yml b/component/class/defaults.yml index 4318afdc0..3893a1b5f 100644 --- a/component/class/defaults.yml +++ b/component/class/defaults.yml @@ -4,6 +4,9 @@ parameters: redis: source: https://charts.bitnami.com/bitnami version: 17.7.1 + mariadb: + source: https://charts.bitnami.com/bitnami + version: 10.1.3 minio: source: https://charts.min.io version: 5.0.13 @@ -39,7 +42,7 @@ parameters: appcat: registry: ghcr.io repository: vshn/appcat - tag: v4.44.1 + tag: v4.44.2 apiserver: registry: ghcr.io repository: vshn/appcat-apiserver @@ -64,7 +67,54 @@ parameters: tenantID: ${cluster:tenant} quotasEnabled: false grpcEndpoint: host.docker.internal:9443 - + defaultRestoreRoleRules: + - apiGroups: + - vshn.appcat.vshn.io + resources: + - "*" + verbs: + - get + - apiGroups: + - "k8up.io" + resources: + - snapshots + verbs: + - get + - apiGroups: + - "" + resources: + - secrets + verbs: + - get + - create + - delete + - apiGroups: + - apps + resources: + - statefulsets/scale + verbs: + - update + - patch + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - apiGroups: + - batch + resources: + - jobs + verbs: + - get + - apiGroups: + - "" + resources: + - events + verbs: + - get + - create + - patch controller: enabled: false namespace: ${appcat:namespace} @@ -101,6 +151,7 @@ parameters: APPCAT_HANDLER_ENABLED: "true" VSHN_POSTGRES_BACKUP_HANDLER_ENABLED: "false" VSHN_REDIS_BACKUP_HANDLER_ENABLED: "false" + VSHN_MARIADB_BACKUP_HANDLER_ENABLED: "false" extraArgs: [] extraEnv: {} apiservice: @@ -118,6 +169,15 @@ parameters: slos: enabled: true alertsEnabled: true + uptimeDefaults: + objective: 99.9 + alerting: + page_alert: + # This should reduce non actionable alerts because of single instance restarts. + # The page alert looks (ammong other things) at the burn rate over the last 5min. + # If the alert is pending for more than 5m this indicates a real problem. + for: 6m + ticket_alert: {} namespace: appcat-slos namespaceLabels: ${appcat:namespaceLabels} namespaceAnnotations: ${appcat:namespaceAnnotations} @@ -157,36 +217,14 @@ parameters: ticket_labels: severity: "warning" vshn: - postgres: - uptime: - objective: 99.9 - alerting: - page_alert: - # This should reduce non actionable alerts because of single instance restarts. - # The page alert looks (ammong other things) at the burn rate over the last 5min. - # If the alert is pending for more than 5m this indicates a real problem. - for: 6m - ticket_alert: {} - redis: - uptime: - objective: 99.9 - alerting: - page_alert: - # This should reduce non actionable alerts because of single instance restarts. - # The page alert looks (ammong other things) at the burn rate over the last 5min. - # If the alert is pending for more than 5m this indicates a real problem. - for: 6m - ticket_alert: {} - minio: - uptime: - objective: 99.9 - alerting: - page_alert: - # This should reduce non actionable alerts because of single instance restarts. - # The page alert looks (ammong other things) at the burn rate over the last 5min. - # If the alert is pending for more than 5m this indicates a real problem. - for: 6m - ticket_alert: {} + PostgreSQL: + uptime: ${appcat:slos:uptimeDefaults} + Redis: + uptime: ${appcat:slos:uptimeDefaults} + Minio: + uptime: ${appcat:slos:uptimeDefaults} + MariaDB: + uptime: ${appcat:slos:uptimeDefaults} providers: cloudscale: @@ -254,6 +292,7 @@ parameters: vshn: enabled: false externalDatabaseConnectionsEnabled: "false" + e2eTests: false quotasEnabled: ${appcat:quotasEnabled} secretNamespace: ${crossplane:namespace} emailAlerting: @@ -455,6 +494,70 @@ parameters: memory: "1Gi" disk: 50Gi instances: [] + services: + mariadb: + serviceName: VSHNMariaDB + connectionSecretKeys: + - ca.crt + - MARIADB_HOST + - MARIADB_PORT + - MARIADB_USERNAME + - MARIADB_PASSWORD + - MARIADB_URL + mode: standalone + offered: true + enabled: false + restoreSA: mariadbrestoreserviceaccount + restoreRoleRules: ${appcat:defaultRestoreRoleRules} + hasSts: true + openshiftTemplate: + serviceName: mariadbbyvshn + description: "The open source relational database management system (DBMS) that is a compatible drop-in replacement for the widely used MySQL database technology" + message: 'Your MariaDB by VSHN instance is being provisioned, please see \${SECRET_NAME} for access.' + url: https://vs.hn/vshn-mariadb + tags: "database,sql,mariadb" + icon: "icon-mariadb" + defaultVersion: "11.2" + enableNetworkPolicy: false + secretNamespace: ${appcat:services:vshn:secretNamespace} + helmChartVersion: ${appcat:charts:mariadb:version} + imageRegistry: "" + bucket_region: "lpg" + grpcEndpoint: ${appcat:grpcEndpoint} + proxyFunction: false + defaultPlan: standard-1 + sla: 99.25 + plans: + standard-512m: + size: + enabled: true + cpu: "125m" + memory: "512Mi" + disk: 16Gi + standard-1: + size: + enabled: true + cpu: "250m" + memory: "1Gi" + disk: 16Gi + standard-2: + size: + enabled: true + cpu: "500m" + memory: "2Gi" + disk: 16Gi + standard-4: + size: + enabled: true + cpu: "1" + memory: "4Gi" + disk: 16Gi + standard-8: + size: + enabled: true + cpu: "2" + memory: "8Gi" + disk: 16Gi # Config for exoscale composites exoscale: diff --git a/component/component/main.jsonnet b/component/component/main.jsonnet index 21061f2a5..1b8998235 100644 --- a/component/component/main.jsonnet +++ b/component/component/main.jsonnet @@ -167,10 +167,14 @@ local emailSecret = kube.Secret(params.services.vshn.emailAlerting.secretName) { '10_appcat_maintenance_recording_rule': maintenanceRule, [if params.services.vshn.enabled && params.services.vshn.emailAlerting.enabled then '10_mailgun_secret']: emailSecret, -} + if params.slos.enabled then { - [if params.services.vshn.enabled && params.services.vshn.postgres.enabled then 'sli_exporter/90_slo_vshn_postgresql']: slos.Get('vshn-postgresql'), - [if params.services.vshn.enabled && params.services.vshn.postgres.enabled then 'sli_exporter/90_slo_vshn_postgresql_ha']: slos.Get('vshn-postgresql-ha'), - [if params.services.vshn.enabled && params.services.vshn.redis.enabled then 'sli_exporter/90_slo_vshn_redis']: slos.Get('vshn-redis'), - [if params.services.vshn.enabled && params.services.vshn.redis.enabled then 'sli_exporter/90_slo_vshn_redis_ha']: slos.Get('vshn-redis-ha'), +} + if params.slos.enabled && params.services.vshn.enabled then { + [if params.services.vshn.postgres.enabled then 'sli_exporter/90_slo_vshn_postgresql']: slos.Get('vshn-postgresql'), + [if params.services.vshn.postgres.enabled then 'sli_exporter/90_slo_vshn_postgresql_ha']: slos.Get('vshn-postgresql-ha'), + [if params.services.vshn.redis.enabled then 'sli_exporter/90_slo_vshn_redis']: slos.Get('vshn-redis'), + [if params.services.vshn.redis.enabled then 'sli_exporter/90_slo_vshn_redis_ha']: slos.Get('vshn-redis-ha'), + [if params.services.vshn.minio.enabled then 'sli_exporter/90_slo_vshn_minio']: slos.Get('vshn-minio'), + [if params.services.vshn.minio.enabled then 'sli_exporter/90_slo_vshn_minio_ha']: slos.Get('vshn-minio-ha'), + [if params.services.vshn.services.mariadb.enabled then 'sli_exporter/90_slo_vshn_mariadb']: slos.Get('vshn-mariadb'), + [if params.services.vshn.services.mariadb.enabled then 'sli_exporter/90_slo_vshn_mariadb_ha']: slos.Get('vshn-mariadb-ha'), } else {} diff --git a/component/component/provider.jsonnet b/component/component/provider.jsonnet index 14088ed1d..20f87aab9 100644 --- a/component/component/provider.jsonnet +++ b/component/component/provider.jsonnet @@ -285,6 +285,11 @@ local runtimeConfigRef(name) = { resources: [ 'jobs' ], verbs: [ 'get', 'list', 'watch', 'create', 'delete' ], }, + { + apiGroups: [ 'monitoring.coreos.com' ], + resources: [ 'servicemonitors' ], + verbs: [ 'get', 'list', 'watch', 'update', 'patch', 'create', 'delete' ], + }, ], }; local rolebinding = kube.ClusterRoleBinding('crossplane:provider:provider-helm:system:custom') { diff --git a/component/component/slos.libsonnet b/component/component/slos.libsonnet index da6766c7f..623170b52 100644 --- a/component/component/slos.libsonnet +++ b/component/component/slos.libsonnet @@ -28,7 +28,7 @@ local newSLO(name, group, sloParams) = labels: params.slos.alerting.ticket_labels, annotations: { [if std.objectHas(slo.alerting.ticket_alert, 'for') then 'for']: std.get(slo.alerting.ticket_alert, 'for'), - runbook_url: 'https://hub.syn.tools/appcat/runbooks/%s.html#%s' % [ group, name ], + runbook_url: 'https://hub.syn.tools/appcat/runbooks/%s.html#%s' % [ std.rstripChars(group, '-ha'), name ], }, }, } + com.makeMergeable(sloParams.alerting), @@ -70,117 +70,47 @@ local getEventsHA(serviceName) = { total_query: 'sum(rate(appcat_probes_seconds_count{service="' + serviceName + '", ha="true"}[{{.window}}])) by (service, namespace, name, organization, sla)', }; -{ - slothInput: { - 'vshn-postgresql': [ - newSLO('uptime', 'vshn-postgresql', params.slos.vshn.postgres.uptime) { - description: 'Uptime SLO for PostgreSQL by VSHN', - sli: { - events: getEvents('VSHNPostgreSQL'), - }, - alerting+: { - name: 'SLO_AppCat_VSHNPostgreSQLUptime', - annotations+: { - summary: 'Probes to PostgreSQL by VSHN instance fail', - }, - labels+: { - service: 'VSHNPostgreSQL', - OnCall: '{{ if eq $labels.sla "guaranteed" }}true{{ else }}false{{ end }}', - }, - }, - }, - ], - 'vshn-postgresql-ha': [ - newSLO('uptime', 'vshn-postgresql-ha', params.slos.vshn.postgres.uptime) { - description: 'Uptime SLO for High Available PostgreSQL by VSHN', - sli: { - events: getEventsHA('VSHNPostgreSQL'), - }, - alerting+: { - name: 'SLO_AppCat_HAVSHNPosgtreSQLUptime', - annotations+: { - summary: 'Probes to HA PostgreSQL by VSHN instance fail', - }, - labels+: { - service: 'VSHNPostgreSQL', - OnCall: '{{ if eq $labels.sla "guaranteed" }}true{{ else }}false{{ end }}', - }, - }, - }, - ], - // redis without HA - 'vshn-redis': [ - newSLO('uptime', 'vshn-redis', params.slos.vshn.redis.uptime) { - description: 'Uptime SLO for Redis by VSHN', - sli: { - events: getEvents('VSHNRedis'), - }, - alerting+: { - name: 'SLO_AppCat_VSHNRedisUptime', - annotations+: { - summary: 'Probes to Redis by VSHN instance fail', - }, - labels+: { - service: 'VSHNRedis', - OnCall: '{{ if eq $labels.sla "guaranteed" }}true{{ else }}false{{ end }}', - }, - }, - }, - ], - 'vshn-redis-ha': [ - newSLO('uptime', 'vshn-redis-ha', params.slos.vshn.redis.uptime) { - description: 'Uptime SLO for High Available Redis by VSHN', - sli: { - events: getEventsHA('VSHNRedis'), - }, - alerting+: { - name: 'SLO_AppCat_HAVSHNRedisUptime', - annotations+: { - summary: 'Probes to HA Redis by VSHN instance fail', - }, - labels+: { - service: 'VSHNRedis', - OnCall: '{{ if eq $labels.sla "guaranteed" }}true{{ else }}false{{ end }}', - }, - }, - }, - ], - 'vshn-minio': [ - newSLO('uptime', 'vshn-minio', params.slos.vshn.minio.uptime) { - description: 'Uptime SLO for Minio by VSHN', +local generateSlothInput(name, uptime) = + local nameLower = std.asciiLower(name); + { + ['vshn-%s' % nameLower]: [ + newSLO('uptime', 'vshn-' + nameLower, uptime) { + description: 'Uptime SLO for ' + name + ' by VSHN', sli: { - events: getEvents('VSHNMinio'), + events: getEvents('VSHN' + name), }, alerting+: { - name: 'SLO_AppCat_VSHNMinioUptime', + name: 'SLO_AppCat_VSHN' + name + 'Uptime', annotations+: { - summary: 'Probes to Minio by VSHN instance fail', + summary: 'Probes to ' + name + ' by VSHN instance fail', }, labels+: { - service: 'VSHNMinio', + service: 'VSHN' + name, OnCall: '{{ if eq $labels.sla "guaranteed" }}true{{ else }}false{{ end }}', }, }, }, ], - 'vshn-minio-ha': [ - newSLO('uptime', 'vshn-postgresql-ha', params.slos.vshn.minio.uptime) { - description: 'Uptime SLO for High Available Minio by VSHN', + ['vshn-%s-ha' % nameLower]: [ + newSLO('uptime', 'vshn-' + nameLower + '-ha', uptime) { + description: 'Uptime SLO for High Available ' + name + ' by VSHN', sli: { - events: getEventsHA('VSHNMinio'), + events: getEventsHA('VSHN' + name), }, alerting+: { - name: 'SLO_AppCat_HAVSHNMinioUptime', + name: 'SLO_AppCat_HAVSHN' + name + 'Uptime', annotations+: { - summary: 'Probes to HA Minio by VSHN instance fail', + summary: 'Probes to HA ' + name + ' by VSHN instance fail', }, labels+: { - service: 'VSHNMinio', + service: 'VSHN' + name, OnCall: '{{ if eq $labels.sla "guaranteed" }}true{{ else }}false{{ end }}', }, }, }, ], - }, + }; +{ + slothInput: std.foldl(function(objOut, name) objOut + generateSlothInput(name, params.slos.vshn[name].uptime), std.objectFields(params.slos.vshn), {}), Get(name): prometheusRule(name), } diff --git a/component/component/vshn_appcat_services.jsonnet b/component/component/vshn_appcat_services.jsonnet new file mode 100644 index 000000000..6f23fa252 --- /dev/null +++ b/component/component/vshn_appcat_services.jsonnet @@ -0,0 +1,213 @@ +local com = import 'lib/commodore.libjsonnet'; +local kap = import 'lib/kapitan.libjsonnet'; +local kube = import 'lib/kube.libjsonnet'; + +local comp = import 'lib/appcat-compositions.libsonnet'; +local crossplane = import 'lib/crossplane.libsonnet'; + +local common = import 'common.libsonnet'; +local prom = import 'prometheus.libsonnet'; +local xrds = import 'xrds.libsonnet'; + +local slos = import 'slos.libsonnet'; + +local inv = kap.inventory(); +local params = inv.parameters.appcat; + +local serviceNameLabelKey = 'appcat.vshn.io/servicename'; +local serviceNamespaceLabelKey = 'appcat.vshn.io/claim-namespace'; + +local getServiceNamePlural(serviceName) = + local serviceNameLower = std.asciiLower(serviceName); + if std.endsWith(serviceName, 's') then + serviceNameLower + else + serviceNameLower + 's'; + +local vshn_appcat_service(name) = + local isOpenshift = std.startsWith(inv.parameters.facts.distribution, 'openshift'); + local securityContext = !isOpenshift; + + local serviceParams = params.services.vshn.services[name]; + local connectionSecretKeys = serviceParams.connectionSecretKeys; + local promRuleSLA = prom.PromRuleSLA(serviceParams.sla, serviceParams.serviceName); + local plans = common.FilterDisabledParams(serviceParams.plans); + local serviceNamePlural = getServiceNamePlural(serviceParams.serviceName); + + + local restoreServiceAccount = kube.ServiceAccount(serviceParams.restoreSA) + { + metadata+: { + namespace: params.services.controlNamespace, + }, + }; + + local restoreRoleName = 'crossplane:appcat:job:' + name + ':restorejob'; + local restoreRole = kube.ClusterRole(restoreRoleName) { + rules: serviceParams.restoreRoleRules, + }; + + local restoreClusterRoleBinding = kube.ClusterRoleBinding('appcat:job:' + name + ':restorejob') + { + roleRef_: restoreRole, + subjects_: [ restoreServiceAccount ], + }; + + local resizeServiceAccount = kube.ServiceAccount('sa-sts-deleter') + { + metadata+: { + namespace: params.services.controlNamespace, + }, + }; + + local resizeClusterRole = kube.ClusterRole('appcat:job:' + name + ':resizejob') { + rules: [ + { + apiGroups: [ 'helm.crossplane.io' ], + resources: [ 'releases' ], + verbs: [ 'get', 'list', 'watch', 'update', 'patch', 'create', 'delete' ], + }, + { + apiGroups: [ 'apps' ], + resources: [ 'statefulsets' ], + verbs: [ 'delete', 'get', 'watch', 'list', 'update', 'patch' ], + }, + { + apiGroups: [ 'helm.crossplane.io' ], + resources: [ 'releases' ], + verbs: [ 'update', 'get' ], + }, + { + apiGroups: [ '' ], + resources: [ 'pods' ], + verbs: [ 'list', 'get', 'update', 'delete' ], + }, + ], + }; + + local resizeClusterRoleBinding = kube.ClusterRoleBinding('appcat:job:' + name + ':resizejob') + { + roleRef_: resizeClusterRole, + subjects_: [ resizeServiceAccount ], + }; + + local xrd = xrds.XRDFromCRD( + 'x' + serviceNamePlural + '.vshn.appcat.vshn.io', + xrds.LoadCRD('vshn.appcat.vshn.io_' + serviceNamePlural + '.yaml', params.images.appcat.tag), + defaultComposition=std.asciiLower(serviceParams.serviceName) + '.vshn.appcat.vshn.io', + connectionSecretKeys=connectionSecretKeys, + ) + xrds.WithPlanDefaults(plans, serviceParams.defaultPlan); + + local composition = + kube._Object('apiextensions.crossplane.io/v1', 'Composition', std.asciiLower(serviceParams.serviceName) + '.vshn.appcat.vshn.io') + + common.SyncOptions + + common.vshnMetaVshnDBaas(name, serviceParams.mode, std.toString(serviceParams.offered), plans) + + { + spec: { + compositeTypeRef: comp.CompositeRef(xrd), + writeConnectionSecretsToNamespace: serviceParams.secretNamespace, + mode: 'Pipeline', + pipeline: + [ + { + step: name + '-func', + functionRef: { + name: 'function-appcat', + }, + input: kube.ConfigMap('xfn-config') + { + metadata: { + labels: { + name: 'xfn-config', + }, + name: 'xfn-config', + }, + data: { + serviceName: name, + imageTag: common.GetAppCatImageTag(), + chartRepository: params.charts[name].source, + chartVersion: params.charts[name].version, + bucketRegion: serviceParams.bucket_region, + maintenanceSA: 'helm-based-service-maintenance', + controlNamespace: params.services.controlNamespace, + plans: std.toString(plans), + restoreSA: serviceParams.restoreSA, + quotasEnabled: std.toString(params.services.vshn.quotasEnabled), + isOpenshift: std.toString(isOpenshift), + } + if serviceParams.proxyFunction then { + proxyEndpoint: serviceParams.grpcEndpoint, + } else {}, + }, + }, + ], + }, + }; + + // OpenShift template configuration + local templateObject = kube._Object('vshn.appcat.vshn.io/v1', serviceParams.serviceName, '${INSTANCE_NAME}') + { + spec: { + parameters: { + service: { + version: '${VERSION}', + }, + size: { + plan: '${PLAN}', + }, + }, + writeConnectionSecretToRef: { + name: '${SECRET_NAME}', + }, + }, + }; + + local osTemplate = + common.OpenShiftTemplate(serviceParams.openshiftTemplate.serviceName, + serviceParams.serviceName, + serviceParams.openshiftTemplate.description, + serviceParams.openshiftTemplate.icon, + serviceParams.openshiftTemplate.tags, + serviceParams.openshiftTemplate.message, + 'VSHN', + serviceParams.openshiftTemplate.url) + { + objects: [ + templateObject, + ], + parameters: [ + { + name: 'PLAN', + value: 'standard-4', + }, + { + name: 'SECRET_NAME', + value: name + '-credentials', + }, + { + name: 'INSTANCE_NAME', + }, + { + name: 'VERSION', + value: std.toString(serviceParams.openshiftTemplate.defaultVersion), + }, + ], + }; + + local plansCM = kube.ConfigMap('vshn' + name + 'plans') + { + metadata+: { + namespace: params.namespace, + }, + data: { + plans: std.toString(plans), + }, + }; + + + if params.services.vshn.enabled && serviceParams.enabled then { + ['20_xrd_vshn_%s' % name]: xrd, + ['20_rbac_vshn_%s' % name]: xrds.CompositeClusterRoles(xrd), + ['21_composition_vshn_%s' % name]: composition, + ['20_role_vshn_%s_restore' % name]: [ restoreRole, restoreServiceAccount, restoreClusterRoleBinding ], + [if serviceParams.hasSts then '20_rbac_vshn_%s_resize' % name]: [ resizeClusterRole, resizeServiceAccount, resizeClusterRoleBinding ], + ['20_plans_vshn_%s' % name]: plansCM, + ['22_prom_rule_sla_%s' % name]: promRuleSLA, + [if isOpenshift then '21_openshift_template_%s_vshn' % name]: osTemplate, + [if params.services.vshn.enabled && serviceParams.enabled then 'sli_exporter/90_slo_vshn_%s' % name]: slos.Get('vshn-' + name), + [if params.services.vshn.enabled && serviceParams.enabled then 'sli_exporter/90_slo_vshn_%s_ha' % name]: slos.Get('vshn-' + name + '-ha'), + } else {} +; + +std.foldl(function(objOut, name) objOut + vshn_appcat_service(name), std.objectFields(params.services.vshn.services), {}) diff --git a/component/component/vshn_redis.jsonnet b/component/component/vshn_redis.jsonnet index b670251e7..52d52567b 100644 --- a/component/component/vshn_redis.jsonnet +++ b/component/component/vshn_redis.jsonnet @@ -96,25 +96,6 @@ local restoreRole = kube.ClusterRole(restoreRoleName) { ], }; -local helmMonitoringClusterRole = kube.ClusterRole('allow-helm-monitoring-resources') { - rules: [ - { - apiGroups: [ 'monitoring.coreos.com' ], - resources: [ 'servicemonitors' ], - verbs: [ '*' ], - }, - ], -}; -local helmMonitoringServiceAccount = kube.ServiceAccount('provider-helm') + { - metadata+: { - namespace: 'syn-crossplane', - }, -}; -local helmMonitoringClusterRoleBinding = kube.ClusterRoleBinding('system:serviceaccount:syn-crossplane:provider-helm') + { - roleRef_: helmMonitoringClusterRole, - subjects_: [ helmMonitoringServiceAccount ], -}; - local restoreClusterRoleBinding = kube.ClusterRoleBinding('appcat:job:redis:restorejob') + { roleRef_: restoreRole, subjects_: [ restoreServiceAccount ], @@ -756,7 +737,6 @@ if params.services.vshn.enabled && redisParams.enabled then { '20_rbac_vshn_redis': xrds.CompositeClusterRoles(xrd), '20_role_vshn_redisrestore': [ restoreRole, restoreServiceAccount, restoreClusterRoleBinding ], '20_rbac_vshn_redis_resize': [ resizeClusterRole, resizeServiceAccount, resizeClusterRoleBinding ], - '20_rbac_vshn_redis_metrics_servicemonitor': [ helmMonitoringClusterRole, helmMonitoringClusterRoleBinding ], '20_plans_vshn_redis': plansCM, '21_composition_vshn_redis': composition, '22_prom_rule_sla_redis': promRuleRedisSLA, diff --git a/component/tests/golden/apiserver/appcat/appcat/10_function_appcat.yaml b/component/tests/golden/apiserver/appcat/appcat/10_function_appcat.yaml index 8172a4f15..ddce45e12 100644 --- a/component/tests/golden/apiserver/appcat/appcat/10_function_appcat.yaml +++ b/component/tests/golden/apiserver/appcat/appcat/10_function_appcat.yaml @@ -3,6 +3,6 @@ kind: Function metadata: name: function-appcat spec: - package: ghcr.io/vshn/appcat:v4.44.1-func + package: ghcr.io/vshn/appcat:v4.44.2-func runtimeConfigRef: name: function-appcat diff --git a/component/tests/golden/apiserver/appcat/appcat/apiserver/10_apiserver_envs.yaml b/component/tests/golden/apiserver/appcat/appcat/apiserver/10_apiserver_envs.yaml index ef44c40c5..6782a3d10 100644 --- a/component/tests/golden/apiserver/appcat/appcat/apiserver/10_apiserver_envs.yaml +++ b/component/tests/golden/apiserver/appcat/appcat/apiserver/10_apiserver_envs.yaml @@ -1,6 +1,7 @@ apiVersion: v1 data: APPCAT_HANDLER_ENABLED: 'true' + VSHN_MARIADB_BACKUP_HANDLER_ENABLED: 'false' VSHN_POSTGRES_BACKUP_HANDLER_ENABLED: 'false' VSHN_REDIS_BACKUP_HANDLER_ENABLED: 'false' kind: ConfigMap diff --git a/component/tests/golden/cloudscale/appcat/appcat/10_function_appcat.yaml b/component/tests/golden/cloudscale/appcat/appcat/10_function_appcat.yaml index 8172a4f15..ddce45e12 100644 --- a/component/tests/golden/cloudscale/appcat/appcat/10_function_appcat.yaml +++ b/component/tests/golden/cloudscale/appcat/appcat/10_function_appcat.yaml @@ -3,6 +3,6 @@ kind: Function metadata: name: function-appcat spec: - package: ghcr.io/vshn/appcat:v4.44.1-func + package: ghcr.io/vshn/appcat:v4.44.2-func runtimeConfigRef: name: function-appcat diff --git a/component/tests/golden/controllers/appcat/appcat/10_function_appcat.yaml b/component/tests/golden/controllers/appcat/appcat/10_function_appcat.yaml index 8172a4f15..ddce45e12 100644 --- a/component/tests/golden/controllers/appcat/appcat/10_function_appcat.yaml +++ b/component/tests/golden/controllers/appcat/appcat/10_function_appcat.yaml @@ -3,6 +3,6 @@ kind: Function metadata: name: function-appcat spec: - package: ghcr.io/vshn/appcat:v4.44.1-func + package: ghcr.io/vshn/appcat:v4.44.2-func runtimeConfigRef: name: function-appcat diff --git a/component/tests/golden/controllers/appcat/appcat/controllers/appcat/30_deployment.yaml b/component/tests/golden/controllers/appcat/appcat/controllers/appcat/30_deployment.yaml index f8debf6d0..f2092ce3f 100644 --- a/component/tests/golden/controllers/appcat/appcat/controllers/appcat/30_deployment.yaml +++ b/component/tests/golden/controllers/appcat/appcat/controllers/appcat/30_deployment.yaml @@ -23,7 +23,7 @@ spec: env: - name: PLANS_NAMESPACE value: syn-appcat - image: ghcr.io/vshn/appcat:v4.44.1 + image: ghcr.io/vshn/appcat:v4.44.2 livenessProbe: httpGet: path: /healthz diff --git a/component/tests/golden/defaults/appcat/appcat/10_function_appcat.yaml b/component/tests/golden/defaults/appcat/appcat/10_function_appcat.yaml index 8172a4f15..ddce45e12 100644 --- a/component/tests/golden/defaults/appcat/appcat/10_function_appcat.yaml +++ b/component/tests/golden/defaults/appcat/appcat/10_function_appcat.yaml @@ -3,6 +3,6 @@ kind: Function metadata: name: function-appcat spec: - package: ghcr.io/vshn/appcat:v4.44.1-func + package: ghcr.io/vshn/appcat:v4.44.2-func runtimeConfigRef: name: function-appcat diff --git a/component/tests/golden/defaults/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml b/component/tests/golden/defaults/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml index 0f0735e72..56e965b65 100644 --- a/component/tests/golden/defaults/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml +++ b/component/tests/golden/defaults/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml @@ -28,7 +28,7 @@ spec: value: "false" - name: APPCAT_SLI_VSHNREDIS value: "false" - image: ghcr.io/vshn/appcat:v4.44.1 + image: ghcr.io/vshn/appcat:v4.44.2 livenessProbe: httpGet: path: /healthz diff --git a/component/tests/golden/exoscale/appcat/appcat/10_function_appcat.yaml b/component/tests/golden/exoscale/appcat/appcat/10_function_appcat.yaml index 8172a4f15..ddce45e12 100644 --- a/component/tests/golden/exoscale/appcat/appcat/10_function_appcat.yaml +++ b/component/tests/golden/exoscale/appcat/appcat/10_function_appcat.yaml @@ -3,6 +3,6 @@ kind: Function metadata: name: function-appcat spec: - package: ghcr.io/vshn/appcat:v4.44.1-func + package: ghcr.io/vshn/appcat:v4.44.2-func runtimeConfigRef: name: function-appcat diff --git a/component/tests/golden/minio/appcat/appcat/10_function_appcat.yaml b/component/tests/golden/minio/appcat/appcat/10_function_appcat.yaml index 8172a4f15..ddce45e12 100644 --- a/component/tests/golden/minio/appcat/appcat/10_function_appcat.yaml +++ b/component/tests/golden/minio/appcat/appcat/10_function_appcat.yaml @@ -3,6 +3,6 @@ kind: Function metadata: name: function-appcat spec: - package: ghcr.io/vshn/appcat:v4.44.1-func + package: ghcr.io/vshn/appcat:v4.44.2-func runtimeConfigRef: name: function-appcat diff --git a/component/tests/golden/minio/appcat/appcat/10_provider_helm.yaml b/component/tests/golden/minio/appcat/appcat/10_provider_helm.yaml index 1d3819d78..c15ea3088 100644 --- a/component/tests/golden/minio/appcat/appcat/10_provider_helm.yaml +++ b/component/tests/golden/minio/appcat/appcat/10_provider_helm.yaml @@ -122,6 +122,18 @@ rules: - watch - create - delete + - apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - get + - list + - watch + - update + - patch + - create + - delete --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/component/tests/golden/minio/appcat/appcat/21_composition_vshn_minio.yaml b/component/tests/golden/minio/appcat/appcat/21_composition_vshn_minio.yaml index 839c5e165..385d6d7fa 100644 --- a/component/tests/golden/minio/appcat/appcat/21_composition_vshn_minio.yaml +++ b/component/tests/golden/minio/appcat/appcat/21_composition_vshn_minio.yaml @@ -28,7 +28,7 @@ spec: data: controlNamespace: syn-appcat-control defaultPlan: standard-1 - imageTag: v4.44.1 + imageTag: v4.44.2 maintenanceSA: helm-based-service-maintenance minioChartRepository: https://charts.min.io minioChartVersion: 5.0.13 diff --git a/component/tests/golden/minio/appcat/appcat/apiserver/10_apiserver_envs.yaml b/component/tests/golden/minio/appcat/appcat/apiserver/10_apiserver_envs.yaml index 2e308c878..b589e088f 100644 --- a/component/tests/golden/minio/appcat/appcat/apiserver/10_apiserver_envs.yaml +++ b/component/tests/golden/minio/appcat/appcat/apiserver/10_apiserver_envs.yaml @@ -1,6 +1,7 @@ apiVersion: v1 data: APPCAT_HANDLER_ENABLED: 'true' + VSHN_MARIADB_BACKUP_HANDLER_ENABLED: 'false' VSHN_POSTGRES_BACKUP_HANDLER_ENABLED: 'true' VSHN_REDIS_BACKUP_HANDLER_ENABLED: 'true' kind: ConfigMap diff --git a/component/tests/golden/minio/appcat/appcat/controllers/appcat/30_deployment.yaml b/component/tests/golden/minio/appcat/appcat/controllers/appcat/30_deployment.yaml index f8debf6d0..f2092ce3f 100644 --- a/component/tests/golden/minio/appcat/appcat/controllers/appcat/30_deployment.yaml +++ b/component/tests/golden/minio/appcat/appcat/controllers/appcat/30_deployment.yaml @@ -23,7 +23,7 @@ spec: env: - name: PLANS_NAMESPACE value: syn-appcat - image: ghcr.io/vshn/appcat:v4.44.1 + image: ghcr.io/vshn/appcat:v4.44.2 livenessProbe: httpGet: path: /healthz diff --git a/component/tests/golden/minio/appcat/appcat/sla_reporter/01_cronjob.yaml b/component/tests/golden/minio/appcat/appcat/sla_reporter/01_cronjob.yaml index b68f5a788..2f7659134 100644 --- a/component/tests/golden/minio/appcat/appcat/sla_reporter/01_cronjob.yaml +++ b/component/tests/golden/minio/appcat/appcat/sla_reporter/01_cronjob.yaml @@ -30,7 +30,7 @@ spec: envFrom: - secretRef: name: appcat-sla-reports-creds - image: ghcr.io/vshn/appcat:v4.44.1 + image: ghcr.io/vshn/appcat:v4.44.2 name: sla-reporter resources: limits: diff --git a/component/tests/golden/minio/appcat/appcat/sli_exporter/90_slo_vshn_minio_ha.yaml b/component/tests/golden/minio/appcat/appcat/sli_exporter/90_slo_vshn_minio_ha.yaml index e17ed372f..6dd7d4cf2 100644 --- a/component/tests/golden/minio/appcat/appcat/sli_exporter/90_slo_vshn_minio_ha.yaml +++ b/component/tests/golden/minio/appcat/appcat/sli_exporter/90_slo_vshn_minio_ha.yaml @@ -178,7 +178,7 @@ spec: syn_team: schedar - alert: SLO_AppCat_HAVSHNMinioUptime annotations: - runbook_url: https://hub.syn.tools/appcat/runbooks/vshn-postgresql-ha.html#uptime + runbook_url: https://hub.syn.tools/appcat/runbooks/vshn-minio.html#uptime summary: Probes to HA Minio by VSHN instance fail title: (ticket) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget burn rate is too fast. diff --git a/component/tests/golden/minio/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml b/component/tests/golden/minio/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml index 0f0735e72..56e965b65 100644 --- a/component/tests/golden/minio/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml +++ b/component/tests/golden/minio/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml @@ -28,7 +28,7 @@ spec: value: "false" - name: APPCAT_SLI_VSHNREDIS value: "false" - image: ghcr.io/vshn/appcat:v4.44.1 + image: ghcr.io/vshn/appcat:v4.44.2 livenessProbe: httpGet: path: /healthz diff --git a/component/tests/golden/openshift/appcat/appcat/10_function_appcat.yaml b/component/tests/golden/openshift/appcat/appcat/10_function_appcat.yaml index 8172a4f15..ddce45e12 100644 --- a/component/tests/golden/openshift/appcat/appcat/10_function_appcat.yaml +++ b/component/tests/golden/openshift/appcat/appcat/10_function_appcat.yaml @@ -3,6 +3,6 @@ kind: Function metadata: name: function-appcat spec: - package: ghcr.io/vshn/appcat:v4.44.1-func + package: ghcr.io/vshn/appcat:v4.44.2-func runtimeConfigRef: name: function-appcat diff --git a/component/tests/golden/openshift/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml b/component/tests/golden/openshift/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml index 0f0735e72..56e965b65 100644 --- a/component/tests/golden/openshift/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml +++ b/component/tests/golden/openshift/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml @@ -28,7 +28,7 @@ spec: value: "false" - name: APPCAT_SLI_VSHNREDIS value: "false" - image: ghcr.io/vshn/appcat:v4.44.1 + image: ghcr.io/vshn/appcat:v4.44.2 livenessProbe: httpGet: path: /healthz diff --git a/component/tests/golden/vshn/appcat/appcat/10_function_appcat.yaml b/component/tests/golden/vshn/appcat/appcat/10_function_appcat.yaml index 8172a4f15..ddce45e12 100644 --- a/component/tests/golden/vshn/appcat/appcat/10_function_appcat.yaml +++ b/component/tests/golden/vshn/appcat/appcat/10_function_appcat.yaml @@ -3,6 +3,6 @@ kind: Function metadata: name: function-appcat spec: - package: ghcr.io/vshn/appcat:v4.44.1-func + package: ghcr.io/vshn/appcat:v4.44.2-func runtimeConfigRef: name: function-appcat diff --git a/component/tests/golden/vshn/appcat/appcat/10_provider_helm.yaml b/component/tests/golden/vshn/appcat/appcat/10_provider_helm.yaml index 1d3819d78..c15ea3088 100644 --- a/component/tests/golden/vshn/appcat/appcat/10_provider_helm.yaml +++ b/component/tests/golden/vshn/appcat/appcat/10_provider_helm.yaml @@ -122,6 +122,18 @@ rules: - watch - create - delete + - apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - get + - list + - watch + - update + - patch + - create + - delete --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/component/tests/golden/vshn/appcat/appcat/20_plans_vshn_mariadb.yaml b/component/tests/golden/vshn/appcat/appcat/20_plans_vshn_mariadb.yaml new file mode 100644 index 000000000..15527cd60 --- /dev/null +++ b/component/tests/golden/vshn/appcat/appcat/20_plans_vshn_mariadb.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +data: + plans: '{"standard-1": {"size": {"cpu": "250m", "disk": "16Gi", "enabled": true, + "memory": "1Gi"}}, "standard-2": {"size": {"cpu": "500m", "disk": "16Gi", "enabled": + true, "memory": "2Gi"}}, "standard-4": {"size": {"cpu": "1", "disk": "16Gi", "enabled": + true, "memory": "4Gi"}}, "standard-512m": {"size": {"cpu": "125m", "disk": "16Gi", + "enabled": true, "memory": "512Mi"}}, "standard-8": {"size": {"cpu": "2", "disk": + "16Gi", "enabled": true, "memory": "8Gi"}}}' +kind: ConfigMap +metadata: + annotations: {} + labels: + name: vshnmariadbplans + name: vshnmariadbplans + namespace: syn-appcat diff --git a/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb.yaml b/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb.yaml new file mode 100644 index 000000000..cd288225b --- /dev/null +++ b/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb.yaml @@ -0,0 +1,36 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: {} + labels: + rbac.authorization.k8s.io/aggregate-to-view: 'true' + name: appcat:composite:xvshnmariadbs.vshn.appcat.vshn.io:claim-view +rules: + - apiGroups: + - vshn.appcat.vshn.io + resources: + - vshnmariadbs + - vshnmariadbs/status + - vshnmariadbs/finalizers + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: {} + labels: + rbac.authorization.k8s.io/aggregate-to-admin: 'true' + rbac.authorization.k8s.io/aggregate-to-edit: 'true' + name: appcat:composite:xvshnmariadbs.vshn.appcat.vshn.io:claim-edit +rules: + - apiGroups: + - vshn.appcat.vshn.io + resources: + - vshnmariadbs + - vshnmariadbs/status + - vshnmariadbs/finalizers + verbs: + - '*' diff --git a/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb_resize.yaml b/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb_resize.yaml new file mode 100644 index 000000000..a70600288 --- /dev/null +++ b/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb_resize.yaml @@ -0,0 +1,72 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: {} + labels: + name: appcat-job-mariadb-resizejob + name: appcat:job:mariadb:resizejob +rules: + - apiGroups: + - helm.crossplane.io + resources: + - releases + verbs: + - get + - list + - watch + - update + - patch + - create + - delete + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - delete + - get + - watch + - list + - update + - patch + - apiGroups: + - helm.crossplane.io + resources: + - releases + verbs: + - update + - get + - apiGroups: + - '' + resources: + - pods + verbs: + - list + - get + - update + - delete +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: {} + labels: + name: sa-sts-deleter + name: sa-sts-deleter + namespace: syn-appcat-control +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + annotations: {} + labels: + name: appcat-job-mariadb-resizejob + name: appcat:job:mariadb:resizejob +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: appcat:job:mariadb:resizejob +subjects: + - kind: ServiceAccount + name: sa-sts-deleter + namespace: syn-appcat-control diff --git a/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_redis_metrics_servicemonitor.yaml b/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_redis_metrics_servicemonitor.yaml deleted file mode 100644 index f74431b83..000000000 --- a/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_redis_metrics_servicemonitor.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - annotations: {} - labels: - name: allow-helm-monitoring-resources - name: allow-helm-monitoring-resources -rules: - - apiGroups: - - monitoring.coreos.com - resources: - - servicemonitors - verbs: - - '*' ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - annotations: {} - labels: - name: system-serviceaccount-syn-crossplane-provider-helm - name: system:serviceaccount:syn-crossplane:provider-helm -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: allow-helm-monitoring-resources -subjects: - - kind: ServiceAccount - name: provider-helm - namespace: syn-crossplane diff --git a/component/tests/golden/vshn/appcat/appcat/20_role_vshn_mariadb_restore.yaml b/component/tests/golden/vshn/appcat/appcat/20_role_vshn_mariadb_restore.yaml new file mode 100644 index 000000000..fa13213fa --- /dev/null +++ b/component/tests/golden/vshn/appcat/appcat/20_role_vshn_mariadb_restore.yaml @@ -0,0 +1,80 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: {} + labels: + name: crossplane-appcat-job-mariadb-restorejob + name: crossplane:appcat:job:mariadb:restorejob +rules: + - apiGroups: + - vshn.appcat.vshn.io + resources: + - '*' + verbs: + - get + - apiGroups: + - k8up.io + resources: + - snapshots + verbs: + - get + - apiGroups: + - '' + resources: + - secrets + verbs: + - get + - create + - delete + - apiGroups: + - apps + resources: + - statefulsets/scale + verbs: + - update + - patch + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - apiGroups: + - batch + resources: + - jobs + verbs: + - get + - apiGroups: + - '' + resources: + - events + verbs: + - get + - create + - patch +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: {} + labels: + name: mariadbrestoreserviceaccount + name: mariadbrestoreserviceaccount + namespace: syn-appcat-control +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + annotations: {} + labels: + name: appcat-job-mariadb-restorejob + name: appcat:job:mariadb:restorejob +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: crossplane:appcat:job:mariadb:restorejob +subjects: + - kind: ServiceAccount + name: mariadbrestoreserviceaccount + namespace: syn-appcat-control diff --git a/component/tests/golden/vshn/appcat/appcat/20_xrd_vshn_mariadb.yaml b/component/tests/golden/vshn/appcat/appcat/20_xrd_vshn_mariadb.yaml new file mode 100644 index 000000000..a80bccf10 --- /dev/null +++ b/component/tests/golden/vshn/appcat/appcat/20_xrd_vshn_mariadb.yaml @@ -0,0 +1,527 @@ +apiVersion: apiextensions.crossplane.io/v1 +kind: CompositeResourceDefinition +metadata: + annotations: + argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true + argocd.argoproj.io/sync-wave: '10' + labels: + name: xvshnmariadbs.vshn.appcat.vshn.io + name: xvshnmariadbs.vshn.appcat.vshn.io +spec: + claimNames: + kind: VSHNMariaDB + plural: vshnmariadbs + connectionSecretKeys: + - ca.crt + - MARIADB_HOST + - MARIADB_PORT + - MARIADB_USERNAME + - MARIADB_PASSWORD + - MARIADB_URL + defaultCompositionRef: + name: vshnmariadb.vshn.appcat.vshn.io + group: vshn.appcat.vshn.io + names: + kind: XVSHNMariaDB + plural: xvshnmariadbs + versions: + - name: v1 + referenceable: true + schema: + openAPIV3Schema: + description: VSHNMariaDB is the API for creating MariaDB instances. + properties: + spec: + description: Spec defines the desired state of a VSHNMariaDB. + properties: + parameters: + default: {} + description: Parameters are the configurable fields of a VSHNMariaDB. + properties: + backup: + default: {} + description: Backup contains settings to control how the instance + should get backed up. + properties: + retention: + description: K8upRetentionPolicy describes the retention + configuration for a K8up backup. + properties: + keepDaily: + default: 6 + type: integer + keepHourly: + type: integer + keepLast: + type: integer + keepMonthly: + type: integer + keepWeekly: + type: integer + keepYearly: + type: integer + type: object + schedule: + pattern: ^(\*|([0-9]|1[0-9]|2[0-9]|3[0-9]|4[0-9]|5[0-9])|\*\/([0-9]|1[0-9]|2[0-9]|3[0-9]|4[0-9]|5[0-9])) + (\*|([0-9]|1[0-9]|2[0-3])|\*\/([0-9]|1[0-9]|2[0-3])) (\*|([1-9]|1[0-9]|2[0-9]|3[0-1])|\*\/([1-9]|1[0-9]|2[0-9]|3[0-1])) + (\*|([1-9]|1[0-2])|\*\/([1-9]|1[0-2])) (\*|([0-6])|\*\/([0-6]))$ + type: string + type: object + maintenance: + description: Maintenance contains settings to control the maintenance + of an instance. + properties: + dayOfWeek: + description: DayOfWeek specifies at which weekday the maintenance + is held place. Allowed values are [monday, tuesday, wednesday, + thursday, friday, saturday, sunday] + enum: + - monday + - tuesday + - wednesday + - thursday + - friday + - saturday + - sunday + type: string + timeOfDay: + description: 'TimeOfDay for installing updates in UTC. Format: + "hh:mm:ss".' + pattern: ^([0-1]?[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])$ + type: string + type: object + restore: + description: Restore contains settings to control the restore + of an instance. + properties: + backupName: + description: BackupName is the name of the specific backup + you want to restore. + type: string + claimName: + description: ClaimName specifies the name of the instance + you want to restore from. The claim has to be in the same + namespace as this new instance. + type: string + type: object + scheduling: + description: Scheduling contains settings to control the scheduling + of an instance. + properties: + nodeSelector: + additionalProperties: + type: string + description: "NodeSelector is a selector which must match\ + \ a node\u2019s labels for the pod to be scheduled on\ + \ that node" + type: object + type: object + service: + default: {} + description: Service contains MariaDB DBaaS specific properties + properties: + mariadbSettings: + description: MariadbSettings contains additional MariaDB + settings. + type: string + serviceLevel: + default: besteffort + description: ServiceLevel defines the service level of this + service. Either Best Effort or Guaranteed Availability + is allowed. + enum: + - besteffort + - guaranteed + type: string + version: + default: '11.2' + description: Version contains supported version of MariaDB. + Multiple versions are supported. The latest version "11.2" + is the default version. + enum: + - '10.4' + - '10.5' + - '10.6' + - '10.9' + - '10.10' + - '10.11' + - '11.0' + - '11.1' + - '11.2' + type: string + type: object + size: + default: {} + description: Size contains settings to control the sizing of + a service. + properties: + cpu: + description: CPU defines the amount of Kubernetes CPUs for + an instance. + type: string + disk: + description: Disk defines the amount of disk space for an + instance. + type: string + memory: + description: Memory defines the amount of memory in units + of bytes for an instance. + type: string + plan: + default: standard-1 + description: | + Plan is the name of the resource plan that defines the compute resources. + + The following plans are available: + + standard-1 - CPU: 250m; Memory: 1Gi; Disk: 16Gi + + standard-2 - CPU: 500m; Memory: 2Gi; Disk: 16Gi + + standard-4 - CPU: 1; Memory: 4Gi; Disk: 16Gi + + standard-512m - CPU: 125m; Memory: 512Mi; Disk: 16Gi + + standard-8 - CPU: 2; Memory: 8Gi; Disk: 16Gi + enum: + - standard-1 + - standard-2 + - standard-4 + - standard-512m + - standard-8 + type: string + requests: + description: Requests defines CPU and memory requests for + an instance + properties: + cpu: + description: CPU defines the amount of Kubernetes CPUs + for an instance. + type: string + memory: + description: Memory defines the amount of memory in + units of bytes for an instance. + type: string + type: object + type: object + storageClass: + description: StorageClass configures the storageClass to use + for the PVC used by MariaDB. + type: string + tls: + default: {} + description: TLS contains settings to control tls traffic of + a service. + properties: + authClients: + default: true + description: TLSAuthClients enables client authentication + requirement + type: boolean + enabled: + default: true + description: TLSEnabled enables TLS traffic for the service + type: boolean + type: object + type: object + type: object + status: + description: Status reflects the observed state of a VSHNMariaDB. + properties: + caCertificateConditions: + items: + properties: + lastTransitionTime: + description: LastTransitionTime is the last time the condition + transitioned from one status to another. + format: date-time + type: string + message: + description: Message is a human-readable message indicating + details about the transition. + maxLength: 32768 + type: string + observedGeneration: + description: ObservedGeneration represents the .metadata.generation + that the condition was set based upon. For instance, if + .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration + is 9, the condition is out of date with respect to the current + state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: Reason contains a programmatic identifier indicating + the reason for the condition's last transition. + maxLength: 1024 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: Status of the condition, one of True, False, + Unknown. + enum: + - 'True' + - 'False' + - Unknown + type: string + type: + description: Type of condition. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + type: object + type: array + clientCertificateConditions: + items: + properties: + lastTransitionTime: + description: LastTransitionTime is the last time the condition + transitioned from one status to another. + format: date-time + type: string + message: + description: Message is a human-readable message indicating + details about the transition. + maxLength: 32768 + type: string + observedGeneration: + description: ObservedGeneration represents the .metadata.generation + that the condition was set based upon. For instance, if + .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration + is 9, the condition is out of date with respect to the current + state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: Reason contains a programmatic identifier indicating + the reason for the condition's last transition. + maxLength: 1024 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: Status of the condition, one of True, False, + Unknown. + enum: + - 'True' + - 'False' + - Unknown + type: string + type: + description: Type of condition. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + type: object + type: array + instanceNamespace: + description: InstanceNamespace contains the name of the namespace + where the instance resides + type: string + localCAConditions: + items: + properties: + lastTransitionTime: + description: LastTransitionTime is the last time the condition + transitioned from one status to another. + format: date-time + type: string + message: + description: Message is a human-readable message indicating + details about the transition. + maxLength: 32768 + type: string + observedGeneration: + description: ObservedGeneration represents the .metadata.generation + that the condition was set based upon. For instance, if + .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration + is 9, the condition is out of date with respect to the current + state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: Reason contains a programmatic identifier indicating + the reason for the condition's last transition. + maxLength: 1024 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: Status of the condition, one of True, False, + Unknown. + enum: + - 'True' + - 'False' + - Unknown + type: string + type: + description: Type of condition. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + type: object + type: array + namespaceConditions: + items: + properties: + lastTransitionTime: + description: LastTransitionTime is the last time the condition + transitioned from one status to another. + format: date-time + type: string + message: + description: Message is a human-readable message indicating + details about the transition. + maxLength: 32768 + type: string + observedGeneration: + description: ObservedGeneration represents the .metadata.generation + that the condition was set based upon. For instance, if + .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration + is 9, the condition is out of date with respect to the current + state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: Reason contains a programmatic identifier indicating + the reason for the condition's last transition. + maxLength: 1024 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: Status of the condition, one of True, False, + Unknown. + enum: + - 'True' + - 'False' + - Unknown + type: string + type: + description: Type of condition. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + type: object + type: array + schedules: + description: Schedules keeps track of random generated schedules, + is overwriten by schedules set in the service's spec. + properties: + backup: + description: Backup keeps track of the backup schedule. + type: string + maintenance: + description: Maintenance keeps track of the maintenance schedule. + properties: + dayOfWeek: + description: DayOfWeek specifies at which weekday the maintenance + is held place. Allowed values are [monday, tuesday, wednesday, + thursday, friday, saturday, sunday] + enum: + - monday + - tuesday + - wednesday + - thursday + - friday + - saturday + - sunday + type: string + timeOfDay: + description: 'TimeOfDay for installing updates in UTC. Format: + "hh:mm:ss".' + pattern: ^([0-1]?[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])$ + type: string + type: object + type: object + selfSignedIssuerConditions: + items: + properties: + lastTransitionTime: + description: LastTransitionTime is the last time the condition + transitioned from one status to another. + format: date-time + type: string + message: + description: Message is a human-readable message indicating + details about the transition. + maxLength: 32768 + type: string + observedGeneration: + description: ObservedGeneration represents the .metadata.generation + that the condition was set based upon. For instance, if + .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration + is 9, the condition is out of date with respect to the current + state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: Reason contains a programmatic identifier indicating + the reason for the condition's last transition. + maxLength: 1024 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: Status of the condition, one of True, False, + Unknown. + enum: + - 'True' + - 'False' + - Unknown + type: string + type: + description: Type of condition. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + type: object + type: array + serverCertificateConditions: + items: + properties: + lastTransitionTime: + description: LastTransitionTime is the last time the condition + transitioned from one status to another. + format: date-time + type: string + message: + description: Message is a human-readable message indicating + details about the transition. + maxLength: 32768 + type: string + observedGeneration: + description: ObservedGeneration represents the .metadata.generation + that the condition was set based upon. For instance, if + .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration + is 9, the condition is out of date with respect to the current + state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: Reason contains a programmatic identifier indicating + the reason for the condition's last transition. + maxLength: 1024 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: Status of the condition, one of True, False, + Unknown. + enum: + - 'True' + - 'False' + - Unknown + type: string + type: + description: Type of condition. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + type: object + type: array + type: object + required: + - spec + type: object + served: true diff --git a/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_mariadb.yaml b/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_mariadb.yaml new file mode 100644 index 000000000..249b496b3 --- /dev/null +++ b/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_mariadb.yaml @@ -0,0 +1,53 @@ +apiVersion: apiextensions.crossplane.io/v1 +kind: Composition +metadata: + annotations: + argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true + argocd.argoproj.io/sync-wave: '10' + metadata.appcat.vshn.io/description: mariadb instances by VSHN + metadata.appcat.vshn.io/displayname: mariadb by VSHN + metadata.appcat.vshn.io/end-user-docs-url: https://vs.hn/vshn-mariadb + metadata.appcat.vshn.io/flavor: standalone + metadata.appcat.vshn.io/plans: '{"standard-1":{"size":{"cpu":"250m","disk":"16Gi","enabled":true,"memory":"1Gi"}},"standard-2":{"size":{"cpu":"500m","disk":"16Gi","enabled":true,"memory":"2Gi"}},"standard-4":{"size":{"cpu":"1","disk":"16Gi","enabled":true,"memory":"4Gi"}},"standard-512m":{"size":{"cpu":"125m","disk":"16Gi","enabled":true,"memory":"512Mi"}},"standard-8":{"size":{"cpu":"2","disk":"16Gi","enabled":true,"memory":"8Gi"}}}' + metadata.appcat.vshn.io/product-description: https://products.docs.vshn.ch/products/appcat/mariadb.html + metadata.appcat.vshn.io/zone: rma1 + labels: + metadata.appcat.vshn.io/offered: 'true' + metadata.appcat.vshn.io/serviceID: vshn-mariadb + name: vshnmariadb.vshn.appcat.vshn.io + name: vshnmariadb.vshn.appcat.vshn.io +spec: + compositeTypeRef: + apiVersion: vshn.appcat.vshn.io/v1 + kind: XVSHNMariaDB + mode: Pipeline + pipeline: + - functionRef: + name: function-appcat + input: + apiVersion: v1 + data: + bucketRegion: lpg + chartRepository: https://charts.bitnami.com/bitnami + chartVersion: 10.1.3 + controlNamespace: syn-appcat-control + imageTag: v4.44.2 + isOpenshift: 'false' + maintenanceSA: helm-based-service-maintenance + plans: '{"standard-1": {"size": {"cpu": "250m", "disk": "16Gi", "enabled": + true, "memory": "1Gi"}}, "standard-2": {"size": {"cpu": "500m", "disk": + "16Gi", "enabled": true, "memory": "2Gi"}}, "standard-4": {"size": {"cpu": + "1", "disk": "16Gi", "enabled": true, "memory": "4Gi"}}, "standard-512m": + {"size": {"cpu": "125m", "disk": "16Gi", "enabled": true, "memory": "512Mi"}}, + "standard-8": {"size": {"cpu": "2", "disk": "16Gi", "enabled": true, "memory": + "8Gi"}}}' + quotasEnabled: 'false' + restoreSA: mariadbrestoreserviceaccount + serviceName: mariadb + kind: ConfigMap + metadata: + labels: + name: xfn-config + name: xfn-config + step: mariadb-func + writeConnectionSecretsToNamespace: syn-crossplane diff --git a/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_postgres.yaml b/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_postgres.yaml index 2385da64b..7bfaf29da 100644 --- a/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_postgres.yaml +++ b/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_postgres.yaml @@ -1142,7 +1142,7 @@ spec: emailAlertingSmtpHost: smtp.eu.mailgun.org:465 emailAlertingSmtpUsername: myuser@example.com externalDatabaseConnectionsEnabled: 'true' - imageTag: v4.44.1 + imageTag: v4.44.2 quotasEnabled: 'false' serviceName: postgresql sgNamespace: stackgres diff --git a/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_postgresrestore.yaml b/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_postgresrestore.yaml index 101abc3df..269b8375a 100644 --- a/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_postgresrestore.yaml +++ b/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_postgresrestore.yaml @@ -1244,7 +1244,7 @@ spec: emailAlertingSmtpHost: smtp.eu.mailgun.org:465 emailAlertingSmtpUsername: myuser@example.com externalDatabaseConnectionsEnabled: 'true' - imageTag: v4.44.1 + imageTag: v4.44.2 quotasEnabled: 'false' serviceName: postgresql sgNamespace: stackgres diff --git a/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_redis.yaml b/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_redis.yaml index bf3af8e4c..79fe17262 100644 --- a/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_redis.yaml +++ b/component/tests/golden/vshn/appcat/appcat/21_composition_vshn_redis.yaml @@ -985,7 +985,7 @@ spec: emailAlertingSmtpFromAddress: myuser@example.com emailAlertingSmtpHost: smtp.eu.mailgun.org:465 emailAlertingSmtpUsername: myuser@example.com - imageTag: v4.44.1 + imageTag: v4.44.2 maintenanceSA: helm-based-service-maintenance quotasEnabled: 'false' restoreSA: redisrestoreserviceaccount diff --git a/component/tests/golden/vshn/appcat/appcat/22_prom_rule_sla_mariadb.yaml b/component/tests/golden/vshn/appcat/appcat/22_prom_rule_sla_mariadb.yaml new file mode 100644 index 000000000..b64bb8f6b --- /dev/null +++ b/component/tests/golden/vshn/appcat/appcat/22_prom_rule_sla_mariadb.yaml @@ -0,0 +1,16 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + annotations: {} + labels: + name: vshn-vshnmariadb-sla + name: vshn-vshnmariadb-sla + namespace: appcat-slos +spec: + groups: + - name: appcat-vshnmariadb-sla-target + rules: + - expr: vector(99.25) + labels: + service: VSHNMariaDB + record: sla:objective:ratio diff --git a/component/tests/golden/vshn/appcat/appcat/apiserver/10_apiserver_envs.yaml b/component/tests/golden/vshn/appcat/appcat/apiserver/10_apiserver_envs.yaml index 43bb5cb4f..1248e81f3 100644 --- a/component/tests/golden/vshn/appcat/appcat/apiserver/10_apiserver_envs.yaml +++ b/component/tests/golden/vshn/appcat/appcat/apiserver/10_apiserver_envs.yaml @@ -1,6 +1,7 @@ apiVersion: v1 data: APPCAT_HANDLER_ENABLED: 'true' + VSHN_MARIADB_BACKUP_HANDLER_ENABLED: 'false' VSHN_POSTGRES_BACKUP_HANDLER_ENABLED: 'true' VSHN_REDIS_BACKUP_HANDLER_ENABLED: 'true' kind: ConfigMap diff --git a/component/tests/golden/vshn/appcat/appcat/controllers/appcat/30_deployment.yaml b/component/tests/golden/vshn/appcat/appcat/controllers/appcat/30_deployment.yaml index f8debf6d0..f2092ce3f 100644 --- a/component/tests/golden/vshn/appcat/appcat/controllers/appcat/30_deployment.yaml +++ b/component/tests/golden/vshn/appcat/appcat/controllers/appcat/30_deployment.yaml @@ -23,7 +23,7 @@ spec: env: - name: PLANS_NAMESPACE value: syn-appcat - image: ghcr.io/vshn/appcat:v4.44.1 + image: ghcr.io/vshn/appcat:v4.44.2 livenessProbe: httpGet: path: /healthz diff --git a/component/tests/golden/vshn/appcat/appcat/sla_reporter/01_cronjob.yaml b/component/tests/golden/vshn/appcat/appcat/sla_reporter/01_cronjob.yaml index 5acd970f4..15336c74f 100644 --- a/component/tests/golden/vshn/appcat/appcat/sla_reporter/01_cronjob.yaml +++ b/component/tests/golden/vshn/appcat/appcat/sla_reporter/01_cronjob.yaml @@ -30,7 +30,7 @@ spec: envFrom: - secretRef: name: appcat-sla-reports-creds - image: ghcr.io/vshn/appcat:v4.44.1 + image: ghcr.io/vshn/appcat:v4.44.2 name: sla-reporter resources: limits: diff --git a/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_mariadb.yaml b/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_mariadb.yaml new file mode 100644 index 000000000..6bcddfdda --- /dev/null +++ b/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_mariadb.yaml @@ -0,0 +1,206 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + annotations: {} + labels: + name: vshn-mariadb + name: vshn-mariadb + namespace: appcat-slos +spec: + groups: + - name: sloth-slo-sli-recordings-appcat-vshn-mariadb-uptime + rules: + - expr: | + ((sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="false"}[5m]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[5m])) by (service, namespace, name, organization, sla) or vector(0)) - scalar(appcat:cluster:maintenance) > 0 or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[5m])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="false"}[5m])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + sloth_window: 5m + record: slo:sli_error:ratio_rate5m + - expr: | + ((sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="false"}[30m]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[30m])) by (service, namespace, name, organization, sla) or vector(0)) - scalar(appcat:cluster:maintenance) > 0 or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[30m])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="false"}[30m])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + sloth_window: 30m + record: slo:sli_error:ratio_rate30m + - expr: | + ((sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="false"}[1h]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[1h])) by (service, namespace, name, organization, sla) or vector(0)) - scalar(appcat:cluster:maintenance) > 0 or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[1h])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="false"}[1h])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + sloth_window: 1h + record: slo:sli_error:ratio_rate1h + - expr: | + ((sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="false"}[2h]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[2h])) by (service, namespace, name, organization, sla) or vector(0)) - scalar(appcat:cluster:maintenance) > 0 or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[2h])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="false"}[2h])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + sloth_window: 2h + record: slo:sli_error:ratio_rate2h + - expr: | + ((sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="false"}[6h]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[6h])) by (service, namespace, name, organization, sla) or vector(0)) - scalar(appcat:cluster:maintenance) > 0 or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[6h])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="false"}[6h])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + sloth_window: 6h + record: slo:sli_error:ratio_rate6h + - expr: | + ((sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="false"}[1d]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[1d])) by (service, namespace, name, organization, sla) or vector(0)) - scalar(appcat:cluster:maintenance) > 0 or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[1d])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="false"}[1d])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + sloth_window: 1d + record: slo:sli_error:ratio_rate1d + - expr: | + ((sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="false"}[3d]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[3d])) by (service, namespace, name, organization, sla) or vector(0)) - scalar(appcat:cluster:maintenance) > 0 or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[3d])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="false"}[3d])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + sloth_window: 3d + record: slo:sli_error:ratio_rate3d + - expr: | + sum_over_time(slo:sli_error:ratio_rate5m{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"}[30d]) + / ignoring (sloth_window) + count_over_time(slo:sli_error:ratio_rate5m{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"}[30d]) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + sloth_window: 30d + record: slo:sli_error:ratio_rate30d + - name: sloth-slo-meta-recordings-appcat-vshn-mariadb-uptime + rules: + - expr: vector(0.9990000000000001) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + record: slo:objective:ratio + - expr: vector(1-0.9990000000000001) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + record: slo:error_budget:ratio + - expr: vector(30) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + record: slo:time_period:days + - expr: | + slo:sli_error:ratio_rate5m{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} + / on(sloth_id, sloth_slo, sloth_service) group_left + slo:error_budget:ratio{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + record: slo:current_burn_rate:ratio + - expr: | + slo:sli_error:ratio_rate30d{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} + / on(sloth_id, sloth_slo, sloth_service) group_left + slo:error_budget:ratio{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + record: slo:period_burn_rate:ratio + - expr: 1 - slo:period_burn_rate:ratio{sloth_id="appcat-vshn-mariadb-uptime", + sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + record: slo:period_error_budget_remaining:ratio + - expr: vector(1) + labels: + sloth_id: appcat-vshn-mariadb-uptime + sloth_mode: cli-gen-prom + sloth_objective: '99.9' + sloth_service: appcat-vshn-mariadb + sloth_slo: uptime + sloth_spec: prometheus/v1 + sloth_version: v0.11.0 + record: sloth_slo_info + - name: sloth-slo-alerts-appcat-vshn-mariadb-uptime + rules: + - alert: SLO_AppCat_VSHNMariaDBUptime + annotations: + for: 6m + summary: Probes to MariaDB by VSHN instance fail + title: (page) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error + budget burn rate is too fast. + expr: | + ( + max(slo:sli_error:ratio_rate5m{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} > (14.4 * 0.0009999999999999432)) without (sloth_window) + and + max(slo:sli_error:ratio_rate1h{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} > (14.4 * 0.0009999999999999432)) without (sloth_window) + ) + or + ( + max(slo:sli_error:ratio_rate30m{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} > (6 * 0.0009999999999999432)) without (sloth_window) + and + max(slo:sli_error:ratio_rate6h{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} > (6 * 0.0009999999999999432)) without (sloth_window) + ) + for: 6m + labels: + OnCall: '{{ if eq $labels.sla "guaranteed" }}true{{ else }}false{{ end + }}' + service: VSHNMariaDB + severity: critical + slo: 'true' + sloth_severity: page + syn: 'true' + syn_component: appcat + syn_team: schedar + - alert: SLO_AppCat_VSHNMariaDBUptime + annotations: + runbook_url: https://hub.syn.tools/appcat/runbooks/vshn-mariadb.html#uptime + summary: Probes to MariaDB by VSHN instance fail + title: (ticket) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error + budget burn rate is too fast. + expr: | + ( + max(slo:sli_error:ratio_rate2h{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} > (3 * 0.0009999999999999432)) without (sloth_window) + and + max(slo:sli_error:ratio_rate1d{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} > (3 * 0.0009999999999999432)) without (sloth_window) + ) + or + ( + max(slo:sli_error:ratio_rate6h{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} > (1 * 0.0009999999999999432)) without (sloth_window) + and + max(slo:sli_error:ratio_rate3d{sloth_id="appcat-vshn-mariadb-uptime", sloth_service="appcat-vshn-mariadb", sloth_slo="uptime"} > (1 * 0.0009999999999999432)) without (sloth_window) + ) + labels: + OnCall: '{{ if eq $labels.sla "guaranteed" }}true{{ else }}false{{ end + }}' + service: VSHNMariaDB + severity: warning + slo: 'true' + sloth_severity: ticket + syn: 'true' + syn_component: appcat + syn_team: schedar diff --git a/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_mariadb_ha.yaml b/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_mariadb_ha.yaml new file mode 100644 index 000000000..f9dfae467 --- /dev/null +++ b/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_mariadb_ha.yaml @@ -0,0 +1,206 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + annotations: {} + labels: + name: vshn-mariadb-ha + name: vshn-mariadb-ha + namespace: appcat-slos +spec: + groups: + - name: sloth-slo-sli-recordings-appcat-vshn-mariadb-ha-uptime + rules: + - expr: | + (sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="true"}[5m]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[5m])) by (service, namespace, name, organization, sla) or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[5m])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="true"}[5m])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + sloth_window: 5m + record: slo:sli_error:ratio_rate5m + - expr: | + (sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="true"}[30m]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[30m])) by (service, namespace, name, organization, sla) or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[30m])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="true"}[30m])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + sloth_window: 30m + record: slo:sli_error:ratio_rate30m + - expr: | + (sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="true"}[1h]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[1h])) by (service, namespace, name, organization, sla) or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[1h])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="true"}[1h])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + sloth_window: 1h + record: slo:sli_error:ratio_rate1h + - expr: | + (sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="true"}[2h]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[2h])) by (service, namespace, name, organization, sla) or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[2h])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="true"}[2h])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + sloth_window: 2h + record: slo:sli_error:ratio_rate2h + - expr: | + (sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="true"}[6h]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[6h])) by (service, namespace, name, organization, sla) or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[6h])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="true"}[6h])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + sloth_window: 6h + record: slo:sli_error:ratio_rate6h + - expr: | + (sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="true"}[1d]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[1d])) by (service, namespace, name, organization, sla) or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[1d])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="true"}[1d])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + sloth_window: 1d + record: slo:sli_error:ratio_rate1d + - expr: | + (sum(rate(appcat_probes_seconds_count{reason!="success", service="VSHNMariaDB", ha="true"}[3d]) or 0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[3d])) by (service, namespace, name, organization, sla) or sum(0*rate(appcat_probes_seconds_count{service="VSHNMariaDB"}[3d])) by (service, namespace, name, organization, sla)) + / + (sum(rate(appcat_probes_seconds_count{service="VSHNMariaDB", ha="true"}[3d])) by (service, namespace, name, organization, sla)) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + sloth_window: 3d + record: slo:sli_error:ratio_rate3d + - expr: | + sum_over_time(slo:sli_error:ratio_rate5m{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"}[30d]) + / ignoring (sloth_window) + count_over_time(slo:sli_error:ratio_rate5m{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"}[30d]) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + sloth_window: 30d + record: slo:sli_error:ratio_rate30d + - name: sloth-slo-meta-recordings-appcat-vshn-mariadb-ha-uptime + rules: + - expr: vector(0.9990000000000001) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + record: slo:objective:ratio + - expr: vector(1-0.9990000000000001) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + record: slo:error_budget:ratio + - expr: vector(30) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + record: slo:time_period:days + - expr: | + slo:sli_error:ratio_rate5m{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} + / on(sloth_id, sloth_slo, sloth_service) group_left + slo:error_budget:ratio{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + record: slo:current_burn_rate:ratio + - expr: | + slo:sli_error:ratio_rate30d{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} + / on(sloth_id, sloth_slo, sloth_service) group_left + slo:error_budget:ratio{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + record: slo:period_burn_rate:ratio + - expr: 1 - slo:period_burn_rate:ratio{sloth_id="appcat-vshn-mariadb-ha-uptime", + sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + record: slo:period_error_budget_remaining:ratio + - expr: vector(1) + labels: + sloth_id: appcat-vshn-mariadb-ha-uptime + sloth_mode: cli-gen-prom + sloth_objective: '99.9' + sloth_service: appcat-vshn-mariadb-ha + sloth_slo: uptime + sloth_spec: prometheus/v1 + sloth_version: v0.11.0 + record: sloth_slo_info + - name: sloth-slo-alerts-appcat-vshn-mariadb-ha-uptime + rules: + - alert: SLO_AppCat_HAVSHNMariaDBUptime + annotations: + for: 6m + summary: Probes to HA MariaDB by VSHN instance fail + title: (page) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error + budget burn rate is too fast. + expr: | + ( + max(slo:sli_error:ratio_rate5m{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} > (14.4 * 0.0009999999999999432)) without (sloth_window) + and + max(slo:sli_error:ratio_rate1h{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} > (14.4 * 0.0009999999999999432)) without (sloth_window) + ) + or + ( + max(slo:sli_error:ratio_rate30m{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} > (6 * 0.0009999999999999432)) without (sloth_window) + and + max(slo:sli_error:ratio_rate6h{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} > (6 * 0.0009999999999999432)) without (sloth_window) + ) + for: 6m + labels: + OnCall: '{{ if eq $labels.sla "guaranteed" }}true{{ else }}false{{ end + }}' + service: VSHNMariaDB + severity: critical + slo: 'true' + sloth_severity: page + syn: 'true' + syn_component: appcat + syn_team: schedar + - alert: SLO_AppCat_HAVSHNMariaDBUptime + annotations: + runbook_url: https://hub.syn.tools/appcat/runbooks/vshn-mariadb.html#uptime + summary: Probes to HA MariaDB by VSHN instance fail + title: (ticket) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error + budget burn rate is too fast. + expr: | + ( + max(slo:sli_error:ratio_rate2h{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} > (3 * 0.0009999999999999432)) without (sloth_window) + and + max(slo:sli_error:ratio_rate1d{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} > (3 * 0.0009999999999999432)) without (sloth_window) + ) + or + ( + max(slo:sli_error:ratio_rate6h{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} > (1 * 0.0009999999999999432)) without (sloth_window) + and + max(slo:sli_error:ratio_rate3d{sloth_id="appcat-vshn-mariadb-ha-uptime", sloth_service="appcat-vshn-mariadb-ha", sloth_slo="uptime"} > (1 * 0.0009999999999999432)) without (sloth_window) + ) + labels: + OnCall: '{{ if eq $labels.sla "guaranteed" }}true{{ else }}false{{ end + }}' + service: VSHNMariaDB + severity: warning + slo: 'true' + sloth_severity: ticket + syn: 'true' + syn_component: appcat + syn_team: schedar diff --git a/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_postgresql_ha.yaml b/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_postgresql_ha.yaml index 03aa89fad..8bd5b5021 100644 --- a/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_postgresql_ha.yaml +++ b/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_postgresql_ha.yaml @@ -147,7 +147,7 @@ spec: record: sloth_slo_info - name: sloth-slo-alerts-appcat-vshn-postgresql-ha-uptime rules: - - alert: SLO_AppCat_HAVSHNPosgtreSQLUptime + - alert: SLO_AppCat_HAVSHNPostgreSQLUptime annotations: for: 6m summary: Probes to HA PostgreSQL by VSHN instance fail @@ -176,9 +176,9 @@ spec: syn: 'true' syn_component: appcat syn_team: schedar - - alert: SLO_AppCat_HAVSHNPosgtreSQLUptime + - alert: SLO_AppCat_HAVSHNPostgreSQLUptime annotations: - runbook_url: https://hub.syn.tools/appcat/runbooks/vshn-postgresql-ha.html#uptime + runbook_url: https://hub.syn.tools/appcat/runbooks/vshn-postgresql.html#uptime summary: Probes to HA PostgreSQL by VSHN instance fail title: (ticket) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget burn rate is too fast. diff --git a/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_redis_ha.yaml b/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_redis_ha.yaml index ef68dad83..a65a862a7 100644 --- a/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_redis_ha.yaml +++ b/component/tests/golden/vshn/appcat/appcat/sli_exporter/90_slo_vshn_redis_ha.yaml @@ -178,7 +178,7 @@ spec: syn_team: schedar - alert: SLO_AppCat_HAVSHNRedisUptime annotations: - runbook_url: https://hub.syn.tools/appcat/runbooks/vshn-redis-ha.html#uptime + runbook_url: https://hub.syn.tools/appcat/runbooks/vshn-redis.html#uptime summary: Probes to HA Redis by VSHN instance fail title: (ticket) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget burn rate is too fast. diff --git a/component/tests/golden/vshn/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml b/component/tests/golden/vshn/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml index 30d1febb5..02a0fe63e 100644 --- a/component/tests/golden/vshn/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml +++ b/component/tests/golden/vshn/appcat/appcat/sli_exporter/apps_v1_deployment_appcat-sliexporter-controller-manager.yaml @@ -28,7 +28,7 @@ spec: value: "true" - name: APPCAT_SLI_VSHNREDIS value: "true" - image: ghcr.io/vshn/appcat:v4.44.1 + image: ghcr.io/vshn/appcat:v4.44.2 livenessProbe: httpGet: path: /healthz diff --git a/component/tests/vshn.yml b/component/tests/vshn.yml index bb111bfd3..0100fe12f 100644 --- a/component/tests/vshn.yml +++ b/component/tests/vshn.yml @@ -55,6 +55,11 @@ parameters: emailAlerting: enabled: true smtpPassword: "whatever" + services: + mariadb: + enabled: true + # grpcEndpoint: host.docker.internal:9443 + # proxyFunction: true postgres: # grpcEndpoint: host.docker.internal:9443 # proxyFunction: true diff --git a/docs/modules/ROOT/pages/runbooks/vshn-mariadb.adoc b/docs/modules/ROOT/pages/runbooks/vshn-mariadb.adoc new file mode 100644 index 000000000..64d29bc8d --- /dev/null +++ b/docs/modules/ROOT/pages/runbooks/vshn-mariadb.adoc @@ -0,0 +1,118 @@ += MariaDB by VSHN + + +[[uptime]] +== Uptime + +[IMPORTANT] +We don't yet have a lot of operational experience with this service. +If you received this alert, please add any insights you gained to improve this runbook. + +=== icon:glasses[] Overview + +The SLI measures the uptime of each MariaDB by VSHN instance. +This SLI is measured by a prober that executes a SQL query every second. + +If this SLI results in an alert, it means that a significant number of SQL queries failed and that we risk missing the SLO. + +There are two types of alerts that fire if we expect to miss the configured objective. + +* A ticket alert means that the error rate is slightly higher than the objective. +If we don't intervene at some point after receiving this alert, we can expect to miss the objective. +However, no immediate, urgent action is necessary. +A ticket alert should have a label `severity: warning`. +* A page alert means that the error rate is significantly higher than the objective. +Immediate action is necessary to not miss the objective. + +=== icon:bug[] Steps for debugging + +Failed probes can have a multitude of reasons, but in general there are two different kinds of issue clases. +Either the instance itself is failing or provisioning or updating the instance failed. + +In any case, you should first figure out where the effected instance runs. +The alert will provide you with three labels: `cluster_id`, `namespace`, and `name`. + +Connect to the Kubernetes cluster with the provided `cluster_id` and get the effected claim. + +[source,shell] +---- +export NAMESPACE={{ namespace }} +export NAME={{ name }} + +export COMPOSITE=$(kubectl -n $NAMESPACE get vshnmariadb $NAME -o jsonpath="{.spec.resourceRef.name}") +kubectl -n $NAMESPACE get vshnmariadb $NAME +---- + +If the claim is not `SYNCED` this might indicate that there is an issue with provisioning. +If it is synced there is most likely an issue with the instance itself, you can skip to the next subsection. + +==== Debugging Provisioning + +To figure out what went wrong with provisioning it usually helps to take a closer look at the composite. + +[source,shell] +---- +kubectl --as cluster-admin describe xvshnmariadb $COMPOSITE +---- + +If there are sync issues there usually are events that point to the root cause of the issue. + +Further it can help to look at the `Object` resources that are created for this instance. + +[source,shell] +---- +kubectl --as cluster-admin get object -l crossplane.io/composite=$COMPOSITE +---- + +If any of them are not synced, describing them should point you in the right direction. + +Finally, it might also be helpful to look at the logs of various crossplane components in namespace `syn-crossplane`. + +==== Debugging MariaDB Instance + +If the instance is synced, but still not running, we'll need to look at the database pods themselves. + +First see if the pods are running. + +[source,shell] +---- +export INSTANCE_NAMESPACE=$(kubectl -n $NAMESPACE get vshnmariadb $NAME -o jsonpath="{.status.instanceNamespace}") +kubectl --as cluster-admin -n $INSTANCE_NAMESPACE get pod +---- + +If they're running, check the logs if there are any obvious error messages + +[source,shell] +---- +kubectl --as cluster-admin -n $INSTANCE_NAMESPACE sts/${COMPOSITE} +---- + +If you can't see any pods at all, then there might be an issue with the statefulset (eg. faulty configuration). +Check the corresponding statefulset and events. + +[source,shell] +---- +kubectl --as cluster-admin -n $INSTANCE_NAMESPACE describe sts mariadb +---- + +=== icon:wrench[] Tune + +If this alert isn't actionable, noisy, or was raised too late you may want to tune the SLO. + +You have the option tune the SLO through the component parameters. +You can modify the objective, disable the page or ticket alert, or completely disable the SLO. + +The example below will set the SLO set the objective to 99.25% and disable the page alert. + +[source,yaml] +---- +appcat: + slos: + vshn: + mariadb: + uptime: + objective: 99.25 + alerting: + page_alert: + enabled: false +---- diff --git a/docs/modules/ROOT/pages/runbooks/vshn-minio.adoc b/docs/modules/ROOT/pages/runbooks/vshn-minio.adoc new file mode 100644 index 000000000..148fc70c6 --- /dev/null +++ b/docs/modules/ROOT/pages/runbooks/vshn-minio.adoc @@ -0,0 +1,120 @@ += MinIO by VSHN + + +[[uptime]] +== Uptime + +[IMPORTANT] +We don't yet have a lot of operational experience with this service. +If you received this alert, please add any insights you gained to improve this runbook. + +=== icon:glasses[] Overview + +The SLI measures the uptime of each MinIO by VSHN instance. +This SLI is measured by a prober that puts a simple file on to a test bucket every second. + +If this SLI results in an alert, it means that a significant number of write operations failed and that we risk missing the SLO. + +There are two types of alerts that fire if we expect to miss the configured objective. + +* A ticket alert means that the error rate is slightly higher than the objective. +If we don't intervene at some point after receiving this alert, we can expect to miss the objective. +However, no immediate, urgent action is necessary. +A ticket alert should have a label `severity: warning`. +* A page alert means that the error rate is significantly higher than the objective. +Immediate action is necessary to not miss the objective. + +=== icon:bug[] Steps for debugging + +Failed probes can have a multitude of reasons, but in general there are two different kinds of issue clases. +Either the instance itself is failing or provisioning or updating the instance failed. + +In any case, you should first figure out where the effected instance runs. +The alert will provide you with three labels: `cluster_id`, `namespace`, and `name`. + +Connect to the Kubernetes cluster with the provided `cluster_id` and get the effected claim. + +[source,shell] +---- +export NAMESPACE={{ namespace }} +export NAME={{ name }} + +export COMPOSITE=$(kubectl -n $NAMESPACE get vshnminio $NAME -o jsonpath="{.spec.resourceRef.name}") +kubectl -n $NAMESPACE get vshnminio $NAME +---- + +If the claim is not `SYNCED` this might indicate that there is an issue with provisioning. +If it is synced there is most likely an issue with the instance itself, you can skip to the next subsection. + +==== Debugging Provisioning + +To figure out what went wrong with provisioning it usually helps to take a closer look at the composite. + +[source,shell] +---- +kubectl --as cluster-admin describe xvshnminio $COMPOSITE +---- + +If there are sync issues there usually are events that point to the root cause of the issue. + +Further it can help to look at the `Object` resources that are created for this instance. + +[source,shell] +---- +kubectl --as cluster-admin get object -l crossplane.io/composite=$COMPOSITE +---- + +If any of them are not synced, describing them should point you in the right direction. + +Finally, it might also be helpful to look at the logs of various crossplane components in namespace `syn-crossplane`. + +==== Debugging MinIO Instance + +If the instance is synced, but still not running, we'll need to look at the minio pods themselves. + +First see if the pods are running. + +[source,shell] +---- +export INSTANCE_NAMESPACE=$(kubectl -n $NAMESPACE get vshnminio $NAME -o jsonpath="{.status.instanceNamespace}") +kubectl --as cluster-admin -n $INSTANCE_NAMESPACE get pod +---- + +If they're running, check the logs if there are any obvious error messages + +[source,shell] +---- +kubectl --as cluster-admin -n $INSTANCE_NAMESPACE deployment/$COMPOSITE # for standalone setups +kubectl --as cluster-admin -n $INSTANCE_NAMESPACE sts/$COMPOSITE # for distributed setups +---- + +If you can't see any pods at all, then there might be an issue with the statefulset (eg. faulty configuration). +Check the corresponding statefulset and events. + +[source,shell] +---- +kubectl --as cluster-admin -n $INSTANCE_NAMESPACE describe deployment $COMPOSITE # for standalone setups +kubectl --as cluster-admin -n $INSTANCE_NAMESPACE describe sts $COMPOSITE # for distributed setups +---- + +=== icon:wrench[] Tune + +If this alert isn't actionable, noisy, or was raised too late you may want to tune the SLO. + +You have the option tune the SLO through the component parameters. +You can modify the objective, disable the page or ticket alert, or completely disable the SLO. + +The example below will set the SLO set the objective to 99.25% and disable the page alert. + +[source,yaml] +---- +appcat: + slos: + vshn: + minio: + uptime: + objective: 99.25 + alerting: + page_alert: + enabled: false +---- diff --git a/docs/modules/ROOT/pages/runbooks/vshn-redis.adoc b/docs/modules/ROOT/pages/runbooks/vshn-redis.adoc new file mode 100644 index 000000000..745f21aea --- /dev/null +++ b/docs/modules/ROOT/pages/runbooks/vshn-redis.adoc @@ -0,0 +1,118 @@ += Redis by VSHN + + +[[uptime]] +== Uptime + +[IMPORTANT] +We don't yet have a lot of operational experience with this service. +If you received this alert, please add any insights you gained to improve this runbook. + +=== icon:glasses[] Overview + +The SLI measures the uptime of each Redis by VSHN instance. +This SLI is measured by a prober that executes a `Ping` every second. + +If this SLI results in an alert, it means that a significant number of `pings` failed and that we risk missing the SLO. + +There are two types of alerts that fire if we expect to miss the configured objective. + +* A ticket alert means that the error rate is slightly higher than the objective. +If we don't intervene at some point after receiving this alert, we can expect to miss the objective. +However, no immediate, urgent action is necessary. +A ticket alert should have a label `severity: warning`. +* A page alert means that the error rate is significantly higher than the objective. +Immediate action is necessary to not miss the objective. + +=== icon:bug[] Steps for debugging + +Failed probes can have a multitude of reasons, but in general there are two different kinds of issue clases. +Either the instance itself is failing or provisioning or updating the instance failed. + +In any case, you should first figure out where the effected instance runs. +The alert will provide you with three labels: `cluster_id`, `namespace`, and `name`. + +Connect to the Kubernetes cluster with the provided `cluster_id` and get the effected claim. + +[source,shell] +---- +export NAMESPACE={{ namespace }} +export NAME={{ name }} + +export COMPOSITE=$(kubectl -n $NAMESPACE get vshnredis $NAME -o jsonpath="{.spec.resourceRef.name}") +kubectl -n $NAMESPACE get vshnredis $NAME +---- + +If the claim is not `SYNCED` this might indicate that there is an issue with provisioning. +If it is synced there is most likely an issue with the instance itself, you can skip to the next subsection. + +==== Debugging Provisioning + +To figure out what went wrong with provisioning it usually helps to take a closer look at the composite. + +[source,shell] +---- +kubectl --as cluster-admin describe xvshnredis $COMPOSITE +---- + +If there are sync issues there usually are events that point to the root cause of the issue. + +Further it can help to look at the `Object` resources that are created for this instance. + +[source,shell] +---- +kubectl --as cluster-admin get object -l crossplane.io/composite=$COMPOSITE +---- + +If any of them are not synced, describing them should point you in the right direction. + +Finally, it might also be helpful to look at the logs of various crossplane components in namespace `syn-crossplane`. + +==== Debugging Redis Instance + +If the instance is synced, but still not running, we'll need to look at the redis pods themselves. + +First see if the pods are running. + +[source,shell] +---- +export INSTANCE_NAMESPACE=$(kubectl -n $NAMESPACE get vshnredis $NAME -o jsonpath="{.status.instanceNamespace}") +kubectl --as cluster-admin -n $INSTANCE_NAMESPACE get pod +---- + +If they're running, check the logs if there are any obvious error messages + +[source,shell] +---- +kubectl --as cluster-admin -n $INSTANCE_NAMESPACE sts/redis-master +---- + +If you can't see any pods at all, then there might be an issue with the statefulset (eg. faulty configuration). +Check the corresponding statefulset and events. + +[source,shell] +---- +kubectl --as cluster-admin -n $INSTANCE_NAMESPACE describe sts redis-master +---- + +=== icon:wrench[] Tune + +If this alert isn't actionable, noisy, or was raised too late you may want to tune the SLO. + +You have the option tune the SLO through the component parameters. +You can modify the objective, disable the page or ticket alert, or completely disable the SLO. + +The example below will set the SLO set the objective to 99.25% and disable the page alert. + +[source,yaml] +---- +appcat: + slos: + vshn: + redis: + uptime: + objective: 99.25 + alerting: + page_alert: + enabled: false +---- diff --git a/package/main.yaml b/package/main.yaml index 5a637c18e..cfcb58dd4 100644 --- a/package/main.yaml +++ b/package/main.yaml @@ -7,7 +7,7 @@ parameters: image: registry: ghcr.io repository: vshn/appcat - tag: v4.44.1 + tag: v4.44.2 components: appcat: url: https://github.com/vshn/component-appcat.git From 96f77e4556948728ba0e59fa0829956e6c54ebed Mon Sep 17 00:00:00 2001 From: Nicolas Bigler Date: Thu, 30 Nov 2023 14:28:52 +0100 Subject: [PATCH 2/3] Refactor sts-resizer Signed-off-by: Nicolas Bigler --- component/class/defaults.yml | 3 +- .../statefuleset-resize-controller.jsonnet | 39 +++++++++- .../component/vshn_appcat_services.jsonnet | 38 ---------- component/component/vshn_redis.jsonnet | 37 ---------- .../appcat/20_rbac_vshn_mariadb_resize.yaml | 72 ------------------- .../sts-resizer/20_rbac_resize_job.yaml} | 28 ++++---- 6 files changed, 54 insertions(+), 163 deletions(-) delete mode 100644 component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb_resize.yaml rename component/tests/golden/vshn/appcat/appcat/{20_rbac_vshn_redis_resize.yaml => controllers/sts-resizer/20_rbac_resize_job.yaml} (86%) diff --git a/component/class/defaults.yml b/component/class/defaults.yml index 3893a1b5f..602a32cdf 100644 --- a/component/class/defaults.yml +++ b/component/class/defaults.yml @@ -303,6 +303,8 @@ parameters: smtpFromAddress: myuser@example.com secretNamespace: syn-appcat secretName: mailgun-smtp-credentials + stsResizer: + enabled: true postgres: # bucket_region: 'lpg' || 'ch-gva-2' bucket_region: "" @@ -509,7 +511,6 @@ parameters: enabled: false restoreSA: mariadbrestoreserviceaccount restoreRoleRules: ${appcat:defaultRestoreRoleRules} - hasSts: true openshiftTemplate: serviceName: mariadbbyvshn description: "The open source relational database management system (DBMS) that is a compatible drop-in replacement for the widely used MySQL database technology" diff --git a/component/component/statefuleset-resize-controller.jsonnet b/component/component/statefuleset-resize-controller.jsonnet index 190664582..b558deafa 100644 --- a/component/component/statefuleset-resize-controller.jsonnet +++ b/component/component/statefuleset-resize-controller.jsonnet @@ -66,10 +66,47 @@ local deployment = loadManifest('config/manager/manager.yaml') + { }, }; +local resizeServiceAccount = kube.ServiceAccount('sa-sts-deleter') + { + metadata+: { + namespace: params.services.controlNamespace, + }, +}; + +local resizeClusterRole = kube.ClusterRole('appcat:job:resizejob') { + rules: [ + { + apiGroups: [ 'helm.crossplane.io' ], + resources: [ 'releases' ], + verbs: [ 'get', 'list', 'watch', 'update', 'patch', 'create', 'delete' ], + }, + { + apiGroups: [ 'apps' ], + resources: [ 'statefulsets' ], + verbs: [ 'delete', 'get', 'watch', 'list', 'update', 'patch' ], + }, + { + apiGroups: [ 'helm.crossplane.io' ], + resources: [ 'releases' ], + verbs: [ 'update', 'get' ], + }, + { + apiGroups: [ '' ], + resources: [ 'pods' ], + verbs: [ 'list', 'get', 'update', 'delete' ], + }, + ], +}; + +local resizeClusterRoleBinding = kube.ClusterRoleBinding('appcat:job:resizejob') + { + roleRef_: resizeClusterRole, + subjects_: [ resizeServiceAccount ], +}; + // Curently we only need this for redis. -if params.services.vshn.enabled && params.services.vshn.redis.enabled then { +if params.services.vshn.enabled && (params.services.vshn.redis.enabled || params.services.vshn.services.mariadb.enabled) then { 'controllers/sts-resizer/10_role': role, 'controllers/sts-resizer/10_sa': sa, 'controllers/sts-resizer/10_binding': binding, 'controllers/sts-resizer/10_deployment': deployment, + 'controllers/sts-resizer/20_rbac_resize_job': [ resizeServiceAccount, resizeClusterRole, resizeClusterRoleBinding ], } diff --git a/component/component/vshn_appcat_services.jsonnet b/component/component/vshn_appcat_services.jsonnet index 6f23fa252..dd94ddcd0 100644 --- a/component/component/vshn_appcat_services.jsonnet +++ b/component/component/vshn_appcat_services.jsonnet @@ -26,7 +26,6 @@ local getServiceNamePlural(serviceName) = local vshn_appcat_service(name) = local isOpenshift = std.startsWith(inv.parameters.facts.distribution, 'openshift'); - local securityContext = !isOpenshift; local serviceParams = params.services.vshn.services[name]; local connectionSecretKeys = serviceParams.connectionSecretKeys; @@ -51,42 +50,6 @@ local vshn_appcat_service(name) = subjects_: [ restoreServiceAccount ], }; - local resizeServiceAccount = kube.ServiceAccount('sa-sts-deleter') + { - metadata+: { - namespace: params.services.controlNamespace, - }, - }; - - local resizeClusterRole = kube.ClusterRole('appcat:job:' + name + ':resizejob') { - rules: [ - { - apiGroups: [ 'helm.crossplane.io' ], - resources: [ 'releases' ], - verbs: [ 'get', 'list', 'watch', 'update', 'patch', 'create', 'delete' ], - }, - { - apiGroups: [ 'apps' ], - resources: [ 'statefulsets' ], - verbs: [ 'delete', 'get', 'watch', 'list', 'update', 'patch' ], - }, - { - apiGroups: [ 'helm.crossplane.io' ], - resources: [ 'releases' ], - verbs: [ 'update', 'get' ], - }, - { - apiGroups: [ '' ], - resources: [ 'pods' ], - verbs: [ 'list', 'get', 'update', 'delete' ], - }, - ], - }; - - local resizeClusterRoleBinding = kube.ClusterRoleBinding('appcat:job:' + name + ':resizejob') + { - roleRef_: resizeClusterRole, - subjects_: [ resizeServiceAccount ], - }; - local xrd = xrds.XRDFromCRD( 'x' + serviceNamePlural + '.vshn.appcat.vshn.io', xrds.LoadCRD('vshn.appcat.vshn.io_' + serviceNamePlural + '.yaml', params.images.appcat.tag), @@ -201,7 +164,6 @@ local vshn_appcat_service(name) = ['20_rbac_vshn_%s' % name]: xrds.CompositeClusterRoles(xrd), ['21_composition_vshn_%s' % name]: composition, ['20_role_vshn_%s_restore' % name]: [ restoreRole, restoreServiceAccount, restoreClusterRoleBinding ], - [if serviceParams.hasSts then '20_rbac_vshn_%s_resize' % name]: [ resizeClusterRole, resizeServiceAccount, resizeClusterRoleBinding ], ['20_plans_vshn_%s' % name]: plansCM, ['22_prom_rule_sla_%s' % name]: promRuleSLA, [if isOpenshift then '21_openshift_template_%s_vshn' % name]: osTemplate, diff --git a/component/component/vshn_redis.jsonnet b/component/component/vshn_redis.jsonnet index 52d52567b..71a7bc94f 100644 --- a/component/component/vshn_redis.jsonnet +++ b/component/component/vshn_redis.jsonnet @@ -101,42 +101,6 @@ local restoreClusterRoleBinding = kube.ClusterRoleBinding('appcat:job:redis:rest subjects_: [ restoreServiceAccount ], }; -local resizeServiceAccount = kube.ServiceAccount('sa-sts-deleter') + { - metadata+: { - namespace: params.services.controlNamespace, - }, -}; - -local resizeClusterRole = kube.ClusterRole('appcat:job:redis:resizejob') { - rules: [ - { - apiGroups: [ 'helm.crossplane.io' ], - resources: [ 'releases' ], - verbs: [ 'get', 'list', 'watch', 'update', 'patch', 'create', 'delete' ], - }, - { - apiGroups: [ 'apps' ], - resources: [ 'statefulsets' ], - verbs: [ 'delete', 'get', 'watch', 'list', 'update', 'patch' ], - }, - { - apiGroups: [ 'helm.crossplane.io' ], - resources: [ 'releases' ], - verbs: [ 'update', 'get' ], - }, - { - apiGroups: [ '' ], - resources: [ 'pods' ], - verbs: [ 'list', 'get', 'update', 'delete' ], - }, - ], -}; - -local resizeClusterRoleBinding = kube.ClusterRoleBinding('appcat:job:redis:resizejob') + { - roleRef_: resizeClusterRole, - subjects_: [ resizeServiceAccount ], -}; - local composition = local namespace = comp.KubeObject('v1', 'Namespace') + { @@ -736,7 +700,6 @@ if params.services.vshn.enabled && redisParams.enabled then { '20_xrd_vshn_redis': xrd, '20_rbac_vshn_redis': xrds.CompositeClusterRoles(xrd), '20_role_vshn_redisrestore': [ restoreRole, restoreServiceAccount, restoreClusterRoleBinding ], - '20_rbac_vshn_redis_resize': [ resizeClusterRole, resizeServiceAccount, resizeClusterRoleBinding ], '20_plans_vshn_redis': plansCM, '21_composition_vshn_redis': composition, '22_prom_rule_sla_redis': promRuleRedisSLA, diff --git a/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb_resize.yaml b/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb_resize.yaml deleted file mode 100644 index a70600288..000000000 --- a/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_mariadb_resize.yaml +++ /dev/null @@ -1,72 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - annotations: {} - labels: - name: appcat-job-mariadb-resizejob - name: appcat:job:mariadb:resizejob -rules: - - apiGroups: - - helm.crossplane.io - resources: - - releases - verbs: - - get - - list - - watch - - update - - patch - - create - - delete - - apiGroups: - - apps - resources: - - statefulsets - verbs: - - delete - - get - - watch - - list - - update - - patch - - apiGroups: - - helm.crossplane.io - resources: - - releases - verbs: - - update - - get - - apiGroups: - - '' - resources: - - pods - verbs: - - list - - get - - update - - delete ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - annotations: {} - labels: - name: sa-sts-deleter - name: sa-sts-deleter - namespace: syn-appcat-control ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - annotations: {} - labels: - name: appcat-job-mariadb-resizejob - name: appcat:job:mariadb:resizejob -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: appcat:job:mariadb:resizejob -subjects: - - kind: ServiceAccount - name: sa-sts-deleter - namespace: syn-appcat-control diff --git a/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_redis_resize.yaml b/component/tests/golden/vshn/appcat/appcat/controllers/sts-resizer/20_rbac_resize_job.yaml similarity index 86% rename from component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_redis_resize.yaml rename to component/tests/golden/vshn/appcat/appcat/controllers/sts-resizer/20_rbac_resize_job.yaml index 42bc4d6d2..601f8711e 100644 --- a/component/tests/golden/vshn/appcat/appcat/20_rbac_vshn_redis_resize.yaml +++ b/component/tests/golden/vshn/appcat/appcat/controllers/sts-resizer/20_rbac_resize_job.yaml @@ -1,10 +1,19 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: {} + labels: + name: sa-sts-deleter + name: sa-sts-deleter + namespace: syn-appcat-control +--- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: annotations: {} labels: - name: appcat-job-redis-resizejob - name: appcat:job:redis:resizejob + name: appcat-job-resizejob + name: appcat:job:resizejob rules: - apiGroups: - helm.crossplane.io @@ -46,26 +55,17 @@ rules: - update - delete --- -apiVersion: v1 -kind: ServiceAccount -metadata: - annotations: {} - labels: - name: sa-sts-deleter - name: sa-sts-deleter - namespace: syn-appcat-control ---- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: annotations: {} labels: - name: appcat-job-redis-resizejob - name: appcat:job:redis:resizejob + name: appcat-job-resizejob + name: appcat:job:resizejob roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: appcat:job:redis:resizejob + name: appcat:job:resizejob subjects: - kind: ServiceAccount name: sa-sts-deleter From 4416f34583b150729ed4ac098dbd7aca4269cdb1 Mon Sep 17 00:00:00 2001 From: Nicolas Bigler Date: Thu, 30 Nov 2023 15:38:24 +0100 Subject: [PATCH 3/3] Refactor docs Signed-off-by: Nicolas Bigler --- .../runbooks/vshn-helm-debugging-partial.adoc | 45 +++++++++++++++++++ .../ROOT/pages/runbooks/vshn-mariadb.adoc | 45 +------------------ .../ROOT/pages/runbooks/vshn-minio.adoc | 45 +------------------ .../ROOT/pages/runbooks/vshn-redis.adoc | 45 +------------------ docs/modules/ROOT/partials/nav.adoc | 6 +++ 5 files changed, 57 insertions(+), 129 deletions(-) create mode 100644 docs/modules/ROOT/pages/runbooks/vshn-helm-debugging-partial.adoc diff --git a/docs/modules/ROOT/pages/runbooks/vshn-helm-debugging-partial.adoc b/docs/modules/ROOT/pages/runbooks/vshn-helm-debugging-partial.adoc new file mode 100644 index 000000000..5d0f81c60 --- /dev/null +++ b/docs/modules/ROOT/pages/runbooks/vshn-helm-debugging-partial.adoc @@ -0,0 +1,45 @@ +=== icon:bug[] Steps for debugging + +Failed probes can have a multitude of reasons, but in general there are two different kinds of issue classes. +Either the instance itself is failing or provisioning or updating the instance failed. + +In any case, you should first figure out where the effected instance runs. +The alert will provide you with three labels: `cluster_id`, `namespace`, and `name`. +Connect to the Kubernetes cluster with the provided `cluster_id` and get the effected claim. + +[source,shell,subs="attributes"] +---- +export NAMESPACE={{ namespace }} +export NAME={{ name }} + +export COMPOSITE=$(kubectl -n $NAMESPACE get {service} $NAME -o jsonpath="{.spec.resourceRef.name}") +kubectl -n $NAMESPACE get {service} $NAME +---- + +If the claim is not `SYNCED` this might indicate that there is an issue with provisioning. +If it is synced there is most likely an issue with the instance itself, you can skip to the next subsection. + +==== Debugging Provisioning + +To figure out what went wrong with provisioning it usually helps to take a closer look at the composite. + +[source,shell,subs="attributes"] +---- +kubectl --as cluster-admin describe x{service} $COMPOSITE +---- + +If there are sync issues there usually are events that point to the root cause of the issue. + +Furthermore, it can help to look at the `Object` resources that are created for this instance or the `releases.helm.crossplane.io` object associated with the instance. + +[source,shell] +---- +kubectl --as cluster-admin get object -l crossplane.io/composite=$COMPOSITE +kubectl --as cluster-admin get object $OBJECT_NAME +kubectl --as cluster-admin get releases.helm.crossplane.io -l crossplane.io/composite=$COMPOSITE +kubectl --as cluster-admin describe releases.helm.crossplane.io -l crossplane.io/composite=$COMPOSITE +---- + +If any of them are not synced, describing them should point you in the right direction. + +Finally, it might also be helpful to look at the logs of various crossplane components in namespace `syn-crossplane`. diff --git a/docs/modules/ROOT/pages/runbooks/vshn-mariadb.adoc b/docs/modules/ROOT/pages/runbooks/vshn-mariadb.adoc index 64d29bc8d..02c57d80e 100644 --- a/docs/modules/ROOT/pages/runbooks/vshn-mariadb.adoc +++ b/docs/modules/ROOT/pages/runbooks/vshn-mariadb.adoc @@ -24,49 +24,8 @@ A ticket alert should have a label `severity: warning`. * A page alert means that the error rate is significantly higher than the objective. Immediate action is necessary to not miss the objective. -=== icon:bug[] Steps for debugging - -Failed probes can have a multitude of reasons, but in general there are two different kinds of issue clases. -Either the instance itself is failing or provisioning or updating the instance failed. - -In any case, you should first figure out where the effected instance runs. -The alert will provide you with three labels: `cluster_id`, `namespace`, and `name`. - -Connect to the Kubernetes cluster with the provided `cluster_id` and get the effected claim. - -[source,shell] ----- -export NAMESPACE={{ namespace }} -export NAME={{ name }} - -export COMPOSITE=$(kubectl -n $NAMESPACE get vshnmariadb $NAME -o jsonpath="{.spec.resourceRef.name}") -kubectl -n $NAMESPACE get vshnmariadb $NAME ----- - -If the claim is not `SYNCED` this might indicate that there is an issue with provisioning. -If it is synced there is most likely an issue with the instance itself, you can skip to the next subsection. - -==== Debugging Provisioning - -To figure out what went wrong with provisioning it usually helps to take a closer look at the composite. - -[source,shell] ----- -kubectl --as cluster-admin describe xvshnmariadb $COMPOSITE ----- - -If there are sync issues there usually are events that point to the root cause of the issue. - -Further it can help to look at the `Object` resources that are created for this instance. - -[source,shell] ----- -kubectl --as cluster-admin get object -l crossplane.io/composite=$COMPOSITE ----- - -If any of them are not synced, describing them should point you in the right direction. - -Finally, it might also be helpful to look at the logs of various crossplane components in namespace `syn-crossplane`. +:service: vshnmariadb +include::vshn-helm-debugging-partial.adoc[] ==== Debugging MariaDB Instance diff --git a/docs/modules/ROOT/pages/runbooks/vshn-minio.adoc b/docs/modules/ROOT/pages/runbooks/vshn-minio.adoc index 148fc70c6..bdfe67b8c 100644 --- a/docs/modules/ROOT/pages/runbooks/vshn-minio.adoc +++ b/docs/modules/ROOT/pages/runbooks/vshn-minio.adoc @@ -24,49 +24,8 @@ A ticket alert should have a label `severity: warning`. * A page alert means that the error rate is significantly higher than the objective. Immediate action is necessary to not miss the objective. -=== icon:bug[] Steps for debugging - -Failed probes can have a multitude of reasons, but in general there are two different kinds of issue clases. -Either the instance itself is failing or provisioning or updating the instance failed. - -In any case, you should first figure out where the effected instance runs. -The alert will provide you with three labels: `cluster_id`, `namespace`, and `name`. - -Connect to the Kubernetes cluster with the provided `cluster_id` and get the effected claim. - -[source,shell] ----- -export NAMESPACE={{ namespace }} -export NAME={{ name }} - -export COMPOSITE=$(kubectl -n $NAMESPACE get vshnminio $NAME -o jsonpath="{.spec.resourceRef.name}") -kubectl -n $NAMESPACE get vshnminio $NAME ----- - -If the claim is not `SYNCED` this might indicate that there is an issue with provisioning. -If it is synced there is most likely an issue with the instance itself, you can skip to the next subsection. - -==== Debugging Provisioning - -To figure out what went wrong with provisioning it usually helps to take a closer look at the composite. - -[source,shell] ----- -kubectl --as cluster-admin describe xvshnminio $COMPOSITE ----- - -If there are sync issues there usually are events that point to the root cause of the issue. - -Further it can help to look at the `Object` resources that are created for this instance. - -[source,shell] ----- -kubectl --as cluster-admin get object -l crossplane.io/composite=$COMPOSITE ----- - -If any of them are not synced, describing them should point you in the right direction. - -Finally, it might also be helpful to look at the logs of various crossplane components in namespace `syn-crossplane`. +:service: vshnminio +include::vshn-helm-debugging-partial.adoc[] ==== Debugging MinIO Instance diff --git a/docs/modules/ROOT/pages/runbooks/vshn-redis.adoc b/docs/modules/ROOT/pages/runbooks/vshn-redis.adoc index 745f21aea..b934b3a26 100644 --- a/docs/modules/ROOT/pages/runbooks/vshn-redis.adoc +++ b/docs/modules/ROOT/pages/runbooks/vshn-redis.adoc @@ -24,49 +24,8 @@ A ticket alert should have a label `severity: warning`. * A page alert means that the error rate is significantly higher than the objective. Immediate action is necessary to not miss the objective. -=== icon:bug[] Steps for debugging - -Failed probes can have a multitude of reasons, but in general there are two different kinds of issue clases. -Either the instance itself is failing or provisioning or updating the instance failed. - -In any case, you should first figure out where the effected instance runs. -The alert will provide you with three labels: `cluster_id`, `namespace`, and `name`. - -Connect to the Kubernetes cluster with the provided `cluster_id` and get the effected claim. - -[source,shell] ----- -export NAMESPACE={{ namespace }} -export NAME={{ name }} - -export COMPOSITE=$(kubectl -n $NAMESPACE get vshnredis $NAME -o jsonpath="{.spec.resourceRef.name}") -kubectl -n $NAMESPACE get vshnredis $NAME ----- - -If the claim is not `SYNCED` this might indicate that there is an issue with provisioning. -If it is synced there is most likely an issue with the instance itself, you can skip to the next subsection. - -==== Debugging Provisioning - -To figure out what went wrong with provisioning it usually helps to take a closer look at the composite. - -[source,shell] ----- -kubectl --as cluster-admin describe xvshnredis $COMPOSITE ----- - -If there are sync issues there usually are events that point to the root cause of the issue. - -Further it can help to look at the `Object` resources that are created for this instance. - -[source,shell] ----- -kubectl --as cluster-admin get object -l crossplane.io/composite=$COMPOSITE ----- - -If any of them are not synced, describing them should point you in the right direction. - -Finally, it might also be helpful to look at the logs of various crossplane components in namespace `syn-crossplane`. +:service: vshnminio +include::vshn-helm-debugging-partial.adoc[] ==== Debugging Redis Instance diff --git a/docs/modules/ROOT/partials/nav.adoc b/docs/modules/ROOT/partials/nav.adoc index bfd915bcf..74f84e9e1 100644 --- a/docs/modules/ROOT/partials/nav.adoc +++ b/docs/modules/ROOT/partials/nav.adoc @@ -34,3 +34,9 @@ .Runbooks * xref:runbooks/vshn-postgresql.adoc[] +* xref:runbooks/vshn-redis.adoc[] +* xref:runbooks/vshn-minio.adoc[] +* xref:runbooks/vshn-mariadb.adoc[] +* xref:runbooks/vshn-generic.adoc[] + +