Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding metrics for Runtime CR state #403

Merged
merged 34 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
42dba06
adding metrics for Runtime CR state
koala7659 Sep 30, 2024
975bc8c
Initialise Runtime reconciler with metrics
koala7659 Sep 30, 2024
75df414
Runtime metrics development
koala7659 Oct 1, 2024
4de26b8
Runtime metrics development - cleanup gauge with state modification
koala7659 Oct 2, 2024
33bd8d6
Merge remote-tracking branch 'origin/main' into runtime-ctrl-metrics
koala7659 Oct 2, 2024
c51681e
fix integration tests
koala7659 Oct 2, 2024
bc098c4
fix linter errors
koala7659 Oct 2, 2024
8b21244
Fixing runtime metric storing bug
koala7659 Oct 2, 2024
f3390d3
moving auditlog configuration to RFCfg struct
koala7659 Oct 3, 2024
965c94c
Runtime controller metrics - Counter of unexpected stops of state mac…
koala7659 Oct 3, 2024
6232507
add machine image default name for converted config
koala7659 Oct 3, 2024
89a1ac3
fix panic in unit tests after adding metrics
koala7659 Oct 4, 2024
3df67c9
adjust unit test to use mocked metric object
koala7659 Oct 8, 2024
955496f
reenable fsm machine stops metrics
koala7659 Oct 8, 2024
27abbd8
update metric logging functions
koala7659 Oct 8, 2024
3c5d545
use defaultReconcileDuration option for unit tests
koala7659 Oct 11, 2024
4567e3b
Merge remote-tracking branch 'origin/main' into runtime-ctrl-metrics
koala7659 Oct 11, 2024
0e2f8bc
temporary disable counter
koala7659 Oct 11, 2024
f5e27d7
reenable saving runtime status for metrics
koala7659 Oct 14, 2024
ca56ed3
reenable saving runtime status for metrics and stop counter
koala7659 Oct 14, 2024
79001b4
adding last message from processing of Runtime CR
koala7659 Oct 14, 2024
b14b5d7
adding last message from processing of Runtime CR to metrics
koala7659 Oct 14, 2024
dff566e
Merge remote-tracking branch 'origin/main' into runtime-ctrl-metrics
koala7659 Oct 16, 2024
8b6a3a7
Fixing compilation errors after merge with main
koala7659 Oct 16, 2024
622d78a
Merge remote-tracking branch 'origin/main' into runtime-ctrl-metrics
koala7659 Oct 16, 2024
bf56348
code review remarks - remove unnecessary argument from updateStatusAn…
koala7659 Oct 16, 2024
46712da
Merge remote-tracking branch 'origin/main' into runtime-ctrl-metrics
koala7659 Oct 16, 2024
b05accc
Merge remote-tracking branch 'origin/main' into runtime-ctrl-metrics
koala7659 Oct 18, 2024
12b9bec
Merge remote-tracking branch 'origin/main' into runtime-ctrl-metrics
koala7659 Oct 18, 2024
1821b2d
apply suggestions from code review
koala7659 Oct 18, 2024
9b76e0c
Merge remote-tracking branch 'origin/main' into runtime-ctrl-metrics
koala7659 Oct 21, 2024
ebbaab9
fix failing unit tests
koala7659 Oct 21, 2024
2b54111
Apply linter fix
koala7659 Oct 21, 2024
fb3aa4d
Merge remote-tracking branch 'origin/main' into runtime-ctrl-metrics
koala7659 Oct 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 25 additions & 24 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ func init() {

const defaultMinimalRotationTimeRatio = 0.6
const defaultExpirationTime = 24 * time.Hour
const defaultRuntimeReconcilerEnabled = true
const defaultGardenerRequestTimeout = 60 * time.Second
const defaultControlPlaneRequeueDuration = 10 * time.Second
const defaultGardenerRequeueDuration = 15 * time.Second

func main() {
var metricsAddr string
Expand All @@ -77,7 +78,6 @@ func main() {
var minimalRotationTimeRatio float64
var expirationTime time.Duration
var gardenerRequestTimeout time.Duration
var enableRuntimeReconciler bool
var converterConfigFilepath string
var shootSpecDumpEnabled bool
var auditLogMandatory bool
Expand All @@ -92,7 +92,6 @@ func main() {
flag.Float64Var(&minimalRotationTimeRatio, "minimal-rotation-time", defaultMinimalRotationTimeRatio, "The ratio determines what is the minimal time that needs to pass to rotate certificate.")
flag.DurationVar(&expirationTime, "kubeconfig-expiration-time", defaultExpirationTime, "Dynamic kubeconfig expiration time")
flag.DurationVar(&gardenerRequestTimeout, "gardener-request-timeout", defaultGardenerRequestTimeout, "Timeout duration for requests to Gardener")
flag.BoolVar(&enableRuntimeReconciler, "runtime-reconciler-enabled", defaultRuntimeReconcilerEnabled, "Feature flag for all runtime reconciler functionalities")
flag.StringVar(&converterConfigFilepath, "converter-config-filepath", "/converter-config/converter_config.json", "A file path to the gardener shoot converter configuration.")
flag.BoolVar(&shootSpecDumpEnabled, "shoot-spec-dump-enabled", false, "Feature flag to allow persisting specs of created shoots")
flag.BoolVar(&auditLogMandatory, "audit-log-mandatory", true, "Feature flag to enable strict mode for audit log configuration")
Expand Down Expand Up @@ -165,46 +164,48 @@ func main() {
getReader := func() (io.Reader, error) {
return os.Open(converterConfigFilepath)
}
var converterConfig config.Config
if err = converterConfig.Load(getReader); err != nil {
var config config.Config
if err = config.Load(getReader); err != nil {
setupLog.Error(err, "unable to load converter configuration")
os.Exit(1)
}

validate := validator.New(validator.WithRequiredStructEnabled())
if err = validate.Struct(converterConfig); err != nil {
if err = validate.Struct(config); err != nil {
setupLog.Error(err, "invalid converter configuration")
os.Exit(1)
}

err = validateAuditLogConfiguration(converterConfig.ConverterConfig.AuditLog.TenantConfigPath)
err = validateAuditLogConfiguration(config.ConverterConfig.AuditLog.TenantConfigPath)
if err != nil {
setupLog.Error(err, "invalid Audit Log configuration")
os.Exit(1)
}

cfg := fsm.RCCfg{
Finalizer: infrastructuremanagerv1.Finalizer,
ShootNamesapace: gardenerNamespace,
Config: converterConfig,
AuditLogMandatory: auditLogMandatory,
GardenerRequeueDuration: defaultGardenerRequeueDuration,
ControlPlaneRequeueDuration: defaultControlPlaneRequeueDuration,
Finalizer: infrastructuremanagerv1.Finalizer,
ShootNamesapace: gardenerNamespace,
Config: config,
AuditLogMandatory: auditLogMandatory,
Metrics: metrics,
AuditLogging: auditlogging.NewAuditLogging(config.ConverterConfig.AuditLog.TenantConfigPath, config.ConverterConfig.AuditLog.PolicyConfigMapName, gardenerClient),
}
if shootSpecDumpEnabled {
cfg.PVCPath = "/testdata/kim"
}

if enableRuntimeReconciler {
runtimeReconciler := runtime_controller.NewRuntimeReconciler(
mgr,
gardenerClient,
logger,
cfg,
)

if err = runtimeReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to setup controller with Manager", "controller", "Runtime")
os.Exit(1)
}
runtimeReconciler := runtime_controller.NewRuntimeReconciler(
mgr,
gardenerClient,
logger,
cfg,
)

if err = runtimeReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to setup controller with Manager", "controller", "Runtime")
os.Exit(1)
}

//+kubebuilder:scaffold:builder
Expand All @@ -218,7 +219,7 @@ func main() {
os.Exit(1)
}

setupLog.Info("Starting Manager", "kubeconfigExpirationTime", expirationTime, "kubeconfigRotationPeriod", rotationPeriod, "enableRuntimeReconciler", enableRuntimeReconciler)
setupLog.Info("Starting Manager", "kubeconfigExpirationTime", expirationTime, "kubeconfigRotationPeriod", rotationPeriod)

if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
Expand Down
1 change: 1 addition & 0 deletions config/manager/converter_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ data:
"projectName": "kyma-dev"
},
"machineImage": {
"defaultName": "gardenlinux",
"defaultVersion": "1312.3.0"
}
}
Expand Down
10 changes: 10 additions & 0 deletions internal/auditlogging/auditlogging.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ var ErrMissingMapping = errors.New("missing mapping for selected region in provi
//go:generate mockery --name=AuditLogging
type AuditLogging interface {
Enable(ctx context.Context, shoot *gardener.Shoot) (bool, error)
UpdateShootClient(client client.Client)
}

//go:generate mockery --name=AuditLogConfigurator
Expand All @@ -34,6 +35,7 @@ type AuditLogConfigurator interface {
GetSeedObj(ctx context.Context, seedKey types.NamespacedName) (gardener.Seed, error)
UpdateShoot(ctx context.Context, shoot *gardener.Shoot) error
GetConfigFromFile() (data map[string]map[string]AuditLogData, err error)
UpdateClient(client client.Client)
}

type AuditLog struct {
Expand Down Expand Up @@ -95,6 +97,14 @@ func (a *auditLogConfig) GetSeedObj(ctx context.Context, seedKey types.Namespace
return seed, nil
}

func (a *auditLogConfig) UpdateClient(client client.Client) {
a.client = client
}

func (al *AuditLog) UpdateShootClient(client client.Client) {
al.UpdateClient(client)
}

func (al *AuditLog) Enable(ctx context.Context, shoot *gardener.Shoot) (bool, error) {
seedName := getSeedName(*shoot)

Expand Down
35 changes: 12 additions & 23 deletions internal/auditlogging/mocks/AuditLogConfigurator.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 10 additions & 6 deletions internal/auditlogging/mocks/AuditLogging.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

68 changes: 61 additions & 7 deletions internal/controller/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,37 @@ const (
RuntimeIDLabel = "kyma-project.io/runtime-id"
ShootNameLabel = "kyma-project.io/shoot-name"
GardenerClusterStateMetricName = "im_gardener_clusters_state"
RuntimeStateMetricName = "im_runtime_state"
RuntimeFSMStopMetricName = "unexpected_stops_total"
provider = "provider"
state = "state"
reason = "reason"
message = "message"
KubeconfigExpirationMetricName = "im_kubeconfig_expiration"
expires = "expires"
lastSyncAnnotation = "operator.kyma-project.io/last-sync"
)

type Metrics struct {
//go:generate mockery --name=Metrics
type Metrics interface {
SetRuntimeStates(runtime v1.Runtime)
CleanUpRuntimeGauge(runtimeID string)
IncRuntimeFSMStopCounter()
SetGardenerClusterStates(cluster v1.GardenerCluster)
CleanUpGardenerClusterGauge(runtimeID string)
CleanUpKubeconfigExpiration(runtimeID string)
SetKubeconfigExpiration(secret corev1.Secret, rotationPeriod time.Duration, minimalRotationTimeRatio float64)
}

type metricsImpl struct {
gardenerClustersStateGaugeVec *prometheus.GaugeVec
kubeconfigExpirationGauge *prometheus.GaugeVec
runtimeStateGauge *prometheus.GaugeVec
runtimeFSMUnexpectedStopsCnt prometheus.Counter
}

func NewMetrics() Metrics {
m := Metrics{
m := &metricsImpl{
gardenerClustersStateGaugeVec: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: componentName,
Expand All @@ -45,12 +62,49 @@ func NewMetrics() Metrics {
Name: KubeconfigExpirationMetricName,
Help: "Exposes current kubeconfig expiration value in epoch timestamp value format",
}, []string{runtimeIDKeyName, shootNameIDKeyName, expires, rotationDuration, expirationDuration}),
runtimeStateGauge: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: componentName,
Name: RuntimeStateMetricName,
Help: "Exposes current Status.state for Runtime CRs",
}, []string{runtimeIDKeyName, shootNameIDKeyName, provider, state, message}),
runtimeFSMUnexpectedStopsCnt: prometheus.NewCounter(
prometheus.CounterOpts{
Name: RuntimeFSMStopMetricName,
Help: "Exposes the number of unexpected state machine stop events",
}),
}
ctrlMetrics.Registry.MustRegister(m.gardenerClustersStateGaugeVec, m.kubeconfigExpirationGauge)
ctrlMetrics.Registry.MustRegister(m.gardenerClustersStateGaugeVec, m.kubeconfigExpirationGauge, m.runtimeStateGauge, m.runtimeFSMUnexpectedStopsCnt)
return m
}

func (m Metrics) SetGardenerClusterStates(cluster v1.GardenerCluster) {
func (m metricsImpl) SetRuntimeStates(runtime v1.Runtime) {
runtimeID := runtime.GetLabels()[RuntimeIDLabel]

if runtimeID != "" {
size := len(runtime.Status.Conditions)

var reason = "No value"
if size > 0 {
reason = runtime.Status.Conditions[size-1].Message
}

m.CleanUpRuntimeGauge(runtimeID)
m.runtimeStateGauge.WithLabelValues(runtimeID, runtime.Spec.Shoot.Name, runtime.Spec.Shoot.Provider.Type, string(runtime.Status.State), reason).Set(1)
}
}

func (m metricsImpl) CleanUpRuntimeGauge(runtimeID string) {
m.runtimeStateGauge.DeletePartialMatch(prometheus.Labels{
runtimeIDKeyName: runtimeID,
})
}

func (m metricsImpl) IncRuntimeFSMStopCounter() {
m.runtimeFSMUnexpectedStopsCnt.Inc()
}

func (m metricsImpl) SetGardenerClusterStates(cluster v1.GardenerCluster) {
var runtimeID = cluster.GetLabels()[RuntimeIDLabel]
var shootName = cluster.GetLabels()[ShootNameLabel]

Expand All @@ -65,13 +119,13 @@ func (m Metrics) SetGardenerClusterStates(cluster v1.GardenerCluster) {
}
}

func (m Metrics) CleanUpGardenerClusterGauge(runtimeID string) {
func (m metricsImpl) CleanUpGardenerClusterGauge(runtimeID string) {
m.gardenerClustersStateGaugeVec.DeletePartialMatch(prometheus.Labels{
runtimeIDKeyName: runtimeID,
})
}

func (m Metrics) CleanUpKubeconfigExpiration(runtimeID string) {
func (m metricsImpl) CleanUpKubeconfigExpiration(runtimeID string) {
m.kubeconfigExpirationGauge.DeletePartialMatch(prometheus.Labels{
runtimeIDKeyName: runtimeID,
})
Expand All @@ -81,7 +135,7 @@ func computeExpirationInSeconds(rotationPeriod time.Duration, minimalRotationTim
return rotationPeriod.Seconds() / minimalRotationTimeRatio
}

func (m Metrics) SetKubeconfigExpiration(secret corev1.Secret, rotationPeriod time.Duration, minimalRotationTimeRatio float64) {
func (m metricsImpl) SetKubeconfigExpiration(secret corev1.Secret, rotationPeriod time.Duration, minimalRotationTimeRatio float64) {
var runtimeID = secret.GetLabels()[RuntimeIDLabel]
var shootName = secret.GetLabels()[ShootNameLabel]

Expand Down
Loading
Loading