diff --git a/cmd/main.go b/cmd/main.go index eb310b71..dc90ba33 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -65,8 +65,9 @@ func init() { const defaultMinimalRotationTimeRatio = 0.6 const defaultExpirationTime = 24 * time.Hour -const defaultRuntimeReconcilerEnabled = true const defaultGardenerRequestTimeout = 60 * time.Second +const defaultControlPlaneRequeueDuration = 10 * time.Second +const defaultGardenerRequeueDuration = 15 * time.Second func main() { var metricsAddr string @@ -77,7 +78,6 @@ func main() { var minimalRotationTimeRatio float64 var expirationTime time.Duration var gardenerRequestTimeout time.Duration - var enableRuntimeReconciler bool var converterConfigFilepath string var shootSpecDumpEnabled bool var auditLogMandatory bool @@ -92,7 +92,6 @@ func main() { flag.Float64Var(&minimalRotationTimeRatio, "minimal-rotation-time", defaultMinimalRotationTimeRatio, "The ratio determines what is the minimal time that needs to pass to rotate certificate.") flag.DurationVar(&expirationTime, "kubeconfig-expiration-time", defaultExpirationTime, "Dynamic kubeconfig expiration time") flag.DurationVar(&gardenerRequestTimeout, "gardener-request-timeout", defaultGardenerRequestTimeout, "Timeout duration for requests to Gardener") - flag.BoolVar(&enableRuntimeReconciler, "runtime-reconciler-enabled", defaultRuntimeReconcilerEnabled, "Feature flag for all runtime reconciler functionalities") flag.StringVar(&converterConfigFilepath, "converter-config-filepath", "/converter-config/converter_config.json", "A file path to the gardener shoot converter configuration.") flag.BoolVar(&shootSpecDumpEnabled, "shoot-spec-dump-enabled", false, "Feature flag to allow persisting specs of created shoots") flag.BoolVar(&auditLogMandatory, "audit-log-mandatory", true, "Feature flag to enable strict mode for audit log configuration") @@ -165,46 +164,48 @@ func main() { getReader := func() (io.Reader, error) { return os.Open(converterConfigFilepath) } - var converterConfig config.Config - if err = converterConfig.Load(getReader); err != nil { + var config config.Config + if err = config.Load(getReader); err != nil { setupLog.Error(err, "unable to load converter configuration") os.Exit(1) } validate := validator.New(validator.WithRequiredStructEnabled()) - if err = validate.Struct(converterConfig); err != nil { + if err = validate.Struct(config); err != nil { setupLog.Error(err, "invalid converter configuration") os.Exit(1) } - err = validateAuditLogConfiguration(converterConfig.ConverterConfig.AuditLog.TenantConfigPath) + err = validateAuditLogConfiguration(config.ConverterConfig.AuditLog.TenantConfigPath) if err != nil { setupLog.Error(err, "invalid Audit Log configuration") os.Exit(1) } cfg := fsm.RCCfg{ - Finalizer: infrastructuremanagerv1.Finalizer, - ShootNamesapace: gardenerNamespace, - Config: converterConfig, - AuditLogMandatory: auditLogMandatory, + GardenerRequeueDuration: defaultGardenerRequeueDuration, + ControlPlaneRequeueDuration: defaultControlPlaneRequeueDuration, + Finalizer: infrastructuremanagerv1.Finalizer, + ShootNamesapace: gardenerNamespace, + Config: config, + AuditLogMandatory: auditLogMandatory, + Metrics: metrics, + AuditLogging: auditlogging.NewAuditLogging(config.ConverterConfig.AuditLog.TenantConfigPath, config.ConverterConfig.AuditLog.PolicyConfigMapName, gardenerClient), } if shootSpecDumpEnabled { cfg.PVCPath = "/testdata/kim" } - if enableRuntimeReconciler { - runtimeReconciler := runtime_controller.NewRuntimeReconciler( - mgr, - gardenerClient, - logger, - cfg, - ) - - if err = runtimeReconciler.SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to setup controller with Manager", "controller", "Runtime") - os.Exit(1) - } + runtimeReconciler := runtime_controller.NewRuntimeReconciler( + mgr, + gardenerClient, + logger, + cfg, + ) + + if err = runtimeReconciler.SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to setup controller with Manager", "controller", "Runtime") + os.Exit(1) } //+kubebuilder:scaffold:builder @@ -218,7 +219,7 @@ func main() { os.Exit(1) } - setupLog.Info("Starting Manager", "kubeconfigExpirationTime", expirationTime, "kubeconfigRotationPeriod", rotationPeriod, "enableRuntimeReconciler", enableRuntimeReconciler) + setupLog.Info("Starting Manager", "kubeconfigExpirationTime", expirationTime, "kubeconfigRotationPeriod", rotationPeriod) if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager") diff --git a/config/manager/converter_config.yaml b/config/manager/converter_config.yaml index 3a844663..a39576f4 100644 --- a/config/manager/converter_config.yaml +++ b/config/manager/converter_config.yaml @@ -22,6 +22,7 @@ data: "projectName": "kyma-dev" }, "machineImage": { + "defaultName": "gardenlinux", "defaultVersion": "1312.3.0" } } diff --git a/internal/auditlogging/auditlogging.go b/internal/auditlogging/auditlogging.go index feac599d..f8e65326 100644 --- a/internal/auditlogging/auditlogging.go +++ b/internal/auditlogging/auditlogging.go @@ -25,6 +25,7 @@ var ErrMissingMapping = errors.New("missing mapping for selected region in provi //go:generate mockery --name=AuditLogging type AuditLogging interface { Enable(ctx context.Context, shoot *gardener.Shoot) (bool, error) + UpdateShootClient(client client.Client) } //go:generate mockery --name=AuditLogConfigurator @@ -34,6 +35,7 @@ type AuditLogConfigurator interface { GetSeedObj(ctx context.Context, seedKey types.NamespacedName) (gardener.Seed, error) UpdateShoot(ctx context.Context, shoot *gardener.Shoot) error GetConfigFromFile() (data map[string]map[string]AuditLogData, err error) + UpdateClient(client client.Client) } type AuditLog struct { @@ -95,6 +97,14 @@ func (a *auditLogConfig) GetSeedObj(ctx context.Context, seedKey types.Namespace return seed, nil } +func (a *auditLogConfig) UpdateClient(client client.Client) { + a.client = client +} + +func (al *AuditLog) UpdateShootClient(client client.Client) { + al.UpdateClient(client) +} + func (al *AuditLog) Enable(ctx context.Context, shoot *gardener.Shoot) (bool, error) { seedName := getSeedName(*shoot) diff --git a/internal/auditlogging/mocks/AuditLogConfigurator.go b/internal/auditlogging/mocks/AuditLogConfigurator.go index 3977f019..ca9e0fbd 100644 --- a/internal/auditlogging/mocks/AuditLogConfigurator.go +++ b/internal/auditlogging/mocks/AuditLogConfigurator.go @@ -1,14 +1,18 @@ -// Code generated by mockery v2.44.2. DO NOT EDIT. +// Code generated by mockery v2.36.1. DO NOT EDIT. package mocks import ( + auditlogging "github.com/kyma-project/infrastructure-manager/internal/auditlogging" + client "sigs.k8s.io/controller-runtime/pkg/client" + context "context" - v1beta1 "github.com/gardener/gardener/pkg/apis/core/v1beta1" - auditlogging "github.com/kyma-project/infrastructure-manager/internal/auditlogging" mock "github.com/stretchr/testify/mock" + types "k8s.io/apimachinery/pkg/types" + + v1beta1 "github.com/gardener/gardener/pkg/apis/core/v1beta1" ) // AuditLogConfigurator is an autogenerated mock type for the AuditLogConfigurator type @@ -20,10 +24,6 @@ type AuditLogConfigurator struct { func (_m *AuditLogConfigurator) CanEnableAuditLogsForShoot(seedName string) bool { ret := _m.Called(seedName) - if len(ret) == 0 { - panic("no return value specified for CanEnableAuditLogsForShoot") - } - var r0 bool if rf, ok := ret.Get(0).(func(string) bool); ok { r0 = rf(seedName) @@ -38,10 +38,6 @@ func (_m *AuditLogConfigurator) CanEnableAuditLogsForShoot(seedName string) bool func (_m *AuditLogConfigurator) GetConfigFromFile() (map[string]map[string]auditlogging.AuditLogData, error) { ret := _m.Called() - if len(ret) == 0 { - panic("no return value specified for GetConfigFromFile") - } - var r0 map[string]map[string]auditlogging.AuditLogData var r1 error if rf, ok := ret.Get(0).(func() (map[string]map[string]auditlogging.AuditLogData, error)); ok { @@ -68,10 +64,6 @@ func (_m *AuditLogConfigurator) GetConfigFromFile() (map[string]map[string]audit func (_m *AuditLogConfigurator) GetPolicyConfigMapName() string { ret := _m.Called() - if len(ret) == 0 { - panic("no return value specified for GetPolicyConfigMapName") - } - var r0 string if rf, ok := ret.Get(0).(func() string); ok { r0 = rf() @@ -86,10 +78,6 @@ func (_m *AuditLogConfigurator) GetPolicyConfigMapName() string { func (_m *AuditLogConfigurator) GetSeedObj(ctx context.Context, seedKey types.NamespacedName) (v1beta1.Seed, error) { ret := _m.Called(ctx, seedKey) - if len(ret) == 0 { - panic("no return value specified for GetSeedObj") - } - var r0 v1beta1.Seed var r1 error if rf, ok := ret.Get(0).(func(context.Context, types.NamespacedName) (v1beta1.Seed, error)); ok { @@ -110,14 +98,15 @@ func (_m *AuditLogConfigurator) GetSeedObj(ctx context.Context, seedKey types.Na return r0, r1 } +// UpdateClient provides a mock function with given fields: _a0 +func (_m *AuditLogConfigurator) UpdateClient(_a0 client.Client) { + _m.Called(_a0) +} + // UpdateShoot provides a mock function with given fields: ctx, shoot func (_m *AuditLogConfigurator) UpdateShoot(ctx context.Context, shoot *v1beta1.Shoot) error { ret := _m.Called(ctx, shoot) - if len(ret) == 0 { - panic("no return value specified for UpdateShoot") - } - var r0 error if rf, ok := ret.Get(0).(func(context.Context, *v1beta1.Shoot) error); ok { r0 = rf(ctx, shoot) diff --git a/internal/auditlogging/mocks/AuditLogging.go b/internal/auditlogging/mocks/AuditLogging.go index f670fd14..ac51f1f0 100644 --- a/internal/auditlogging/mocks/AuditLogging.go +++ b/internal/auditlogging/mocks/AuditLogging.go @@ -1,12 +1,15 @@ -// Code generated by mockery v2.44.2. DO NOT EDIT. +// Code generated by mockery v2.36.1. DO NOT EDIT. package mocks import ( context "context" - v1beta1 "github.com/gardener/gardener/pkg/apis/core/v1beta1" + client "sigs.k8s.io/controller-runtime/pkg/client" + mock "github.com/stretchr/testify/mock" + + v1beta1 "github.com/gardener/gardener/pkg/apis/core/v1beta1" ) // AuditLogging is an autogenerated mock type for the AuditLogging type @@ -18,10 +21,6 @@ type AuditLogging struct { func (_m *AuditLogging) Enable(ctx context.Context, shoot *v1beta1.Shoot) (bool, error) { ret := _m.Called(ctx, shoot) - if len(ret) == 0 { - panic("no return value specified for Enable") - } - var r0 bool var r1 error if rf, ok := ret.Get(0).(func(context.Context, *v1beta1.Shoot) (bool, error)); ok { @@ -42,6 +41,11 @@ func (_m *AuditLogging) Enable(ctx context.Context, shoot *v1beta1.Shoot) (bool, return r0, r1 } +// UpdateShootClient provides a mock function with given fields: _a0 +func (_m *AuditLogging) UpdateShootClient(_a0 client.Client) { + _m.Called(_a0) +} + // NewAuditLogging creates a new instance of AuditLogging. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewAuditLogging(t interface { diff --git a/internal/controller/metrics/metrics.go b/internal/controller/metrics/metrics.go index 512325cc..e73cdfb7 100644 --- a/internal/controller/metrics/metrics.go +++ b/internal/controller/metrics/metrics.go @@ -19,20 +19,37 @@ const ( RuntimeIDLabel = "kyma-project.io/runtime-id" ShootNameLabel = "kyma-project.io/shoot-name" GardenerClusterStateMetricName = "im_gardener_clusters_state" + RuntimeStateMetricName = "im_runtime_state" + RuntimeFSMStopMetricName = "unexpected_stops_total" + provider = "provider" state = "state" reason = "reason" + message = "message" KubeconfigExpirationMetricName = "im_kubeconfig_expiration" expires = "expires" lastSyncAnnotation = "operator.kyma-project.io/last-sync" ) -type Metrics struct { +//go:generate mockery --name=Metrics +type Metrics interface { + SetRuntimeStates(runtime v1.Runtime) + CleanUpRuntimeGauge(runtimeID string) + IncRuntimeFSMStopCounter() + SetGardenerClusterStates(cluster v1.GardenerCluster) + CleanUpGardenerClusterGauge(runtimeID string) + CleanUpKubeconfigExpiration(runtimeID string) + SetKubeconfigExpiration(secret corev1.Secret, rotationPeriod time.Duration, minimalRotationTimeRatio float64) +} + +type metricsImpl struct { gardenerClustersStateGaugeVec *prometheus.GaugeVec kubeconfigExpirationGauge *prometheus.GaugeVec + runtimeStateGauge *prometheus.GaugeVec + runtimeFSMUnexpectedStopsCnt prometheus.Counter } func NewMetrics() Metrics { - m := Metrics{ + m := &metricsImpl{ gardenerClustersStateGaugeVec: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: componentName, @@ -45,12 +62,49 @@ func NewMetrics() Metrics { Name: KubeconfigExpirationMetricName, Help: "Exposes current kubeconfig expiration value in epoch timestamp value format", }, []string{runtimeIDKeyName, shootNameIDKeyName, expires, rotationDuration, expirationDuration}), + runtimeStateGauge: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Subsystem: componentName, + Name: RuntimeStateMetricName, + Help: "Exposes current Status.state for Runtime CRs", + }, []string{runtimeIDKeyName, shootNameIDKeyName, provider, state, message}), + runtimeFSMUnexpectedStopsCnt: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: RuntimeFSMStopMetricName, + Help: "Exposes the number of unexpected state machine stop events", + }), } - ctrlMetrics.Registry.MustRegister(m.gardenerClustersStateGaugeVec, m.kubeconfigExpirationGauge) + ctrlMetrics.Registry.MustRegister(m.gardenerClustersStateGaugeVec, m.kubeconfigExpirationGauge, m.runtimeStateGauge, m.runtimeFSMUnexpectedStopsCnt) return m } -func (m Metrics) SetGardenerClusterStates(cluster v1.GardenerCluster) { +func (m metricsImpl) SetRuntimeStates(runtime v1.Runtime) { + runtimeID := runtime.GetLabels()[RuntimeIDLabel] + + if runtimeID != "" { + size := len(runtime.Status.Conditions) + + var reason = "No value" + if size > 0 { + reason = runtime.Status.Conditions[size-1].Message + } + + m.CleanUpRuntimeGauge(runtimeID) + m.runtimeStateGauge.WithLabelValues(runtimeID, runtime.Spec.Shoot.Name, runtime.Spec.Shoot.Provider.Type, string(runtime.Status.State), reason).Set(1) + } +} + +func (m metricsImpl) CleanUpRuntimeGauge(runtimeID string) { + m.runtimeStateGauge.DeletePartialMatch(prometheus.Labels{ + runtimeIDKeyName: runtimeID, + }) +} + +func (m metricsImpl) IncRuntimeFSMStopCounter() { + m.runtimeFSMUnexpectedStopsCnt.Inc() +} + +func (m metricsImpl) SetGardenerClusterStates(cluster v1.GardenerCluster) { var runtimeID = cluster.GetLabels()[RuntimeIDLabel] var shootName = cluster.GetLabels()[ShootNameLabel] @@ -65,13 +119,13 @@ func (m Metrics) SetGardenerClusterStates(cluster v1.GardenerCluster) { } } -func (m Metrics) CleanUpGardenerClusterGauge(runtimeID string) { +func (m metricsImpl) CleanUpGardenerClusterGauge(runtimeID string) { m.gardenerClustersStateGaugeVec.DeletePartialMatch(prometheus.Labels{ runtimeIDKeyName: runtimeID, }) } -func (m Metrics) CleanUpKubeconfigExpiration(runtimeID string) { +func (m metricsImpl) CleanUpKubeconfigExpiration(runtimeID string) { m.kubeconfigExpirationGauge.DeletePartialMatch(prometheus.Labels{ runtimeIDKeyName: runtimeID, }) @@ -81,7 +135,7 @@ func computeExpirationInSeconds(rotationPeriod time.Duration, minimalRotationTim return rotationPeriod.Seconds() / minimalRotationTimeRatio } -func (m Metrics) SetKubeconfigExpiration(secret corev1.Secret, rotationPeriod time.Duration, minimalRotationTimeRatio float64) { +func (m metricsImpl) SetKubeconfigExpiration(secret corev1.Secret, rotationPeriod time.Duration, minimalRotationTimeRatio float64) { var runtimeID = secret.GetLabels()[RuntimeIDLabel] var shootName = secret.GetLabels()[ShootNameLabel] diff --git a/internal/controller/metrics/mocks/Metrics.go b/internal/controller/metrics/mocks/Metrics.go new file mode 100644 index 00000000..ca21c996 --- /dev/null +++ b/internal/controller/metrics/mocks/Metrics.go @@ -0,0 +1,67 @@ +// Code generated by mockery v2.36.1. DO NOT EDIT. + +package mocks + +import ( + corev1 "k8s.io/api/core/v1" + + mock "github.com/stretchr/testify/mock" + + time "time" + + v1 "github.com/kyma-project/infrastructure-manager/api/v1" +) + +// Metrics is an autogenerated mock type for the Metrics type +type Metrics struct { + mock.Mock +} + +// CleanUpGardenerClusterGauge provides a mock function with given fields: runtimeID +func (_m *Metrics) CleanUpGardenerClusterGauge(runtimeID string) { + _m.Called(runtimeID) +} + +// CleanUpKubeconfigExpiration provides a mock function with given fields: runtimeID +func (_m *Metrics) CleanUpKubeconfigExpiration(runtimeID string) { + _m.Called(runtimeID) +} + +// CleanUpRuntimeGauge provides a mock function with given fields: runtimeID +func (_m *Metrics) CleanUpRuntimeGauge(runtimeID string) { + _m.Called(runtimeID) +} + +// IncRuntimeFSMStopCounter provides a mock function with given fields: +func (_m *Metrics) IncRuntimeFSMStopCounter() { + _m.Called() +} + +// SetGardenerClusterStates provides a mock function with given fields: cluster +func (_m *Metrics) SetGardenerClusterStates(cluster v1.GardenerCluster) { + _m.Called(cluster) +} + +// SetKubeconfigExpiration provides a mock function with given fields: secret, rotationPeriod, minimalRotationTimeRatio +func (_m *Metrics) SetKubeconfigExpiration(secret corev1.Secret, rotationPeriod time.Duration, minimalRotationTimeRatio float64) { + _m.Called(secret, rotationPeriod, minimalRotationTimeRatio) +} + +// SetRuntimeStates provides a mock function with given fields: runtime +func (_m *Metrics) SetRuntimeStates(runtime v1.Runtime) { + _m.Called(runtime) +} + +// NewMetrics creates a new instance of Metrics. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewMetrics(t interface { + mock.TestingT + Cleanup(func()) +}) *Metrics { + mock := &Metrics{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/internal/controller/runtime/fsm/runtime_fsm.go b/internal/controller/runtime/fsm/runtime_fsm.go index 0256d8d8..28f4f5ae 100644 --- a/internal/controller/runtime/fsm/runtime_fsm.go +++ b/internal/controller/runtime/fsm/runtime_fsm.go @@ -12,6 +12,7 @@ import ( imv1 "github.com/kyma-project/infrastructure-manager/api/v1" "github.com/kyma-project/infrastructure-manager/internal/auditlogging" "github.com/kyma-project/infrastructure-manager/internal/config" + "github.com/kyma-project/infrastructure-manager/internal/controller/metrics" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -20,20 +21,19 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" ) -const ( - gardenerRequeueDuration = 15 * time.Second - controlPlaneRequeueDuration = 10 * time.Second -) - type stateFn func(context.Context, *fsm, *systemState) (stateFn, *ctrl.Result, error) type writerGetter = func(filePath string) (io.Writer, error) // runtime reconciler specific configuration type RCCfg struct { - Finalizer string - PVCPath string - ShootNamesapace string - AuditLogMandatory bool + GardenerRequeueDuration time.Duration + ControlPlaneRequeueDuration time.Duration + Finalizer string + PVCPath string + ShootNamesapace string + AuditLogMandatory bool + Metrics metrics.Metrics + AuditLogging auditlogging.AuditLogging config.Config } @@ -64,7 +64,6 @@ type fsm struct { log logr.Logger K8s RCCfg - auditlogging.AuditLogging } func (m *fsm) Run(ctx context.Context, v imv1.Runtime) (ctrl.Result, error) { @@ -108,6 +107,5 @@ func NewFsm(log logr.Logger, cfg RCCfg, k8s K8s) Fsm { RCCfg: cfg, log: log, K8s: k8s, - AuditLogging: auditlogging.NewAuditLogging(cfg.Config.ConverterConfig.AuditLog.TenantConfigPath, cfg.Config.ConverterConfig.AuditLog.PolicyConfigMapName, k8s.ShootClient), } } diff --git a/internal/controller/runtime/fsm/runtime_fsm_apply_crb_test.go b/internal/controller/runtime/fsm/runtime_fsm_apply_crb_test.go index d20378ad..1d060e64 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_apply_crb_test.go +++ b/internal/controller/runtime/fsm/runtime_fsm_apply_crb_test.go @@ -7,8 +7,10 @@ import ( gardener_api "github.com/gardener/gardener/pkg/apis/core/v1beta1" imv1 "github.com/kyma-project/infrastructure-manager/api/v1" + "github.com/kyma-project/infrastructure-manager/internal/controller/metrics/mocks" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/stretchr/testify/mock" rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -20,6 +22,14 @@ var _ = Describe(`runtime_fsm_apply_crb`, Label("applyCRB"), func() { var testErr = fmt.Errorf("test error") + withMockedMetrics := func() fakeFSMOpt { + m := &mocks.Metrics{} + m.On("SetRuntimeStates", mock.Anything).Return() + m.On("CleanUpRuntimeGauge", mock.Anything).Return() + m.On("IncRuntimeFSMStopCounter").Return() + return withMetrics(m) + } + DescribeTable("getMissing", func(tc tcGetCRB) { actual := getMissing(tc.crbs, tc.admins) @@ -110,7 +120,7 @@ var _ = Describe(`runtime_fsm_apply_crb`, Label("applyCRB"), func() { return nil } - DescribeTable("sFnAppluClusterRoleBindings", + DescribeTable("sFnApplyClusterRoleBindings", func(tc tcApplySfn) { // initialize test data if required Expect(tc.init()).ShouldNot(HaveOccurred()) @@ -140,6 +150,8 @@ var _ = Describe(`runtime_fsm_apply_crb`, Label("applyCRB"), func() { withFakedK8sClient(testScheme, &testRuntimeWithAdmin), withFn(sFnApplyClusterRoleBindingsStateSetup), withFakeEventRecorder(1), + withMockedMetrics(), + withDefaultReconcileDuration(), ), setup: defaultSetup, }), @@ -156,6 +168,8 @@ var _ = Describe(`runtime_fsm_apply_crb`, Label("applyCRB"), func() { withFakedK8sClient(testScheme, &testRuntime), withFn(sFnApplyClusterRoleBindingsStateSetup), withFakeEventRecorder(1), + withMockedMetrics(), + withDefaultReconcileDuration(), ), setup: defaultSetup, }), @@ -171,6 +185,8 @@ var _ = Describe(`runtime_fsm_apply_crb`, Label("applyCRB"), func() { withFakedK8sClient(testScheme, &testRuntime), withFn(sFnApplyClusterRoleBindingsStateSetup), withFakeEventRecorder(1), + withMockedMetrics(), + withDefaultReconcileDuration(), ), setup: func(f *fsm) error { GetShootClient = func( diff --git a/internal/controller/runtime/fsm/runtime_fsm_common.go b/internal/controller/runtime/fsm/runtime_fsm_common.go index 5ba47706..33e7b781 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_common.go +++ b/internal/controller/runtime/fsm/runtime_fsm_common.go @@ -1,6 +1,7 @@ package fsm import ( + "context" "time" ctrl "sigs.k8s.io/controller-runtime" @@ -39,3 +40,10 @@ func stop() (stateFn, *ctrl.Result, error) { func switchState(fn stateFn) (stateFn, *ctrl.Result, error) { return fn, nil, nil } + +func stopWithMetrics() (stateFn, *ctrl.Result, error) { + return func(_ context.Context, m *fsm, _ *systemState) (stateFn, *ctrl.Result, error) { + m.Metrics.IncRuntimeFSMStopCounter() + return stop() + }, nil, nil +} diff --git a/internal/controller/runtime/fsm/runtime_fsm_configure_auditlog.go b/internal/controller/runtime/fsm/runtime_fsm_configure_auditlog.go index 81ccd276..28024317 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_configure_auditlog.go +++ b/internal/controller/runtime/fsm/runtime_fsm_configure_auditlog.go @@ -25,7 +25,7 @@ func sFnConfigureAuditLog(ctx context.Context, m *fsm, s *systemState) (stateFn, "Waiting for Gardener shoot to be Ready state after configuration of the Audit Logs", ) - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) } if err == nil { diff --git a/internal/controller/runtime/fsm/runtime_fsm_configure_auditlog_test.go b/internal/controller/runtime/fsm/runtime_fsm_configure_auditlog_test.go index cf48b935..bc59aa98 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_configure_auditlog_test.go +++ b/internal/controller/runtime/fsm/runtime_fsm_configure_auditlog_test.go @@ -35,8 +35,9 @@ func TestAuditLogState(t *testing.T) { }, } - fsm := &fsm{AuditLogging: auditLog} + fsm := &fsm{} fsm.RCCfg.AuditLogMandatory = true + fsm.RCCfg.AuditLogging = auditLog auditLog.On("Enable", ctx, shoot).Return(true, nil).Once() @@ -72,8 +73,9 @@ func TestAuditLogState(t *testing.T) { }, } - fsm := &fsm{AuditLogging: auditLog} + fsm := &fsm{} fsm.RCCfg.AuditLogMandatory = true + fsm.RCCfg.AuditLogging = auditLog auditLog.On("Enable", ctx, shoot).Return(false, nil).Once() @@ -109,8 +111,9 @@ func TestAuditLogState(t *testing.T) { }, } - fsm := &fsm{AuditLogging: auditLog} + fsm := &fsm{} fsm.RCCfg.AuditLogMandatory = true + fsm.RCCfg.AuditLogging = auditLog auditLog.On("Enable", ctx, shoot).Return(false, auditlogging.ErrMissingMapping).Once() @@ -146,8 +149,9 @@ func TestAuditLogState(t *testing.T) { }, } - fsm := &fsm{AuditLogging: auditLog} + fsm := &fsm{} fsm.RCCfg.AuditLogMandatory = false + fsm.RCCfg.AuditLogging = auditLog auditLog.On("Enable", ctx, shoot).Return(false, auditlogging.ErrMissingMapping).Once() @@ -183,8 +187,9 @@ func TestAuditLogState(t *testing.T) { }, } - fsm := &fsm{AuditLogging: auditLog} + fsm := &fsm{} fsm.RCCfg.AuditLogMandatory = true + fsm.RCCfg.AuditLogging = auditLog auditLog.On("Enable", ctx, shoot).Return(false, errors.New("some error during configuration")).Once() @@ -220,8 +225,9 @@ func TestAuditLogState(t *testing.T) { }, } - fsm := &fsm{AuditLogging: auditLog} + fsm := &fsm{} fsm.RCCfg.AuditLogMandatory = false + fsm.RCCfg.AuditLogging = auditLog auditLog.On("Enable", ctx, shoot).Return(false, errors.New("some error during configuration")).Once() @@ -257,8 +263,9 @@ func TestAuditLogState(t *testing.T) { }, } - fsm := &fsm{AuditLogging: auditLog} + fsm := &fsm{} fsm.RCCfg.AuditLogMandatory = true + fsm.RCCfg.AuditLogging = auditLog auditLog.On("Enable", ctx, shoot).Return(false, k8serrors.NewConflict(gardener.Resource("shoots"), shoot.Name, errors.New("k8s conflict on update error"))).Once() diff --git a/internal/controller/runtime/fsm/runtime_fsm_create_kubeconfig.go b/internal/controller/runtime/fsm/runtime_fsm_create_kubeconfig.go index 8d83944e..72155b6d 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_create_kubeconfig.go +++ b/internal/controller/runtime/fsm/runtime_fsm_create_kubeconfig.go @@ -33,6 +33,7 @@ func sFnCreateKubeconfig(ctx context.Context, m *fsm, s *systemState) (stateFn, "False", err.Error(), ) + m.Metrics.IncRuntimeFSMStopCounter() return updateStatusAndStop() } @@ -46,18 +47,19 @@ func sFnCreateKubeconfig(ctx context.Context, m *fsm, s *systemState) (stateFn, "False", err.Error(), ) + m.Metrics.IncRuntimeFSMStopCounter() return updateStatusAndStop() } m.log.Info("Gardener Cluster CR created, waiting for readiness", "Name", runtimeID) s.instance.UpdateStatePending(imv1.ConditionTypeRuntimeKubeconfigReady, imv1.ConditionReasonGardenerCRCreated, "Unknown", "Gardener Cluster CR created, waiting for readiness") - return updateStatusAndRequeueAfter(controlPlaneRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.ControlPlaneRequeueDuration) } // wait section if cluster.Status.State != imv1.ReadyState { m.log.Info("GardenerCluster CR is not ready yet, requeue", "Name", runtimeID, "State", cluster.Status.State) - return requeueAfter(controlPlaneRequeueDuration) + return requeueAfter(m.RCCfg.ControlPlaneRequeueDuration) } m.log.Info("GardenerCluster CR is ready", "Name", runtimeID) diff --git a/internal/controller/runtime/fsm/runtime_fsm_create_kubeconfig_test.go b/internal/controller/runtime/fsm/runtime_fsm_create_kubeconfig_test.go index 095524de..53d3b988 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_create_kubeconfig_test.go +++ b/internal/controller/runtime/fsm/runtime_fsm_create_kubeconfig_test.go @@ -3,10 +3,12 @@ package fsm import ( "context" "fmt" + "github.com/stretchr/testify/mock" "time" gardener "github.com/gardener/gardener/pkg/apis/core/v1beta1" imv1 "github.com/kyma-project/infrastructure-manager/api/v1" + "github.com/kyma-project/infrastructure-manager/internal/controller/metrics/mocks" . "github.com/onsi/ginkgo/v2" //nolint:revive . "github.com/onsi/gomega" //nolint:revive "k8s.io/apimachinery/pkg/api/meta" @@ -34,6 +36,14 @@ var _ = Describe("KIM sFnCreateKubeconfig", func() { } } + withMockedMetrics := func() fakeFSMOpt { + m := &mocks.Metrics{} + m.On("SetRuntimeStates", mock.Anything).Return() + m.On("CleanUpRuntimeGauge", mock.Anything).Return() + m.On("IncRuntimeFSMStopCounter").Return() + return withMetrics(m) + } + inputRtWithLabels := makeInputRuntimeWithLabels() inputRtWithLabelsAndCondition := makeInputRuntimeWithLabels() @@ -74,7 +84,7 @@ var _ = Describe("KIM sFnCreateKubeconfig", func() { // and set Runtime state to Pending with condition type ConditionTypeRuntimeKubeconfigReady and reason ConditionReasonGardenerCRCreated "should create GardenCluster CR when it does not existed before", testCtx, - must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects()), + must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects(), withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: *inputRtWithLabels, shoot: &testShoot}, testOpts{ MatchExpectedErr: BeNil(), @@ -85,7 +95,7 @@ var _ = Describe("KIM sFnCreateKubeconfig", func() { Entry( "should remain in waiting state when GardenCluster CR exists and is not ready yet", testCtx, - must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects(testGardenerCRStatePending)), + must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects(testGardenerCRStatePending), withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: *inputRtWithLabels, shoot: &testShoot}, testOpts{ MatchExpectedErr: BeNil(), @@ -95,7 +105,7 @@ var _ = Describe("KIM sFnCreateKubeconfig", func() { Entry( "should return sFnProcessShoot when GardenCluster CR exists and is in ready state", testCtx, - must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects(testGardenerCRStateReady)), + must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects(testGardenerCRStateReady), withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: *inputRtWithLabelsAndCondition, shoot: &testShoot}, testOpts{ MatchExpectedErr: BeNil(), @@ -105,7 +115,7 @@ var _ = Describe("KIM sFnCreateKubeconfig", func() { Entry( "should return sFnUpdateStatus when GardenCluster CR exists and is in ready state and condition is not set", testCtx, - must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects(testGardenerCRStateReady)), + must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects(testGardenerCRStateReady), withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: *inputRtWithLabels, shoot: &testShoot}, testOpts{ MatchExpectedErr: BeNil(), diff --git a/internal/controller/runtime/fsm/runtime_fsm_create_shoot.go b/internal/controller/runtime/fsm/runtime_fsm_create_shoot.go index 3cacb929..63bddc81 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_create_shoot.go +++ b/internal/controller/runtime/fsm/runtime_fsm_create_shoot.go @@ -14,6 +14,7 @@ func sFnCreateShoot(ctx context.Context, m *fsm, s *systemState) (stateFn, *ctrl newShoot, err := convertShoot(&s.instance, m.Config.ConverterConfig) if err != nil { m.log.Error(err, "Failed to convert Runtime instance to shoot object") + m.Metrics.IncRuntimeFSMStopCounter() return updateStatePendingWithErrorAndStop( &s.instance, imv1.ConditionTypeRuntimeProvisioned, @@ -32,7 +33,7 @@ func sFnCreateShoot(ctx context.Context, m *fsm, s *systemState) (stateFn, *ctrl "False", fmt.Sprintf("Gardener API create error: %v", err), ) - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) } m.log.Info( @@ -55,5 +56,5 @@ func sFnCreateShoot(ctx context.Context, m *fsm, s *systemState) (stateFn, *ctrl return switchState(sFnDumpShootSpec) } - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) } diff --git a/internal/controller/runtime/fsm/runtime_fsm_create_shoot_dry_run.go b/internal/controller/runtime/fsm/runtime_fsm_create_shoot_dry_run.go index 93cdc662..e8fa9feb 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_create_shoot_dry_run.go +++ b/internal/controller/runtime/fsm/runtime_fsm_create_shoot_dry_run.go @@ -13,6 +13,7 @@ func sFnCreateShootDryRun(_ context.Context, m *fsm, s *systemState) (stateFn, * newShoot, err := convertShoot(&s.instance, m.Config.ConverterConfig) if err != nil { m.log.Error(err, "Failed to convert Runtime instance to shoot object [dry-run]") + m.Metrics.IncRuntimeFSMStopCounter() return updateStatePendingWithErrorAndStop( &s.instance, imv1.ConditionTypeRuntimeProvisionedDryRun, diff --git a/internal/controller/runtime/fsm/runtime_fsm_delete_kubeconfig.go b/internal/controller/runtime/fsm/runtime_fsm_delete_kubeconfig.go index 5208322e..fe1cc5fc 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_delete_kubeconfig.go +++ b/internal/controller/runtime/fsm/runtime_fsm_delete_kubeconfig.go @@ -25,6 +25,7 @@ func sFnDeleteKubeconfig(ctx context.Context, m *fsm, s *systemState) (stateFn, if !k8serrors.IsNotFound(err) { m.log.Error(err, "GardenerCluster CR read error", "name", runtimeID) s.instance.UpdateStateDeletion(imv1.RuntimeStateTerminating, imv1.ConditionReasonKubernetesAPIErr, "False", err.Error()) + m.Metrics.IncRuntimeFSMStopCounter() return updateStatusAndStop() } @@ -39,7 +40,7 @@ func sFnDeleteKubeconfig(ctx context.Context, m *fsm, s *systemState) (stateFn, // wait section if !cluster.DeletionTimestamp.IsZero() { m.log.Info("Waiting for GardenerCluster CR to be deleted", "Runtime", runtimeID, "Shoot", s.shoot.Name) - return requeueAfter(controlPlaneRequeueDuration) + return requeueAfter(m.RCCfg.ControlPlaneRequeueDuration) } // action section @@ -64,5 +65,5 @@ func sFnDeleteKubeconfig(ctx context.Context, m *fsm, s *systemState) (stateFn, } // out succeeded section - return updateStatusAndRequeueAfter(controlPlaneRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.ControlPlaneRequeueDuration) } diff --git a/internal/controller/runtime/fsm/runtime_fsm_delete_shoot.go b/internal/controller/runtime/fsm/runtime_fsm_delete_shoot.go index 2b47c7bb..6f19c099 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_delete_shoot.go +++ b/internal/controller/runtime/fsm/runtime_fsm_delete_shoot.go @@ -15,7 +15,7 @@ func sFnDeleteShoot(ctx context.Context, m *fsm, s *systemState) (stateFn, *ctrl // wait section if !s.shoot.GetDeletionTimestamp().IsZero() { m.log.Info("Waiting for shoot to be deleted", "Name", s.shoot.Name, "Namespace", s.shoot.Namespace) - return requeueAfter(gardenerRequeueDuration) + return requeueAfter(m.RCCfg.GardenerRequeueDuration) } // action section @@ -57,7 +57,7 @@ func sFnDeleteShoot(ctx context.Context, m *fsm, s *systemState) (stateFn, *ctrl } // out section - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) } func isGardenerCloudDelConfirmationSet(a map[string]string) bool { diff --git a/internal/controller/runtime/fsm/runtime_fsm_initialise.go b/internal/controller/runtime/fsm/runtime_fsm_initialise.go index 7c2ebc3b..708d0de0 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_initialise.go +++ b/internal/controller/runtime/fsm/runtime_fsm_initialise.go @@ -4,6 +4,7 @@ import ( "context" imv1 "github.com/kyma-project/infrastructure-manager/api/v1" + "github.com/kyma-project/infrastructure-manager/internal/controller/metrics" "k8s.io/apimachinery/pkg/api/meta" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -75,7 +76,7 @@ func sFnInitialize(ctx context.Context, m *fsm, s *systemState) (stateFn, *ctrl. } m.log.Info("noting to reconcile, stopping fsm") - return stop() + return stopWithMetrics() } func addFinalizerAndRequeue(ctx context.Context, m *fsm, s *systemState) (stateFn, *ctrl.Result, error) { @@ -91,11 +92,14 @@ func addFinalizerAndRequeue(ctx context.Context, m *fsm, s *systemState) (stateF func removeFinalizerAndStop(ctx context.Context, m *fsm, s *systemState) (stateFn, *ctrl.Result, error) { m.log.Info("removing finalizer") + runtimeID := s.instance.GetLabels()[metrics.RuntimeIDLabel] controllerutil.RemoveFinalizer(&s.instance, m.Finalizer) - err := m.Update(ctx, &s.instance) if err != nil { return updateStatusAndStopWithError(err) } + + // remove from metrics + m.Metrics.CleanUpRuntimeGauge(runtimeID) return stop() } diff --git a/internal/controller/runtime/fsm/runtime_fsm_initialise_test.go b/internal/controller/runtime/fsm/runtime_fsm_initialise_test.go index 79648dc1..b05cc1e2 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_initialise_test.go +++ b/internal/controller/runtime/fsm/runtime_fsm_initialise_test.go @@ -2,10 +2,12 @@ package fsm import ( "context" + "github.com/stretchr/testify/mock" "time" gardener "github.com/gardener/gardener/pkg/apis/core/v1beta1" imv1 "github.com/kyma-project/infrastructure-manager/api/v1" + "github.com/kyma-project/infrastructure-manager/internal/controller/metrics/mocks" . "github.com/onsi/ginkgo/v2" //nolint:revive . "github.com/onsi/gomega" //nolint:revive "github.com/onsi/gomega/types" @@ -35,6 +37,14 @@ var _ = Describe("KIM sFnInitialise", func() { } } + withMockedMetrics := func() fakeFSMOpt { + m := &mocks.Metrics{} + m.On("SetRuntimeStates", mock.Anything).Return() + m.On("CleanUpRuntimeGauge", mock.Anything).Return() + m.On("IncRuntimeFSMStopCounter").Return() + return withMetrics(m) + } + testRt := imv1.Runtime{ ObjectMeta: metav1.ObjectMeta{ Name: "test-instance", @@ -149,17 +159,17 @@ var _ = Describe("KIM sFnInitialise", func() { Entry( "should return nothing when CR is being deleted without finalizer and shoot is missing", testCtx, - must(newFakeFSM, withTestFinalizer), + must(newFakeFSM, withTestFinalizer, withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: testRtWithDeletionTimestamp}, testOpts{ MatchExpectedErr: BeNil(), - MatchNextFnState: BeNil(), + MatchNextFnState: haveName("stopWithMetrics"), }, ), Entry( "should return sFnUpdateStatus when CR is being deleted with finalizer and shoot is missing - Remove finalizer", testCtx, - must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects(&testRt)), + must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects(&testRt), withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: testRtWithDeletionTimestampAndFinalizer}, testOpts{ MatchExpectedErr: BeNil(), @@ -169,7 +179,7 @@ var _ = Describe("KIM sFnInitialise", func() { Entry( "should return sFnDeleteShoot and no error when CR is being deleted with finalizer and shoot exists", testCtx, - must(newFakeFSM, withTestFinalizer), + must(newFakeFSM, withTestFinalizer, withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: testRtWithDeletionTimestampAndFinalizer, shoot: &testShoot}, testOpts{ MatchExpectedErr: BeNil(), @@ -179,7 +189,7 @@ var _ = Describe("KIM sFnInitialise", func() { Entry( "should return sFnUpdateStatus and no error when CR has been created without finalizer - Add finalizer", testCtx, - must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects(&testRt)), + must(newFakeFSM, withTestFinalizer, withTestSchemeAndObjects(&testRt), withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: testRt}, testOpts{ MatchExpectedErr: BeNil(), @@ -190,7 +200,7 @@ var _ = Describe("KIM sFnInitialise", func() { Entry( "should return sFnUpdateStatus and no error when there is no Provisioning Condition - Add condition", testCtx, - must(newFakeFSM, withTestFinalizer), + must(newFakeFSM, withTestFinalizer, withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: testRtWithFinalizerNoProvisioningCondition}, testOpts{ MatchExpectedErr: BeNil(), @@ -200,7 +210,7 @@ var _ = Describe("KIM sFnInitialise", func() { Entry( "should return sFnCreateShoot and no error when exists Provisioning Condition and shoot is missing", testCtx, - must(newFakeFSM, withTestFinalizer), + must(newFakeFSM, withTestFinalizer, withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: testRtWithFinalizerAndProvisioningCondition}, testOpts{ MatchExpectedErr: BeNil(), @@ -210,7 +220,7 @@ var _ = Describe("KIM sFnInitialise", func() { Entry( "should return sFnCreateShootDryRun and no error when exists Provisioning Condition and shoot is missing", testCtx, - must(newFakeFSM, withTestFinalizer), + must(newFakeFSM, withTestFinalizer, withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: testDryRunRtWithFinalizerAndProvisioningCondition}, testOpts{ MatchExpectedErr: BeNil(), @@ -220,17 +230,17 @@ var _ = Describe("KIM sFnInitialise", func() { Entry( "should stop when sFnCreateShootDryRun was already executed", testCtx, - must(newFakeFSM, withTestFinalizer), + must(newFakeFSM, withTestFinalizer, withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: testDryRunRtWithFinalizerAndProvisioningReadyCondition}, testOpts{ MatchExpectedErr: BeNil(), - MatchNextFnState: BeNil(), + MatchNextFnState: haveName("stopWithMetrics"), }, ), Entry( "should return sFnSelectShootProcessing and no error when exists Provisioning Condition and shoot exists", testCtx, - must(newFakeFSM, withTestFinalizer), + must(newFakeFSM, withTestFinalizer, withMockedMetrics(), withDefaultReconcileDuration()), &systemState{instance: testRtWithFinalizerAndProvisioningCondition, shoot: &testShoot}, testOpts{ MatchExpectedErr: BeNil(), diff --git a/internal/controller/runtime/fsm/runtime_fsm_patch_shoot.go b/internal/controller/runtime/fsm/runtime_fsm_patch_shoot.go index 6682b5a3..63a92413 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_patch_shoot.go +++ b/internal/controller/runtime/fsm/runtime_fsm_patch_shoot.go @@ -19,6 +19,7 @@ func sFnPatchExistingShoot(ctx context.Context, m *fsm, s *systemState) (stateFn updatedShoot, err := convertShoot(&s.instance, m.Config.ConverterConfig) if err != nil { m.log.Error(err, "Failed to convert Runtime instance to shoot object, exiting with no retry") + m.Metrics.IncRuntimeFSMStopCounter() return updateStatePendingWithErrorAndStop(&s.instance, imv1.ConditionTypeRuntimeProvisioned, imv1.ConditionReasonConversionError, "Runtime conversion error") } @@ -32,10 +33,11 @@ func sFnPatchExistingShoot(ctx context.Context, m *fsm, s *systemState) (stateFn if err != nil { if k8serrors.IsConflict(err) { m.log.Info("Gardener shoot for runtime is outdated, retrying", "Name", s.shoot.Name, "Namespace", s.shoot.Namespace) - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) } m.log.Error(err, "Failed to patch shoot object, exiting with no retry") + m.Metrics.IncRuntimeFSMStopCounter() return updateStatePendingWithErrorAndStop(&s.instance, imv1.ConditionTypeRuntimeProvisioned, imv1.ConditionReasonProcessingErr, "Shoot patch error") } @@ -53,7 +55,7 @@ func sFnPatchExistingShoot(ctx context.Context, m *fsm, s *systemState) (stateFn "Shoot is pending for update", ) - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) } func convertShoot(instance *imv1.Runtime, cfg config.ConverterConfig) (gardener.Shoot, error) { diff --git a/internal/controller/runtime/fsm/runtime_fsm_persist_shoot.go b/internal/controller/runtime/fsm/runtime_fsm_persist_shoot.go index 21b67fdf..5346c959 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_persist_shoot.go +++ b/internal/controller/runtime/fsm/runtime_fsm_persist_shoot.go @@ -60,7 +60,7 @@ func sFnDumpShootSpec(_ context.Context, m *fsm, s *systemState) (stateFn, *ctrl if err := persist(paths["runtime"], runtimeCp, m.writerProvider); err != nil { return updateStatusAndStopWithError(err) } - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) } func createFilesPath(pvcPath, namespace, name string) map[string]string { diff --git a/internal/controller/runtime/fsm/runtime_fsm_persist_shoot_test.go b/internal/controller/runtime/fsm/runtime_fsm_persist_shoot_test.go index 4ceda6ea..be161611 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_persist_shoot_test.go +++ b/internal/controller/runtime/fsm/runtime_fsm_persist_shoot_test.go @@ -3,11 +3,13 @@ package fsm import ( "bytes" "context" + "github.com/stretchr/testify/mock" "io" "time" gardener "github.com/gardener/gardener/pkg/apis/core/v1beta1" "github.com/kyma-project/infrastructure-manager/internal/config" + "github.com/kyma-project/infrastructure-manager/internal/controller/metrics/mocks" "github.com/kyma-project/infrastructure-manager/internal/controller/runtime/fsm/testing" . "github.com/onsi/ginkgo/v2" //nolint:revive . "github.com/onsi/gomega" //nolint:revive @@ -23,6 +25,14 @@ var _ = Describe("KIM sFnPersist", func() { } }() + withMockedMetrics := func() fakeFSMOpt { + m := &mocks.Metrics{} + m.On("SetRuntimeStates", mock.Anything).Return() + m.On("CleanUpRuntimeGauge", mock.Anything).Return() + m.On("IncRuntimeFSMStopCounter").Return() + return withMetrics(m) + } + testCtx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() @@ -30,7 +40,10 @@ var _ = Describe("KIM sFnPersist", func() { expectedRuntime.Spec.Shoot.Provider.Type = "aws" It("should persist shoot data", func() { - next, _, err := sFnDumpShootSpec(testCtx, must(newFakeFSM, withStorageWriter(testWriterGetter), withConverterConfig(config.ConverterConfig{})), &systemState{shoot: &testing.ShootNoDNS, instance: *expectedRuntime}) + next, _, err := sFnDumpShootSpec(testCtx, + must(newFakeFSM, withStorageWriter(testWriterGetter), withConverterConfig(config.ConverterConfig{}), withMockedMetrics(), withDefaultReconcileDuration()), + &systemState{shoot: &testing.ShootNoDNS, instance: *expectedRuntime}, + ) Expect(err).To(BeNil()) Expect(next).To(haveName("sFnUpdateStatus")) diff --git a/internal/controller/runtime/fsm/runtime_fsm_select_shoot_processing.go b/internal/controller/runtime/fsm/runtime_fsm_select_shoot_processing.go index 45843a3c..452ae8d9 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_select_shoot_processing.go +++ b/internal/controller/runtime/fsm/runtime_fsm_select_shoot_processing.go @@ -19,14 +19,14 @@ func sFnSelectShootProcessing(_ context.Context, m *fsm, s *systemState) (stateF if s.shoot.Spec.DNS == nil || s.shoot.Spec.DNS.Domain == nil { msg := fmt.Sprintf("DNS Domain is not set yet for shoot: %s, scheduling for retry", s.shoot.Name) m.log.Info(msg) - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) } lastOperation := s.shoot.Status.LastOperation if lastOperation == nil { msg := fmt.Sprintf("Last operation is nil for shoot: %s, scheduling for retry", s.shoot.Name) m.log.Info(msg) - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) } patchShoot, err := shouldPatchShoot(&s.instance, s.shoot) @@ -50,7 +50,7 @@ func sFnSelectShootProcessing(_ context.Context, m *fsm, s *systemState) (stateF } m.log.Info("Unknown shoot operation type, exiting with no retry") - return stop() + return stopWithMetrics() } func shouldPatchShoot(runtime *imv1.Runtime, shoot *gardener.Shoot) (bool, error) { diff --git a/internal/controller/runtime/fsm/runtime_fsm_take_snapshot.go b/internal/controller/runtime/fsm/runtime_fsm_take_snapshot.go index 49830c8d..e4388e59 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_take_snapshot.go +++ b/internal/controller/runtime/fsm/runtime_fsm_take_snapshot.go @@ -26,7 +26,7 @@ func sFnTakeSnapshot(ctx context.Context, m *fsm, s *systemState) (stateFn, *ctr if err != nil && !apierrors.IsNotFound(err) { m.log.Info("Failed to get Gardener shoot", "error", err) - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) } if err == nil { diff --git a/internal/controller/runtime/fsm/runtime_fsm_update_status.go b/internal/controller/runtime/fsm/runtime_fsm_update_status.go index 7248573c..256a1927 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_update_status.go +++ b/internal/controller/runtime/fsm/runtime_fsm_update_status.go @@ -9,12 +9,17 @@ import ( func sFnUpdateStatus(result *ctrl.Result, err error) stateFn { return func(ctx context.Context, m *fsm, s *systemState) (stateFn, *ctrl.Result, error) { + if err != nil { + m.Metrics.IncRuntimeFSMStopCounter() + } + // make sure there is a change in status if reflect.DeepEqual(s.instance.Status, s.snapshot) { return nil, result, err } updateErr := m.Status().Update(ctx, &s.instance) + if updateErr != nil { m.log.Error(updateErr, "unable to update instance status!") if err == nil { @@ -22,6 +27,8 @@ func sFnUpdateStatus(result *ctrl.Result, err error) stateFn { } return nil, nil, err } + + m.Metrics.SetRuntimeStates(s.instance) next := sFnEmmitEventfunc(nil, result, err) return next, nil, nil } diff --git a/internal/controller/runtime/fsm/runtime_fsm_waiting_for_shoot_reconcile.go b/internal/controller/runtime/fsm/runtime_fsm_waiting_for_shoot_reconcile.go index 2510c239..24091785 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_waiting_for_shoot_reconcile.go +++ b/internal/controller/runtime/fsm/runtime_fsm_waiting_for_shoot_reconcile.go @@ -22,7 +22,7 @@ func sFnWaitForShootReconcile(_ context.Context, m *fsm, s *systemState) (stateF "Unknown", "Shoot update is in progress") - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) case gardener.LastOperationStateFailed: var reason ErrReason @@ -40,6 +40,7 @@ func sFnWaitForShootReconcile(_ context.Context, m *fsm, s *systemState) (stateF "False", string(reason), ) + m.Metrics.IncRuntimeFSMStopCounter() return updateStatusAndStop() case gardener.LastOperationStateSucceeded: @@ -53,5 +54,5 @@ func sFnWaitForShootReconcile(_ context.Context, m *fsm, s *systemState) (stateF } m.log.Info("Update did not processed, exiting with no retry") - return stop() + return stopWithMetrics() } diff --git a/internal/controller/runtime/fsm/runtime_fsm_waiting_shoot_creation.go b/internal/controller/runtime/fsm/runtime_fsm_waiting_shoot_creation.go index f5c4b3de..f9f811cf 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_waiting_shoot_creation.go +++ b/internal/controller/runtime/fsm/runtime_fsm_waiting_shoot_creation.go @@ -40,12 +40,12 @@ func sFnWaitForShootCreation(_ context.Context, m *fsm, s *systemState) (stateFn "Unknown", "Shoot creation in progress") - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) case gardener.LastOperationStateFailed: if gardenerhelper.HasErrorCode(s.shoot.Status.LastErrors, gardener.ErrorInfraRateLimitsExceeded) { m.log.Info(fmt.Sprintf("Error during cluster provisioning: Rate limits exceeded for Shoot %s, scheduling for retry", s.shoot.Name)) - return updateStatusAndRequeueAfter(gardenerRequeueDuration) + return updateStatusAndRequeueAfter(m.RCCfg.GardenerRequeueDuration) } // also handle other retryable errors here @@ -61,6 +61,7 @@ func sFnWaitForShootCreation(_ context.Context, m *fsm, s *systemState) (stateFn "False", "Shoot creation failed") + m.Metrics.IncRuntimeFSMStopCounter() return updateStatusAndStop() case gardener.LastOperationStateSucceeded: @@ -74,7 +75,7 @@ func sFnWaitForShootCreation(_ context.Context, m *fsm, s *systemState) (stateFn default: m.log.Info("Unknown shoot operation state, exiting with no retry") - return stop() + return stopWithMetrics() } } diff --git a/internal/controller/runtime/fsm/utilz_for_test.go b/internal/controller/runtime/fsm/utilz_for_test.go index d159c441..67047bbe 100644 --- a/internal/controller/runtime/fsm/utilz_for_test.go +++ b/internal/controller/runtime/fsm/utilz_for_test.go @@ -3,10 +3,12 @@ package fsm import ( "context" "fmt" + "time" gardener "github.com/gardener/gardener/pkg/apis/core/v1beta1" gardener_api "github.com/gardener/gardener/pkg/apis/core/v1beta1" "github.com/kyma-project/infrastructure-manager/internal/config" + "github.com/kyma-project/infrastructure-manager/internal/controller/metrics" . "github.com/onsi/ginkgo/v2" //nolint:revive . "github.com/onsi/gomega" //nolint:revive metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -20,6 +22,9 @@ import ( type fakeFSMOpt func(*fsm) error +const defaultControlPlaneRequeueDuration = 10 * time.Second +const defaultGardenerRequeueDuration = 15 * time.Second + var ( errFailedToCreateFakeFSM = fmt.Errorf("failed to create fake FSM") @@ -51,6 +56,21 @@ var ( } } + withMetrics = func(m metrics.Metrics) fakeFSMOpt { + return func(fsm *fsm) error { + fsm.Metrics = m + return nil + } + } + + withDefaultReconcileDuration = func() fakeFSMOpt { + return func(fsm *fsm) error { + fsm.ControlPlaneRequeueDuration = defaultControlPlaneRequeueDuration + fsm.GardenerRequeueDuration = defaultGardenerRequeueDuration + return nil + } + } + withFakedK8sClient = func( scheme *runtime.Scheme, objs ...client.Object) fakeFSMOpt { @@ -120,6 +140,9 @@ func (s *stubAuditLogging) Enable(ctx context.Context, shoot *gardener.Shoot) (b return s.isEnabled, s.err } +func (s *stubAuditLogging) UpdateShootClient(client client.Client) { +} + func newSetupStateForTest(sfn stateFn, opts ...func(*systemState) error) stateFn { return func(_ context.Context, _ *fsm, s *systemState) (stateFn, *ctrl.Result, error) { for _, fn := range opts { diff --git a/internal/controller/runtime/runtime_controller.go b/internal/controller/runtime/runtime_controller.go index 82161dee..d0293824 100644 --- a/internal/controller/runtime/runtime_controller.go +++ b/internal/controller/runtime/runtime_controller.go @@ -51,7 +51,6 @@ func (r *RuntimeReconciler) Reconcile(ctx context.Context, request ctrl.Request) r.Log.Info(request.String()) var runtime imv1.Runtime - if err := r.Get(ctx, request.NamespacedName, &runtime); err != nil { return ctrl.Result{ Requeue: false, @@ -68,7 +67,7 @@ func (r *RuntimeReconciler) Reconcile(ctx context.Context, request ctrl.Request) ShootClient: r.ShootClient, EventRecorder: r.EventRecorder, }) - requCounter++ + return stateFSM.Run(ctx, runtime) } @@ -85,6 +84,7 @@ func NewRuntimeReconciler(mgr ctrl.Manager, shootClient client.Client, logger lo func (r *RuntimeReconciler) UpdateShootClient(client client.Client) { r.ShootClient = client + r.Cfg.AuditLogging.UpdateShootClient(client) } // SetupWithManager sets up the controller with the Manager. diff --git a/internal/controller/runtime/runtime_controller_test.go b/internal/controller/runtime/runtime_controller_test.go index d7bf833b..d69df97f 100644 --- a/internal/controller/runtime/runtime_controller_test.go +++ b/internal/controller/runtime/runtime_controller_test.go @@ -122,7 +122,7 @@ var _ = Describe("Runtime Controller", func() { //TODO: condition should be 'TRUE' return true - }, time.Second*300, time.Second*3).Should(BeTrue()) + }, time.Second*600, time.Second*3).Should(BeTrue()) Expect(customTracker.IsSequenceFullyUsed()).To(BeTrue()) diff --git a/internal/controller/runtime/suite_test.go b/internal/controller/runtime/suite_test.go index aaedd822..484fe55e 100644 --- a/internal/controller/runtime/suite_test.go +++ b/internal/controller/runtime/suite_test.go @@ -19,7 +19,10 @@ package runtime import ( "context" "encoding/json" + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/types" "path/filepath" + "sigs.k8s.io/controller-runtime/pkg/client/interceptor" "testing" "time" @@ -28,11 +31,12 @@ import ( infrastructuremanagerv1 "github.com/kyma-project/infrastructure-manager/api/v1" "github.com/kyma-project/infrastructure-manager/internal/auditlogging" "github.com/kyma-project/infrastructure-manager/internal/config" + "github.com/kyma-project/infrastructure-manager/internal/controller/metrics/mocks" "github.com/kyma-project/infrastructure-manager/internal/controller/runtime/fsm" gardener_shoot "github.com/kyma-project/infrastructure-manager/pkg/gardener/shoot" . "github.com/onsi/ginkgo/v2" //nolint:revive . "github.com/onsi/gomega" //nolint:revive - "github.com/pkg/errors" + "github.com/stretchr/testify/mock" v1 "k8s.io/api/autoscaling/v1" v12 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -40,7 +44,6 @@ import ( //nolint:revive "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/serializer" - "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" clienttesting "k8s.io/client-go/testing" @@ -48,7 +51,6 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/controller-runtime/pkg/client/interceptor" "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" @@ -112,7 +114,23 @@ var _ = BeforeSuite(func() { customTracker = NewCustomTracker(tracker, []*gardener_api.Shoot{}, []*gardener_api.Seed{}) gardenerTestClient = fake.NewClientBuilder().WithScheme(clientScheme).WithObjectTracker(customTracker).Build() - runtimeReconciler = NewRuntimeReconciler(mgr, gardenerTestClient, logger, fsm.RCCfg{Finalizer: infrastructuremanagerv1.Finalizer, Config: fixConverterConfigForTests()}) + convConfig := fixConverterConfigForTests() + + mm := &mocks.Metrics{} + mm.On("SetRuntimeStates", mock.Anything).Return() + mm.On("IncRuntimeFSMStopCounter").Return() + mm.On("CleanUpRuntimeGauge", mock.Anything).Return() + + fsmCfg := fsm.RCCfg{ + Finalizer: infrastructuremanagerv1.Finalizer, + Config: convConfig, + Metrics: mm, + AuditLogging: auditlogging.NewAuditLogging(convConfig.ConverterConfig.AuditLog.TenantConfigPath, convConfig.ConverterConfig.AuditLog.PolicyConfigMapName, gardenerTestClient), + GardenerRequeueDuration: 3 * time.Second, + ControlPlaneRequeueDuration: 3 * time.Second, + } + + runtimeReconciler = NewRuntimeReconciler(mgr, gardenerTestClient, logger, fsmCfg) Expect(runtimeReconciler).NotTo(BeNil()) err = runtimeReconciler.SetupWithManager(mgr) Expect(err).To(BeNil())