From 3cc605638747ee4a8d97682d1a8b1c219b42bfe7 Mon Sep 17 00:00:00 2001 From: Mario Constanti Date: Thu, 5 Oct 2023 11:51:10 +0200 Subject: [PATCH] feat: add new metrics add info metrics about providers, enterprises, organizations, repositories and pools. Also expose most of the configurable pool information as metric like e.g. max Runners as garm_pool_max_runners Signed-off-by: Mario Constanti --- doc/config_metrics.md | 52 +++++++++- metrics/enterprise.go | 50 ++++++++++ metrics/health.go | 22 +++++ metrics/instance.go | 79 +++++++++++++++ metrics/metrics.go | 206 ++++++++++++++++++++++------------------ metrics/organization.go | 50 ++++++++++ metrics/pool.go | 121 +++++++++++++++++++++++ metrics/provider.go | 36 +++++++ metrics/repository.go | 51 ++++++++++ metrics/util.go | 8 ++ 10 files changed, 579 insertions(+), 96 deletions(-) create mode 100644 metrics/enterprise.go create mode 100644 metrics/health.go create mode 100644 metrics/instance.go create mode 100644 metrics/organization.go create mode 100644 metrics/pool.go create mode 100644 metrics/provider.go create mode 100644 metrics/repository.go create mode 100644 metrics/util.go diff --git a/doc/config_metrics.md b/doc/config_metrics.md index caa50b1b..8eaeb214 100644 --- a/doc/config_metrics.md +++ b/doc/config_metrics.md @@ -2,11 +2,55 @@ This is one of the features in GARM that I really love having. For one thing, it's community contributed and for another, it really adds value to the project. It allows us to create some pretty nice visualizations of what is happening with GARM. -At the moment there are only three meaningful metrics being collected, besides the default ones that the prometheus golang package enables by default. These are: +## Common metrics -* `garm_health` - This is a gauge that is set to 1 if GARM is healthy and 0 if it is not. This is useful for alerting. -* `garm_runner_status` - This is a gauge value that gives us details about the runners garm spawns -* `garm_webhooks_received` - This is a counter that increments every time GARM receives a webhook from GitHub. +| Metric name | Type | Labels | Description | +|--------------------------|---------|-------------------------------------------------------------------|------------------------------------------------------------------------------------------------------| +| `garm_health` | Gauge | `controller_id`=<controller id>
`name`=<hostname> | This is a gauge that is set to 1 if GARM is healthy and 0 if it is not. This is useful for alerting. | +| `garm_webhooks_received` | Counter | `controller_id`=<controller id>
`name`=<hostname> | This is a counter that increments every time GARM receives a webhook from GitHub. | + +## Enterprise metrics + +| Metric name | Type | Labels | Description | +|---------------------------------------|-------|-------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------| +| `garm_enterprise_info` | Gauge | `id`=<enterprise id>
`name`=<enterprise name> | This is a gauge that is set to 1 and expose enterprise information | +| `garm_enterprise_pool_manager_status` | Gauge | `id`=<enterprise id>
`name`=<enterprise name>
`running`=<true\|false> | This is a gauge that is set to 1 if the enterprise pool manager is running and set to 0 if not | + +## Organization metrics + +| Metric name | Type | Labels | Description | +|-----------------------------------------|-------|-----------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------| +| `garm_organization_info` | Gauge | `id`=<organization id>
`name`=<organization name> | This is a gauge that is set to 1 and expose organization information | +| `garm_organization_pool_manager_status` | Gauge | `id`=<organization id>
`name`=<organization name>
`running`=<true\|false> | This is a gauge that is set to 1 if the organization pool manager is running and set to 0 if not | + +## Repository metrics + +| Metric name | Type | Labels | Description | +|---------------------------------------|-------|-------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------| +| `garm_repository_info` | Gauge | `id`=<repository id>
`name`=<repository name> | This is a gauge that is set to 1 and expose repository information | +| `garm_repository_pool_manager_status` | Gauge | `id`=<repository id>
`name`=<repository name>
`running`=<true\|false> | This is a gauge that is set to 1 if the repository pool manager is running and set to 0 if not | + +## Provider metrics + +| Metric name | Type | Labels | Description | +|----------------------|-------|-------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------| +| `garm_provider_info` | Gauge | `description`=<provider description>
`name`=<provider name>
`type`=<internal\|external> | This is a gauge that is set to 1 and expose provider information | + +## Pool metrics + +| Metric name | Type | Labels | Description | +|-------------------------------|-------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------| +| `garm_pool_info` | Gauge | `flavor`=<flavor>
`id`=<pool id>
`image`=<image name>
`os_arch`=<defined OS arch>
`os_type`=<defined OS name>
`pool_owner`=<owner name>
`pool_type`=<repository\|organization\|enterprise>
`prefix`=<prefix>
`provider`=<provider name>
`tags`=<concatenated list of pool tags>
| This is a gauge that is set to 1 and expose pool information | +| `garm_pool_status` | Gauge | `enabled`=<true\|false>
`id`=<pool id> | This is a gauge that is set to 1 if the pool is enabled and set to 0 if not | +| `garm_pool_bootstrap_timeout` | Gauge | `id`=<pool id> | This is a gauge that is set to the pool bootstrap timeout | +| `garm_pool_max_runners` | Gauge | `id`=<pool id> | This is a gauge that is set to the pool max runners | +| `garm_pool_min_idle_runners` | Gauge | `id`=<pool id> | This is a gauge that is set to the pool min idle runners | + +## Runner metrics + +| Metric name | Type | Labels | Description | +|----------------------|-------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------| +| `garm_runner_status` | Gauge | `controller_id`=<controller id>
`hostname`=<hostname>
`name`=<runner name>
`pool_owner`=<owner name>
`pool_type`=<repository\|organization\|enterprise>
`provider`=<provider name>
`runner_status`=<running\|stopped\|error\|pending_delete\|deleting\|pending_create\|creating\|unknown>
`status`=<idle\|pending\|terminated\|installing\|failed\|active>
| This is a gauge value that gives us details about the runners garm spawns | More metrics will be added in the future. diff --git a/metrics/enterprise.go b/metrics/enterprise.go new file mode 100644 index 00000000..1e788491 --- /dev/null +++ b/metrics/enterprise.go @@ -0,0 +1,50 @@ +package metrics + +import ( + "log" + "strconv" + + "github.com/cloudbase/garm/auth" + "github.com/prometheus/client_golang/prometheus" +) + +// CollectOrganizationMetric collects the metrics for the enterprise objects +func (c *GarmCollector) CollectEnterpriseMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) { + ctx := auth.GetAdminContext() + + enterprises, err := c.runner.ListEnterprises(ctx) + if err != nil { + log.Printf("listing providers: %s", err) + // continue anyway + } + + for _, enterprise := range enterprises { + + enterpriseInfo, err := prometheus.NewConstMetric( + c.enterpriseInfo, + prometheus.GaugeValue, + 1, + enterprise.Name, // label: name + enterprise.ID, // label: id + ) + if err != nil { + log.Printf("cannot collect enterpriseInfo metric: %s", err) + continue + } + ch <- enterpriseInfo + + enterprisePoolManagerStatus, err := prometheus.NewConstMetric( + c.enterprisePoolManagerStatus, + prometheus.GaugeValue, + bool2float64(enterprise.PoolManagerStatus.IsRunning), + enterprise.Name, // label: name + enterprise.ID, // label: id + strconv.FormatBool(enterprise.PoolManagerStatus.IsRunning), // label: running + ) + if err != nil { + log.Printf("cannot collect enterprisePoolManagerStatus metric: %s", err) + continue + } + ch <- enterprisePoolManagerStatus + } +} diff --git a/metrics/health.go b/metrics/health.go new file mode 100644 index 00000000..4cc9e83a --- /dev/null +++ b/metrics/health.go @@ -0,0 +1,22 @@ +package metrics + +import ( + "log" + + "github.com/prometheus/client_golang/prometheus" +) + +func (c *GarmCollector) CollectHealthMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) { + m, err := prometheus.NewConstMetric( + c.healthMetric, + prometheus.GaugeValue, + 1, + hostname, + controllerID, + ) + if err != nil { + log.Printf("error on creating health metric: %s", err) + return + } + ch <- m +} diff --git a/metrics/instance.go b/metrics/instance.go new file mode 100644 index 00000000..f25ab52b --- /dev/null +++ b/metrics/instance.go @@ -0,0 +1,79 @@ +package metrics + +import ( + "log" + + "github.com/cloudbase/garm/auth" + "github.com/prometheus/client_golang/prometheus" +) + +// CollectInstanceMetric collects the metrics for the runner instances +// reflecting the statuses and the pool they belong to. +func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) { + ctx := auth.GetAdminContext() + + instances, err := c.runner.ListAllInstances(ctx) + if err != nil { + log.Printf("cannot collect metrics, listing instances: %s", err) + return + } + + pools, err := c.runner.ListAllPools(ctx) + if err != nil { + log.Printf("listing pools: %s", err) + // continue anyway + } + + type poolInfo struct { + Name string + Type string + ProviderName string + } + + poolNames := make(map[string]poolInfo) + for _, pool := range pools { + if pool.EnterpriseName != "" { + poolNames[pool.ID] = poolInfo{ + Name: pool.EnterpriseName, + Type: string(pool.PoolType()), + ProviderName: pool.ProviderName, + } + } else if pool.OrgName != "" { + poolNames[pool.ID] = poolInfo{ + Name: pool.OrgName, + Type: string(pool.PoolType()), + ProviderName: pool.ProviderName, + } + } else { + poolNames[pool.ID] = poolInfo{ + Name: pool.RepoName, + Type: string(pool.PoolType()), + ProviderName: pool.ProviderName, + } + } + } + + for _, instance := range instances { + + m, err := prometheus.NewConstMetric( + c.instanceMetric, + prometheus.GaugeValue, + 1, + instance.Name, // label: name + string(instance.Status), // label: status + string(instance.RunnerStatus), // label: runner_status + poolNames[instance.PoolID].Name, // label: pool_owner + poolNames[instance.PoolID].Type, // label: pool_type + instance.PoolID, // label: pool_id + hostname, // label: hostname + controllerID, // label: controller_id + poolNames[instance.PoolID].ProviderName, // label: provider + ) + + if err != nil { + log.Printf("cannot collect runner metric: %s", err) + continue + } + ch <- m + } +} diff --git a/metrics/metrics.go b/metrics/metrics.go index 04e218a6..1032a88f 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -11,6 +11,15 @@ import ( "github.com/prometheus/client_golang/prometheus" ) +const metricsNamespace = "garm_" +const metricsRunnerSubsystem = "runner_" +const metricsPoolSubsystem = "pool_" +const metricsProviderSubsystem = "provider_" +const metricsOrganizationSubsystem = "organization_" +const metricsRepositorySubsystem = "repository_" +const metricsEnterpriseSubsystem = "enterprise_" +const metricsWebhookSubsystem = "webhook_" + var webhooksReceived *prometheus.CounterVec = nil // RecordWebhookWithLabels will increment a webhook metric identified by specific @@ -48,7 +57,7 @@ func RegisterCollectors(runner *runner.Runner) error { // at this point the webhook is not yet authenticated and // we don't know if it's meant for us or not webhooksReceived = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "garm_webhooks_received", + Name: metricsNamespace + metricsWebhookSubsystem + "received", Help: "The total number of webhooks received", }, []string{"valid", "reason", "hostname", "controller_id"}) @@ -59,6 +68,31 @@ func RegisterCollectors(runner *runner.Runner) error { return nil } +type GarmCollector struct { + healthMetric *prometheus.Desc + instanceMetric *prometheus.Desc + + // pool metrics + poolInfo *prometheus.Desc + poolStatus *prometheus.Desc + poolMaxRunners *prometheus.Desc + poolMinIdleRunners *prometheus.Desc + poolBootstrapTimeout *prometheus.Desc + + // provider metrics + providerInfo *prometheus.Desc + + organizationInfo *prometheus.Desc + organizationPoolManagerStatus *prometheus.Desc + repositoryInfo *prometheus.Desc + repositoryPoolManagerStatus *prometheus.Desc + enterpriseInfo *prometheus.Desc + enterprisePoolManagerStatus *prometheus.Desc + + runner *runner.Runner + cachedControllerInfo params.ControllerInfo +} + func NewGarmCollector(r *runner.Runner) (*GarmCollector, error) { controllerInfo, err := r.GetControllerInfo(auth.GetAdminContext()) if err != nil { @@ -67,29 +101,92 @@ func NewGarmCollector(r *runner.Runner) (*GarmCollector, error) { return &GarmCollector{ runner: r, instanceMetric: prometheus.NewDesc( - "garm_runner_status", + metricsNamespace+metricsRunnerSubsystem+"status", "Status of the runner", - []string{"name", "status", "runner_status", "pool_owner", "pool_type", "pool_id", "hostname", "controller_id"}, nil, + []string{"name", "status", "runner_status", "pool_owner", "pool_type", "pool_id", "hostname", "controller_id", "provider"}, nil, ), healthMetric: prometheus.NewDesc( - "garm_health", + metricsNamespace+"health", "Health of the runner", []string{"hostname", "controller_id"}, nil, ), + poolInfo: prometheus.NewDesc( + metricsNamespace+metricsPoolSubsystem+"info", + "Information of the pool", + []string{"id", "image", "flavor", "prefix", "os_type", "os_arch", "tags", "provider", "pool_owner", "pool_type"}, nil, + ), + poolStatus: prometheus.NewDesc( + metricsNamespace+metricsPoolSubsystem+"status", + "Status of the pool", + []string{"id", "enabled"}, nil, + ), + poolMaxRunners: prometheus.NewDesc( + metricsNamespace+metricsPoolSubsystem+"max_runners", + "Max runners of the pool", + []string{"id"}, nil, + ), + poolMinIdleRunners: prometheus.NewDesc( + metricsNamespace+metricsPoolSubsystem+"min_idle_runners", + "Min idle runners of the pool", + []string{"id"}, nil, + ), + poolBootstrapTimeout: prometheus.NewDesc( + metricsNamespace+metricsPoolSubsystem+"bootstrap_timeout", + "Bootstrap timeout of the pool", + []string{"id"}, nil, + ), + providerInfo: prometheus.NewDesc( + metricsNamespace+metricsProviderSubsystem+"info", + "Info of the provider", + []string{"name", "type", "description"}, nil, + ), + organizationInfo: prometheus.NewDesc( + metricsNamespace+metricsOrganizationSubsystem+"info", + "Info of the organization", + []string{"name", "id"}, nil, + ), + organizationPoolManagerStatus: prometheus.NewDesc( + metricsNamespace+metricsOrganizationSubsystem+"pool_manager_status", + "Status of the organization pool manager", + []string{"name", "id", "running"}, nil, + ), + repositoryInfo: prometheus.NewDesc( + metricsNamespace+metricsRepositorySubsystem+"info", + "Info of the organization", + []string{"name", "owner", "id"}, nil, + ), + repositoryPoolManagerStatus: prometheus.NewDesc( + metricsNamespace+metricsRepositorySubsystem+"pool_manager_status", + "Status of the repository pool manager", + []string{"name", "id", "running"}, nil, + ), + enterpriseInfo: prometheus.NewDesc( + metricsNamespace+metricsEnterpriseSubsystem+"info", + "Info of the organization", + []string{"name", "id"}, nil, + ), + enterprisePoolManagerStatus: prometheus.NewDesc( + metricsNamespace+metricsEnterpriseSubsystem+"pool_manager_status", + "Status of the enterprise pool manager", + []string{"name", "id", "running"}, nil, + ), + cachedControllerInfo: controllerInfo, }, nil } -type GarmCollector struct { - healthMetric *prometheus.Desc - instanceMetric *prometheus.Desc - runner *runner.Runner - cachedControllerInfo params.ControllerInfo -} - func (c *GarmCollector) Describe(ch chan<- *prometheus.Desc) { ch <- c.instanceMetric ch <- c.healthMetric + ch <- c.poolInfo + ch <- c.poolStatus + ch <- c.poolMaxRunners + ch <- c.poolMinIdleRunners + ch <- c.providerInfo + ch <- c.organizationInfo + ch <- c.organizationPoolManagerStatus + ch <- c.enterpriseInfo + ch <- c.enterprisePoolManagerStatus } func (c *GarmCollector) Collect(ch chan<- prometheus.Metric) { @@ -98,87 +195,12 @@ func (c *GarmCollector) Collect(ch chan<- prometheus.Metric) { log.Printf("failed to get controller info: %s", err) return } + c.CollectInstanceMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String()) c.CollectHealthMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String()) -} - -func (c *GarmCollector) CollectHealthMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) { - m, err := prometheus.NewConstMetric( - c.healthMetric, - prometheus.GaugeValue, - 1, - hostname, - controllerID, - ) - if err != nil { - log.Printf("error on creating health metric: %s", err) - return - } - ch <- m -} - -// CollectInstanceMetric collects the metrics for the runner instances -// reflecting the statuses and the pool they belong to. -func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) { - ctx := auth.GetAdminContext() - - instances, err := c.runner.ListAllInstances(ctx) - if err != nil { - log.Printf("cannot collect metrics, listing instances: %s", err) - return - } - - pools, err := c.runner.ListAllPools(ctx) - if err != nil { - log.Printf("listing pools: %s", err) - // continue anyway - } - - type poolInfo struct { - Name string - Type string - } - - poolNames := make(map[string]poolInfo) - for _, pool := range pools { - if pool.EnterpriseName != "" { - poolNames[pool.ID] = poolInfo{ - Name: pool.EnterpriseName, - Type: string(pool.PoolType()), - } - } else if pool.OrgName != "" { - poolNames[pool.ID] = poolInfo{ - Name: pool.OrgName, - Type: string(pool.PoolType()), - } - } else { - poolNames[pool.ID] = poolInfo{ - Name: pool.RepoName, - Type: string(pool.PoolType()), - } - } - } - - for _, instance := range instances { - - m, err := prometheus.NewConstMetric( - c.instanceMetric, - prometheus.GaugeValue, - 1, - instance.Name, - string(instance.Status), - string(instance.RunnerStatus), - poolNames[instance.PoolID].Name, - poolNames[instance.PoolID].Type, - instance.PoolID, - hostname, - controllerID, - ) - - if err != nil { - log.Printf("cannot collect metrics, creating metric: %s", err) - continue - } - ch <- m - } + c.CollectPoolMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String()) + c.CollectProviderMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String()) + c.CollectOrganizationMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String()) + c.CollectRepositoryMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String()) + c.CollectEnterpriseMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String()) } diff --git a/metrics/organization.go b/metrics/organization.go new file mode 100644 index 00000000..06c8343e --- /dev/null +++ b/metrics/organization.go @@ -0,0 +1,50 @@ +package metrics + +import ( + "log" + "strconv" + + "github.com/cloudbase/garm/auth" + "github.com/prometheus/client_golang/prometheus" +) + +// CollectOrganizationMetric collects the metrics for the organization objects +func (c *GarmCollector) CollectOrganizationMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) { + ctx := auth.GetAdminContext() + + organizations, err := c.runner.ListOrganizations(ctx) + if err != nil { + log.Printf("listing providers: %s", err) + // continue anyway + } + + for _, organization := range organizations { + + organizationInfo, err := prometheus.NewConstMetric( + c.organizationInfo, + prometheus.GaugeValue, + 1, + organization.Name, // label: name + organization.ID, // label: id + ) + if err != nil { + log.Printf("cannot collect organizationInfo metric: %s", err) + continue + } + ch <- organizationInfo + + organizationPoolManagerStatus, err := prometheus.NewConstMetric( + c.organizationPoolManagerStatus, + prometheus.GaugeValue, + bool2float64(organization.PoolManagerStatus.IsRunning), + organization.Name, // label: name + organization.ID, // label: id + strconv.FormatBool(organization.PoolManagerStatus.IsRunning), // label: running + ) + if err != nil { + log.Printf("cannot collect organizationPoolManagerStatus metric: %s", err) + continue + } + ch <- organizationPoolManagerStatus + } +} diff --git a/metrics/pool.go b/metrics/pool.go new file mode 100644 index 00000000..a7a11fe1 --- /dev/null +++ b/metrics/pool.go @@ -0,0 +1,121 @@ +package metrics + +import ( + "log" + "strconv" + "strings" + + "github.com/cloudbase/garm/auth" + "github.com/prometheus/client_golang/prometheus" +) + +// CollectPoolMetric collects the metrics for the pool objects +func (c *GarmCollector) CollectPoolMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) { + ctx := auth.GetAdminContext() + + pools, err := c.runner.ListAllPools(ctx) + if err != nil { + log.Printf("listing pools: %s", err) + // continue anyway + } + + type poolInfo struct { + Name string + Type string + } + + poolNames := make(map[string]poolInfo) + for _, pool := range pools { + if pool.EnterpriseName != "" { + poolNames[pool.ID] = poolInfo{ + Name: pool.EnterpriseName, + Type: string(pool.PoolType()), + } + } else if pool.OrgName != "" { + poolNames[pool.ID] = poolInfo{ + Name: pool.OrgName, + Type: string(pool.PoolType()), + } + } else { + poolNames[pool.ID] = poolInfo{ + Name: pool.RepoName, + Type: string(pool.PoolType()), + } + } + + var poolTags []string + for _, tag := range pool.Tags { + poolTags = append(poolTags, tag.Name) + } + + poolInfo, err := prometheus.NewConstMetric( + c.poolInfo, + prometheus.GaugeValue, + 1, + pool.ID, // label: id + pool.Image, // label: image + pool.Flavor, // label: flavor + pool.Prefix, // label: prefix + string(pool.OSType), // label: os_type + string(pool.OSArch), // label: os_arch + strings.Join(poolTags, ","), // label: tags + pool.ProviderName, // label: provider + poolNames[pool.ID].Name, // label: pool_owner + poolNames[pool.ID].Type, // label: pool_type + ) + if err != nil { + log.Printf("cannot collect poolInfo metric: %s", err) + continue + } + ch <- poolInfo + + poolStatus, err := prometheus.NewConstMetric( + c.poolStatus, + prometheus.GaugeValue, + bool2float64(pool.Enabled), + pool.ID, // label: id + strconv.FormatBool(pool.Enabled), // label: enabled + ) + if err != nil { + log.Printf("cannot collect poolStatus metric: %s", err) + continue + } + ch <- poolStatus + + poolMaxRunners, err := prometheus.NewConstMetric( + c.poolMaxRunners, + prometheus.GaugeValue, + float64(pool.MaxRunners), + pool.ID, // label: id + ) + if err != nil { + log.Printf("cannot collect poolMaxRunners metric: %s", err) + continue + } + ch <- poolMaxRunners + + poolMinIdleRunners, err := prometheus.NewConstMetric( + c.poolMinIdleRunners, + prometheus.GaugeValue, + float64(pool.MinIdleRunners), + pool.ID, // label: id + ) + if err != nil { + log.Printf("cannot collect poolMinIdleRunners metric: %s", err) + continue + } + ch <- poolMinIdleRunners + + poolBootstrapTimeout, err := prometheus.NewConstMetric( + c.poolBootstrapTimeout, + prometheus.GaugeValue, + float64(pool.RunnerBootstrapTimeout), + pool.ID, // label: id + ) + if err != nil { + log.Printf("cannot collect poolBootstrapTimeout metric: %s", err) + continue + } + ch <- poolBootstrapTimeout + } +} diff --git a/metrics/provider.go b/metrics/provider.go new file mode 100644 index 00000000..328845da --- /dev/null +++ b/metrics/provider.go @@ -0,0 +1,36 @@ +package metrics + +import ( + "log" + + "github.com/cloudbase/garm/auth" + "github.com/prometheus/client_golang/prometheus" +) + +// CollectPoolMetric collects the metrics for the pool objects +func (c *GarmCollector) CollectProviderMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) { + ctx := auth.GetAdminContext() + + providers, err := c.runner.ListProviders(ctx) + if err != nil { + log.Printf("listing providers: %s", err) + // continue anyway + } + + for _, provider := range providers { + + providerInfo, err := prometheus.NewConstMetric( + c.providerInfo, + prometheus.GaugeValue, + 1, + provider.Name, // label: name + string(provider.ProviderType), // label: type + provider.Description, // label: description + ) + if err != nil { + log.Printf("cannot collect providerInfo metric: %s", err) + continue + } + ch <- providerInfo + } +} diff --git a/metrics/repository.go b/metrics/repository.go new file mode 100644 index 00000000..7a7ce930 --- /dev/null +++ b/metrics/repository.go @@ -0,0 +1,51 @@ +package metrics + +import ( + "log" + "strconv" + + "github.com/cloudbase/garm/auth" + "github.com/prometheus/client_golang/prometheus" +) + +// CollectOrganizationMetric collects the metrics for the repository objects +func (c *GarmCollector) CollectRepositoryMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) { + ctx := auth.GetAdminContext() + + repositories, err := c.runner.ListRepositories(ctx) + if err != nil { + log.Printf("listing providers: %s", err) + // continue anyway + } + + for _, repository := range repositories { + + repositoryInfo, err := prometheus.NewConstMetric( + c.repositoryInfo, + prometheus.GaugeValue, + 1, + repository.Name, // label: name + repository.Owner, // label: owner + repository.ID, // label: id + ) + if err != nil { + log.Printf("cannot collect repositoryInfo metric: %s", err) + continue + } + ch <- repositoryInfo + + repositoryPoolManagerStatus, err := prometheus.NewConstMetric( + c.repositoryPoolManagerStatus, + prometheus.GaugeValue, + bool2float64(repository.PoolManagerStatus.IsRunning), + repository.Name, // label: name + repository.ID, // label: id + strconv.FormatBool(repository.PoolManagerStatus.IsRunning), // label: running + ) + if err != nil { + log.Printf("cannot collect repositoryPoolManagerStatus metric: %s", err) + continue + } + ch <- repositoryPoolManagerStatus + } +} diff --git a/metrics/util.go b/metrics/util.go new file mode 100644 index 00000000..ae2d7087 --- /dev/null +++ b/metrics/util.go @@ -0,0 +1,8 @@ +package metrics + +func bool2float64(b bool) float64 { + if b { + return 1 + } + return 0 +}