-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add pull mount histogram metrics (#79)
* feat: wip add histogram and counter metrics for pull and mount operations * refactor: move registering metrics and starting metrics server to a separate fn - so that it is easier to use in tests - fix bug where `err` is used instead of `e` - remove metric for mount and pull wait timeout (never really recorded because kubelet timeout exhausts before) * test: wip test for metrics * test: add e2e test - hook it into cicd - finish up unit tests - fix nil ptr error for async errors - * refactor: remove debug statement - not needed anymore * fix: metrics test gha step name for * chore: rename metrics test job `error-compatible-ephemeral-volume` -> `error-ephemeral-volume` * debug: add ssh action * fix: ci/cd test failing - because the script assumes warm metal driver is deployed in `default` namespace (it is deployed in `kube-system` namespace) * docs: fix typo in code comments * chore: bump release version to `v0.9.0`
- Loading branch information
1 parent
6061d3a
commit 8c0cf34
Showing
16 changed files
with
429 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
name: test-metrics-5m | ||
on: | ||
push: | ||
branches: [master] | ||
pull_request: | ||
branches: [master] | ||
workflow_dispatch: | ||
jobs: | ||
integration: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Start a kind cluster with containerd | ||
uses: helm/kind-action@v1.4.0 | ||
with: | ||
cluster_name: kind-${{ github.run_id }} | ||
kubectl_version: "v1.25.2" | ||
config: ./hack/ci/containerd-cluster-conf.yaml | ||
- name: Build image | ||
run: ./hack/ci/build.sh | ||
- name: Set image version | ||
run: | | ||
echo "VALUE_FILE=charts/warm-metal-csi-driver/values.yaml" >> "$GITHUB_ENV" | ||
echo "IMAGE_TAG=$(git rev-parse --short HEAD)" >> "$GITHUB_ENV" | ||
echo "HELM_NAME=wm-csi-integration-tests" >> "$GITHUB_ENV" | ||
- name: Install the CSI Driver | ||
run: | | ||
trap "kubectl -n kube-system describe po" ERR | ||
helm install ${HELM_NAME} charts/warm-metal-csi-driver -n kube-system \ | ||
-f ${VALUE_FILE} \ | ||
--set csiPlugin.image.tag=${IMAGE_TAG} \ | ||
--wait \ | ||
--debug | ||
- name: Test metrics | ||
run: ./test/integration/test-metrics.sh | ||
- name: Uninstall the CSI Driver | ||
run: helm uninstall -n kube-system ${HELM_NAME} --wait |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
VERSION ?= v0.8.2 | ||
VERSION ?= v0.9.0 | ||
|
||
IMAGE_BUILDER ?= docker | ||
IMAGE_BUILD_CMD ?= buildx | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
package metrics | ||
|
||
import ( | ||
"net/http" | ||
|
||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/prometheus/client_golang/prometheus/promhttp" | ||
"k8s.io/klog/v2" | ||
) | ||
|
||
const Async = "async" | ||
const Sync = "sync" | ||
const ImagePullTimeKey = "pull_duration_seconds" | ||
const ImageMountTimeKey = "mount_duration_seconds" | ||
const OperationErrorsCountKey = "operation_errors_total" | ||
|
||
var ImagePullTime = prometheus.NewHistogramVec( | ||
prometheus.HistogramOpts{ | ||
Subsystem: "warm_metal", | ||
Name: ImagePullTimeKey, | ||
Help: "The time it took to pull an image", | ||
Buckets: []float64{0, 1, 5, 10, 15, 30, 60, 120, 180}, | ||
}, | ||
[]string{"operation_type"}, | ||
) | ||
|
||
var ImageMountTime = prometheus.NewHistogramVec( | ||
prometheus.HistogramOpts{ | ||
Subsystem: "warm_metal", | ||
Name: ImageMountTimeKey, | ||
Help: "The time it took to mount an image", | ||
Buckets: []float64{0, 1, 5, 10, 15, 30, 60, 120, 180}, | ||
}, | ||
[]string{"operation_type"}, | ||
) | ||
|
||
var OperationErrorsCount = prometheus.NewCounterVec( | ||
prometheus.CounterOpts{ | ||
Subsystem: "warm_metal", | ||
Name: OperationErrorsCountKey, | ||
Help: "Cumulative number of operation (pull,mount,unmount) errors in the driver", | ||
}, | ||
[]string{"operation_type"}, | ||
) | ||
|
||
func RegisterMetrics() *prometheus.Registry { | ||
reg := prometheus.NewRegistry() | ||
reg.MustRegister(ImagePullTime) | ||
reg.MustRegister(ImageMountTime) | ||
reg.MustRegister(OperationErrorsCount) | ||
|
||
return reg | ||
} | ||
|
||
func StartMetricsServer(reg *prometheus.Registry) { | ||
go func() { | ||
http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg})) | ||
klog.Info("serving internal metrics at port 8080") | ||
klog.Fatal(http.ListenAndServe(":8080", nil)) | ||
}() | ||
} |
Oops, something went wrong.