From 1fd9a0a6a75af0ec3456e7deb8c56e0c54fc6d23 Mon Sep 17 00:00:00 2001
From: robertdavidsmith <34475852+robertdavidsmith@users.noreply.github.com>
Date: Fri, 19 Jul 2024 09:48:52 +0100
Subject: [PATCH] Scheduler pprof support in helm chart (#3799)

Co-authored-by: Robert Smith <robert.smith@gresearch.co.uk>
---
 .../scheduler-profiling-ingress.yaml          | 41 +++++++++++++++++++
 .../scheduler-profiling-service.yaml          | 22 ++++++++++
 docs/developer/pprof.md                       |  7 ++--
 3 files changed, 67 insertions(+), 3 deletions(-)
 create mode 100644 deployment/scheduler/templates/scheduler-profiling-ingress.yaml
 create mode 100644 deployment/scheduler/templates/scheduler-profiling-service.yaml

diff --git a/deployment/scheduler/templates/scheduler-profiling-ingress.yaml b/deployment/scheduler/templates/scheduler-profiling-ingress.yaml
new file mode 100644
index 00000000000..64edffb1724
--- /dev/null
+++ b/deployment/scheduler/templates/scheduler-profiling-ingress.yaml
@@ -0,0 +1,41 @@
+{{- if and .Values.scheduler.applicationConfig.profiling .Values.scheduler.applicationConfig.profiling.port }}
+{{- $root := . -}}
+{{- range $i := until (int .Values.scheduler.replicas) }}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ $root.Values.scheduler.ingress.nameOverride | default (include "armada-scheduler.name" $root) }}-{{ $i }}-profiling
+  namespace: {{ $root.Release.Namespace }}
+  annotations:
+    certmanager.k8s.io/cluster-issuer: {{ required "A value is required for $root.Values.scheduler.clusterIssuer" $root.Values.scheduler.clusterIssuer }}
+    cert-manager.io/cluster-issuer: {{ required "A value is required for $root.Values.scheduler.clusterIssuer" $root.Values.scheduler.clusterIssuer }}
+  labels:
+    {{- include "armada-scheduler.labels.all" $root | nindent 4 }}
+spec:
+  rules:
+  {{- range required "A value is required for .Values.scheduler.hostnames" $root.Values.scheduler.hostnames }}
+  {{- $splits := splitList "." . -}}
+  {{- $hostname := (list (first $splits) "-" $i "-profiling." (rest $splits | join ".")) | join "" }}
+  - host: {{ $hostname  }}
+    http:
+      paths:
+        - path: /
+          pathType: Prefix
+          backend:
+            service:
+              name: {{ include "armada-scheduler.name" $root }}-{{ $i }}-profiling
+              port:
+                number: {{ $root.Values.scheduler.applicationConfig.profiling.port }}
+  {{ end -}}
+  tls:
+    - hosts:
+       {{- range required "A value is required for .Values.scheduler.hostnames" $root.Values.scheduler.hostnames }}
+       {{- $splits := splitList "." . -}}
+       {{- $hostname := (list (first $splits) "-" $i "-profiling." (rest $splits | join ".")) | join "" }}
+      - {{ $hostname -}}
+       {{ end }}
+      secretName: armada-scheduler-{{ $i }}-profiling-service-tls
+
+---
+{{- end }}
+{{- end }}
diff --git a/deployment/scheduler/templates/scheduler-profiling-service.yaml b/deployment/scheduler/templates/scheduler-profiling-service.yaml
new file mode 100644
index 00000000000..51b665f2940
--- /dev/null
+++ b/deployment/scheduler/templates/scheduler-profiling-service.yaml
@@ -0,0 +1,22 @@
+{{- if and .Values.scheduler.applicationConfig.profiling .Values.scheduler.applicationConfig.profiling.port }}
+  {{- $root := . -}}
+{{- range $i := until (int .Values.scheduler.replicas) }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "armada-scheduler.name" $root }}-{{ $i }}-profiling
+  namespace: {{ $root.Release.Namespace }}
+  labels:
+    {{- include "armada-scheduler.labels.all" $root | nindent 4 }}
+    name: {{ include "armada-scheduler.name" $root }}-{{ $i }}-profiling
+spec:
+  selector:
+    statefulset.kubernetes.io/pod-name: {{ include "armada-scheduler.name" $root }}-{{ $i }}
+    {{- include "armada-scheduler.labels.identity" $root | nindent 4 }}
+  ports:
+    - name: profiling
+      protocol: TCP
+      port: {{ $root.Values.scheduler.applicationConfig.profiling.port }}
+---
+{{- end }}
+{{- end }}
diff --git a/docs/developer/pprof.md b/docs/developer/pprof.md
index f5f2ddb0989..0d1c22d14a2 100644
--- a/docs/developer/pprof.md
+++ b/docs/developer/pprof.md
@@ -1,7 +1,7 @@
 # Use of pprof
 
 - Go provides a profiling tool called pprof. It's documented at https://pkg.go.dev/net/http/pprof.
-- If you wish to use this with Armada, enable the profiling socket with the following config (this should be under `applicationConfig` if using the helm charts). This config will listen on the specified port with no auth.
+- To use pprof with Armada, enable the profiling socket with the following config (this should be under `applicationConfig` if using the helm charts). This config will listen on port `6060` with no auth.
   ```
   profiling:
     port: 6060
@@ -10,5 +10,6 @@
       permissionGroupMapping:
         pprof: ["everyone"]
   ```
-- It's possible to put pprof behind auth if you want, see [api.md#authentication](./api.md#authentication) and [oidc.md](./oidc.md).
-- The helm charts do not currently expose the profiling port via a service and ingress. You can use `kubectl port-forward` to access them.
+- It's possible to put pprof behind auth, see [api.md#authentication](./api.md#authentication) and [oidc.md](./oidc.md).
+- For the scheduler, the helm chart will make a service and ingress for every pod. These are named `armada-scheduler-0-profiling` etc.
+- For other services, the helm charts do not currently expose the profiling port. You can use `kubectl port-forward` to access these.