From 4ad4333f6e812905de9ba54cf70ea8f4049df357 Mon Sep 17 00:00:00 2001 From: Julianne DeMars Date: Thu, 31 Aug 2023 23:48:58 +0000 Subject: [PATCH 1/9] Add basic experimental cache support --- cmd/sidecar_mounter/main.go | 7 ++ .../jupyter-experimental-readcache.yaml | 85 +++++++++++++++++++ pkg/cloud_provider/clientset/fake.go | 6 +- pkg/webhook/mutatingwebhook.go | 16 +++- pkg/webhook/sidecar_spec.go | 36 ++++++-- 5 files changed, 137 insertions(+), 13 deletions(-) create mode 100644 examples/jupyter/jupyter-experimental-readcache.yaml diff --git a/cmd/sidecar_mounter/main.go b/cmd/sidecar_mounter/main.go index 8b10b7e8a..949ae049b 100644 --- a/cmd/sidecar_mounter/main.go +++ b/cmd/sidecar_mounter/main.go @@ -25,6 +25,7 @@ import ( "os" "os/signal" "path/filepath" + "strings" "sync" "syscall" "time" @@ -189,5 +190,11 @@ func prepareMountConfig(sp string) (*sidecarmounter.MountConfig, error) { return nil, fmt.Errorf("failed to fetch bucket name from CSI driver") } + for _, opt := range mc.Options{ + if strings.Contains(opt, "experimental-local-file-cache") { + mc.TempDir = "/cache/gcsfuse-tmp" + } + } + return &mc, nil } diff --git a/examples/jupyter/jupyter-experimental-readcache.yaml b/examples/jupyter/jupyter-experimental-readcache.yaml new file mode 100644 index 000000000..dcc1965ea --- /dev/null +++ b/examples/jupyter/jupyter-experimental-readcache.yaml @@ -0,0 +1,85 @@ +# Tensorflow/Jupyter StatefulSet +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: tensorflow + namespace: example +spec: + selector: + matchLabels: + pod: tensorflow-pod + serviceName: tensorflow + replicas: 1 + template: + metadata: + annotations: + gke-gcsfuse/volumes: "true" + gke-gcsfuse/cpu-limit: 500m + gke-gcsfuse/memory-limit: 10Gi + gke-gcsfuse/ephemeral-storage-limit: 30Gi + labels: + pod: tensorflow-pod + spec: + serviceAccountName: gcsfuse-ksa + nodeSelector: + cloud.google.com/gke-accelerator: nvidia-tesla-t4 + terminationGracePeriodSeconds: 30 + containers: + - name: tensorflow-container + securityContext: + privileged: true + image: tensorflow/tensorflow:2.13.0-gpu-jupyter + volumeMounts: + - name: tensorflow-pvc + mountPath: /tf/saved + resources: + limits: + nvidia.com/gpu: "1" + ephemeral-storage: 30Gi + memory: 10Gi + requests: + nvidia.com/gpu: "1" + ephemeral-storage: 30Gi + memory: 10Gi + env: + - name: JUPYTER_TOKEN + value: "jupyter" + volumes: + - name: tensorflow-pvc + csi: + driver: gcsfuse.csi.storage.gke.io + volumeAttributes: + bucketName: # unique bucket name + # update your experimental cache file options according to flags + # from + # https://github.com/GoogleCloudPlatform/gcsfuse/blob/19ed094b6612789b09ad4a1df3a2314099c65129/flags.go#L233C1-L236 + mountOptions: "experimental-local-file-cache,stat-cache-ttl=240m0s,type-cache-ttl=240m0s,stat-cache-capacity=5000000000" + +--- +# Headless service for the above StatefulSet +apiVersion: v1 +kind: Service +metadata: + name: tensorflow + namespace: example +spec: + ports: + - port: 8888 + clusterIP: None + selector: + pod: tensorflow-pod +--- +# External service +apiVersion: "v1" +kind: "Service" +metadata: + name: tensorflow-jupyter + namespace: example +spec: + ports: + - protocol: "TCP" + port: 80 + targetPort: 8888 + selector: + pod: tensorflow-pod + type: LoadBalancer diff --git a/pkg/cloud_provider/clientset/fake.go b/pkg/cloud_provider/clientset/fake.go index f92f12a5c..dc1aedafa 100644 --- a/pkg/cloud_provider/clientset/fake.go +++ b/pkg/cloud_provider/clientset/fake.go @@ -38,11 +38,9 @@ func (c *FakeClientset) GetPod(_ context.Context, namespace, name string) (*v1.P }, Spec: v1.PodSpec{ Containers: []v1.Container{ - webhook.GetSidecarContainerSpec(config), - }, - Volumes: []v1.Volume{ - webhook.GetSidecarContainerVolumeSpec(), + webhook.GetSidecarContainerSpec(config, false), }, + Volumes: webhook.GetSidecarContainerVolumeSpec(false), }, } diff --git a/pkg/webhook/mutatingwebhook.go b/pkg/webhook/mutatingwebhook.go index 2f8b6599d..3a22e4fe6 100644 --- a/pkg/webhook/mutatingwebhook.go +++ b/pkg/webhook/mutatingwebhook.go @@ -109,8 +109,20 @@ func (si *SidecarInjector) Handle(_ context.Context, req admission.Request) admi klog.Infof("mutating Pod: Name %q, GenerateName %q, Namespace %q, CPU limit %q, memory limit %q, ephemeral storage limit %q", pod.Name, pod.GenerateName, pod.Namespace, configCopy.CPULimit.String(), configCopy.MemoryLimit.String(), configCopy.EphemeralStorageLimit.String()) // the gcsfuse sidecar container has to before the containers that consume the gcsfuse volume - pod.Spec.Containers = append([]corev1.Container{GetSidecarContainerSpec(configCopy)}, pod.Spec.Containers...) - pod.Spec.Volumes = append([]corev1.Volume{GetSidecarContainerVolumeSpec()}, pod.Spec.Volumes...) + useExperimentalLocalFileCache := false + for _, v := range pod.Spec.Volumes{ + if v.CSI == nil || v.CSI.VolumeAttributes == nil { + continue + } + if val, ok := v.CSI.VolumeAttributes["mountOptions"]; ok{ + if strings.Contains(val, "experimental-local-file-cache"){ + useExperimentalLocalFileCache = true + } + } + } + pod.Spec.Containers = append([]corev1.Container{GetSidecarContainerSpec(configCopy, useExperimentalLocalFileCache)}, pod.Spec.Containers...) + + pod.Spec.Volumes = append(GetSidecarContainerVolumeSpec(useExperimentalLocalFileCache), pod.Spec.Volumes...) marshaledPod, err := json.Marshal(pod) if err != nil { return admission.Errored(http.StatusBadRequest, fmt.Errorf("failed to marshal pod: %w", err)) diff --git a/pkg/webhook/sidecar_spec.go b/pkg/webhook/sidecar_spec.go index 54e2cd512..f735c399b 100644 --- a/pkg/webhook/sidecar_spec.go +++ b/pkg/webhook/sidecar_spec.go @@ -34,10 +34,11 @@ const ( NobodyGID = 65534 ) -func GetSidecarContainerSpec(c *Config) v1.Container { + +func GetSidecarContainerSpec(c *Config, useExperimentalLocalFileCache bool) v1.Container { // The sidecar container follows Restricted Pod Security Standard, // see https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted - return v1.Container{ + toReturn := v1.Container{ Name: SidecarContainerName, Image: c.ContainerImage, ImagePullPolicy: v1.PullPolicy(c.ImagePullPolicy), @@ -74,15 +75,36 @@ func GetSidecarContainerSpec(c *Config) v1.Container { }, }, } + + if useExperimentalLocalFileCache { + toReturn.VolumeMounts = append(toReturn.VolumeMounts, v1.VolumeMount{ + Name: "scratch-volume", + MountPath: "/cache", + }) + } + return toReturn } -func GetSidecarContainerVolumeSpec() v1.Volume { - return v1.Volume{ - Name: SidecarContainerVolumeName, - VolumeSource: v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{}, +func GetSidecarContainerVolumeSpec(useExperimentalLocalFileCache bool) []v1.Volume { + toReturn := []v1.Volume { + { + Name: SidecarContainerVolumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, }, } + if useExperimentalLocalFileCache { + toReturn = append(toReturn, v1.Volume{ + Name: "scratch-volume", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/mnt/stateful_partition/kube-ephemeral-ssd", + }, + }, + }) + } + return toReturn } // ValidatePodHasSidecarContainerInjected validates the following: From 31e29eabc87bb21b25472f9c1f8ff84af6859fe6 Mon Sep 17 00:00:00 2001 From: Julianne DeMars Date: Tue, 5 Sep 2023 17:24:43 +0000 Subject: [PATCH 2/9] Run formatter --- cmd/sidecar_mounter/main.go | 2 +- pkg/webhook/mutatingwebhook.go | 6 +++--- pkg/webhook/sidecar_spec.go | 15 +++++++-------- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/cmd/sidecar_mounter/main.go b/cmd/sidecar_mounter/main.go index 949ae049b..5cf35635d 100644 --- a/cmd/sidecar_mounter/main.go +++ b/cmd/sidecar_mounter/main.go @@ -190,7 +190,7 @@ func prepareMountConfig(sp string) (*sidecarmounter.MountConfig, error) { return nil, fmt.Errorf("failed to fetch bucket name from CSI driver") } - for _, opt := range mc.Options{ + for _, opt := range mc.Options { if strings.Contains(opt, "experimental-local-file-cache") { mc.TempDir = "/cache/gcsfuse-tmp" } diff --git a/pkg/webhook/mutatingwebhook.go b/pkg/webhook/mutatingwebhook.go index 3a22e4fe6..5533fbedc 100644 --- a/pkg/webhook/mutatingwebhook.go +++ b/pkg/webhook/mutatingwebhook.go @@ -110,12 +110,12 @@ func (si *SidecarInjector) Handle(_ context.Context, req admission.Request) admi klog.Infof("mutating Pod: Name %q, GenerateName %q, Namespace %q, CPU limit %q, memory limit %q, ephemeral storage limit %q", pod.Name, pod.GenerateName, pod.Namespace, configCopy.CPULimit.String(), configCopy.MemoryLimit.String(), configCopy.EphemeralStorageLimit.String()) // the gcsfuse sidecar container has to before the containers that consume the gcsfuse volume useExperimentalLocalFileCache := false - for _, v := range pod.Spec.Volumes{ + for _, v := range pod.Spec.Volumes { if v.CSI == nil || v.CSI.VolumeAttributes == nil { continue } - if val, ok := v.CSI.VolumeAttributes["mountOptions"]; ok{ - if strings.Contains(val, "experimental-local-file-cache"){ + if val, ok := v.CSI.VolumeAttributes["mountOptions"]; ok { + if strings.Contains(val, "experimental-local-file-cache") { useExperimentalLocalFileCache = true } } diff --git a/pkg/webhook/sidecar_spec.go b/pkg/webhook/sidecar_spec.go index f735c399b..9d509a75d 100644 --- a/pkg/webhook/sidecar_spec.go +++ b/pkg/webhook/sidecar_spec.go @@ -34,7 +34,6 @@ const ( NobodyGID = 65534 ) - func GetSidecarContainerSpec(c *Config, useExperimentalLocalFileCache bool) v1.Container { // The sidecar container follows Restricted Pod Security Standard, // see https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted @@ -86,7 +85,7 @@ func GetSidecarContainerSpec(c *Config, useExperimentalLocalFileCache bool) v1.C } func GetSidecarContainerVolumeSpec(useExperimentalLocalFileCache bool) []v1.Volume { - toReturn := []v1.Volume { + toReturn := []v1.Volume{ { Name: SidecarContainerVolumeName, VolumeSource: v1.VolumeSource{ @@ -96,13 +95,13 @@ func GetSidecarContainerVolumeSpec(useExperimentalLocalFileCache bool) []v1.Volu } if useExperimentalLocalFileCache { toReturn = append(toReturn, v1.Volume{ - Name: "scratch-volume", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{ - Path: "/mnt/stateful_partition/kube-ephemeral-ssd", - }, + Name: "scratch-volume", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/mnt/stateful_partition/kube-ephemeral-ssd", }, - }) + }, + }) } return toReturn } From a43c6965a23eb14ef15c35d416e63e40b2c33131 Mon Sep 17 00:00:00 2001 From: Julianne DeMars Date: Tue, 5 Sep 2023 19:50:48 +0000 Subject: [PATCH 3/9] allow custom cache --- cmd/sidecar_mounter/main.go | 5 ++++- pkg/webhook/mutatingwebhook.go | 9 +++++++-- pkg/webhook/sidecar_spec.go | 16 +++++++++------- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/cmd/sidecar_mounter/main.go b/cmd/sidecar_mounter/main.go index 5cf35635d..b1d1d7e1f 100644 --- a/cmd/sidecar_mounter/main.go +++ b/cmd/sidecar_mounter/main.go @@ -31,6 +31,8 @@ import ( "time" sidecarmounter "github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/sidecar_mounter" + sidecarspec "github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/webhook" + "github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/util" "k8s.io/klog/v2" ) @@ -192,7 +194,8 @@ func prepareMountConfig(sp string) (*sidecarmounter.MountConfig, error) { for _, opt := range mc.Options { if strings.Contains(opt, "experimental-local-file-cache") { - mc.TempDir = "/cache/gcsfuse-tmp" + + mc.TempDir = filepath.Join(sidecarspec.CacheVolumeMountPath, "gcsfuse-tmp") } } diff --git a/pkg/webhook/mutatingwebhook.go b/pkg/webhook/mutatingwebhook.go index 5533fbedc..d97182a1d 100644 --- a/pkg/webhook/mutatingwebhook.go +++ b/pkg/webhook/mutatingwebhook.go @@ -110,7 +110,11 @@ func (si *SidecarInjector) Handle(_ context.Context, req admission.Request) admi klog.Infof("mutating Pod: Name %q, GenerateName %q, Namespace %q, CPU limit %q, memory limit %q, ephemeral storage limit %q", pod.Name, pod.GenerateName, pod.Namespace, configCopy.CPULimit.String(), configCopy.MemoryLimit.String(), configCopy.EphemeralStorageLimit.String()) // the gcsfuse sidecar container has to before the containers that consume the gcsfuse volume useExperimentalLocalFileCache := false + hasCacheVolume := false for _, v := range pod.Spec.Volumes { + if v.Name == CacheVolumeName { + hasCacheVolume = false + } if v.CSI == nil || v.CSI.VolumeAttributes == nil { continue } @@ -121,8 +125,9 @@ func (si *SidecarInjector) Handle(_ context.Context, req admission.Request) admi } } pod.Spec.Containers = append([]corev1.Container{GetSidecarContainerSpec(configCopy, useExperimentalLocalFileCache)}, pod.Spec.Containers...) - - pod.Spec.Volumes = append(GetSidecarContainerVolumeSpec(useExperimentalLocalFileCache), pod.Spec.Volumes...) + // Add a volume for the experimental read cache if none already exists + addCacheVolume := useExperimentalLocalFileCache && !hasCacheVolume + pod.Spec.Volumes = append(GetSidecarContainerVolumeSpec(addCacheVolume), pod.Spec.Volumes...) marshaledPod, err := json.Marshal(pod) if err != nil { return admission.Errored(http.StatusBadRequest, fmt.Errorf("failed to marshal pod: %w", err)) diff --git a/pkg/webhook/sidecar_spec.go b/pkg/webhook/sidecar_spec.go index 9d509a75d..6be32f877 100644 --- a/pkg/webhook/sidecar_spec.go +++ b/pkg/webhook/sidecar_spec.go @@ -28,13 +28,15 @@ const ( SidecarContainerName = "gke-gcsfuse-sidecar" SidecarContainerVolumeName = "gke-gcsfuse-tmp" SidecarContainerVolumeMountPath = "/gcsfuse-tmp" + CacheVolumeName = "cache-volume" + CacheVolumeMountPath = "/cache" // See the nonroot user discussion: https://github.com/GoogleContainerTools/distroless/issues/443 NobodyUID = 65534 NobodyGID = 65534 ) -func GetSidecarContainerSpec(c *Config, useExperimentalLocalFileCache bool) v1.Container { +func GetSidecarContainerSpec(c *Config, addCacheVolume bool) v1.Container { // The sidecar container follows Restricted Pod Security Standard, // see https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted toReturn := v1.Container{ @@ -75,16 +77,16 @@ func GetSidecarContainerSpec(c *Config, useExperimentalLocalFileCache bool) v1.C }, } - if useExperimentalLocalFileCache { + if addCacheVolume { toReturn.VolumeMounts = append(toReturn.VolumeMounts, v1.VolumeMount{ - Name: "scratch-volume", - MountPath: "/cache", + Name: CacheVolumeName, + MountPath: CacheVolumeMountPath, }) } return toReturn } -func GetSidecarContainerVolumeSpec(useExperimentalLocalFileCache bool) []v1.Volume { +func GetSidecarContainerVolumeSpec(addCacheVolume bool) []v1.Volume { toReturn := []v1.Volume{ { Name: SidecarContainerVolumeName, @@ -93,9 +95,9 @@ func GetSidecarContainerVolumeSpec(useExperimentalLocalFileCache bool) []v1.Volu }, }, } - if useExperimentalLocalFileCache { + if addCacheVolume { toReturn = append(toReturn, v1.Volume{ - Name: "scratch-volume", + Name: CacheVolumeName, VolumeSource: v1.VolumeSource{ HostPath: &v1.HostPathVolumeSource{ Path: "/mnt/stateful_partition/kube-ephemeral-ssd", From 7f537f4107da1ba6cd0741f769b910f8274a61ea Mon Sep 17 00:00:00 2001 From: Julianne DeMars Date: Tue, 5 Sep 2023 19:55:13 +0000 Subject: [PATCH 4/9] fix reference --- pkg/webhook/mutatingwebhook.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/webhook/mutatingwebhook.go b/pkg/webhook/mutatingwebhook.go index d97182a1d..d696f1d02 100644 --- a/pkg/webhook/mutatingwebhook.go +++ b/pkg/webhook/mutatingwebhook.go @@ -124,9 +124,9 @@ func (si *SidecarInjector) Handle(_ context.Context, req admission.Request) admi } } } - pod.Spec.Containers = append([]corev1.Container{GetSidecarContainerSpec(configCopy, useExperimentalLocalFileCache)}, pod.Spec.Containers...) // Add a volume for the experimental read cache if none already exists addCacheVolume := useExperimentalLocalFileCache && !hasCacheVolume + pod.Spec.Containers = append([]corev1.Container{GetSidecarContainerSpec(configCopy, addCacheVolume)}, pod.Spec.Containers...) pod.Spec.Volumes = append(GetSidecarContainerVolumeSpec(addCacheVolume), pod.Spec.Volumes...) marshaledPod, err := json.Marshal(pod) if err != nil { From 27bc440e37ce0a419147b89461b213cae452e03f Mon Sep 17 00:00:00 2001 From: Julianne DeMars Date: Tue, 5 Sep 2023 23:56:18 +0000 Subject: [PATCH 5/9] remove reliance on exp flag --- cmd/sidecar_mounter/main.go | 9 -------- examples/README.md | 31 ++++++++++++++++++++++++++-- pkg/cloud_provider/clientset/fake.go | 4 ++-- pkg/webhook/mutatingwebhook.go | 23 ++++----------------- pkg/webhook/sidecar_spec.go | 20 ++---------------- 5 files changed, 37 insertions(+), 50 deletions(-) diff --git a/cmd/sidecar_mounter/main.go b/cmd/sidecar_mounter/main.go index b1d1d7e1f..2ee80b155 100644 --- a/cmd/sidecar_mounter/main.go +++ b/cmd/sidecar_mounter/main.go @@ -25,13 +25,11 @@ import ( "os" "os/signal" "path/filepath" - "strings" "sync" "syscall" "time" sidecarmounter "github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/sidecar_mounter" - sidecarspec "github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/webhook" "github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/util" "k8s.io/klog/v2" @@ -192,12 +190,5 @@ func prepareMountConfig(sp string) (*sidecarmounter.MountConfig, error) { return nil, fmt.Errorf("failed to fetch bucket name from CSI driver") } - for _, opt := range mc.Options { - if strings.Contains(opt, "experimental-local-file-cache") { - - mc.TempDir = filepath.Join(sidecarspec.CacheVolumeMountPath, "gcsfuse-tmp") - } - } - return &mc, nil } diff --git a/examples/README.md b/examples/README.md index 4309a4469..c2649c557 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,4 +1,4 @@ -