fix: single PV mounted on multiple pods as read-only erroring (#99)

* fix: two pods reading from same read-only volume fails * refactor: remove redundant code * refactor: remove debug code * refactor: remove arch specific go flags * refactor: remove debug code * refactor: remove extra newline * refactor: remove debug code changes * fix: refactor `run_test_job` to accept multiple jobs - to fix failing test in CI/CD (one of the manifests has multiple job templates) * test: fix typo for job name
warm-metal · Dec 20, 2023 · 6061d3a · 6061d3a
1 parent ac6f2dd
commit 6061d3a
Show file tree

Hide file tree

Showing 5 changed files with 116 additions and 18 deletions.
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-VERSION ?= v0.8.1
+VERSION ?= v0.8.2
 
 IMAGE_BUILDER ?= docker
 IMAGE_BUILD_CMD ?= buildx

diff --git a/charts/warm-metal-csi-driver/Chart.yaml b/charts/warm-metal-csi-driver/Chart.yaml
@@ -15,9 +15,9 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.8.1
+version: 0.8.2
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
-appVersion: v0.8.1
+appVersion: v0.8.2
diff --git a/hack/lib/utils.sh b/hack/lib/utils.sh
@@ -24,17 +24,30 @@ function lib::run_test_job() {
   local manifest=$1
   echo "Start job $(basename $manifest)"
 
+  local total_failed=0
   kubectl delete --ignore-not-found -f "$manifest"
   kubectl apply -f "$manifest"
-  local job=$(kubectl get -f "$manifest" --no-headers -o=custom-columns=:metadata.name,:.kind | grep Job | awk '{ print $1 }')
-  while [ "$job" == "" ]; do
+  jobs="$(kubectl get -f "$manifest" --no-headers -o=custom-columns=:metadata.name,:.kind | grep Job | awk '{ print $1 }')"
+  while [ "$jobs" == "" ]; do
     sleep 1
-    job=$(kubectl get -f "$manifest" --no-headers -o=custom-columns=:metadata.name,:.kind | grep Job | awk '{ print $1 }')
+    jobs="$(kubectl get -f "$manifest" --no-headers -o=custom-columns=:metadata.name,:.kind | grep Job | awk '{ print $1 }')"
   done
 
-  kubectl wait --timeout=5m --for=condition=complete job/$job
-  succeeded=$(kubectl get --no-headers -ocustom-columns=:.status.succeeded job/$job)
-  [[ succeeded -eq 1 ]] && kubectl delete -f "$manifest"
+
+  while read job
+  do 
+    kubectl wait --timeout=5m --for=condition=complete job/$job
+    succeeded=$(kubectl get --no-headers -ocustom-columns=:.status.succeeded job/$job)
+
+    if [[ "${succeeded}" != "1" ]]; then
+        echo "Test failed for job/$job"
+        total_failed=$(($total_failed+1))
+    fi
+  done <<< "$jobs"  # this is done because `<code> | while read job` creates a subshell 
+  # because of which increment in `total_failed` is never reflected outside the loop 
+  # more info: https://unix.stackexchange.com/questions/402750/modify-global-variable-in-while-loop
+
+  [[ total_failed -eq 0 ]] && kubectl delete -f "$manifest"
 }
 
 function lib::run_failed_test_job() {

diff --git a/pkg/mountexecutor/mountexecutor.go b/pkg/mountexecutor/mountexecutor.go
@@ -58,10 +58,10 @@ func NewMountExecutor(o *MountExecutorOptions) *MountExecutor {
 // StartMounting starts the mounting
 func (m *MountExecutor) StartMounting(o *MountOptions) error {
 
-	if pullstatus.Get(o.NamedRef) != pullstatus.Pulled || mountstatus.Get(o.VolumeId) == mountstatus.StillMounting {
+	if pullstatus.Get(o.NamedRef) != pullstatus.Pulled || mountstatus.Get(o.TargetPath) == mountstatus.StillMounting {
 		klog.Infof("image '%s' hasn't been pulled yet (status: %s) or volume is still mounting (status: %s)",
 			o.NamedRef.Name(),
-			pullstatus.Get(o.NamedRef), mountstatus.Get(o.VolumeId))
+			pullstatus.Get(o.NamedRef), mountstatus.Get(o.TargetPath))
 		return nil
 	}
 
@@ -70,12 +70,12 @@ func (m *MountExecutor) StartMounting(o *MountOptions) error {
 		o.VolumeCapability.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY
 
 	if !m.asyncMount {
-		mountstatus.Update(o.VolumeId, mountstatus.StillMounting)
+		mountstatus.Update(o.TargetPath, mountstatus.StillMounting)
 		if err := m.mounter.Mount(o.Context, o.VolumeId, backend.MountTarget(o.TargetPath), o.NamedRef, ro); err != nil {
-			mountstatus.Update(o.VolumeId, mountstatus.Errored)
+			mountstatus.Update(o.TargetPath, mountstatus.Errored)
 			return err
 		}
-		mountstatus.Update(o.VolumeId, mountstatus.Mounted)
+		mountstatus.Update(o.TargetPath, mountstatus.Mounted)
 		return nil
 	}
 
@@ -85,14 +85,14 @@ func (m *MountExecutor) StartMounting(o *MountOptions) error {
 		ctx, cancel := context.WithTimeout(context.Background(), mountCtxTimeout)
 		defer cancel()
 
-		mountstatus.Update(o.VolumeId, mountstatus.StillMounting)
+		mountstatus.Update(o.TargetPath, mountstatus.StillMounting)
 		if err := m.mounter.Mount(ctx, o.VolumeId, backend.MountTarget(o.TargetPath), o.NamedRef, ro); err != nil {
 			klog.Errorf("mount err: %v", err.Error())
-			mountstatus.Update(o.VolumeId, mountstatus.Errored)
+			mountstatus.Update(o.TargetPath, mountstatus.Errored)
 			m.asyncErrs[o.NamedRef] = fmt.Errorf("err: %v: %v", err, m.asyncErrs[o.NamedRef])
 			return
 		}
-		mountstatus.Update(o.VolumeId, mountstatus.Mounted)
+		mountstatus.Update(o.TargetPath, mountstatus.Mounted)
 	}()
 
 	return nil
@@ -109,7 +109,7 @@ func (m *MountExecutor) WaitForMount(o *MountOptions) error {
 	}
 
 	mountCondFn := func() (done bool, err error) {
-		if mountstatus.Get(o.VolumeId) == mountstatus.Mounted {
+		if mountstatus.Get(o.TargetPath) == mountstatus.Mounted {
 			return true, nil
 		}
 		if m.asyncErrs[o.NamedRef] != nil {

diff --git a/test/integration/manifests/two-readonly-single-pv.yaml b/test/integration/manifests/two-readonly-single-pv.yaml
@@ -0,0 +1,85 @@
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: pv-test-multi-read-simple-fs
+spec:
+  storageClassName: csi-image.warm-metal.tech
+  capacity:
+    storage: 5Mi
+  accessModes:
+    - ReadOnlyMany  # does not force volume to be mounted read only
+  persistentVolumeReclaimPolicy: Delete
+  csi:
+    driver: csi-image.warm-metal.tech
+    volumeHandle: "docker.io/warmmetal/csi-image-test:simple-fs"
+    volumeAttributes:
+      pullAlways: "true"
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: pv-test-multi-read-simple-fs
+spec:
+  storageClassName: csi-image.warm-metal.tech
+  accessModes:
+  - ReadOnlyMany
+  resources:
+    requests:
+      storage: 5Mi
+  volumeName: pv-test-multi-read-simple-fs
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: two-ro-single-pv-read-only-true
+spec:
+  template:
+    metadata:
+      name: two-ro-single-pv-read-only-true
+    spec:
+      containers:
+        - name: two-ro-single-pv-read-only-true
+          image: docker.io/warmmetal/csi-image-test:check-fs
+          env:
+            - name: TARGET
+              value: /target1
+            - name: CHECK_RO
+              value: "true"
+          volumeMounts:
+            - mountPath: /target1
+              readOnly: true
+              name: target1
+      restartPolicy: Never
+      volumes:
+        - name: target1
+          persistentVolumeClaim:
+            claimName: pv-test-multi-read-simple-fs
+  backoffLimit: 0
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: two-ro-single-pv-read-only-false
+spec:
+  template:
+    metadata:
+      name: two-ro-single-pv-read-only-false
+    spec:
+      containers:
+        - name: two-ro-single-pv-read-only-false
+          image: docker.io/warmmetal/csi-image-test:check-fs
+          env:
+            - name: TARGET
+              value: /target1
+            - name: CHECK_RO
+              value: "true"
+          volumeMounts:
+            - mountPath: /target1
+              name: target1
+      restartPolicy: Never
+      volumes:
+        - name: target1
+          persistentVolumeClaim:
+            claimName: pv-test-multi-read-simple-fs
+  backoffLimit: 0