Skip to content

Commit

Permalink
fix: single PV mounted on multiple pods as read-only erroring (#99)
Browse files Browse the repository at this point in the history
* fix: two pods reading from same read-only volume fails

* refactor: remove redundant code

* refactor: remove debug code

* refactor: remove arch specific go flags

* refactor: remove debug code

* refactor: remove extra newline

* refactor: remove debug code changes

* fix: refactor `run_test_job` to accept multiple jobs
- to fix failing test in CI/CD (one of the manifests has multiple job templates)

* test: fix typo for job name
  • Loading branch information
vadasambar authored Dec 20, 2023
1 parent ac6f2dd commit 6061d3a
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 18 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION ?= v0.8.1
VERSION ?= v0.8.2

IMAGE_BUILDER ?= docker
IMAGE_BUILD_CMD ?= buildx
Expand Down
4 changes: 2 additions & 2 deletions charts/warm-metal-csi-driver/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.8.1
version: 0.8.2

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
appVersion: v0.8.1
appVersion: v0.8.2
25 changes: 19 additions & 6 deletions hack/lib/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,30 @@ function lib::run_test_job() {
local manifest=$1
echo "Start job $(basename $manifest)"

local total_failed=0
kubectl delete --ignore-not-found -f "$manifest"
kubectl apply -f "$manifest"
local job=$(kubectl get -f "$manifest" --no-headers -o=custom-columns=:metadata.name,:.kind | grep Job | awk '{ print $1 }')
while [ "$job" == "" ]; do
jobs="$(kubectl get -f "$manifest" --no-headers -o=custom-columns=:metadata.name,:.kind | grep Job | awk '{ print $1 }')"
while [ "$jobs" == "" ]; do
sleep 1
job=$(kubectl get -f "$manifest" --no-headers -o=custom-columns=:metadata.name,:.kind | grep Job | awk '{ print $1 }')
jobs="$(kubectl get -f "$manifest" --no-headers -o=custom-columns=:metadata.name,:.kind | grep Job | awk '{ print $1 }')"
done

kubectl wait --timeout=5m --for=condition=complete job/$job
succeeded=$(kubectl get --no-headers -ocustom-columns=:.status.succeeded job/$job)
[[ succeeded -eq 1 ]] && kubectl delete -f "$manifest"

while read job
do
kubectl wait --timeout=5m --for=condition=complete job/$job
succeeded=$(kubectl get --no-headers -ocustom-columns=:.status.succeeded job/$job)

if [[ "${succeeded}" != "1" ]]; then
echo "Test failed for job/$job"
total_failed=$(($total_failed+1))
fi
done <<< "$jobs" # this is done because `<code> | while read job` creates a subshell
# because of which increment in `total_failed` is never reflected outside the loop
# more info: https://unix.stackexchange.com/questions/402750/modify-global-variable-in-while-loop

[[ total_failed -eq 0 ]] && kubectl delete -f "$manifest"
}

function lib::run_failed_test_job() {
Expand Down
18 changes: 9 additions & 9 deletions pkg/mountexecutor/mountexecutor.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ func NewMountExecutor(o *MountExecutorOptions) *MountExecutor {
// StartMounting starts the mounting
func (m *MountExecutor) StartMounting(o *MountOptions) error {

if pullstatus.Get(o.NamedRef) != pullstatus.Pulled || mountstatus.Get(o.VolumeId) == mountstatus.StillMounting {
if pullstatus.Get(o.NamedRef) != pullstatus.Pulled || mountstatus.Get(o.TargetPath) == mountstatus.StillMounting {
klog.Infof("image '%s' hasn't been pulled yet (status: %s) or volume is still mounting (status: %s)",
o.NamedRef.Name(),
pullstatus.Get(o.NamedRef), mountstatus.Get(o.VolumeId))
pullstatus.Get(o.NamedRef), mountstatus.Get(o.TargetPath))
return nil
}

Expand All @@ -70,12 +70,12 @@ func (m *MountExecutor) StartMounting(o *MountOptions) error {
o.VolumeCapability.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY

if !m.asyncMount {
mountstatus.Update(o.VolumeId, mountstatus.StillMounting)
mountstatus.Update(o.TargetPath, mountstatus.StillMounting)
if err := m.mounter.Mount(o.Context, o.VolumeId, backend.MountTarget(o.TargetPath), o.NamedRef, ro); err != nil {
mountstatus.Update(o.VolumeId, mountstatus.Errored)
mountstatus.Update(o.TargetPath, mountstatus.Errored)
return err
}
mountstatus.Update(o.VolumeId, mountstatus.Mounted)
mountstatus.Update(o.TargetPath, mountstatus.Mounted)
return nil
}

Expand All @@ -85,14 +85,14 @@ func (m *MountExecutor) StartMounting(o *MountOptions) error {
ctx, cancel := context.WithTimeout(context.Background(), mountCtxTimeout)
defer cancel()

mountstatus.Update(o.VolumeId, mountstatus.StillMounting)
mountstatus.Update(o.TargetPath, mountstatus.StillMounting)
if err := m.mounter.Mount(ctx, o.VolumeId, backend.MountTarget(o.TargetPath), o.NamedRef, ro); err != nil {
klog.Errorf("mount err: %v", err.Error())
mountstatus.Update(o.VolumeId, mountstatus.Errored)
mountstatus.Update(o.TargetPath, mountstatus.Errored)
m.asyncErrs[o.NamedRef] = fmt.Errorf("err: %v: %v", err, m.asyncErrs[o.NamedRef])
return
}
mountstatus.Update(o.VolumeId, mountstatus.Mounted)
mountstatus.Update(o.TargetPath, mountstatus.Mounted)
}()

return nil
Expand All @@ -109,7 +109,7 @@ func (m *MountExecutor) WaitForMount(o *MountOptions) error {
}

mountCondFn := func() (done bool, err error) {
if mountstatus.Get(o.VolumeId) == mountstatus.Mounted {
if mountstatus.Get(o.TargetPath) == mountstatus.Mounted {
return true, nil
}
if m.asyncErrs[o.NamedRef] != nil {
Expand Down
85 changes: 85 additions & 0 deletions test/integration/manifests/two-readonly-single-pv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: pv-test-multi-read-simple-fs
spec:
storageClassName: csi-image.warm-metal.tech
capacity:
storage: 5Mi
accessModes:
- ReadOnlyMany # does not force volume to be mounted read only
persistentVolumeReclaimPolicy: Delete
csi:
driver: csi-image.warm-metal.tech
volumeHandle: "docker.io/warmmetal/csi-image-test:simple-fs"
volumeAttributes:
pullAlways: "true"
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: pv-test-multi-read-simple-fs
spec:
storageClassName: csi-image.warm-metal.tech
accessModes:
- ReadOnlyMany
resources:
requests:
storage: 5Mi
volumeName: pv-test-multi-read-simple-fs
---
apiVersion: batch/v1
kind: Job
metadata:
name: two-ro-single-pv-read-only-true
spec:
template:
metadata:
name: two-ro-single-pv-read-only-true
spec:
containers:
- name: two-ro-single-pv-read-only-true
image: docker.io/warmmetal/csi-image-test:check-fs
env:
- name: TARGET
value: /target1
- name: CHECK_RO
value: "true"
volumeMounts:
- mountPath: /target1
readOnly: true
name: target1
restartPolicy: Never
volumes:
- name: target1
persistentVolumeClaim:
claimName: pv-test-multi-read-simple-fs
backoffLimit: 0
---
apiVersion: batch/v1
kind: Job
metadata:
name: two-ro-single-pv-read-only-false
spec:
template:
metadata:
name: two-ro-single-pv-read-only-false
spec:
containers:
- name: two-ro-single-pv-read-only-false
image: docker.io/warmmetal/csi-image-test:check-fs
env:
- name: TARGET
value: /target1
- name: CHECK_RO
value: "true"
volumeMounts:
- mountPath: /target1
name: target1
restartPolicy: Never
volumes:
- name: target1
persistentVolumeClaim:
claimName: pv-test-multi-read-simple-fs
backoffLimit: 0

0 comments on commit 6061d3a

Please sign in to comment.