diff --git a/utils/devices.go b/utils/devices.go index a607aba8b..3126bb120 100644 --- a/utils/devices.go +++ b/utils/devices.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "github.com/spf13/afero" "golang.org/x/net/context" "github.com/netapp/trident/internal/fiji" @@ -26,7 +27,8 @@ import ( const LUKSMetadataSize = 18874368 const ( - luksDevicePrefix = "luks-" + luksDevicePrefix = "luks-" + devicesRemovalMaxWaitTime = 5 * time.Second ) var ( @@ -35,6 +37,8 @@ var ( beforeRemoveFile = fiji.Register("beforeRemoveFile", "devices") LuksCloseDurations = durations.TimeDuration{} + + osFs = afero.NewOsFs() ) // waitForDevice accepts a device name and checks if it is present @@ -175,6 +179,30 @@ func removeDevice(ctx context.Context, deviceInfo *models.ScsiDeviceInfo, ignore return nil } +// waitForDevicesRemoval waits for devices to be removed from the system. +func waitForDevicesRemoval(ctx context.Context, osFs afero.Fs, devicePathPrefix string, deviceNames []string, + maxWaitTime time.Duration, +) error { + startTime := time.Now() + for time.Since(startTime) < maxWaitTime { + anyExist := false + for _, device := range deviceNames { + path := filepath.Join(devicePathPrefix, device) + if _, err := osFs.Stat(path); !os.IsNotExist(err) { + anyExist = true + break + } + } + if !anyExist { + return nil + } + time.Sleep(50 * time.Millisecond) + } + + Logc(ctx).WithField("devices", deviceNames).Debug("Timed out waiting for devices to be removed.") + return errors.TimeoutError("timed out waiting for devices to be removed") +} + // canFlushMultipathDevice determines whether device can be flushed. // 1. Check the health of path by executing 'multipath -C ' // 2. If no error, return nil. @@ -911,8 +939,13 @@ func removeSCSIDevice(ctx context.Context, deviceInfo *models.ScsiDeviceInfo, ig return false, err } - // Give the host a chance to fully process the removal - time.Sleep(time.Second) + // Wait for device to be removed. Do not ignore errors here as we need the device removed + // for the force removal of the multipath device to succeed. + err = waitForDevicesRemoval(ctx, osFs, iscsi.DevPrefix, deviceInfo.Devices, devicesRemovalMaxWaitTime) + if err != nil { + return false, err + } + listAllISCSIDevices(ctx) // If ignoreErrors was set to true while entering into this function and diff --git a/utils/devices_linux.go b/utils/devices_linux.go index 3241caeb4..a1a9f5fb5 100644 --- a/utils/devices_linux.go +++ b/utils/devices_linux.go @@ -14,7 +14,6 @@ import ( "unsafe" log "github.com/sirupsen/logrus" - "github.com/spf13/afero" "golang.org/x/net/context" "golang.org/x/sys/unix" @@ -49,8 +48,6 @@ var ( duringOpenBeforeCryptSetupOpen = fiji.Register("duringOpenBeforeCryptSetupOpen", "devices_linux") duringRotatePassphraseBeforeLuksKeyChange = fiji.Register("duringRotatePassphraseBeforeLuksKeyChange", "devices_linux") - - osFs = afero.NewOsFs() ) // flushOneDevice flushes any outstanding I/O to a disk diff --git a/utils/devices_test.go b/utils/devices_test.go index 59489cbe3..100edb256 100644 --- a/utils/devices_test.go +++ b/utils/devices_test.go @@ -8,11 +8,13 @@ import ( "testing" "time" + "github.com/spf13/afero" "github.com/stretchr/testify/assert" "go.uber.org/mock/gomock" mockexec "github.com/netapp/trident/mocks/mock_utils/mock_exec" "github.com/netapp/trident/mocks/mock_utils/mock_models/mock_luks" + "github.com/netapp/trident/utils/errors" ) // //////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -266,3 +268,85 @@ func TestRemoveMultipathDeviceMapping(t *testing.T) { }) } } + +func TestWaitForDevicesRemoval(t *testing.T) { + errMsg := "timed out waiting for devices to be removed" + tests := map[string]struct { + name string + devicePathPrefix string + deviceNames []string + getOsFs func() (afero.Fs, error) + maxWaitTime time.Duration + expectedError error + }{ + "Devices removed successfully": { + devicePathPrefix: "/dev", + deviceNames: []string{"sda", "sdb"}, + getOsFs: func() (afero.Fs, error) { + return afero.NewMemMapFs(), nil + }, + maxWaitTime: 1 * time.Second, + expectedError: nil, + }, + "Timeout waiting for devices to be removed": { + devicePathPrefix: "/dev", + deviceNames: []string{"sda", "sdb"}, + getOsFs: func() (afero.Fs, error) { + osFs := afero.NewMemMapFs() + _, err := osFs.Create("/dev/sda") + if err != nil { + return nil, err + } + _, err = osFs.Create("/dev/sdb") + if err != nil { + return nil, err + } + return osFs, nil + }, + maxWaitTime: 1 * time.Second, + expectedError: errors.TimeoutError(errMsg), + }, + "Timeout waiting for last device to be removed": { + devicePathPrefix: "/dev", + deviceNames: []string{"sda", "sdb"}, + getOsFs: func() (afero.Fs, error) { + osFs := afero.NewMemMapFs() + _, err := osFs.Create("/dev/sdb") + if err != nil { + return nil, err + } + return osFs, nil + }, + maxWaitTime: 1 * time.Second, + expectedError: errors.TimeoutError(errMsg), + }, + "Timeout waiting for first device to be removed": { + devicePathPrefix: "/dev", + deviceNames: []string{"sda", "sdb"}, + getOsFs: func() (afero.Fs, error) { + osFs := afero.NewMemMapFs() + _, err := osFs.Create("/dev/sda") + if err != nil { + return nil, err + } + return osFs, nil + }, + maxWaitTime: 1 * time.Second, + expectedError: errors.TimeoutError(errMsg), + }, + } + + for name, params := range tests { + t.Run(name, func(t *testing.T) { + fs, err := params.getOsFs() + assert.NoError(t, err) + err = waitForDevicesRemoval(context.Background(), fs, params.devicePathPrefix, params.deviceNames, + params.maxWaitTime) + if params.expectedError != nil { + assert.EqualError(t, err, params.expectedError.Error()) + } else { + assert.NoError(t, err) + } + }) + } +}