deflake: Add retry with timeout to wait for final conditions
This commit is contained in:
parent
8b2dae57d4
commit
c14068c202
@ -18,6 +18,7 @@ package reconciler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -47,6 +48,7 @@ const (
|
||||
syncLoopPeriod = 100 * time.Minute
|
||||
maxWaitForUnmountDuration = 50 * time.Millisecond
|
||||
maxLongWaitForUnmountDuration = 4200 * time.Second
|
||||
volumeAttachedCheckTimeout = 5 * time.Second
|
||||
)
|
||||
|
||||
// Calls Run()
|
||||
@ -604,7 +606,7 @@ func Test_Run_OneVolumeAttachAndDetachUncertainNodesWithReadWriteOnce(t *testing
|
||||
// Volume is added to asw. Because attach operation fails, volume should not be reported as attached to the node.
|
||||
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||
|
||||
// When volume is added to the node, it is set to mounted by default. Then the status will be updated by checking node status VolumeInUse.
|
||||
// Without this, the delete operation will be delayed due to mounted status
|
||||
@ -665,7 +667,7 @@ func Test_Run_UpdateNodeStatusFailBeforeOneVolumeDetachNodeWithReadWriteOnce(t *
|
||||
// Volume is added to asw, volume should be reported as attached to the node.
|
||||
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||
|
||||
// Delete the pod
|
||||
dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1)
|
||||
@ -683,7 +685,7 @@ func Test_Run_UpdateNodeStatusFailBeforeOneVolumeDetachNodeWithReadWriteOnce(t *
|
||||
// in node status. By calling this function (GetVolumesToReportAttached), node status should be updated, and the volume
|
||||
// will not need to be updated until new changes are applied (detach is triggered again)
|
||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||
|
||||
}
|
||||
|
||||
@ -728,11 +730,10 @@ func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) {
|
||||
t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr)
|
||||
}
|
||||
|
||||
time.Sleep(1000 * time.Millisecond)
|
||||
// Volume is added to asw, volume should be reported as attached to the node.
|
||||
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||
|
||||
// Delete the pod, but detach will fail
|
||||
dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1)
|
||||
@ -741,28 +742,29 @@ func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) {
|
||||
// Right before detach operation is performed, the volume will be first removed from being reported
|
||||
// as attached on node status (RemoveVolumeFromReportAsAttached). After detach operation which is expected to fail,
|
||||
// controller then added the volume back as attached.
|
||||
// Here it sleeps 100ms so that detach should be triggered already at this point.
|
||||
// Here verifyVolumeReportedAsAttachedToNode will wait until the detach is triggered.
|
||||
// verifyVolumeReportedAsAttachedToNode will check volume is in the list of volume attached that needs to be updated
|
||||
// in node status. By calling this function (GetVolumesToReportAttached), node status should be updated, and the volume
|
||||
// will not need to be updated until new changes are applied (detach is triggered again)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||
|
||||
// After the first detach fails, reconciler will wait for a period of time before retrying to detach.
|
||||
// The wait time is increasing exponentially from initial value of 0.5s (0.5, 1, 2, 4, ...).
|
||||
// The test here waits for 100 Millisecond to make sure it is in exponential backoff period after
|
||||
// the first detach operation. At this point, volumes status should not be updated
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||
verifyVolumeNoStatusUpdateNeeded(t, logger, generatedVolumeName, nodeName1, asw)
|
||||
// The below verifies check intermediate state and is flaky but useful
|
||||
//
|
||||
// // After the first detach fails, reconciler will wait for a period of time before retrying to detach.
|
||||
// // The wait time is increasing exponentially from initial value of 0.5s (0.5, 1, 2, 4, ...).
|
||||
// // The test here waits for 100 Millisecond to make sure it is in exponential backoff period after
|
||||
// // the first detach operation. At this point, volumes status should not be updated
|
||||
// time.Sleep(100 * time.Millisecond)
|
||||
// verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||
// verifyVolumeNoStatusUpdateNeeded(t, logger, generatedVolumeName, nodeName1, asw)
|
||||
|
||||
// Wait for 600ms to make sure second detach operation triggered. Again, The volume will be
|
||||
// removed from being reported as attached on node status and then added back as attached.
|
||||
// The volume will be in the list of attached volumes that need to be updated to node status.
|
||||
time.Sleep(600 * time.Millisecond)
|
||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
||||
// // Wait for 600ms to make sure second detach operation triggered. Again, The volume will be
|
||||
// // removed from being reported as attached on node status and then added back as attached.
|
||||
// // The volume will be in the list of attached volumes that need to be updated to node status.
|
||||
// time.Sleep(600 * time.Millisecond)
|
||||
// verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||
// verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||
|
||||
// Add a second pod which tries to attach the volume to the same node.
|
||||
// After adding pod to the same node, detach will not be triggered any more.
|
||||
@ -770,8 +772,7 @@ func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) {
|
||||
if podAddErr != nil {
|
||||
t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr)
|
||||
}
|
||||
// Sleep 1s to verify no detach are triggered after second pod is added in the future.
|
||||
time.Sleep(1000 * time.Millisecond)
|
||||
// verify no detach are triggered after second pod is added in the future.
|
||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||
verifyVolumeNoStatusUpdateNeeded(t, logger, generatedVolumeName, nodeName1, asw)
|
||||
|
||||
@ -834,7 +835,7 @@ func Test_Run_OneVolumeAttachAndDetachTimeoutNodesWithReadWriteOnce(t *testing.T
|
||||
// Volume is added to asw. Because attach operation fails, volume should not be reported as attached to the node.
|
||||
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateUncertain, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, false, asw)
|
||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, false, asw, volumeAttachedCheckTimeout)
|
||||
|
||||
// When volume is added to the node, it is set to mounted by default. Then the status will be updated by checking node status VolumeInUse.
|
||||
// Without this, the delete operation will be delayed due to mounted status
|
||||
@ -1612,8 +1613,11 @@ func verifyVolumeReportedAsAttachedToNode(
|
||||
nodeName k8stypes.NodeName,
|
||||
isAttached bool,
|
||||
asw cache.ActualStateOfWorld,
|
||||
timeout time.Duration,
|
||||
) {
|
||||
result := false
|
||||
var result bool
|
||||
var lastErr error
|
||||
err := wait.PollUntilContextTimeout(context.TODO(), 50*time.Millisecond, timeout, false, func(context.Context) (done bool, err error) {
|
||||
volumes := asw.GetVolumesToReportAttached(logger)
|
||||
for _, volume := range volumes[nodeName] {
|
||||
if volume.Name == volumeName {
|
||||
@ -1623,13 +1627,18 @@ func verifyVolumeReportedAsAttachedToNode(
|
||||
|
||||
if result == isAttached {
|
||||
t.Logf("Volume <%v> is reported as attached to node <%v>: %v", volumeName, nodeName, result)
|
||||
return
|
||||
return true, nil
|
||||
}
|
||||
t.Fatalf("Check volume <%v> is reported as attached to node <%v>, got %v, expected %v",
|
||||
lastErr = fmt.Errorf("Check volume <%v> is reported as attached to node <%v>, got %v, expected %v",
|
||||
volumeName,
|
||||
nodeName,
|
||||
result,
|
||||
isAttached)
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("last error: %q, wait timeout: %q", lastErr, err.Error())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user