deflake: Add retry with timeout to wait for final conditions
This commit is contained in:
parent
8b2dae57d4
commit
c14068c202
@ -18,6 +18,7 @@ package reconciler
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -47,6 +48,7 @@ const (
|
|||||||
syncLoopPeriod = 100 * time.Minute
|
syncLoopPeriod = 100 * time.Minute
|
||||||
maxWaitForUnmountDuration = 50 * time.Millisecond
|
maxWaitForUnmountDuration = 50 * time.Millisecond
|
||||||
maxLongWaitForUnmountDuration = 4200 * time.Second
|
maxLongWaitForUnmountDuration = 4200 * time.Second
|
||||||
|
volumeAttachedCheckTimeout = 5 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// Calls Run()
|
// Calls Run()
|
||||||
@ -604,7 +606,7 @@ func Test_Run_OneVolumeAttachAndDetachUncertainNodesWithReadWriteOnce(t *testing
|
|||||||
// Volume is added to asw. Because attach operation fails, volume should not be reported as attached to the node.
|
// Volume is added to asw. Because attach operation fails, volume should not be reported as attached to the node.
|
||||||
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
||||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||||
|
|
||||||
// When volume is added to the node, it is set to mounted by default. Then the status will be updated by checking node status VolumeInUse.
|
// When volume is added to the node, it is set to mounted by default. Then the status will be updated by checking node status VolumeInUse.
|
||||||
// Without this, the delete operation will be delayed due to mounted status
|
// Without this, the delete operation will be delayed due to mounted status
|
||||||
@ -665,7 +667,7 @@ func Test_Run_UpdateNodeStatusFailBeforeOneVolumeDetachNodeWithReadWriteOnce(t *
|
|||||||
// Volume is added to asw, volume should be reported as attached to the node.
|
// Volume is added to asw, volume should be reported as attached to the node.
|
||||||
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
||||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||||
|
|
||||||
// Delete the pod
|
// Delete the pod
|
||||||
dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1)
|
dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1)
|
||||||
@ -683,7 +685,7 @@ func Test_Run_UpdateNodeStatusFailBeforeOneVolumeDetachNodeWithReadWriteOnce(t *
|
|||||||
// in node status. By calling this function (GetVolumesToReportAttached), node status should be updated, and the volume
|
// in node status. By calling this function (GetVolumesToReportAttached), node status should be updated, and the volume
|
||||||
// will not need to be updated until new changes are applied (detach is triggered again)
|
// will not need to be updated until new changes are applied (detach is triggered again)
|
||||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -728,11 +730,10 @@ func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) {
|
|||||||
t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr)
|
t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(1000 * time.Millisecond)
|
|
||||||
// Volume is added to asw, volume should be reported as attached to the node.
|
// Volume is added to asw, volume should be reported as attached to the node.
|
||||||
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
||||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||||
|
|
||||||
// Delete the pod, but detach will fail
|
// Delete the pod, but detach will fail
|
||||||
dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1)
|
dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1)
|
||||||
@ -741,28 +742,29 @@ func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) {
|
|||||||
// Right before detach operation is performed, the volume will be first removed from being reported
|
// Right before detach operation is performed, the volume will be first removed from being reported
|
||||||
// as attached on node status (RemoveVolumeFromReportAsAttached). After detach operation which is expected to fail,
|
// as attached on node status (RemoveVolumeFromReportAsAttached). After detach operation which is expected to fail,
|
||||||
// controller then added the volume back as attached.
|
// controller then added the volume back as attached.
|
||||||
// Here it sleeps 100ms so that detach should be triggered already at this point.
|
// Here verifyVolumeReportedAsAttachedToNode will wait until the detach is triggered.
|
||||||
// verifyVolumeReportedAsAttachedToNode will check volume is in the list of volume attached that needs to be updated
|
// verifyVolumeReportedAsAttachedToNode will check volume is in the list of volume attached that needs to be updated
|
||||||
// in node status. By calling this function (GetVolumesToReportAttached), node status should be updated, and the volume
|
// in node status. By calling this function (GetVolumesToReportAttached), node status should be updated, and the volume
|
||||||
// will not need to be updated until new changes are applied (detach is triggered again)
|
// will not need to be updated until new changes are applied (detach is triggered again)
|
||||||
time.Sleep(100 * time.Millisecond)
|
|
||||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||||
|
|
||||||
// After the first detach fails, reconciler will wait for a period of time before retrying to detach.
|
// The below verifies check intermediate state and is flaky but useful
|
||||||
// The wait time is increasing exponentially from initial value of 0.5s (0.5, 1, 2, 4, ...).
|
//
|
||||||
// The test here waits for 100 Millisecond to make sure it is in exponential backoff period after
|
// // After the first detach fails, reconciler will wait for a period of time before retrying to detach.
|
||||||
// the first detach operation. At this point, volumes status should not be updated
|
// // The wait time is increasing exponentially from initial value of 0.5s (0.5, 1, 2, 4, ...).
|
||||||
time.Sleep(100 * time.Millisecond)
|
// // The test here waits for 100 Millisecond to make sure it is in exponential backoff period after
|
||||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
// // the first detach operation. At this point, volumes status should not be updated
|
||||||
verifyVolumeNoStatusUpdateNeeded(t, logger, generatedVolumeName, nodeName1, asw)
|
// time.Sleep(100 * time.Millisecond)
|
||||||
|
// verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||||
|
// verifyVolumeNoStatusUpdateNeeded(t, logger, generatedVolumeName, nodeName1, asw)
|
||||||
|
|
||||||
// Wait for 600ms to make sure second detach operation triggered. Again, The volume will be
|
// // Wait for 600ms to make sure second detach operation triggered. Again, The volume will be
|
||||||
// removed from being reported as attached on node status and then added back as attached.
|
// // removed from being reported as attached on node status and then added back as attached.
|
||||||
// The volume will be in the list of attached volumes that need to be updated to node status.
|
// // The volume will be in the list of attached volumes that need to be updated to node status.
|
||||||
time.Sleep(600 * time.Millisecond)
|
// time.Sleep(600 * time.Millisecond)
|
||||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
// verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw)
|
// verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
|
||||||
|
|
||||||
// Add a second pod which tries to attach the volume to the same node.
|
// Add a second pod which tries to attach the volume to the same node.
|
||||||
// After adding pod to the same node, detach will not be triggered any more.
|
// After adding pod to the same node, detach will not be triggered any more.
|
||||||
@ -770,8 +772,7 @@ func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) {
|
|||||||
if podAddErr != nil {
|
if podAddErr != nil {
|
||||||
t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr)
|
t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr)
|
||||||
}
|
}
|
||||||
// Sleep 1s to verify no detach are triggered after second pod is added in the future.
|
// verify no detach are triggered after second pod is added in the future.
|
||||||
time.Sleep(1000 * time.Millisecond)
|
|
||||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
|
||||||
verifyVolumeNoStatusUpdateNeeded(t, logger, generatedVolumeName, nodeName1, asw)
|
verifyVolumeNoStatusUpdateNeeded(t, logger, generatedVolumeName, nodeName1, asw)
|
||||||
|
|
||||||
@ -834,7 +835,7 @@ func Test_Run_OneVolumeAttachAndDetachTimeoutNodesWithReadWriteOnce(t *testing.T
|
|||||||
// Volume is added to asw. Because attach operation fails, volume should not be reported as attached to the node.
|
// Volume is added to asw. Because attach operation fails, volume should not be reported as attached to the node.
|
||||||
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw)
|
||||||
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateUncertain, asw)
|
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateUncertain, asw)
|
||||||
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, false, asw)
|
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, false, asw, volumeAttachedCheckTimeout)
|
||||||
|
|
||||||
// When volume is added to the node, it is set to mounted by default. Then the status will be updated by checking node status VolumeInUse.
|
// When volume is added to the node, it is set to mounted by default. Then the status will be updated by checking node status VolumeInUse.
|
||||||
// Without this, the delete operation will be delayed due to mounted status
|
// Without this, the delete operation will be delayed due to mounted status
|
||||||
@ -1612,24 +1613,32 @@ func verifyVolumeReportedAsAttachedToNode(
|
|||||||
nodeName k8stypes.NodeName,
|
nodeName k8stypes.NodeName,
|
||||||
isAttached bool,
|
isAttached bool,
|
||||||
asw cache.ActualStateOfWorld,
|
asw cache.ActualStateOfWorld,
|
||||||
|
timeout time.Duration,
|
||||||
) {
|
) {
|
||||||
result := false
|
var result bool
|
||||||
volumes := asw.GetVolumesToReportAttached(logger)
|
var lastErr error
|
||||||
for _, volume := range volumes[nodeName] {
|
err := wait.PollUntilContextTimeout(context.TODO(), 50*time.Millisecond, timeout, false, func(context.Context) (done bool, err error) {
|
||||||
if volume.Name == volumeName {
|
volumes := asw.GetVolumesToReportAttached(logger)
|
||||||
result = true
|
for _, volume := range volumes[nodeName] {
|
||||||
|
if volume.Name == volumeName {
|
||||||
|
result = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if result == isAttached {
|
if result == isAttached {
|
||||||
t.Logf("Volume <%v> is reported as attached to node <%v>: %v", volumeName, nodeName, result)
|
t.Logf("Volume <%v> is reported as attached to node <%v>: %v", volumeName, nodeName, result)
|
||||||
return
|
return true, nil
|
||||||
|
}
|
||||||
|
lastErr = fmt.Errorf("Check volume <%v> is reported as attached to node <%v>, got %v, expected %v",
|
||||||
|
volumeName,
|
||||||
|
nodeName,
|
||||||
|
result,
|
||||||
|
isAttached)
|
||||||
|
return false, nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("last error: %q, wait timeout: %q", lastErr, err.Error())
|
||||||
}
|
}
|
||||||
t.Fatalf("Check volume <%v> is reported as attached to node <%v>, got %v, expected %v",
|
|
||||||
volumeName,
|
|
||||||
nodeName,
|
|
||||||
result,
|
|
||||||
isAttached)
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user