controller: fix requeueing progressing deployments

Drop the secondary queue and add either ratelimited or after the
required amount of time that we need to wait directly in the main
queue. In this way we can always be sure that we will sync back
the Deployment if its progress has yet to resolve into a complete
(NewReplicaSetAvailable) or TimedOut condition.
This commit is contained in:
Michail Kargakis
2017-02-15 22:01:41 +02:00
parent 1ad5cea24e
commit 7a8259c6a1
4 changed files with 225 additions and 84 deletions

View File

@@ -150,21 +150,8 @@ func (dc *DeploymentController) syncRolloutStatus(allRSs []*extensions.ReplicaSe
// Do not update if there is nothing new to add.
if reflect.DeepEqual(d.Status, newStatus) {
// If there is no sign of progress at this point then there is a high chance that the
// deployment is stuck. We should resync this deployment at some point[1] in the future
// and check if it has timed out. We definitely need this, otherwise we depend on the
// controller resync interval. See https://github.com/kubernetes/kubernetes/issues/34458.
//
// [1] time.Now() + progressDeadlineSeconds - lastUpdateTime (of the Progressing condition).
if d.Spec.ProgressDeadlineSeconds != nil &&
!util.DeploymentComplete(d, &newStatus) &&
!util.DeploymentTimedOut(d, &newStatus) &&
currentCond != nil {
after := time.Now().Add(time.Duration(*d.Spec.ProgressDeadlineSeconds) * time.Second).Sub(currentCond.LastUpdateTime.Time)
glog.V(2).Infof("Queueing up deployment %q for a progress check after %ds", d.Name, int(after.Seconds()))
dc.checkProgressAfter(d, after)
}
// Requeue the deployment if required.
dc.requeueStuckDeployment(d, newStatus)
return nil
}
@@ -207,3 +194,48 @@ func (dc *DeploymentController) getReplicaFailures(allRSs []*extensions.ReplicaS
}
return conditions
}
// used for unit testing
var nowFn = func() time.Time { return time.Now() }
// requeueStuckDeployment checks whether the provided deployment needs to be synced for a progress
// check. It returns the time after the deployment will be requeued for the progress check, 0 if it
// will be requeued now, or -1 if it does not need to be requeued.
func (dc *DeploymentController) requeueStuckDeployment(d *extensions.Deployment, newStatus extensions.DeploymentStatus) time.Duration {
currentCond := util.GetDeploymentCondition(d.Status, extensions.DeploymentProgressing)
// Can't estimate progress if there is no deadline in the spec or progressing condition in the current status.
if d.Spec.ProgressDeadlineSeconds == nil || currentCond == nil {
return time.Duration(-1)
}
// No need to estimate progress if the rollout is complete or already timed out.
if util.DeploymentComplete(d, &newStatus) || currentCond.Reason == util.TimedOutReason {
return time.Duration(-1)
}
// If there is no sign of progress at this point then there is a high chance that the
// deployment is stuck. We should resync this deployment at some point in the future[1]
// and check whether it has timed out. We definitely need this, otherwise we depend on the
// controller resync interval. See https://github.com/kubernetes/kubernetes/issues/34458.
//
// [1] ProgressingCondition.LastUpdatedTime + progressDeadlineSeconds - time.Now()
//
// For example, if a Deployment updated its Progressing condition 3 minutes ago and has a
// deadline of 10 minutes, it would need to be resynced for a progress check after 7 minutes.
//
// lastUpdated: 00:00:00
// now: 00:03:00
// progressDeadlineSeconds: 600 (10 minutes)
//
// lastUpdated + progressDeadlineSeconds - now => 00:00:00 + 00:10:00 - 00:03:00 => 07:00
after := currentCond.LastUpdateTime.Time.Add(time.Duration(*d.Spec.ProgressDeadlineSeconds) * time.Second).Sub(nowFn())
// If the remaining time is less than a second, then requeue the deployment immediately.
// Make it ratelimited so we stay on the safe side, eventually the Deployment should
// transition either to a Complete or to a TimedOut condition.
if after < time.Second {
glog.V(2).Infof("Queueing up deployment %q for a progress check now", d.Name)
dc.enqueueRateLimited(d)
return time.Duration(0)
}
glog.V(2).Infof("Queueing up deployment %q for a progress check after %ds", d.Name, int(after.Seconds()))
dc.enqueueAfter(d, after)
return after
}