Merge pull request #42097 from kargakis/address-mismatched-available-replicas

Automatic merge from submit-queue

Enqueue controllers after minreadyseconds when all pods are ready

@janetkuo this should address https://github.com/kubernetes/kubernetes/issues/41697#issuecomment-281851377. Impossible to unit test this but it should stabilize some of our deployment e2e tests that occasionally fail because of availableReplicas not being updated.

It should also fix https://github.com/kubernetes/kubernetes/issues/41641

Eventually I would like AddAfter to be able to cancel previous invocations of the same key so I opened https://github.com/kubernetes/client-go/issues/131

@kubernetes/sig-apps-bugs
This commit is contained in:
Kubernetes Submit Queue
2017-02-27 22:09:46 -08:00
committed by GitHub
4 changed files with 47 additions and 20 deletions

View File

@@ -633,10 +633,16 @@ func (rm *ReplicationManager) syncReplicationController(key string) error {
newStatus := calculateStatus(rc, filteredPods, manageReplicasErr)
// Always updates status as pods come up or die.
if err := updateReplicationControllerStatus(rm.kubeClient.Core().ReplicationControllers(rc.Namespace), *rc, newStatus); err != nil {
updatedRC, err := updateReplicationControllerStatus(rm.kubeClient.Core().ReplicationControllers(rc.Namespace), *rc, newStatus)
if err != nil {
// Multiple things could lead to this update failing. Returning an error causes a requeue without forcing a hotloop
return err
}
// Resync the ReplicationController after MinReadySeconds as a last line of defense to guard against clock-skew.
if manageReplicasErr == nil && updatedRC.Spec.MinReadySeconds > 0 &&
updatedRC.Status.ReadyReplicas == *(updatedRC.Spec.Replicas) &&
updatedRC.Status.AvailableReplicas != *(updatedRC.Spec.Replicas) {
rm.enqueueControllerAfter(updatedRC, time.Duration(updatedRC.Spec.MinReadySeconds)*time.Second)
}
return manageReplicasErr
}

View File

@@ -30,7 +30,7 @@ import (
)
// updateReplicationControllerStatus attempts to update the Status.Replicas of the given controller, with a single GET/PUT retry.
func updateReplicationControllerStatus(c v1core.ReplicationControllerInterface, rc v1.ReplicationController, newStatus v1.ReplicationControllerStatus) (updateErr error) {
func updateReplicationControllerStatus(c v1core.ReplicationControllerInterface, rc v1.ReplicationController, newStatus v1.ReplicationControllerStatus) (*v1.ReplicationController, error) {
// This is the steady state. It happens when the rc doesn't have any expectations, since
// we do a periodic relist every 30s. If the generations differ but the replicas are
// the same, a caller might've resized to the same replica count.
@@ -40,7 +40,7 @@ func updateReplicationControllerStatus(c v1core.ReplicationControllerInterface,
rc.Status.AvailableReplicas == newStatus.AvailableReplicas &&
rc.Generation == rc.Status.ObservedGeneration &&
reflect.DeepEqual(rc.Status.Conditions, newStatus.Conditions) {
return nil
return &rc, nil
}
// Save the generation number we acted on, otherwise we might wrongfully indicate
// that we've seen a spec update when we retry.
@@ -48,9 +48,10 @@ func updateReplicationControllerStatus(c v1core.ReplicationControllerInterface,
// same status.
newStatus.ObservedGeneration = rc.Generation
var getErr error
var getErr, updateErr error
var updatedRC *v1.ReplicationController
for i, rc := 0, &rc; ; i++ {
glog.V(4).Infof(fmt.Sprintf("Updating replica count for rc: %s/%s, ", rc.Namespace, rc.Name) +
glog.V(4).Infof(fmt.Sprintf("Updating status for rc: %s/%s, ", rc.Namespace, rc.Name) +
fmt.Sprintf("replicas %d->%d (need %d), ", rc.Status.Replicas, newStatus.Replicas, *(rc.Spec.Replicas)) +
fmt.Sprintf("fullyLabeledReplicas %d->%d, ", rc.Status.FullyLabeledReplicas, newStatus.FullyLabeledReplicas) +
fmt.Sprintf("readyReplicas %d->%d, ", rc.Status.ReadyReplicas, newStatus.ReadyReplicas) +
@@ -58,17 +59,23 @@ func updateReplicationControllerStatus(c v1core.ReplicationControllerInterface,
fmt.Sprintf("sequence No: %v->%v", rc.Status.ObservedGeneration, newStatus.ObservedGeneration))
rc.Status = newStatus
_, updateErr = c.UpdateStatus(rc)
if updateErr == nil || i >= statusUpdateRetries {
return updateErr
updatedRC, updateErr = c.UpdateStatus(rc)
if updateErr == nil {
return updatedRC, nil
}
// Stop retrying if we exceed statusUpdateRetries - the replicationController will be requeued with a rate limit.
if i >= statusUpdateRetries {
break
}
// Update the controller with the latest resource version for the next poll
if rc, getErr = c.Get(rc.Name, metav1.GetOptions{}); getErr != nil {
// If the GET fails we can't trust status.Replicas anymore. This error
// is bound to be more interesting than the update failure.
return getErr
return nil, getErr
}
}
return nil, updateErr
}
// OverlappingControllers sorts a list of controllers by creation timestamp, using their names as a tie breaker.