controller: support perma-failed deployments

This commit adds support for failing deployments based on a timeout
parameter defined in the spec. If there is no progress for the amount
of time defined as progressDeadlineSeconds then the deployment will be
marked as failed by adding a condition with a ProgressDeadlineExceeded
reason in it. Progress in the context of a deployment means the creation
or adoption of a new replica set, scaling up new pods, and scaling down
old pods.
This commit is contained in:
Michail Kargakis
2016-09-15 17:57:53 +02:00
parent c4ff44b66d
commit a5029bf373
10 changed files with 834 additions and 25 deletions

View File

@@ -70,8 +70,111 @@ const (
// TODO: Delete this annotation when we gracefully handle overlapping selectors.
// See https://github.com/kubernetes/kubernetes/issues/2210
SelectorUpdateAnnotation = "deployment.kubernetes.io/selector-updated-at"
// Reasons for deployment conditions
//
// Progressing:
//
// ReplicaSetUpdatedReason is added in a deployment when one of its replica sets is updated as part
// of the rollout process.
ReplicaSetUpdatedReason = "ReplicaSetUpdated"
// FailedRSCreateReason is added in a deployment when it cannot create a new replica set.
FailedRSCreateReason = "ReplicaSetCreateError"
// NewReplicaSetReason is added in a deployment when it creates a new replica set.
NewReplicaSetReason = "NewReplicaSetCreated"
// FoundNewRSReason is added in a deployment when it adopts an existing replica set.
FoundNewRSReason = "FoundNewReplicaSet"
// NewRSAvailableReason is added in a deployment when its newest replica set is made available
// ie. the number of new pods that have passed readiness checks and run for at least minReadySeconds
// is at least the minimum available pods that need to run for the deployment.
NewRSAvailableReason = "NewReplicaSetAvailable"
// TimedOutReason is added in a deployment when its newest replica set fails to show any progress
// within the given deadline (progressDeadlineSeconds).
TimedOutReason = "ProgressDeadlineExceeded"
// PausedDeployReason is added in a deployment when it is paused. Lack of progress shouldn't be
// estimated once a deployment is paused.
PausedDeployReason = "DeploymentPaused"
// ResumedDeployReason is added in a deployment when it is resumed. Useful for not failing accidentally
// deployments that paused amidst a rollout and are bounded by a deadline.
ResumedDeployReason = "DeploymentResumed"
//
// Available:
//
// MinimumReplicasAvailable is added in a deployment when it has its minimum replicas required available.
MinimumReplicasAvailable = "MinimumReplicasAvailable"
// MinimumReplicasUnavailable is added in a deployment when it doesn't have the minimum required replicas
// available.
MinimumReplicasUnavailable = "MinimumReplicasUnavailable"
)
// NewDeploymentCondition creates a new deployment condition.
func NewDeploymentCondition(condType extensions.DeploymentConditionType, status api.ConditionStatus, reason, message string) *extensions.DeploymentCondition {
return &extensions.DeploymentCondition{
Type: condType,
Status: status,
LastUpdateTime: unversioned.Now(),
LastTransitionTime: unversioned.Now(),
Reason: reason,
Message: message,
}
}
// GetDeploymentCondition returns the condition with the provided type.
func GetDeploymentCondition(status extensions.DeploymentStatus, condType extensions.DeploymentConditionType) *extensions.DeploymentCondition {
for i := range status.Conditions {
c := status.Conditions[i]
if c.Type == condType {
return &c
}
}
return nil
}
// SetDeploymentCondition updates the deployment to include the provided condition. If the condition that
// we are about to add already exists and has the same status and reason then we are not going to update.
func SetDeploymentCondition(status *extensions.DeploymentStatus, condition extensions.DeploymentCondition) {
currentCond := GetDeploymentCondition(*status, condition.Type)
if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason {
return
}
// Do not update lastTransitionTime if the status of the condition doesn't change.
if currentCond != nil && currentCond.Status == condition.Status {
condition.LastTransitionTime = currentCond.LastTransitionTime
}
newConditions := filterOutCondition(status.Conditions, condition.Type)
status.Conditions = append(newConditions, condition)
}
// RemoveDeploymentCondition removes the deployment condition with the provided type.
func RemoveDeploymentCondition(status *extensions.DeploymentStatus, condType extensions.DeploymentConditionType) {
status.Conditions = filterOutCondition(status.Conditions, condType)
}
// filterOutCondition returns a new slice of deployment conditions without conditions with the provided type.
func filterOutCondition(conditions []extensions.DeploymentCondition, condType extensions.DeploymentConditionType) []extensions.DeploymentCondition {
var newConditions []extensions.DeploymentCondition
for _, c := range conditions {
if c.Type == condType {
continue
}
newConditions = append(newConditions, c)
}
return newConditions
}
// ReplicaSetToDeploymentCondition converts a replica set condition into a deployment condition.
// Useful for promoting replica set failure conditions into deployments.
func ReplicaSetToDeploymentCondition(cond extensions.ReplicaSetCondition) extensions.DeploymentCondition {
return extensions.DeploymentCondition{
Type: extensions.DeploymentConditionType(cond.Type),
Status: cond.Status,
LastTransitionTime: cond.LastTransitionTime,
LastUpdateTime: cond.LastTransitionTime,
Reason: cond.Reason,
Message: cond.Message,
}
}
// SetDeploymentRevision updates the revision for a deployment.
func SetDeploymentRevision(deployment *extensions.Deployment, revision string) bool {
updated := false
@@ -696,6 +799,56 @@ func IsRollingUpdate(deployment *extensions.Deployment) bool {
return deployment.Spec.Strategy.Type == extensions.RollingUpdateDeploymentStrategyType
}
// DeploymentComplete considers a deployment to be complete once its desired replicas equals its
// updatedReplicas and it doesn't violate minimum availability.
func DeploymentComplete(deployment *extensions.Deployment, newStatus *extensions.DeploymentStatus) bool {
return newStatus.UpdatedReplicas == deployment.Spec.Replicas &&
newStatus.AvailableReplicas >= deployment.Spec.Replicas-MaxUnavailable(*deployment)
}
// DeploymentProgressing reports progress for a deployment. Progress is estimated by comparing the
// current with the new status of the deployment that the controller is observing. The following
// algorithm is already used in the kubectl rolling updater to report lack of progress.
func DeploymentProgressing(deployment *extensions.Deployment, newStatus *extensions.DeploymentStatus) bool {
oldStatus := deployment.Status
// Old replicas that need to be scaled down
oldStatusOldReplicas := oldStatus.Replicas - oldStatus.UpdatedReplicas
newStatusOldReplicas := newStatus.Replicas - newStatus.UpdatedReplicas
return (newStatus.UpdatedReplicas > oldStatus.UpdatedReplicas) || (newStatusOldReplicas < oldStatusOldReplicas)
}
// used for unit testing
var nowFn = func() time.Time { return time.Now() }
// DeploymentTimedOut considers a deployment to have timed out once its condition that reports progress
// is older than progressDeadlineSeconds or a Progressing condition with a TimedOutReason reason already
// exists.
func DeploymentTimedOut(deployment *extensions.Deployment, newStatus *extensions.DeploymentStatus) bool {
if deployment.Spec.ProgressDeadlineSeconds == nil {
return false
}
// Look for the Progressing condition. If it doesn't exist, we have no base to estimate progress.
// If it's already set with a TimedOutReason reason, we have already timed out, no need to check
// again.
condition := GetDeploymentCondition(*newStatus, extensions.DeploymentProgressing)
if condition == nil {
return false
}
if condition.Reason == TimedOutReason {
return true
}
// Look at the difference in seconds between now and the last time we reported any
// progress or tried to create a replica set, or resumed a paused deployment and
// compare against progressDeadlineSeconds.
from := condition.LastTransitionTime
delta := time.Duration(*deployment.Spec.ProgressDeadlineSeconds) * time.Second
return from.Add(delta).Before(nowFn())
}
// NewRSNewReplicas calculates the number of replicas a deployment's new RS should have.
// When one of the followings is true, we're rolling out the deployment; otherwise, we're scaling it.
// 1) The new RS is saturated: newRS's replicas == deployment's replicas