Break deployment controller into separate self-contained files

* rolling.go (has all the logic for rolling deployments) * recreate.go (has all the logic for recreate deployments) * sync.go (has all the logic for getting and scaling replica sets) * rollback.go (has all the logic for rolling back a deployment) * util.go (contains all the utilities used throughout the controller) Leave back at deployment_controller.go all the necessary bits for creating, setting up, and running the controller loop. Also add package documentation.
2016-06-25 11:31:32 +02:00
parent d06359d6a0
commit 332d151d61
9 changed files with 1935 additions and 1776 deletions
--- a/pkg/controller/deployment/rolling.go
+++ b/pkg/controller/deployment/rolling.go
@@ -0,0 +1,243 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package deployment
+
+import (
+	"fmt"
+	"sort"
+
+	"github.com/golang/glog"
+	"k8s.io/kubernetes/pkg/apis/extensions"
+	"k8s.io/kubernetes/pkg/controller"
+	deploymentutil "k8s.io/kubernetes/pkg/util/deployment"
+	"k8s.io/kubernetes/pkg/util/integer"
+)
+
+// rolloutRolling implements the logic for rolling a new replica set.
+func (dc *DeploymentController) rolloutRolling(deployment *extensions.Deployment) error {
+	newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(deployment, true)
+	if err != nil {
+		return err
+	}
+	allRSs := append(oldRSs, newRS)
+
+	// Scale up, if we can.
+	scaledUp, err := dc.reconcileNewReplicaSet(allRSs, newRS, deployment)
+	if err != nil {
+		return err
+	}
+	if scaledUp {
+		// Update DeploymentStatus
+		return dc.updateDeploymentStatus(allRSs, newRS, deployment)
+	}
+
+	// Scale down, if we can.
+	scaledDown, err := dc.reconcileOldReplicaSets(allRSs, controller.FilterActiveReplicaSets(oldRSs), newRS, deployment)
+	if err != nil {
+		return err
+	}
+	if scaledDown {
+		// Update DeploymentStatus
+		return dc.updateDeploymentStatus(allRSs, newRS, deployment)
+	}
+
+	dc.cleanupDeployment(oldRSs, deployment)
+
+	// Sync deployment status
+	return dc.syncDeploymentStatus(allRSs, newRS, deployment)
+}
+
+func (dc *DeploymentController) reconcileNewReplicaSet(allRSs []*extensions.ReplicaSet, newRS *extensions.ReplicaSet, deployment *extensions.Deployment) (bool, error) {
+	if newRS.Spec.Replicas == deployment.Spec.Replicas {
+		// Scaling not required.
+		return false, nil
+	}
+	if newRS.Spec.Replicas > deployment.Spec.Replicas {
+		// Scale down.
+		scaled, _, err := dc.scaleReplicaSetAndRecordEvent(newRS, deployment.Spec.Replicas, deployment)
+		return scaled, err
+	}
+	newReplicasCount, err := deploymentutil.NewRSNewReplicas(deployment, allRSs, newRS)
+	if err != nil {
+		return false, err
+	}
+	scaled, _, err := dc.scaleReplicaSetAndRecordEvent(newRS, newReplicasCount, deployment)
+	return scaled, err
+}
+
+func (dc *DeploymentController) reconcileOldReplicaSets(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, newRS *extensions.ReplicaSet, deployment *extensions.Deployment) (bool, error) {
+	oldPodsCount := deploymentutil.GetReplicaCountForReplicaSets(oldRSs)
+	if oldPodsCount == 0 {
+		// Can't scale down further
+		return false, nil
+	}
+
+	minReadySeconds := deployment.Spec.MinReadySeconds
+	allPodsCount := deploymentutil.GetReplicaCountForReplicaSets(allRSs)
+	// TODO: use dc.getAvailablePodsForReplicaSets instead
+	newRSAvailablePodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, deployment, []*extensions.ReplicaSet{newRS}, minReadySeconds)
+	if err != nil {
+		return false, fmt.Errorf("could not find available pods: %v", err)
+	}
+	maxUnavailable := maxUnavailable(*deployment)
+
+	// Check if we can scale down. We can scale down in the following 2 cases:
+	// * Some old replica sets have unhealthy replicas, we could safely scale down those unhealthy replicas since that won't further
+	//  increase unavailability.
+	// * New replica set has scaled up and it's replicas becomes ready, then we can scale down old replica sets in a further step.
+	//
+	// maxScaledDown := allPodsCount - minAvailable - newReplicaSetPodsUnavailable
+	// take into account not only maxUnavailable and any surge pods that have been created, but also unavailable pods from
+	// the newRS, so that the unavailable pods from the newRS would not make us scale down old replica sets in a further
+	// step(that will increase unavailability).
+	//
+	// Concrete example:
+	//
+	// * 10 replicas
+	// * 2 maxUnavailable (absolute number, not percent)
+	// * 3 maxSurge (absolute number, not percent)
+	//
+	// case 1:
+	// * Deployment is updated, newRS is created with 3 replicas, oldRS is scaled down to 8, and newRS is scaled up to 5.
+	// * The new replica set pods crashloop and never become available.
+	// * allPodsCount is 13. minAvailable is 8. newRSPodsUnavailable is 5.
+	// * A node fails and causes one of the oldRS pods to become unavailable. However, 13 - 8 - 5 = 0, so the oldRS won't be scaled down.
+	// * The user notices the crashloop and does kubectl rollout undo to rollback.
+	// * newRSPodsUnavailable is 1, since we rolled back to the good replica set, so maxScaledDown = 13 - 8 - 1 = 4. 4 of the crashlooping pods will be scaled down.
+	// * The total number of pods will then be 9 and the newRS can be scaled up to 10.
+	//
+	// case 2:
+	// Same example, but pushing a new pod template instead of rolling back (aka "roll over"):
+	// * The new replica set created must start with 0 replicas because allPodsCount is already at 13.
+	// * However, newRSPodsUnavailable would also be 0, so the 2 old replica sets could be scaled down by 5 (13 - 8 - 0), which would then
+	// allow the new replica set to be scaled up by 5.
+	minAvailable := deployment.Spec.Replicas - maxUnavailable
+	newRSUnavailablePodCount := newRS.Spec.Replicas - newRSAvailablePodCount
+	maxScaledDown := allPodsCount - minAvailable - newRSUnavailablePodCount
+	if maxScaledDown <= 0 {
+		return false, nil
+	}
+
+	// Clean up unhealthy replicas first, otherwise unhealthy replicas will block deployment
+	// and cause timeout. See https://github.com/kubernetes/kubernetes/issues/16737
+	oldRSs, cleanupCount, err := dc.cleanupUnhealthyReplicas(oldRSs, deployment, deployment.Spec.MinReadySeconds, maxScaledDown)
+	if err != nil {
+		return false, nil
+	}
+	glog.V(4).Infof("Cleaned up unhealthy replicas from old RSes by %d", cleanupCount)
+
+	// Scale down old replica sets, need check maxUnavailable to ensure we can scale down
+	allRSs = append(oldRSs, newRS)
+	scaledDownCount, err := dc.scaleDownOldReplicaSetsForRollingUpdate(allRSs, oldRSs, deployment)
+	if err != nil {
+		return false, nil
+	}
+	glog.V(4).Infof("Scaled down old RSes of deployment %s by %d", deployment.Name, scaledDownCount)
+
+	totalScaledDown := cleanupCount + scaledDownCount
+	return totalScaledDown > 0, nil
+}
+
+// cleanupUnhealthyReplicas will scale down old replica sets with unhealthy replicas, so that all unhealthy replicas will be deleted.
+func (dc *DeploymentController) cleanupUnhealthyReplicas(oldRSs []*extensions.ReplicaSet, deployment *extensions.Deployment, minReadySeconds, maxCleanupCount int32) ([]*extensions.ReplicaSet, int32, error) {
+	sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs))
+	// Safely scale down all old replica sets with unhealthy replicas. Replica set will sort the pods in the order
+	// such that not-ready < ready, unscheduled < scheduled, and pending < running. This ensures that unhealthy replicas will
+	// been deleted first and won't increase unavailability.
+	totalScaledDown := int32(0)
+	for i, targetRS := range oldRSs {
+		if totalScaledDown >= maxCleanupCount {
+			break
+		}
+		if targetRS.Spec.Replicas == 0 {
+			// cannot scale down this replica set.
+			continue
+		}
+		// TODO: use dc.getAvailablePodsForReplicaSets instead
+		availablePodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, deployment, []*extensions.ReplicaSet{targetRS}, minReadySeconds)
+		if err != nil {
+			return nil, totalScaledDown, fmt.Errorf("could not find available pods: %v", err)
+		}
+		if targetRS.Spec.Replicas == availablePodCount {
+			// no unhealthy replicas found, no scaling required.
+			continue
+		}
+
+		scaledDownCount := int32(integer.IntMin(int(maxCleanupCount-totalScaledDown), int(targetRS.Spec.Replicas-availablePodCount)))
+		newReplicasCount := targetRS.Spec.Replicas - scaledDownCount
+		if newReplicasCount > targetRS.Spec.Replicas {
+			return nil, 0, fmt.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, targetRS.Spec.Replicas, newReplicasCount)
+		}
+		_, updatedOldRS, err := dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment)
+		if err != nil {
+			return nil, totalScaledDown, err
+		}
+		totalScaledDown += scaledDownCount
+		oldRSs[i] = updatedOldRS
+	}
+	return oldRSs, totalScaledDown, nil
+}
+
+// scaleDownOldReplicaSetsForRollingUpdate scales down old replica sets when deployment strategy is "RollingUpdate".
+// Need check maxUnavailable to ensure availability
+func (dc *DeploymentController) scaleDownOldReplicaSetsForRollingUpdate(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, deployment *extensions.Deployment) (int32, error) {
+	maxUnavailable := maxUnavailable(*deployment)
+
+	// Check if we can scale down.
+	minAvailable := deployment.Spec.Replicas - maxUnavailable
+	minReadySeconds := deployment.Spec.MinReadySeconds
+	// Find the number of ready pods.
+	// TODO: use dc.getAvailablePodsForReplicaSets instead
+	availablePodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, deployment, allRSs, minReadySeconds)
+	if err != nil {
+		return 0, fmt.Errorf("could not find available pods: %v", err)
+	}
+	if availablePodCount <= minAvailable {
+		// Cannot scale down.
+		return 0, nil
+	}
+	glog.V(4).Infof("Found %d available pods in deployment %s, scaling down old RSes", availablePodCount, deployment.Name)
+
+	sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs))
+
+	totalScaledDown := int32(0)
+	totalScaleDownCount := availablePodCount - minAvailable
+	for _, targetRS := range oldRSs {
+		if totalScaledDown >= totalScaleDownCount {
+			// No further scaling required.
+			break
+		}
+		if targetRS.Spec.Replicas == 0 {
+			// cannot scale down this ReplicaSet.
+			continue
+		}
+		// Scale down.
+		scaleDownCount := int32(integer.IntMin(int(targetRS.Spec.Replicas), int(totalScaleDownCount-totalScaledDown)))
+		newReplicasCount := targetRS.Spec.Replicas - scaleDownCount
+		if newReplicasCount > targetRS.Spec.Replicas {
+			return 0, fmt.Errorf("when scaling down old RS, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, targetRS.Spec.Replicas, newReplicasCount)
+		}
+		_, _, err = dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment)
+		if err != nil {
+			return totalScaledDown, err
+		}
+
+		totalScaledDown += scaleDownCount
+	}
+
+	return totalScaledDown, nil
+}