Handle overlapping deployments gracefully

1. When overlapping deployments are discovered, annotate them
2. Expose those overlapping annotations as warnings in kubectl describe
3. Only respect the earliest updated one (skip syncing all other overlapping deployments)
4. Use indexer instead of store for deployment lister
This commit is contained in:
Janet Kuo
2016-08-16 18:47:15 -07:00
parent fe808ec2a4
commit 90557ec56c
8 changed files with 233 additions and 41 deletions

View File

@@ -23,6 +23,7 @@ package deployment
import (
"fmt"
"reflect"
"sort"
"time"
"github.com/golang/glog"
@@ -36,6 +37,7 @@ import (
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/deployment/util"
"k8s.io/kubernetes/pkg/controller/framework"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/util/metrics"
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
@@ -108,7 +110,7 @@ func NewDeploymentController(client clientset.Interface, resyncPeriod controller
queue: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()),
}
dc.dStore.Store, dc.dController = framework.NewInformer(
dc.dStore.Indexer, dc.dController = framework.NewIndexerInformer(
&cache.ListWatch{
ListFunc: func(options api.ListOptions) (runtime.Object, error) {
return dc.client.Extensions().Deployments(api.NamespaceAll).List(options)
@@ -125,6 +127,7 @@ func NewDeploymentController(client clientset.Interface, resyncPeriod controller
// This will enter the sync loop and no-op, because the deployment has been deleted from the store.
DeleteFunc: dc.deleteDeploymentNotification,
},
cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc},
)
dc.rsStore.Store, dc.rsController = framework.NewInformer(
@@ -252,7 +255,6 @@ func (dc *DeploymentController) addReplicaSet(obj interface{}) {
}
// getDeploymentForReplicaSet returns the deployment managing the given ReplicaSet.
// TODO: Surface that we are ignoring multiple deployments for a given ReplicaSet.
func (dc *DeploymentController) getDeploymentForReplicaSet(rs *extensions.ReplicaSet) *extensions.Deployment {
deployments, err := dc.dStore.GetDeploymentsForReplicaSet(rs)
if err != nil || len(deployments) == 0 {
@@ -262,8 +264,11 @@ func (dc *DeploymentController) getDeploymentForReplicaSet(rs *extensions.Replic
// Because all ReplicaSet's belonging to a deployment should have a unique label key,
// there should never be more than one deployment returned by the above method.
// If that happens we should probably dynamically repair the situation by ultimately
// trying to clean up one of the controllers, for now we just return one of the two,
// likely randomly.
// trying to clean up one of the controllers, for now we just return the older one
if len(deployments) > 1 {
sort.Sort(util.BySelectorLastUpdateTime(deployments))
glog.Errorf("user error! more than one deployment is selecting replica set %s/%s with labels: %#v, returning %s/%s", rs.Namespace, rs.Name, rs.Labels, deployments[0].Namespace, deployments[0].Name)
}
return &deployments[0]
}
@@ -321,22 +326,20 @@ func (dc *DeploymentController) deleteReplicaSet(obj interface{}) {
}
}
// getDeploymentForPod returns the deployment managing the ReplicaSet that manages the given Pod.
// TODO: Surface that we are ignoring multiple deployments for a given Pod.
// getDeploymentForPod returns the deployment that manages the given Pod.
// If there are multiple deployments for a given Pod, only return the oldest one.
func (dc *DeploymentController) getDeploymentForPod(pod *api.Pod) *extensions.Deployment {
rss, err := dc.rsStore.GetPodReplicaSets(pod)
if err != nil {
glog.V(4).Infof("Error: %v. No ReplicaSets found for pod %v, deployment controller will avoid syncing.", err, pod.Name)
deployments, err := dc.dStore.GetDeploymentsForPod(pod)
if err != nil || len(deployments) == 0 {
glog.V(4).Infof("Error: %v. No deployment found for Pod %v, deployment controller will avoid syncing.", err, pod.Name)
return nil
}
for _, rs := range rss {
deployments, err := dc.dStore.GetDeploymentsForReplicaSet(&rs)
if err == nil && len(deployments) > 0 {
return &deployments[0]
}
if len(deployments) > 1 {
sort.Sort(util.BySelectorLastUpdateTime(deployments))
glog.Errorf("user error! more than one deployment is selecting pod %s/%s with labels: %#v, returning %s/%s", pod.Namespace, pod.Name, pod.Labels, deployments[0].Namespace, deployments[0].Name)
}
glog.V(4).Infof("No deployments found for pod %v, deployment controller will avoid syncing.", pod.Name)
return nil
return &deployments[0]
}
// When a pod is created, ensure its controller syncs
@@ -407,15 +410,28 @@ func (dc *DeploymentController) enqueueDeployment(deployment *extensions.Deploym
return
}
// TODO: Handle overlapping deployments better. Either disallow them at admission time or
// deterministically avoid syncing deployments that fight over ReplicaSet's. Currently, we
// only ensure that the same deployment is synced for a given ReplicaSet. When we
// periodically relist all deployments there will still be some ReplicaSet instability. One
// way to handle this is by querying the store for all deployments that this deployment
// overlaps, as well as all deployments that overlap this deployments, and sorting them.
dc.queue.Add(key)
}
func (dc *DeploymentController) markDeploymentOverlap(deployment *extensions.Deployment, withDeployment string) (*extensions.Deployment, error) {
if deployment.Annotations[util.OverlapAnnotation] == withDeployment {
return deployment, nil
}
if deployment.Annotations == nil {
deployment.Annotations = make(map[string]string)
}
deployment.Annotations[util.OverlapAnnotation] = withDeployment
return dc.client.Extensions().Deployments(deployment.Namespace).Update(deployment)
}
func (dc *DeploymentController) clearDeploymentOverlap(deployment *extensions.Deployment) (*extensions.Deployment, error) {
if len(deployment.Annotations[util.OverlapAnnotation]) == 0 {
return deployment, nil
}
delete(deployment.Annotations, util.OverlapAnnotation)
return dc.client.Extensions().Deployments(deployment.Namespace).Update(deployment)
}
// worker runs a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the syncHandler is never invoked concurrently with the same key.
func (dc *DeploymentController) worker() {
@@ -463,7 +479,7 @@ func (dc *DeploymentController) syncDeployment(key string) error {
glog.V(4).Infof("Finished syncing deployment %q (%v)", key, time.Now().Sub(startTime))
}()
obj, exists, err := dc.dStore.Store.GetByKey(key)
obj, exists, err := dc.dStore.Indexer.GetByKey(key)
if err != nil {
glog.Infof("Unable to retrieve deployment %v from store: %v", key, err)
return err
@@ -491,6 +507,11 @@ func (dc *DeploymentController) syncDeployment(key string) error {
return dc.syncStatusOnly(d)
}
// Handle overlapping deployments by deterministically avoid syncing deployments that fight over ReplicaSets.
if err = dc.handleOverlap(d); err != nil {
return err
}
if d.Spec.Paused {
return dc.sync(d)
}
@@ -518,3 +539,40 @@ func (dc *DeploymentController) syncDeployment(key string) error {
}
return fmt.Errorf("unexpected deployment strategy type: %s", d.Spec.Strategy.Type)
}
// handleOverlap relists all deployment in the same namespace for overlaps, and avoid syncing
// the newer overlapping ones (only sync the oldest one). New/old is determined by when the
// deployment's selector is last updated.
func (dc *DeploymentController) handleOverlap(d *extensions.Deployment) error {
selector, err := unversioned.LabelSelectorAsSelector(d.Spec.Selector)
if err != nil {
return fmt.Errorf("deployment %s/%s has invalid label selector: %v", d.Namespace, d.Name, err)
}
deployments, err := dc.dStore.Deployments(d.Namespace).List(labels.Everything())
if err != nil {
return fmt.Errorf("error listing deployments in namespace %s: %v", d.Namespace, err)
}
overlapping := false
for i := range deployments {
other := &deployments[i]
if !selector.Empty() && selector.Matches(labels.Set(other.Spec.Template.Labels)) && d.UID != other.UID {
overlapping = true
// We don't care if the overlapping annotation update failed or not (we don't make decision on it)
d, _ = dc.markDeploymentOverlap(d, other.Name)
other, _ = dc.markDeploymentOverlap(other, d.Name)
// Skip syncing this one if older overlapping one is found
// TODO: figure out a better way to determine which deployment to skip,
// either with controller reference, or with validation.
// Using oldest active replica set to determine which deployment to skip wouldn't make much difference,
// since new replica set hasn't been created after selector update
if util.SelectorUpdatedBefore(other, d) {
return fmt.Errorf("found deployment %s/%s has overlapping selector with an older deployment %s/%s, skip syncing it", d.Namespace, d.Name, other.Namespace, other.Name)
}
}
}
if !overlapping {
// We don't care if the overlapping annotation update failed or not (we don't make decision on it)
d, _ = dc.clearDeploymentOverlap(d)
}
return nil
}