kube-controller-manager: Decouple TaintManager from NodeLifeCycleController (KEP-3902)

This commit is contained in:
Andrea Tosatto
2023-07-10 14:02:56 +01:00
parent 38ed3ef7b7
commit ccda2d6fd4
14 changed files with 556 additions and 224 deletions

View File

@@ -37,6 +37,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
appsv1informers "k8s.io/client-go/informers/apps/v1"
coordinformers "k8s.io/client-go/informers/coordination/v1"
coreinformers "k8s.io/client-go/informers/core/v1"
@@ -54,7 +55,9 @@ import (
kubeletapis "k8s.io/kubelet/pkg/apis"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler"
"k8s.io/kubernetes/pkg/controller/tainteviction"
controllerutil "k8s.io/kubernetes/pkg/controller/util/node"
"k8s.io/kubernetes/pkg/features"
taintutils "k8s.io/kubernetes/pkg/util/taints"
)
@@ -127,6 +130,13 @@ const (
// podUpdateWorkerSizes assumes that in most cases pod will be handled by monitorNodeHealth pass.
// Pod update workers will only handle lagging cache pods. 4 workers should be enough.
podUpdateWorkerSize = 4
// nodeUpdateWorkerSize defines the size of workers for node update or/and pod update.
nodeUpdateWorkerSize = 8
// taintEvictionController is defined here in order to prevent imports of
// k8s.io/kubernetes/cmd/kube-controller-manager/names which would result in validation errors.
// This constant will be removed upon graduation of the SeparateTaintEvictionController feature.
taintEvictionController = "taint-eviction-controller"
)
// labelReconcileInfo lists Node labels to reconcile, and how to reconcile them.
@@ -207,7 +217,7 @@ type podUpdateItem struct {
// Controller is the controller that manages node's life cycle.
type Controller struct {
taintManager *scheduler.NoExecuteTaintManager
taintManager *tainteviction.Controller
podLister corelisters.PodLister
podInformerSynced cache.InformerSynced
@@ -326,7 +336,7 @@ func NewNodeLifecycleController(
nodeMonitorPeriod: nodeMonitorPeriod,
nodeStartupGracePeriod: nodeStartupGracePeriod,
nodeMonitorGracePeriod: nodeMonitorGracePeriod,
nodeUpdateWorkerSize: scheduler.UpdateWorkerSize,
nodeUpdateWorkerSize: nodeUpdateWorkerSize,
zoneNoExecuteTainter: make(map[string]*scheduler.RateLimitedTimedQueue),
nodesToRetry: sync.Map{},
zoneStates: make(map[string]ZoneState),
@@ -346,17 +356,11 @@ func NewNodeLifecycleController(
AddFunc: func(obj interface{}) {
pod := obj.(*v1.Pod)
nc.podUpdated(nil, pod)
if nc.taintManager != nil {
nc.taintManager.PodUpdated(nil, pod)
}
},
UpdateFunc: func(prev, obj interface{}) {
prevPod := prev.(*v1.Pod)
newPod := obj.(*v1.Pod)
nc.podUpdated(prevPod, newPod)
if nc.taintManager != nil {
nc.taintManager.PodUpdated(prevPod, newPod)
}
},
DeleteFunc: func(obj interface{}) {
pod, isPod := obj.(*v1.Pod)
@@ -374,9 +378,6 @@ func NewNodeLifecycleController(
}
}
nc.podUpdated(pod, nil)
if nc.taintManager != nil {
nc.taintManager.PodUpdated(pod, nil)
}
},
})
nc.podInformerSynced = podInformer.Informer().HasSynced
@@ -412,21 +413,14 @@ func NewNodeLifecycleController(
nc.podLister = podInformer.Lister()
nc.nodeLister = nodeInformer.Lister()
nc.taintManager = scheduler.NewNoExecuteTaintManager(ctx, kubeClient, nc.podLister, nc.nodeLister, nc.getPodsAssignedToNode)
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
nc.taintManager.NodeUpdated(nil, node)
return nil
}),
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(oldNode, newNode *v1.Node) error {
nc.taintManager.NodeUpdated(oldNode, newNode)
return nil
}),
DeleteFunc: controllerutil.CreateDeleteNodeHandler(logger, func(node *v1.Node) error {
nc.taintManager.NodeUpdated(node, nil)
return nil
}),
})
if !utilfeature.DefaultFeatureGate.Enabled(features.SeparateTaintEvictionController) {
logger.Info("Running TaintEvictionController as part of NodeLifecyleController")
tm, err := tainteviction.New(ctx, kubeClient, podInformer, nodeInformer, taintEvictionController)
if err != nil {
return nil, err
}
nc.taintManager = tm
}
logger.Info("Controller will reconcile labels")
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
@@ -480,10 +474,13 @@ func (nc *Controller) Run(ctx context.Context) {
return
}
go nc.taintManager.Run(ctx)
if !utilfeature.DefaultFeatureGate.Enabled(features.SeparateTaintEvictionController) {
logger.Info("Starting", "controller", taintEvictionController)
go nc.taintManager.Run(ctx)
}
// Start workers to reconcile labels and/or update NoSchedule taint for nodes.
for i := 0; i < scheduler.UpdateWorkerSize; i++ {
for i := 0; i < nodeUpdateWorkerSize; i++ {
// Thanks to "workqueue", each worker just need to get item from queue, because
// the item is flagged when got from queue: if new event come, the new item will
// be re-queued until "Done", so no more than one worker handle the same item and