Merge pull request #49257 from k82cn/k8s_42001
Automatic merge from submit-queue (batch tested with PRs 51574, 51534, 49257, 44680, 48836) Task 1: Tainted node by condition. **What this PR does / why we need it**: Tainted node by condition for MemoryPressure, OutOfDisk and so on. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: part of #42001 **Release note**: ```release-note Tainted nodes by conditions as following: * 'node.kubernetes.io/network-unavailable=:NoSchedule' if NetworkUnavailable is true * 'node.kubernetes.io/disk-pressure=:NoSchedule' if DiskPressure is true * 'node.kubernetes.io/memory-pressure=:NoSchedule' if MemoryPressure is true * 'node.kubernetes.io/out-of-disk=:NoSchedule' if OutOfDisk is true ```
This commit is contained in:
@@ -76,6 +76,20 @@ var (
|
||||
Key: algorithm.TaintNodeNotReady,
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
}
|
||||
|
||||
nodeConditionToTaintKeyMap = map[v1.NodeConditionType]string{
|
||||
v1.NodeMemoryPressure: algorithm.TaintNodeMemoryPressure,
|
||||
v1.NodeOutOfDisk: algorithm.TaintNodeOutOfDisk,
|
||||
v1.NodeDiskPressure: algorithm.TaintNodeDiskPressure,
|
||||
v1.NodeNetworkUnavailable: algorithm.TaintNodeNetworkUnavailable,
|
||||
}
|
||||
|
||||
taintKeyToNodeConditionMap = map[string]v1.NodeConditionType{
|
||||
algorithm.TaintNodeNetworkUnavailable: v1.NodeNetworkUnavailable,
|
||||
algorithm.TaintNodeMemoryPressure: v1.NodeMemoryPressure,
|
||||
algorithm.TaintNodeOutOfDisk: v1.NodeOutOfDisk,
|
||||
algorithm.TaintNodeDiskPressure: v1.NodeDiskPressure,
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -180,6 +194,10 @@ type NodeController struct {
|
||||
// if set to true NodeController will taint Nodes with 'TaintNodeNotReady' and 'TaintNodeUnreachable'
|
||||
// taints instead of evicting Pods itself.
|
||||
useTaintBasedEvictions bool
|
||||
|
||||
// if set to true, NodeController will taint Nodes based on its condition for 'NetworkUnavailable',
|
||||
// 'MemoryPressure', 'OutOfDisk' and 'DiskPressure'.
|
||||
taintNodeByCondition bool
|
||||
}
|
||||
|
||||
// NewNodeController returns a new node controller to sync instances from cloudprovider.
|
||||
@@ -206,7 +224,9 @@ func NewNodeController(
|
||||
allocateNodeCIDRs bool,
|
||||
allocatorType ipam.CIDRAllocatorType,
|
||||
runTaintManager bool,
|
||||
useTaintBasedEvictions bool) (*NodeController, error) {
|
||||
useTaintBasedEvictions bool,
|
||||
taintNodeByCondition bool,
|
||||
) (*NodeController, error) {
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "controllermanager"})
|
||||
eventBroadcaster.StartLogging(glog.Infof)
|
||||
@@ -387,6 +407,17 @@ func NewNodeController(
|
||||
nc.taintManager = scheduler.NewNoExecuteTaintManager(kubeClient)
|
||||
}
|
||||
|
||||
if nc.taintNodeByCondition {
|
||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: util.CreateAddNodeHandler(func(node *v1.Node) error {
|
||||
return nc.doNoScheduleTaintingPass(node)
|
||||
}),
|
||||
UpdateFunc: util.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
||||
return nc.doNoScheduleTaintingPass(newNode)
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
nc.nodeLister = nodeInformer.Lister()
|
||||
nc.nodeInformerSynced = nodeInformer.Informer().HasSynced
|
||||
|
||||
@@ -425,6 +456,34 @@ func (nc *NodeController) doEvictionPass() {
|
||||
}
|
||||
}
|
||||
|
||||
func (nc *NodeController) doNoScheduleTaintingPass(node *v1.Node) error {
|
||||
// Map node's condition to Taints.
|
||||
taints := []v1.Taint{}
|
||||
for _, condition := range node.Status.Conditions {
|
||||
if _, found := nodeConditionToTaintKeyMap[condition.Type]; found {
|
||||
if condition.Status == v1.ConditionTrue {
|
||||
taints = append(taints, v1.Taint{
|
||||
Key: nodeConditionToTaintKeyMap[condition.Type],
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
nodeTaints := taintutils.TaintSetFilter(node.Spec.Taints, func(t *v1.Taint) bool {
|
||||
_, found := taintKeyToNodeConditionMap[t.Key]
|
||||
return found
|
||||
})
|
||||
taintsToAdd, taintsToDel := taintutils.TaintSetDiff(taints, nodeTaints)
|
||||
// If nothing to add not delete, return true directly.
|
||||
if len(taintsToAdd) == 0 && len(taintsToDel) == 0 {
|
||||
return nil
|
||||
}
|
||||
if !util.SwapNodeControllerTaint(nc.kubeClient, taintsToAdd, taintsToDel, node) {
|
||||
return fmt.Errorf("failed to swap taints of node %+v", node)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (nc *NodeController) doNoExecuteTaintingPass() {
|
||||
nc.evictorLock.Lock()
|
||||
defer nc.evictorLock.Unlock()
|
||||
@@ -459,7 +518,7 @@ func (nc *NodeController) doNoExecuteTaintingPass() {
|
||||
return true, 0
|
||||
}
|
||||
|
||||
return util.SwapNodeControllerTaint(nc.kubeClient, &taintToAdd, &oppositeTaint, node), 0
|
||||
return util.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node), 0
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -542,7 +601,7 @@ func (nc *NodeController) monitorNodeStatus() error {
|
||||
nc.knownNodeSet[added[i].Name] = added[i]
|
||||
nc.addPodEvictorForNewZone(added[i])
|
||||
if nc.useTaintBasedEvictions {
|
||||
nc.markNodeAsHealthy(added[i])
|
||||
nc.markNodeAsReachable(added[i])
|
||||
} else {
|
||||
nc.cancelPodEviction(added[i])
|
||||
}
|
||||
@@ -591,7 +650,7 @@ func (nc *NodeController) monitorNodeStatus() error {
|
||||
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
|
||||
if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
|
||||
taintToAdd := *NotReadyTaintTemplate
|
||||
if !util.SwapNodeControllerTaint(nc.kubeClient, &taintToAdd, UnreachableTaintTemplate, node) {
|
||||
if !util.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
|
||||
glog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
|
||||
}
|
||||
} else if nc.markNodeForTainting(node) {
|
||||
@@ -618,7 +677,7 @@ func (nc *NodeController) monitorNodeStatus() error {
|
||||
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
|
||||
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
|
||||
taintToAdd := *UnreachableTaintTemplate
|
||||
if !util.SwapNodeControllerTaint(nc.kubeClient, &taintToAdd, NotReadyTaintTemplate, node) {
|
||||
if !util.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
|
||||
glog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
|
||||
}
|
||||
} else if nc.markNodeForTainting(node) {
|
||||
@@ -642,7 +701,7 @@ func (nc *NodeController) monitorNodeStatus() error {
|
||||
}
|
||||
if observedReadyCondition.Status == v1.ConditionTrue {
|
||||
if nc.useTaintBasedEvictions {
|
||||
removed, err := nc.markNodeAsHealthy(node)
|
||||
removed, err := nc.markNodeAsReachable(node)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name)
|
||||
}
|
||||
@@ -737,7 +796,7 @@ func (nc *NodeController) handleDisruption(zoneToNodeConditions map[string][]*v1
|
||||
glog.V(0).Info("NodeController detected that all Nodes are not-Ready. Entering master disruption mode.")
|
||||
for i := range nodes {
|
||||
if nc.useTaintBasedEvictions {
|
||||
_, err := nc.markNodeAsHealthy(nodes[i])
|
||||
_, err := nc.markNodeAsReachable(nodes[i])
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to remove taints from Node %v", nodes[i].Name)
|
||||
}
|
||||
@@ -1053,7 +1112,7 @@ func (nc *NodeController) markNodeForTainting(node *v1.Node) bool {
|
||||
return nc.zoneNoExecuteTainer[utilnode.GetZoneKey(node)].Add(node.Name, string(node.UID))
|
||||
}
|
||||
|
||||
func (nc *NodeController) markNodeAsHealthy(node *v1.Node) (bool, error) {
|
||||
func (nc *NodeController) markNodeAsReachable(node *v1.Node) (bool, error) {
|
||||
nc.evictorLock.Lock()
|
||||
defer nc.evictorLock.Unlock()
|
||||
err := controller.RemoveTaintOffNode(nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
|
||||
|
Reference in New Issue
Block a user