Merge pull request #49257 from k82cn/k8s_42001

Automatic merge from submit-queue (batch tested with PRs 51574, 51534, 49257, 44680, 48836)

Task 1: Tainted node by condition.

**What this PR does / why we need it**:
Tainted node by condition for MemoryPressure, OutOfDisk and so on.

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: part of #42001 

**Release note**:
```release-note
Tainted nodes by conditions as following:
  * 'node.kubernetes.io/network-unavailable=:NoSchedule' if NetworkUnavailable is true
  * 'node.kubernetes.io/disk-pressure=:NoSchedule' if DiskPressure is true
  * 'node.kubernetes.io/memory-pressure=:NoSchedule' if MemoryPressure is true
  * 'node.kubernetes.io/out-of-disk=:NoSchedule' if OutOfDisk is true
```
This commit is contained in:
Kubernetes Submit Queue
2017-08-31 23:13:20 -07:00
committed by GitHub
6 changed files with 302 additions and 25 deletions

View File

@@ -76,6 +76,20 @@ var (
Key: algorithm.TaintNodeNotReady,
Effect: v1.TaintEffectNoExecute,
}
nodeConditionToTaintKeyMap = map[v1.NodeConditionType]string{
v1.NodeMemoryPressure: algorithm.TaintNodeMemoryPressure,
v1.NodeOutOfDisk: algorithm.TaintNodeOutOfDisk,
v1.NodeDiskPressure: algorithm.TaintNodeDiskPressure,
v1.NodeNetworkUnavailable: algorithm.TaintNodeNetworkUnavailable,
}
taintKeyToNodeConditionMap = map[string]v1.NodeConditionType{
algorithm.TaintNodeNetworkUnavailable: v1.NodeNetworkUnavailable,
algorithm.TaintNodeMemoryPressure: v1.NodeMemoryPressure,
algorithm.TaintNodeOutOfDisk: v1.NodeOutOfDisk,
algorithm.TaintNodeDiskPressure: v1.NodeDiskPressure,
}
)
const (
@@ -180,6 +194,10 @@ type NodeController struct {
// if set to true NodeController will taint Nodes with 'TaintNodeNotReady' and 'TaintNodeUnreachable'
// taints instead of evicting Pods itself.
useTaintBasedEvictions bool
// if set to true, NodeController will taint Nodes based on its condition for 'NetworkUnavailable',
// 'MemoryPressure', 'OutOfDisk' and 'DiskPressure'.
taintNodeByCondition bool
}
// NewNodeController returns a new node controller to sync instances from cloudprovider.
@@ -206,7 +224,9 @@ func NewNodeController(
allocateNodeCIDRs bool,
allocatorType ipam.CIDRAllocatorType,
runTaintManager bool,
useTaintBasedEvictions bool) (*NodeController, error) {
useTaintBasedEvictions bool,
taintNodeByCondition bool,
) (*NodeController, error) {
eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "controllermanager"})
eventBroadcaster.StartLogging(glog.Infof)
@@ -387,6 +407,17 @@ func NewNodeController(
nc.taintManager = scheduler.NewNoExecuteTaintManager(kubeClient)
}
if nc.taintNodeByCondition {
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: util.CreateAddNodeHandler(func(node *v1.Node) error {
return nc.doNoScheduleTaintingPass(node)
}),
UpdateFunc: util.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
return nc.doNoScheduleTaintingPass(newNode)
}),
})
}
nc.nodeLister = nodeInformer.Lister()
nc.nodeInformerSynced = nodeInformer.Informer().HasSynced
@@ -425,6 +456,34 @@ func (nc *NodeController) doEvictionPass() {
}
}
func (nc *NodeController) doNoScheduleTaintingPass(node *v1.Node) error {
// Map node's condition to Taints.
taints := []v1.Taint{}
for _, condition := range node.Status.Conditions {
if _, found := nodeConditionToTaintKeyMap[condition.Type]; found {
if condition.Status == v1.ConditionTrue {
taints = append(taints, v1.Taint{
Key: nodeConditionToTaintKeyMap[condition.Type],
Effect: v1.TaintEffectNoSchedule,
})
}
}
}
nodeTaints := taintutils.TaintSetFilter(node.Spec.Taints, func(t *v1.Taint) bool {
_, found := taintKeyToNodeConditionMap[t.Key]
return found
})
taintsToAdd, taintsToDel := taintutils.TaintSetDiff(taints, nodeTaints)
// If nothing to add not delete, return true directly.
if len(taintsToAdd) == 0 && len(taintsToDel) == 0 {
return nil
}
if !util.SwapNodeControllerTaint(nc.kubeClient, taintsToAdd, taintsToDel, node) {
return fmt.Errorf("failed to swap taints of node %+v", node)
}
return nil
}
func (nc *NodeController) doNoExecuteTaintingPass() {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
@@ -459,7 +518,7 @@ func (nc *NodeController) doNoExecuteTaintingPass() {
return true, 0
}
return util.SwapNodeControllerTaint(nc.kubeClient, &taintToAdd, &oppositeTaint, node), 0
return util.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node), 0
})
}
}
@@ -542,7 +601,7 @@ func (nc *NodeController) monitorNodeStatus() error {
nc.knownNodeSet[added[i].Name] = added[i]
nc.addPodEvictorForNewZone(added[i])
if nc.useTaintBasedEvictions {
nc.markNodeAsHealthy(added[i])
nc.markNodeAsReachable(added[i])
} else {
nc.cancelPodEviction(added[i])
}
@@ -591,7 +650,7 @@ func (nc *NodeController) monitorNodeStatus() error {
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
taintToAdd := *NotReadyTaintTemplate
if !util.SwapNodeControllerTaint(nc.kubeClient, &taintToAdd, UnreachableTaintTemplate, node) {
if !util.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
glog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
}
} else if nc.markNodeForTainting(node) {
@@ -618,7 +677,7 @@ func (nc *NodeController) monitorNodeStatus() error {
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
taintToAdd := *UnreachableTaintTemplate
if !util.SwapNodeControllerTaint(nc.kubeClient, &taintToAdd, NotReadyTaintTemplate, node) {
if !util.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
glog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
}
} else if nc.markNodeForTainting(node) {
@@ -642,7 +701,7 @@ func (nc *NodeController) monitorNodeStatus() error {
}
if observedReadyCondition.Status == v1.ConditionTrue {
if nc.useTaintBasedEvictions {
removed, err := nc.markNodeAsHealthy(node)
removed, err := nc.markNodeAsReachable(node)
if err != nil {
glog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name)
}
@@ -737,7 +796,7 @@ func (nc *NodeController) handleDisruption(zoneToNodeConditions map[string][]*v1
glog.V(0).Info("NodeController detected that all Nodes are not-Ready. Entering master disruption mode.")
for i := range nodes {
if nc.useTaintBasedEvictions {
_, err := nc.markNodeAsHealthy(nodes[i])
_, err := nc.markNodeAsReachable(nodes[i])
if err != nil {
glog.Errorf("Failed to remove taints from Node %v", nodes[i].Name)
}
@@ -1053,7 +1112,7 @@ func (nc *NodeController) markNodeForTainting(node *v1.Node) bool {
return nc.zoneNoExecuteTainer[utilnode.GetZoneKey(node)].Add(node.Name, string(node.UID))
}
func (nc *NodeController) markNodeAsHealthy(node *v1.Node) (bool, error) {
func (nc *NodeController) markNodeAsReachable(node *v1.Node) (bool, error) {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
err := controller.RemoveTaintOffNode(nc.kubeClient, node.Name, node, UnreachableTaintTemplate)