Merge pull request #50186 from k82cn/k8s_42001-4
Automatic merge from submit-queue Task 2: Added toleration to DaemonSet pods for node condition taints **What this PR does / why we need it**: If TaintByCondition was enabled, added toleration to DaemonSet pods for node condition taints. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: part of #42001 **Release note**: ```release-note None ```
This commit is contained in:
commit
6cbfac2cf5
@ -1053,6 +1053,30 @@ func (dsc *DaemonSetsController) simulate(newPod *v1.Pod, node *v1.Node, ds *ext
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
})
|
||||
|
||||
// According to TaintNodesByCondition, all DaemonSet pods should tolerate
|
||||
// MemoryPressure and DisPressure taints, and the critical pods should tolerate
|
||||
// OutOfDisk taint additional.
|
||||
v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
|
||||
Key: algorithm.TaintNodeDiskPressure,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
})
|
||||
|
||||
v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
|
||||
Key: algorithm.TaintNodeMemoryPressure,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
})
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
|
||||
kubelettypes.IsCriticalPod(newPod) {
|
||||
v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
|
||||
Key: algorithm.TaintNodeOutOfDisk,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
})
|
||||
}
|
||||
|
||||
pods := []*v1.Pod{}
|
||||
|
||||
podList, err := dsc.podLister.List(labels.Everything())
|
||||
@ -1225,6 +1249,11 @@ func Predicates(pod *v1.Pod, nodeInfo *schedulercache.NodeInfo) (bool, []algorit
|
||||
func NodeConditionPredicates(nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason) {
|
||||
reasons := []algorithm.PredicateFailureReason{}
|
||||
|
||||
// If TaintNodesByCondition feature was enabled, account PodToleratesNodeTaints predicates.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.TaintNodesByCondition) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
for _, c := range nodeInfo.Node().Status.Conditions {
|
||||
// TODO: There are other node status that the DaemonSet should ideally respect too,
|
||||
// e.g. MemoryPressure, and DiskPressure
|
||||
|
@ -1251,6 +1251,68 @@ func TestOutOfDiskNodeDaemonLaunchesCriticalPod(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// DaemonSet should launch a critical pod even when the node with OutOfDisk taints.
|
||||
func TestTaintOutOfDiskNodeDaemonLaunchesCriticalPod(t *testing.T) {
|
||||
for _, strategy := range updateStrategies() {
|
||||
ds := newDaemonSet("critical")
|
||||
ds.Spec.UpdateStrategy = *strategy
|
||||
setDaemonSetCritical(ds)
|
||||
manager, podControl, _ := newTestController(ds)
|
||||
|
||||
node := newNode("not-enough-disk", nil)
|
||||
node.Status.Conditions = []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}
|
||||
node.Spec.Taints = []v1.Taint{{Key: algorithm.TaintNodeOutOfDisk, Effect: v1.TaintEffectNoSchedule}}
|
||||
manager.nodeStore.Add(node)
|
||||
|
||||
// NOTE: Whether or not TaintNodesByCondition is enabled, it'll add toleration to DaemonSet pods.
|
||||
|
||||
// Without enabling critical pod annotation feature gate, we shouldn't create critical pod
|
||||
utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=False")
|
||||
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=True")
|
||||
manager.dsStore.Add(ds)
|
||||
syncAndValidateDaemonSets(t, manager, ds, podControl, 0, 0, 0)
|
||||
|
||||
// With enabling critical pod annotation feature gate, we will create critical pod
|
||||
utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=True")
|
||||
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=False")
|
||||
manager.dsStore.Add(ds)
|
||||
syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0, 0)
|
||||
|
||||
// Rollback feature gate to false.
|
||||
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=False")
|
||||
utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=False")
|
||||
}
|
||||
}
|
||||
|
||||
// DaemonSet should launch a pod even when the node with MemoryPressure/DiskPressure taints.
|
||||
func TestTaintPressureNodeDaemonLaunchesPod(t *testing.T) {
|
||||
for _, strategy := range updateStrategies() {
|
||||
ds := newDaemonSet("critical")
|
||||
ds.Spec.UpdateStrategy = *strategy
|
||||
setDaemonSetCritical(ds)
|
||||
manager, podControl, _ := newTestController(ds)
|
||||
|
||||
node := newNode("resources-pressure", nil)
|
||||
node.Status.Conditions = []v1.NodeCondition{
|
||||
{Type: v1.NodeDiskPressure, Status: v1.ConditionTrue},
|
||||
{Type: v1.NodeMemoryPressure, Status: v1.ConditionTrue},
|
||||
}
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{Key: algorithm.TaintNodeDiskPressure, Effect: v1.TaintEffectNoSchedule},
|
||||
{Key: algorithm.TaintNodeMemoryPressure, Effect: v1.TaintEffectNoSchedule},
|
||||
}
|
||||
manager.nodeStore.Add(node)
|
||||
|
||||
// Enabling critical pod and taint nodes by condition feature gate should create critical pod
|
||||
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=True")
|
||||
manager.dsStore.Add(ds)
|
||||
syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0, 0)
|
||||
|
||||
// Rollback feature gate to false.
|
||||
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=False")
|
||||
}
|
||||
}
|
||||
|
||||
// DaemonSet should launch a critical pod even when the node has insufficient free resource.
|
||||
func TestInsufficientCapacityNodeDaemonLaunchesCriticalPod(t *testing.T) {
|
||||
for _, strategy := range updateStrategies() {
|
||||
|
@ -15,11 +15,14 @@ go_library(
|
||||
deps = [
|
||||
"//pkg/api/v1/helper:go_default_library",
|
||||
"//pkg/api/v1/pod:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/util/labels:go_default_library",
|
||||
"//plugin/pkg/scheduler/algorithm:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//vendor/k8s.io/client-go/kubernetes/scheme:go_default_library",
|
||||
],
|
||||
)
|
||||
|
@ -22,9 +22,12 @@ import (
|
||||
"k8s.io/api/core/v1"
|
||||
extensions "k8s.io/api/extensions/v1beta1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
labelsutil "k8s.io/kubernetes/pkg/util/labels"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||
)
|
||||
@ -55,6 +58,30 @@ func CreatePodTemplate(template v1.PodTemplateSpec, generation int64, hash strin
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
})
|
||||
|
||||
// According to TaintNodesByCondition feature, all DaemonSet pods should tolerate
|
||||
// MemoryPressure and DisPressure taints, and the critical pods should tolerate
|
||||
// OutOfDisk taint.
|
||||
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
|
||||
Key: algorithm.TaintNodeDiskPressure,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
})
|
||||
|
||||
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
|
||||
Key: algorithm.TaintNodeMemoryPressure,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
})
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
|
||||
kubelettypes.IsCritical(newTemplate.Namespace, newTemplate.Annotations) {
|
||||
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
|
||||
Key: algorithm.TaintNodeOutOfDisk,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
})
|
||||
}
|
||||
|
||||
templateGenerationStr := fmt.Sprint(generation)
|
||||
newTemplate.ObjectMeta.Labels = labelsutil.CloneAndAddLabel(
|
||||
template.ObjectMeta.Labels,
|
||||
|
@ -141,11 +141,17 @@ func (sp SyncPodType) String() string {
|
||||
// key. Both the rescheduler and the kubelet use this key to make admission
|
||||
// and scheduling decisions.
|
||||
func IsCriticalPod(pod *v1.Pod) bool {
|
||||
return IsCritical(pod.Namespace, pod.Annotations)
|
||||
}
|
||||
|
||||
// IsCritical returns true if parameters bear the critical pod annotation
|
||||
// key. The DaemonSetController use this key directly to make scheduling decisions.
|
||||
func IsCritical(ns string, annotations map[string]string) bool {
|
||||
// Critical pods are restricted to "kube-system" namespace as of now.
|
||||
if pod.Namespace != kubeapi.NamespaceSystem {
|
||||
if ns != kubeapi.NamespaceSystem {
|
||||
return false
|
||||
}
|
||||
val, ok := pod.Annotations[CriticalPodAnnotationKey]
|
||||
val, ok := annotations[CriticalPodAnnotationKey]
|
||||
if ok && val == "" {
|
||||
return true
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user