Added unschedulable and network-unavailable toleration.

Signed-off-by: Da K. Ma <klaus1982.cn@gmail.com>
This commit is contained in:
Da K. Ma
2018-06-11 14:26:38 +08:00
parent 7b6647a418
commit e39b510726
7 changed files with 181 additions and 115 deletions

View File

@@ -23,7 +23,6 @@ import (
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
@@ -49,16 +48,13 @@ func GetTemplateGeneration(ds *apps.DaemonSet) (*int64, error) {
return &generation, nil
}
// CreatePodTemplate returns copy of provided template with additional
// label which contains templateGeneration (for backward compatibility),
// hash of provided template and sets default daemon tolerations.
func CreatePodTemplate(template v1.PodTemplateSpec, generation *int64, hash string) v1.PodTemplateSpec {
newTemplate := *template.DeepCopy()
// AddOrUpdateDaemonPodTolerations apply necessary tolerations to DeamonSet Pods, e.g. node.kubernetes.io/not-ready:NoExecute.
func AddOrUpdateDaemonPodTolerations(spec *v1.PodSpec, isCritical bool) {
// DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
// Add infinite toleration for taint notReady:NoExecute here
// to survive taint-based eviction enforced by NodeController
// when node turns not ready.
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeNotReady,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
@@ -68,36 +64,67 @@ func CreatePodTemplate(template v1.PodTemplateSpec, generation *int64, hash stri
// Add infinite toleration for taint unreachable:NoExecute here
// to survive taint-based eviction enforced by NodeController
// when node turns unreachable.
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeUnreachable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
})
// According to TaintNodesByCondition feature, all DaemonSet pods should tolerate
// MemoryPressure and DisPressure taints, and the critical pods should tolerate
// OutOfDisk taint.
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
// MemoryPressure, DisPressure, Unschedulable and NetworkUnavailable taints,
// and the critical pods should tolerate OutOfDisk taint.
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeDiskPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeMemoryPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeUnschedulable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})
if spec.HostNetwork {
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeNetworkUnavailable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})
}
// TODO(#48843) OutOfDisk taints will be removed in 1.10
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
kubelettypes.IsCritical(newTemplate.Namespace, newTemplate.Annotations) {
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
if isCritical {
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeOutOfDisk,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
})
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeOutOfDisk,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})
}
}
// CreatePodTemplate returns copy of provided template with additional
// label which contains templateGeneration (for backward compatibility),
// hash of provided template and sets default daemon tolerations.
func CreatePodTemplate(ns string, template v1.PodTemplateSpec, generation *int64, hash string) v1.PodTemplateSpec {
newTemplate := *template.DeepCopy()
// TODO(k82cn): when removing CritialPod feature, also remove 'ns' parameter.
isCritical := utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
kubelettypes.IsCritical(ns, newTemplate.Annotations)
AddOrUpdateDaemonPodTolerations(&newTemplate.Spec, isCritical)
if newTemplate.ObjectMeta.Labels == nil {
newTemplate.ObjectMeta.Labels = make(map[string]string)
@@ -185,31 +212,6 @@ func ReplaceDaemonSetPodNodeNameNodeAffinity(affinity *v1.Affinity, nodename str
return affinity
}
// AppendNoScheduleTolerationIfNotExist appends unschedulable toleration to `.spec` if not exist; otherwise,
// no changes to `.spec.tolerations`.
func AppendNoScheduleTolerationIfNotExist(tolerations []v1.Toleration) []v1.Toleration {
unschedulableToleration := v1.Toleration{
Key: algorithm.TaintNodeUnschedulable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
}
unschedulableTaintExist := false
for _, t := range tolerations {
if apiequality.Semantic.DeepEqual(t, unschedulableToleration) {
unschedulableTaintExist = true
break
}
}
if !unschedulableTaintExist {
tolerations = append(tolerations, unschedulableToleration)
}
return tolerations
}
// GetTargetNodeName get the target node name of DaemonSet pods. If `.spec.NodeName` is not empty (nil),
// return `.spec.NodeName`; otherwise, retrieve node name of pending pods from NodeAffinity. Return error
// if failed to retrieve node name from `.spec.NodeName` and NodeAffinity.