feat: graduate ScheduleDaemonSetPods to GA

This commit is contained in:
draveness
2019-09-18 00:06:37 +08:00
parent b084336460
commit 35d772e354
9 changed files with 1525 additions and 2153 deletions

View File

@@ -26,7 +26,7 @@ import (
"k8s.io/klog"
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -35,7 +35,6 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
appsinformers "k8s.io/client-go/informers/apps/v1"
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
@@ -52,8 +51,6 @@ import (
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/daemon/util"
"k8s.io/kubernetes/pkg/features"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
"k8s.io/utils/integer"
@@ -973,9 +970,7 @@ func (dsc *DaemonSetsController) manage(ds *apps.DaemonSet, nodeList []*v1.Node,
// Remove unscheduled pods assigned to not existing nodes when daemonset pods are scheduled by scheduler.
// If node doesn't exist then pods are never scheduled and can't be deleted by PodGCController.
if utilfeature.DefaultFeatureGate.Enabled(features.ScheduleDaemonSetPods) {
podsToDelete = append(podsToDelete, getUnscheduledPodsWithoutNode(nodeList, nodeToDaemonPods)...)
}
podsToDelete = append(podsToDelete, getUnscheduledPodsWithoutNode(nodeList, nodeToDaemonPods)...)
// Label new pods using the hash label value of the current history when creating them
if err = dsc.syncNodes(ds, podsToDelete, nodesNeedingDaemonPods, hash); err != nil {
@@ -1033,25 +1028,16 @@ func (dsc *DaemonSetsController) syncNodes(ds *apps.DaemonSet, podsToDelete, nod
for i := pos; i < pos+batchSize; i++ {
go func(ix int) {
defer createWait.Done()
var err error
podTemplate := template.DeepCopy()
if utilfeature.DefaultFeatureGate.Enabled(features.ScheduleDaemonSetPods) {
// The pod's NodeAffinity will be updated to make sure the Pod is bound
// to the target node by default scheduler. It is safe to do so because there
// should be no conflicting node affinity with the target node.
podTemplate.Spec.Affinity = util.ReplaceDaemonSetPodNodeNameNodeAffinity(
podTemplate.Spec.Affinity, nodesNeedingDaemonPods[ix])
// The pod's NodeAffinity will be updated to make sure the Pod is bound
// to the target node by default scheduler. It is safe to do so because there
// should be no conflicting node affinity with the target node.
podTemplate.Spec.Affinity = util.ReplaceDaemonSetPodNodeNameNodeAffinity(
podTemplate.Spec.Affinity, nodesNeedingDaemonPods[ix])
err = dsc.podControl.CreatePodsWithControllerRef(ds.Namespace, podTemplate,
ds, metav1.NewControllerRef(ds, controllerKind))
} else {
// If pod is scheduled by DaemonSetController, set its '.spec.scheduleName'.
podTemplate.Spec.SchedulerName = "kubernetes.io/daemonset-controller"
err = dsc.podControl.CreatePodsOnNode(nodesNeedingDaemonPods[ix], ds.Namespace, podTemplate,
ds, metav1.NewControllerRef(ds, controllerKind))
}
err := dsc.podControl.CreatePodsWithControllerRef(ds.Namespace, podTemplate,
ds, metav1.NewControllerRef(ds, controllerKind))
if err != nil && errors.IsTimeout(err) {
// Pod is created but its initialization has timed out.
@@ -1355,14 +1341,10 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *apps.
// TODO(k82cn): When 'ScheduleDaemonSetPods' upgrade to beta or GA, remove unnecessary check on failure reason,
// e.g. InsufficientResourceError; and simplify "wantToRun, shouldSchedule, shouldContinueRunning"
// into one result, e.g. selectedNode.
var insufficientResourceErr error
for _, r := range reasons {
klog.V(4).Infof("DaemonSet Predicates failed on node %s for ds '%s/%s' for reason: %v", node.Name, ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, r.GetReason())
switch reason := r.(type) {
case *predicates.InsufficientResourceError:
insufficientResourceErr = reason
case *predicates.PredicateFailureError:
var emitEvent bool
// we try to partition predicates into two partitions here: intentional on the part of the operator and not.
switch reason {
// intentional
@@ -1384,18 +1366,6 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *apps.
return false, false, false, nil
}
wantToRun, shouldSchedule = false, false
// unintentional
case
predicates.ErrDiskConflict,
predicates.ErrVolumeZoneConflict,
predicates.ErrMaxVolumeCountExceeded,
predicates.ErrNodeUnderMemoryPressure,
predicates.ErrNodeUnderDiskPressure:
// wantToRun and shouldContinueRunning are likely true here. They are
// absolutely true at the time of writing the comment. See first comment
// of this method.
shouldSchedule = false
emitEvent = true
// unexpected
case
predicates.ErrPodAffinityNotMatch,
@@ -1405,19 +1375,10 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *apps.
default:
klog.V(4).Infof("unknown predicate failure reason: %s", reason.GetReason())
wantToRun, shouldSchedule, shouldContinueRunning = false, false, false
emitEvent = true
}
if emitEvent {
dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, FailedPlacementReason, "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.GetReason())
}
}
}
// only emit this event if insufficient resource is the only thing
// preventing the daemon pod from scheduling
if shouldSchedule && insufficientResourceErr != nil {
dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, FailedPlacementReason, "failed to place pod on %q: %s", node.ObjectMeta.Name, insufficientResourceErr.Error())
shouldSchedule = false
}
return
}
@@ -1471,35 +1432,7 @@ func checkNodeFitness(pod *v1.Pod, meta predicates.PredicateMetadata, nodeInfo *
func Predicates(pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []predicates.PredicateFailureReason, error) {
var predicateFails []predicates.PredicateFailureReason
// If ScheduleDaemonSetPods is enabled, only check nodeSelector, nodeAffinity and toleration/taint match.
if utilfeature.DefaultFeatureGate.Enabled(features.ScheduleDaemonSetPods) {
fit, reasons, err := checkNodeFitness(pod, nil, nodeInfo)
if err != nil {
return false, predicateFails, err
}
if !fit {
predicateFails = append(predicateFails, reasons...)
}
return len(predicateFails) == 0, predicateFails, nil
}
critical := kubelettypes.IsCriticalPod(pod)
fit, reasons, err := predicates.PodToleratesNodeTaints(pod, nil, nodeInfo)
if err != nil {
return false, predicateFails, err
}
if !fit {
predicateFails = append(predicateFails, reasons...)
}
if critical {
// If the pod is marked as critical and support for critical pod annotations is enabled,
// check predicates for critical pods only.
fit, reasons, err = predicates.EssentialPredicates(pod, nil, nodeInfo)
} else {
fit, reasons, err = predicates.GeneralPredicates(pod, nil, nodeInfo)
}
fit, reasons, err := checkNodeFitness(pod, nil, nodeInfo)
if err != nil {
return false, predicateFails, err
}