|
|
|
@@ -26,7 +26,7 @@ import (
|
|
|
|
|
"k8s.io/klog"
|
|
|
|
|
|
|
|
|
|
apps "k8s.io/api/apps/v1"
|
|
|
|
|
"k8s.io/api/core/v1"
|
|
|
|
|
v1 "k8s.io/api/core/v1"
|
|
|
|
|
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
|
|
|
|
"k8s.io/apimachinery/pkg/api/errors"
|
|
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
|
|
@@ -35,7 +35,6 @@ import (
|
|
|
|
|
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
|
|
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
|
|
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
|
|
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
|
|
|
|
appsinformers "k8s.io/client-go/informers/apps/v1"
|
|
|
|
|
coreinformers "k8s.io/client-go/informers/core/v1"
|
|
|
|
|
clientset "k8s.io/client-go/kubernetes"
|
|
|
|
@@ -52,8 +51,6 @@ import (
|
|
|
|
|
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
|
|
|
|
"k8s.io/kubernetes/pkg/controller"
|
|
|
|
|
"k8s.io/kubernetes/pkg/controller/daemon/util"
|
|
|
|
|
"k8s.io/kubernetes/pkg/features"
|
|
|
|
|
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
|
|
|
|
|
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
|
|
|
|
|
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
|
|
|
|
|
"k8s.io/utils/integer"
|
|
|
|
@@ -973,9 +970,7 @@ func (dsc *DaemonSetsController) manage(ds *apps.DaemonSet, nodeList []*v1.Node,
|
|
|
|
|
|
|
|
|
|
// Remove unscheduled pods assigned to not existing nodes when daemonset pods are scheduled by scheduler.
|
|
|
|
|
// If node doesn't exist then pods are never scheduled and can't be deleted by PodGCController.
|
|
|
|
|
if utilfeature.DefaultFeatureGate.Enabled(features.ScheduleDaemonSetPods) {
|
|
|
|
|
podsToDelete = append(podsToDelete, getUnscheduledPodsWithoutNode(nodeList, nodeToDaemonPods)...)
|
|
|
|
|
}
|
|
|
|
|
podsToDelete = append(podsToDelete, getUnscheduledPodsWithoutNode(nodeList, nodeToDaemonPods)...)
|
|
|
|
|
|
|
|
|
|
// Label new pods using the hash label value of the current history when creating them
|
|
|
|
|
if err = dsc.syncNodes(ds, podsToDelete, nodesNeedingDaemonPods, hash); err != nil {
|
|
|
|
@@ -1033,25 +1028,16 @@ func (dsc *DaemonSetsController) syncNodes(ds *apps.DaemonSet, podsToDelete, nod
|
|
|
|
|
for i := pos; i < pos+batchSize; i++ {
|
|
|
|
|
go func(ix int) {
|
|
|
|
|
defer createWait.Done()
|
|
|
|
|
var err error
|
|
|
|
|
|
|
|
|
|
podTemplate := template.DeepCopy()
|
|
|
|
|
if utilfeature.DefaultFeatureGate.Enabled(features.ScheduleDaemonSetPods) {
|
|
|
|
|
// The pod's NodeAffinity will be updated to make sure the Pod is bound
|
|
|
|
|
// to the target node by default scheduler. It is safe to do so because there
|
|
|
|
|
// should be no conflicting node affinity with the target node.
|
|
|
|
|
podTemplate.Spec.Affinity = util.ReplaceDaemonSetPodNodeNameNodeAffinity(
|
|
|
|
|
podTemplate.Spec.Affinity, nodesNeedingDaemonPods[ix])
|
|
|
|
|
// The pod's NodeAffinity will be updated to make sure the Pod is bound
|
|
|
|
|
// to the target node by default scheduler. It is safe to do so because there
|
|
|
|
|
// should be no conflicting node affinity with the target node.
|
|
|
|
|
podTemplate.Spec.Affinity = util.ReplaceDaemonSetPodNodeNameNodeAffinity(
|
|
|
|
|
podTemplate.Spec.Affinity, nodesNeedingDaemonPods[ix])
|
|
|
|
|
|
|
|
|
|
err = dsc.podControl.CreatePodsWithControllerRef(ds.Namespace, podTemplate,
|
|
|
|
|
ds, metav1.NewControllerRef(ds, controllerKind))
|
|
|
|
|
} else {
|
|
|
|
|
// If pod is scheduled by DaemonSetController, set its '.spec.scheduleName'.
|
|
|
|
|
podTemplate.Spec.SchedulerName = "kubernetes.io/daemonset-controller"
|
|
|
|
|
|
|
|
|
|
err = dsc.podControl.CreatePodsOnNode(nodesNeedingDaemonPods[ix], ds.Namespace, podTemplate,
|
|
|
|
|
ds, metav1.NewControllerRef(ds, controllerKind))
|
|
|
|
|
}
|
|
|
|
|
err := dsc.podControl.CreatePodsWithControllerRef(ds.Namespace, podTemplate,
|
|
|
|
|
ds, metav1.NewControllerRef(ds, controllerKind))
|
|
|
|
|
|
|
|
|
|
if err != nil && errors.IsTimeout(err) {
|
|
|
|
|
// Pod is created but its initialization has timed out.
|
|
|
|
@@ -1355,14 +1341,10 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *apps.
|
|
|
|
|
// TODO(k82cn): When 'ScheduleDaemonSetPods' upgrade to beta or GA, remove unnecessary check on failure reason,
|
|
|
|
|
// e.g. InsufficientResourceError; and simplify "wantToRun, shouldSchedule, shouldContinueRunning"
|
|
|
|
|
// into one result, e.g. selectedNode.
|
|
|
|
|
var insufficientResourceErr error
|
|
|
|
|
for _, r := range reasons {
|
|
|
|
|
klog.V(4).Infof("DaemonSet Predicates failed on node %s for ds '%s/%s' for reason: %v", node.Name, ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, r.GetReason())
|
|
|
|
|
switch reason := r.(type) {
|
|
|
|
|
case *predicates.InsufficientResourceError:
|
|
|
|
|
insufficientResourceErr = reason
|
|
|
|
|
case *predicates.PredicateFailureError:
|
|
|
|
|
var emitEvent bool
|
|
|
|
|
// we try to partition predicates into two partitions here: intentional on the part of the operator and not.
|
|
|
|
|
switch reason {
|
|
|
|
|
// intentional
|
|
|
|
@@ -1384,18 +1366,6 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *apps.
|
|
|
|
|
return false, false, false, nil
|
|
|
|
|
}
|
|
|
|
|
wantToRun, shouldSchedule = false, false
|
|
|
|
|
// unintentional
|
|
|
|
|
case
|
|
|
|
|
predicates.ErrDiskConflict,
|
|
|
|
|
predicates.ErrVolumeZoneConflict,
|
|
|
|
|
predicates.ErrMaxVolumeCountExceeded,
|
|
|
|
|
predicates.ErrNodeUnderMemoryPressure,
|
|
|
|
|
predicates.ErrNodeUnderDiskPressure:
|
|
|
|
|
// wantToRun and shouldContinueRunning are likely true here. They are
|
|
|
|
|
// absolutely true at the time of writing the comment. See first comment
|
|
|
|
|
// of this method.
|
|
|
|
|
shouldSchedule = false
|
|
|
|
|
emitEvent = true
|
|
|
|
|
// unexpected
|
|
|
|
|
case
|
|
|
|
|
predicates.ErrPodAffinityNotMatch,
|
|
|
|
@@ -1405,19 +1375,10 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *apps.
|
|
|
|
|
default:
|
|
|
|
|
klog.V(4).Infof("unknown predicate failure reason: %s", reason.GetReason())
|
|
|
|
|
wantToRun, shouldSchedule, shouldContinueRunning = false, false, false
|
|
|
|
|
emitEvent = true
|
|
|
|
|
}
|
|
|
|
|
if emitEvent {
|
|
|
|
|
dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, FailedPlacementReason, "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.GetReason())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// only emit this event if insufficient resource is the only thing
|
|
|
|
|
// preventing the daemon pod from scheduling
|
|
|
|
|
if shouldSchedule && insufficientResourceErr != nil {
|
|
|
|
|
dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, FailedPlacementReason, "failed to place pod on %q: %s", node.ObjectMeta.Name, insufficientResourceErr.Error())
|
|
|
|
|
shouldSchedule = false
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -1471,35 +1432,7 @@ func checkNodeFitness(pod *v1.Pod, meta predicates.PredicateMetadata, nodeInfo *
|
|
|
|
|
func Predicates(pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []predicates.PredicateFailureReason, error) {
|
|
|
|
|
var predicateFails []predicates.PredicateFailureReason
|
|
|
|
|
|
|
|
|
|
// If ScheduleDaemonSetPods is enabled, only check nodeSelector, nodeAffinity and toleration/taint match.
|
|
|
|
|
if utilfeature.DefaultFeatureGate.Enabled(features.ScheduleDaemonSetPods) {
|
|
|
|
|
fit, reasons, err := checkNodeFitness(pod, nil, nodeInfo)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return false, predicateFails, err
|
|
|
|
|
}
|
|
|
|
|
if !fit {
|
|
|
|
|
predicateFails = append(predicateFails, reasons...)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return len(predicateFails) == 0, predicateFails, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
critical := kubelettypes.IsCriticalPod(pod)
|
|
|
|
|
|
|
|
|
|
fit, reasons, err := predicates.PodToleratesNodeTaints(pod, nil, nodeInfo)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return false, predicateFails, err
|
|
|
|
|
}
|
|
|
|
|
if !fit {
|
|
|
|
|
predicateFails = append(predicateFails, reasons...)
|
|
|
|
|
}
|
|
|
|
|
if critical {
|
|
|
|
|
// If the pod is marked as critical and support for critical pod annotations is enabled,
|
|
|
|
|
// check predicates for critical pods only.
|
|
|
|
|
fit, reasons, err = predicates.EssentialPredicates(pod, nil, nodeInfo)
|
|
|
|
|
} else {
|
|
|
|
|
fit, reasons, err = predicates.GeneralPredicates(pod, nil, nodeInfo)
|
|
|
|
|
}
|
|
|
|
|
fit, reasons, err := checkNodeFitness(pod, nil, nodeInfo)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return false, predicateFails, err
|
|
|
|
|
}
|
|
|
|
|