Merge pull request #50949 from bsalamat/preemption_eviction
Automatic merge from submit-queue
Add pod preemption to the scheduler
**What this PR does / why we need it**:
This is the last of a series of PRs to add priority-based preemption to the scheduler. This PR connects the preemption logic to the scheduler workflow.
**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #48646
**Special notes for your reviewer**:
This PR includes other PRs which are under review (#50805, #50405, #50190). All the new code is located in 43627afdf9.
**Release note**:
```release-note
Add priority-based preemption to the scheduler.
```
ref/ #47604
/assign @davidopp
@kubernetes/sig-scheduling-pr-reviews
This commit is contained in:
@@ -15,6 +15,7 @@ go_library(
|
||||
"nvidia-gpus.go",
|
||||
"opaque_resource.go",
|
||||
"predicates.go",
|
||||
"preemption.go",
|
||||
"priorities.go",
|
||||
"rescheduler.go",
|
||||
],
|
||||
@@ -33,6 +34,7 @@ go_library(
|
||||
"//vendor/github.com/stretchr/testify/assert:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
|
||||
"//vendor/k8s.io/api/scheduling/v1alpha1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
|
||||
@@ -52,6 +52,7 @@ type pausePodConfig struct {
|
||||
NodeName string
|
||||
Ports []v1.ContainerPort
|
||||
OwnerReferences []metav1.OwnerReference
|
||||
PriorityClassName string
|
||||
}
|
||||
|
||||
var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
|
||||
@@ -555,8 +556,9 @@ func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
|
||||
Ports: conf.Ports,
|
||||
},
|
||||
},
|
||||
Tolerations: conf.Tolerations,
|
||||
NodeName: conf.NodeName,
|
||||
Tolerations: conf.Tolerations,
|
||||
NodeName: conf.NodeName,
|
||||
PriorityClassName: conf.PriorityClassName,
|
||||
},
|
||||
}
|
||||
if conf.Resources != nil {
|
||||
|
||||
128
test/e2e/scheduling/preemption.go
Normal file
128
test/e2e/scheduling/preemption.go
Normal file
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduling
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/api/scheduling/v1alpha1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
_ "github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
var _ = SIGDescribe("SchedulerPreemption [Serial] [Feature:PodPreemption]", func() {
|
||||
var cs clientset.Interface
|
||||
var nodeList *v1.NodeList
|
||||
var ns string
|
||||
f := framework.NewDefaultFramework("sched-preemption")
|
||||
|
||||
lowPriority, mediumPriority, highPriority := int32(1), int32(100), int32(1000)
|
||||
lowPriorityClassName := f.BaseName + "-low-priority"
|
||||
mediumPriorityClassName := f.BaseName + "-medium-priority"
|
||||
highPriorityClassName := f.BaseName + "-high-priority"
|
||||
|
||||
AfterEach(func() {
|
||||
})
|
||||
|
||||
BeforeEach(func() {
|
||||
cs = f.ClientSet
|
||||
ns = f.Namespace.Name
|
||||
nodeList = &v1.NodeList{}
|
||||
|
||||
_, err := f.ClientSet.SchedulingV1alpha1().PriorityClasses().Create(&v1alpha1.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: highPriorityClassName}, Value: highPriority})
|
||||
Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true))
|
||||
_, err = f.ClientSet.SchedulingV1alpha1().PriorityClasses().Create(&v1alpha1.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: mediumPriorityClassName}, Value: mediumPriority})
|
||||
Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true))
|
||||
_, err = f.ClientSet.SchedulingV1alpha1().PriorityClasses().Create(&v1alpha1.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: lowPriorityClassName}, Value: lowPriority})
|
||||
Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true))
|
||||
|
||||
framework.WaitForAllNodesHealthy(cs, time.Minute)
|
||||
masterNodes, nodeList = framework.GetMasterAndWorkerNodesOrDie(cs)
|
||||
|
||||
err = framework.CheckTestingNSDeletedExcept(cs, ns)
|
||||
framework.ExpectNoError(err)
|
||||
})
|
||||
|
||||
// This test verifies that when a higher priority pod is created and no node with
|
||||
// enough resources is found, scheduler preempts a lower priority pod to schedule
|
||||
// the high priority pod.
|
||||
It("validates basic preemption works", func() {
|
||||
var podRes v1.ResourceList
|
||||
// Create one pod per node that uses a lot of the node's resources.
|
||||
By("Create pods that use 60% of node resources.")
|
||||
pods := make([]*v1.Pod, len(nodeList.Items))
|
||||
for i, node := range nodeList.Items {
|
||||
cpuAllocatable, found := node.Status.Allocatable["cpu"]
|
||||
Expect(found).To(Equal(true))
|
||||
milliCPU := cpuAllocatable.MilliValue() * 40 / 100
|
||||
memAllocatable, found := node.Status.Allocatable["memory"]
|
||||
Expect(found).To(Equal(true))
|
||||
memory := memAllocatable.Value() * 60 / 100
|
||||
podRes = v1.ResourceList{}
|
||||
podRes[v1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI)
|
||||
podRes[v1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI)
|
||||
|
||||
// make the first pod low priority and the rest medium priority.
|
||||
priorityName := mediumPriorityClassName
|
||||
if i == 0 {
|
||||
priorityName = lowPriorityClassName
|
||||
}
|
||||
pods[i] = createPausePod(f, pausePodConfig{
|
||||
Name: fmt.Sprintf("pod%d-%v", i, priorityName),
|
||||
PriorityClassName: priorityName,
|
||||
Resources: &v1.ResourceRequirements{
|
||||
Requests: podRes,
|
||||
},
|
||||
})
|
||||
framework.Logf("Created pod: %v", pods[i].Name)
|
||||
}
|
||||
By("Wait for pods to be scheduled.")
|
||||
for _, pod := range pods {
|
||||
framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
|
||||
}
|
||||
|
||||
By("Run a high priority pod that use 60% of a node resources.")
|
||||
// Create a high priority pod and make sure it is scheduled.
|
||||
runPausePod(f, pausePodConfig{
|
||||
Name: "preemptor-pod",
|
||||
PriorityClassName: highPriorityClassName,
|
||||
Resources: &v1.ResourceRequirements{
|
||||
Requests: podRes,
|
||||
},
|
||||
})
|
||||
// Make sure that the lowest priority pod is deleted.
|
||||
preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(pods[0].Name, metav1.GetOptions{})
|
||||
podDeleted := (err != nil && errors.IsNotFound(err)) ||
|
||||
(err == nil && preemptedPod.DeletionTimestamp != nil)
|
||||
Expect(podDeleted).To(BeTrue())
|
||||
// Other pods (mid priority ones) should be present.
|
||||
for i := 1; i < len(pods); i++ {
|
||||
livePod, err := cs.CoreV1().Pods(pods[i].Namespace).Get(pods[i].Name, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
Expect(livePod.DeletionTimestamp).To(BeNil())
|
||||
}
|
||||
})
|
||||
})
|
||||
@@ -21,12 +21,14 @@ go_test(
|
||||
deps = [
|
||||
"//pkg/api:go_default_library",
|
||||
"//pkg/api/testapi:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//plugin/cmd/kube-scheduler/app:go_default_library",
|
||||
"//plugin/cmd/kube-scheduler/app/options:go_default_library",
|
||||
"//plugin/pkg/scheduler:go_default_library",
|
||||
"//plugin/pkg/scheduler/algorithm:go_default_library",
|
||||
"//plugin/pkg/scheduler/algorithmprovider:go_default_library",
|
||||
"//plugin/pkg/scheduler/api:go_default_library",
|
||||
"//plugin/pkg/scheduler/core:go_default_library",
|
||||
"//plugin/pkg/scheduler/factory:go_default_library",
|
||||
"//plugin/pkg/scheduler/schedulercache:go_default_library",
|
||||
"//test/e2e/framework:go_default_library",
|
||||
@@ -37,6 +39,7 @@ go_test(
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//vendor/k8s.io/client-go/informers:go_default_library",
|
||||
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
|
||||
"//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
|
||||
|
||||
@@ -51,7 +51,7 @@ func TestNodeAffinity(t *testing.T) {
|
||||
}
|
||||
// Create a pod with node affinity.
|
||||
podName := "pod-with-node-affinity"
|
||||
pod, err := runPausePod(context.clientSet, &pausePodConfig{
|
||||
pod, err := runPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{
|
||||
Name: podName,
|
||||
Namespace: context.ns.Name,
|
||||
Affinity: &v1.Affinity{
|
||||
@@ -72,7 +72,7 @@ func TestNodeAffinity(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}))
|
||||
if err != nil {
|
||||
t.Fatalf("Error running pause pod: %v", err)
|
||||
}
|
||||
@@ -110,11 +110,11 @@ func TestPodAffinity(t *testing.T) {
|
||||
// Add a pod with a label and wait for it to schedule.
|
||||
labelKey := "service"
|
||||
labelValue := "S1"
|
||||
_, err = runPausePod(context.clientSet, &pausePodConfig{
|
||||
_, err = runPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{
|
||||
Name: "attractor-pod",
|
||||
Namespace: context.ns.Name,
|
||||
Labels: map[string]string{labelKey: labelValue},
|
||||
})
|
||||
}))
|
||||
if err != nil {
|
||||
t.Fatalf("Error running the attractor pod: %v", err)
|
||||
}
|
||||
@@ -125,7 +125,7 @@ func TestPodAffinity(t *testing.T) {
|
||||
}
|
||||
// Add a new pod with affinity to the attractor pod.
|
||||
podName := "pod-with-podaffinity"
|
||||
pod, err := runPausePod(context.clientSet, &pausePodConfig{
|
||||
pod, err := runPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{
|
||||
Name: podName,
|
||||
Namespace: context.ns.Name,
|
||||
Affinity: &v1.Affinity{
|
||||
@@ -158,7 +158,7 @@ func TestPodAffinity(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}))
|
||||
if err != nil {
|
||||
t.Fatalf("Error running pause pod: %v", err)
|
||||
}
|
||||
|
||||
@@ -24,9 +24,11 @@ import (
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/informers"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
clientv1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
@@ -36,15 +38,18 @@ import (
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/testapi"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/plugin/cmd/kube-scheduler/app"
|
||||
"k8s.io/kubernetes/plugin/cmd/kube-scheduler/app/options"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||
_ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/core"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
"k8s.io/kubernetes/test/integration/framework"
|
||||
testutils "k8s.io/kubernetes/test/utils"
|
||||
)
|
||||
|
||||
const enableEquivalenceCache = true
|
||||
@@ -56,11 +61,11 @@ type nodeStateManager struct {
|
||||
makeUnSchedulable nodeMutationFunc
|
||||
}
|
||||
|
||||
func PredicateOne(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
|
||||
func PredicateOne(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
|
||||
return true, nil, nil
|
||||
}
|
||||
|
||||
func PredicateTwo(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
|
||||
func PredicateTwo(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
|
||||
return true, nil, nil
|
||||
}
|
||||
|
||||
@@ -457,13 +462,13 @@ func TestMultiScheduler(t *testing.T) {
|
||||
}
|
||||
|
||||
defaultScheduler := "default-scheduler"
|
||||
testPodFitsDefault, err := createPausePod(context.clientSet, &pausePodConfig{Name: "pod-fits-default", Namespace: context.ns.Name, SchedulerName: defaultScheduler})
|
||||
testPodFitsDefault, err := createPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{Name: "pod-fits-default", Namespace: context.ns.Name, SchedulerName: defaultScheduler}))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create pod: %v", err)
|
||||
}
|
||||
|
||||
fooScheduler := "foo-scheduler"
|
||||
testPodFitsFoo, err := createPausePod(context.clientSet, &pausePodConfig{Name: "pod-fits-foo", Namespace: context.ns.Name, SchedulerName: fooScheduler})
|
||||
testPodFitsFoo, err := createPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{Name: "pod-fits-foo", Namespace: context.ns.Name, SchedulerName: fooScheduler}))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create pod: %v", err)
|
||||
}
|
||||
@@ -647,3 +652,251 @@ func TestAllocatable(t *testing.T) {
|
||||
t.Logf("Test allocatable awareness: %s Pod not scheduled as expected", testAllocPod2.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPreemption tests a few preemption scenarios.
|
||||
func TestPreemption(t *testing.T) {
|
||||
// Enable PodPriority feature gate.
|
||||
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority))
|
||||
// Initialize scheduler.
|
||||
context := initTest(t, "preemption")
|
||||
defer cleanupTest(t, context)
|
||||
cs := context.clientSet
|
||||
|
||||
lowPriority, mediumPriority, highPriority := int32(100), int32(200), int32(300)
|
||||
defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(100, resource.BinarySI)},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
description string
|
||||
existingPods []*v1.Pod
|
||||
pod *v1.Pod
|
||||
preemptedPodIndexes map[int]struct{}
|
||||
}{
|
||||
{
|
||||
description: "basic pod preemption",
|
||||
existingPods: []*v1.Pod{
|
||||
initPausePod(context.clientSet, &pausePodConfig{
|
||||
Name: "victim-pod",
|
||||
Namespace: context.ns.Name,
|
||||
Priority: &lowPriority,
|
||||
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
|
||||
},
|
||||
}),
|
||||
},
|
||||
pod: initPausePod(cs, &pausePodConfig{
|
||||
Name: "preemptor-pod",
|
||||
Namespace: context.ns.Name,
|
||||
Priority: &highPriority,
|
||||
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
|
||||
},
|
||||
}),
|
||||
preemptedPodIndexes: map[int]struct{}{0: {}},
|
||||
},
|
||||
{
|
||||
description: "preemption is performed to satisfy anti-affinity",
|
||||
existingPods: []*v1.Pod{
|
||||
initPausePod(cs, &pausePodConfig{
|
||||
Name: "pod-0", Namespace: context.ns.Name,
|
||||
Priority: &mediumPriority,
|
||||
Labels: map[string]string{"pod": "p0"},
|
||||
Resources: defaultPodRes,
|
||||
}),
|
||||
initPausePod(cs, &pausePodConfig{
|
||||
Name: "pod-1", Namespace: context.ns.Name,
|
||||
Priority: &lowPriority,
|
||||
Labels: map[string]string{"pod": "p1"},
|
||||
Resources: defaultPodRes,
|
||||
Affinity: &v1.Affinity{
|
||||
PodAntiAffinity: &v1.PodAntiAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
|
||||
{
|
||||
LabelSelector: &metav1.LabelSelector{
|
||||
MatchExpressions: []metav1.LabelSelectorRequirement{
|
||||
{
|
||||
Key: "pod",
|
||||
Operator: metav1.LabelSelectorOpIn,
|
||||
Values: []string{"preemptor"},
|
||||
},
|
||||
},
|
||||
},
|
||||
TopologyKey: "node",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
},
|
||||
// A higher priority pod with anti-affinity.
|
||||
pod: initPausePod(cs, &pausePodConfig{
|
||||
Name: "preemptor-pod",
|
||||
Namespace: context.ns.Name,
|
||||
Priority: &highPriority,
|
||||
Labels: map[string]string{"pod": "preemptor"},
|
||||
Resources: defaultPodRes,
|
||||
Affinity: &v1.Affinity{
|
||||
PodAntiAffinity: &v1.PodAntiAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
|
||||
{
|
||||
LabelSelector: &metav1.LabelSelector{
|
||||
MatchExpressions: []metav1.LabelSelectorRequirement{
|
||||
{
|
||||
Key: "pod",
|
||||
Operator: metav1.LabelSelectorOpIn,
|
||||
Values: []string{"p0"},
|
||||
},
|
||||
},
|
||||
},
|
||||
TopologyKey: "node",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
preemptedPodIndexes: map[int]struct{}{0: {}, 1: {}},
|
||||
},
|
||||
{
|
||||
// This is similar to the previous case only pod-1 is high priority.
|
||||
description: "preemption is not performed when anti-affinity is not satisfied",
|
||||
existingPods: []*v1.Pod{
|
||||
initPausePod(cs, &pausePodConfig{
|
||||
Name: "pod-0", Namespace: context.ns.Name,
|
||||
Priority: &mediumPriority,
|
||||
Labels: map[string]string{"pod": "p0"},
|
||||
Resources: defaultPodRes,
|
||||
}),
|
||||
initPausePod(cs, &pausePodConfig{
|
||||
Name: "pod-1", Namespace: context.ns.Name,
|
||||
Priority: &highPriority,
|
||||
Labels: map[string]string{"pod": "p1"},
|
||||
Resources: defaultPodRes,
|
||||
Affinity: &v1.Affinity{
|
||||
PodAntiAffinity: &v1.PodAntiAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
|
||||
{
|
||||
LabelSelector: &metav1.LabelSelector{
|
||||
MatchExpressions: []metav1.LabelSelectorRequirement{
|
||||
{
|
||||
Key: "pod",
|
||||
Operator: metav1.LabelSelectorOpIn,
|
||||
Values: []string{"preemptor"},
|
||||
},
|
||||
},
|
||||
},
|
||||
TopologyKey: "node",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
},
|
||||
// A higher priority pod with anti-affinity.
|
||||
pod: initPausePod(cs, &pausePodConfig{
|
||||
Name: "preemptor-pod",
|
||||
Namespace: context.ns.Name,
|
||||
Priority: &highPriority,
|
||||
Labels: map[string]string{"pod": "preemptor"},
|
||||
Resources: defaultPodRes,
|
||||
Affinity: &v1.Affinity{
|
||||
PodAntiAffinity: &v1.PodAntiAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
|
||||
{
|
||||
LabelSelector: &metav1.LabelSelector{
|
||||
MatchExpressions: []metav1.LabelSelectorRequirement{
|
||||
{
|
||||
Key: "pod",
|
||||
Operator: metav1.LabelSelectorOpIn,
|
||||
Values: []string{"p0"},
|
||||
},
|
||||
},
|
||||
},
|
||||
TopologyKey: "node",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
preemptedPodIndexes: map[int]struct{}{},
|
||||
},
|
||||
}
|
||||
|
||||
// Create a node with some resources and a label.
|
||||
nodeRes := &v1.ResourceList{
|
||||
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(500, resource.BinarySI),
|
||||
}
|
||||
node, err := createNode(context.clientSet, "node1", nodeRes)
|
||||
if err != nil {
|
||||
t.Fatalf("Error creating nodes: %v", err)
|
||||
}
|
||||
nodeLabels := map[string]string{"node": node.Name}
|
||||
if err = testutils.AddLabelsToNode(context.clientSet, node.Name, nodeLabels); err != nil {
|
||||
t.Fatalf("Cannot add labels to node: %v", err)
|
||||
}
|
||||
if err = waitForNodeLabels(context.clientSet, node.Name, nodeLabels); err != nil {
|
||||
t.Fatalf("Adding labels to node didn't succeed: %v", err)
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
pods := make([]*v1.Pod, len(test.existingPods))
|
||||
// Create and run existingPods.
|
||||
for i, p := range test.existingPods {
|
||||
pods[i], err = runPausePod(cs, p)
|
||||
if err != nil {
|
||||
t.Fatalf("Test [%v]: Error running pause pod: %v", test.description, err)
|
||||
}
|
||||
}
|
||||
// Create the "pod".
|
||||
preemptor, err := createPausePod(cs, test.pod)
|
||||
if err != nil {
|
||||
t.Errorf("Error while creating high priority pod: %v", err)
|
||||
}
|
||||
// Wait for preemption of pods and make sure the other ones are not preempted.
|
||||
for i, p := range pods {
|
||||
if _, found := test.preemptedPodIndexes[i]; found {
|
||||
if err = wait.Poll(time.Second, wait.ForeverTestTimeout, podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
|
||||
t.Errorf("Test [%v]: Pod %v is not getting evicted.", test.description, p.Name)
|
||||
}
|
||||
} else {
|
||||
if p.DeletionTimestamp != nil {
|
||||
t.Errorf("Test [%v]: Didn't expect pod %v to get preempted.", test.description, p.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Also check that the preemptor pod gets the annotation for nominated node name.
|
||||
if len(test.preemptedPodIndexes) > 0 {
|
||||
if err = wait.Poll(time.Second, wait.ForeverTestTimeout, func() (bool, error) {
|
||||
pod, err := context.clientSet.CoreV1().Pods(context.ns.Name).Get("preemptor-pod", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Errorf("Test [%v]: error getting pod: %v", test.description, err)
|
||||
}
|
||||
annot, found := pod.Annotations[core.NominatedNodeAnnotationKey]
|
||||
if found && len(annot) > 0 {
|
||||
return true, nil
|
||||
}
|
||||
return false, err
|
||||
}); err != nil {
|
||||
t.Errorf("Test [%v]: Pod annotation did not get set.", test.description)
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
pods = append(pods, preemptor)
|
||||
for _, p := range pods {
|
||||
err = cs.CoreV1().Pods(p.Namespace).Delete(p.Name, metav1.NewDeleteOptions(0))
|
||||
if err != nil && !errors.IsNotFound(err) {
|
||||
t.Errorf("Test [%v]: error, %v, while deleting pod during test.", test.description, err)
|
||||
}
|
||||
err = wait.Poll(time.Second, wait.ForeverTestTimeout, podDeleted(cs, p.Namespace, p.Name))
|
||||
if err != nil {
|
||||
t.Errorf("Test [%v]: error, %v, while waiting for pod to get deleted.", test.description, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -205,6 +205,7 @@ type pausePodConfig struct {
|
||||
Tolerations []v1.Toleration
|
||||
NodeName string
|
||||
SchedulerName string
|
||||
Priority *int32
|
||||
}
|
||||
|
||||
// initPausePod initializes a pod API object from the given config. It is used
|
||||
@@ -213,6 +214,7 @@ func initPausePod(cs clientset.Interface, conf *pausePodConfig) *v1.Pod {
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: conf.Name,
|
||||
Namespace: conf.Namespace,
|
||||
Labels: conf.Labels,
|
||||
Annotations: conf.Annotations,
|
||||
},
|
||||
@@ -228,6 +230,7 @@ func initPausePod(cs clientset.Interface, conf *pausePodConfig) *v1.Pod {
|
||||
Tolerations: conf.Tolerations,
|
||||
NodeName: conf.NodeName,
|
||||
SchedulerName: conf.SchedulerName,
|
||||
Priority: conf.Priority,
|
||||
},
|
||||
}
|
||||
if conf.Resources != nil {
|
||||
@@ -238,9 +241,8 @@ func initPausePod(cs clientset.Interface, conf *pausePodConfig) *v1.Pod {
|
||||
|
||||
// createPausePod creates a pod with "Pause" image and the given config and
|
||||
// return its pointer and error status.
|
||||
func createPausePod(cs clientset.Interface, conf *pausePodConfig) (*v1.Pod, error) {
|
||||
p := initPausePod(cs, conf)
|
||||
return cs.CoreV1().Pods(conf.Namespace).Create(p)
|
||||
func createPausePod(cs clientset.Interface, p *v1.Pod) (*v1.Pod, error) {
|
||||
return cs.CoreV1().Pods(p.Namespace).Create(p)
|
||||
}
|
||||
|
||||
// createPausePodWithResource creates a pod with "Pause" image and the given
|
||||
@@ -262,22 +264,21 @@ func createPausePodWithResource(cs clientset.Interface, podName string, nsName s
|
||||
},
|
||||
}
|
||||
}
|
||||
return createPausePod(cs, &conf)
|
||||
return createPausePod(cs, initPausePod(cs, &conf))
|
||||
}
|
||||
|
||||
// runPausePod creates a pod with "Pause" image and the given config and waits
|
||||
// until it is scheduled. It returns its pointer and error status.
|
||||
func runPausePod(cs clientset.Interface, conf *pausePodConfig) (*v1.Pod, error) {
|
||||
p := initPausePod(cs, conf)
|
||||
pod, err := cs.CoreV1().Pods(conf.Namespace).Create(p)
|
||||
func runPausePod(cs clientset.Interface, pod *v1.Pod) (*v1.Pod, error) {
|
||||
pod, err := cs.CoreV1().Pods(pod.Namespace).Create(pod)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error creating pause pod: %v", err)
|
||||
}
|
||||
if err = waitForPodToSchedule(cs, pod); err != nil {
|
||||
return pod, fmt.Errorf("Pod %v didn't schedule successfully. Error: %v", pod.Name, err)
|
||||
}
|
||||
if pod, err = cs.CoreV1().Pods(conf.Namespace).Get(conf.Name, metav1.GetOptions{}); err != nil {
|
||||
return pod, fmt.Errorf("Error getting pod %v info: %v", conf.Name, err)
|
||||
if pod, err = cs.CoreV1().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{}); err != nil {
|
||||
return pod, fmt.Errorf("Error getting pod %v info: %v", pod.Name, err)
|
||||
}
|
||||
return pod, nil
|
||||
}
|
||||
@@ -285,7 +286,10 @@ func runPausePod(cs clientset.Interface, conf *pausePodConfig) (*v1.Pod, error)
|
||||
// podDeleted returns true if a pod is not found in the given namespace.
|
||||
func podDeleted(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
|
||||
return func() (bool, error) {
|
||||
_, err := c.CoreV1().Pods(podNamespace).Get(podName, metav1.GetOptions{})
|
||||
pod, err := c.CoreV1().Pods(podNamespace).Get(podName, metav1.GetOptions{})
|
||||
if pod.DeletionTimestamp != nil {
|
||||
return true, nil
|
||||
}
|
||||
if errors.IsNotFound(err) {
|
||||
return true, nil
|
||||
}
|
||||
@@ -293,6 +297,20 @@ func podDeleted(c clientset.Interface, podNamespace, podName string) wait.Condit
|
||||
}
|
||||
}
|
||||
|
||||
// podIsGettingEvicted returns true if the pod's deletion timestamp is set.
|
||||
func podIsGettingEvicted(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
|
||||
return func() (bool, error) {
|
||||
pod, err := c.CoreV1().Pods(podNamespace).Get(podName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if pod.DeletionTimestamp != nil {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// podScheduled returns true if a node is assigned to the given pod.
|
||||
func podScheduled(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
|
||||
return func() (bool, error) {
|
||||
|
||||
Reference in New Issue
Block a user