diff --git a/plugin/pkg/admission/priority/BUILD b/plugin/pkg/admission/priority/BUILD index 10470288dd6..6489d6830e9 100644 --- a/plugin/pkg/admission/priority/BUILD +++ b/plugin/pkg/admission/priority/BUILD @@ -38,6 +38,7 @@ go_library( "//pkg/kubeapiserver/admission:go_default_library", "//pkg/kubelet/types:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", "//staging/src/k8s.io/apiserver/pkg/admission:go_default_library", "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", diff --git a/plugin/pkg/admission/priority/admission.go b/plugin/pkg/admission/priority/admission.go index 8f3d62469b1..c60a01223bc 100644 --- a/plugin/pkg/admission/priority/admission.go +++ b/plugin/pkg/admission/priority/admission.go @@ -21,6 +21,7 @@ import ( "io" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apiserver/pkg/admission" utilfeature "k8s.io/apiserver/pkg/util/feature" @@ -134,6 +135,20 @@ func (p *priorityPlugin) Validate(a admission.Attributes) error { } } +// priorityClassPermittedInNamespace returns true if we allow the given priority class name in the +// given namespace. It currently checks that system priorities are created only in the system namespace. +func priorityClassPermittedInNamespace(priorityClassName string, namespace string) bool { + // Only allow system priorities in the system namespace. This is to prevent abuse or incorrect + // usage of these priorities. Pods created at these priorities could preempt system critical + // components. + for _, spc := range scheduling.SystemPriorityClasses() { + if spc.Name == priorityClassName && namespace != metav1.NamespaceSystem { + return false + } + } + return true +} + // admitPod makes sure a new pod does not set spec.Priority field. It also makes sure that the PriorityClassName exists if it is provided and resolves the pod priority from the PriorityClassName. func (p *priorityPlugin) admitPod(a admission.Attributes) error { operation := a.GetOperation() @@ -162,6 +177,11 @@ func (p *priorityPlugin) admitPod(a admission.Attributes) error { return fmt.Errorf("failed to get default priority class: %v", err) } } else { + pcName := pod.Spec.PriorityClassName + if !priorityClassPermittedInNamespace(pcName, a.GetNamespace()) { + return admission.NewForbidden(a, fmt.Errorf("pods with %v priorityClass is not permitted in %v namespace", pcName, a.GetNamespace())) + } + // Try resolving the priority class name. pc, err := p.lister.Get(pod.Spec.PriorityClassName) if err != nil { diff --git a/plugin/pkg/admission/priority/admission_test.go b/plugin/pkg/admission/priority/admission_test.go index 2a9e3a1b33c..21e5bf2e728 100644 --- a/plugin/pkg/admission/priority/admission_test.go +++ b/plugin/pkg/admission/priority/admission_test.go @@ -314,7 +314,7 @@ func TestPodAdmission(t *testing.T) { { ObjectMeta: metav1.ObjectMeta{ Name: "pod-w-system-priority", - Namespace: "namespace", + Namespace: metav1.NamespaceSystem, }, Spec: api.PodSpec{ Containers: []api.Container{ @@ -329,7 +329,7 @@ func TestPodAdmission(t *testing.T) { { ObjectMeta: metav1.ObjectMeta{ Name: "mirror-pod-w-system-priority", - Namespace: "namespace", + Namespace: metav1.NamespaceSystem, Annotations: map[string]string{api.MirrorPodAnnotationKey: ""}, }, Spec: api.PodSpec{ @@ -374,6 +374,21 @@ func TestPodAdmission(t *testing.T) { }, }, }, + // pod[8]: Pod with a system priority class name in non-system namespace + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-w-system-priority-in-nonsystem-namespace", + Namespace: "non-system-namespace", + }, + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: containerName, + }, + }, + PriorityClassName: scheduling.SystemClusterCritical, + }, + }, } // Enable PodPriority feature gate. utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority)) @@ -459,6 +474,13 @@ func TestPodAdmission(t *testing.T) { scheduling.SystemCriticalPriority, false, }, + { + "pod with system critical priority in non-system namespace", + []*scheduling.PriorityClass{systemClusterCritical}, + *pods[8], + scheduling.SystemCriticalPriority, + true, + }, } for _, test := range tests { @@ -485,8 +507,7 @@ func TestPodAdmission(t *testing.T) { if !test.expectError { if err != nil { t.Errorf("Test %q: unexpected error received: %v", test.name, err) - } - if *test.pod.Spec.Priority != test.expectedPriority { + } else if *test.pod.Spec.Priority != test.expectedPriority { t.Errorf("Test %q: expected priority is %d, but got %d.", test.name, test.expectedPriority, *test.pod.Spec.Priority) } } diff --git a/test/e2e/scheduling/predicates.go b/test/e2e/scheduling/predicates.go index 761460f9f27..8d789dfb4e6 100644 --- a/test/e2e/scheduling/predicates.go +++ b/test/e2e/scheduling/predicates.go @@ -47,6 +47,7 @@ var masterNodes sets.String type pausePodConfig struct { Name string + Namespace string Affinity *v1.Affinity Annotations, Labels, NodeSelector map[string]string Resources *v1.ResourceRequirements @@ -602,9 +603,11 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() { }) func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod { + var gracePeriod = int64(1) pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: conf.Name, + Namespace: conf.Namespace, Labels: conf.Labels, Annotations: conf.Annotations, OwnerReferences: conf.OwnerReferences, @@ -619,9 +622,10 @@ func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod { Ports: conf.Ports, }, }, - Tolerations: conf.Tolerations, - NodeName: conf.NodeName, - PriorityClassName: conf.PriorityClassName, + Tolerations: conf.Tolerations, + NodeName: conf.NodeName, + PriorityClassName: conf.PriorityClassName, + TerminationGracePeriodSeconds: &gracePeriod, }, } if conf.Resources != nil { @@ -631,7 +635,11 @@ func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod { } func createPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod { - pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(initPausePod(f, conf)) + namespace := conf.Namespace + if len(namespace) == 0 { + namespace = f.Namespace.Name + } + pod, err := f.ClientSet.CoreV1().Pods(namespace).Create(initPausePod(f, conf)) framework.ExpectNoError(err) return pod } @@ -639,7 +647,7 @@ func createPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod { func runPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod { pod := createPausePod(f, conf) framework.ExpectNoError(framework.WaitForPodRunningInNamespace(f.ClientSet, pod)) - pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(conf.Name, metav1.GetOptions{}) + pod, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Get(conf.Name, metav1.GetOptions{}) framework.ExpectNoError(err) return pod } diff --git a/test/e2e/scheduling/preemption.go b/test/e2e/scheduling/preemption.go index d545493bc59..1ee85ede306 100644 --- a/test/e2e/scheduling/preemption.go +++ b/test/e2e/scheduling/preemption.go @@ -167,6 +167,7 @@ var _ = SIGDescribe("SchedulerPreemption [Serial] [Feature:PodPreemption]", func // Create a critical pod and make sure it is scheduled. runPausePod(f, pausePodConfig{ Name: "critical-pod", + Namespace: metav1.NamespaceSystem, PriorityClassName: scheduling.SystemClusterCritical, Resources: &v1.ResourceRequirements{ Requests: podRes, @@ -183,6 +184,9 @@ var _ = SIGDescribe("SchedulerPreemption [Serial] [Feature:PodPreemption]", func framework.ExpectNoError(err) Expect(livePod.DeletionTimestamp).To(BeNil()) } + // Clean-up the critical pod + err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Delete("critical-pod", metav1.NewDeleteOptions(0)) + framework.ExpectNoError(err) }) // This test verifies that when a high priority pod is pending and its @@ -334,10 +338,14 @@ var _ = SIGDescribe("PodPriorityResolution [Serial] [Feature:PodPreemption]", fu for i, spc := range systemPriorityClasses { pod := createPausePod(f, pausePodConfig{ Name: fmt.Sprintf("pod%d-%v", i, spc), + Namespace: metav1.NamespaceSystem, PriorityClassName: spc, }) Expect(pod.Spec.Priority).NotTo(BeNil()) framework.Logf("Created pod: %v", pod.Name) + // Clean-up the pod. + err := f.ClientSet.CoreV1().Pods(pod.Namespace).Delete(pod.Name, metav1.NewDeleteOptions(0)) + framework.ExpectNoError(err) } }) }) diff --git a/test/kubemark/resources/kube_dns_template.yaml b/test/kubemark/resources/kube_dns_template.yaml index 480eff46627..19a16d7d6cc 100644 --- a/test/kubemark/resources/kube_dns_template.yaml +++ b/test/kubemark/resources/kube_dns_template.yaml @@ -60,7 +60,6 @@ spec: annotations: scheduler.alpha.kubernetes.io/critical-pod: '' spec: - priorityClassName: system-cluster-critical tolerations: - key: "CriticalAddonsOnly" operator: "Exists"