Merge pull request #125277 from iholder101/swap/skip_critical_pods
[KEP-2400]: Restrict access to swap for containers in high priority Pods
This commit is contained in:
		@@ -180,6 +180,11 @@ func (m *kubeGenericRuntimeManager) configureContainerSwapResources(lcr *runtime
 | 
				
			|||||||
		return
 | 
							return
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if kubelettypes.IsCriticalPod(pod) {
 | 
				
			||||||
 | 
							swapConfigurationHelper.ConfigureNoSwap(lcr)
 | 
				
			||||||
 | 
							return
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// NOTE(ehashman): Behavior is defined in the opencontainers runtime spec:
 | 
						// NOTE(ehashman): Behavior is defined in the opencontainers runtime spec:
 | 
				
			||||||
	// https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory
 | 
						// https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory
 | 
				
			||||||
	switch m.memorySwapBehavior {
 | 
						switch m.memorySwapBehavior {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -40,8 +40,10 @@ import (
 | 
				
			|||||||
	utilfeature "k8s.io/apiserver/pkg/util/feature"
 | 
						utilfeature "k8s.io/apiserver/pkg/util/feature"
 | 
				
			||||||
	featuregatetesting "k8s.io/component-base/featuregate/testing"
 | 
						featuregatetesting "k8s.io/component-base/featuregate/testing"
 | 
				
			||||||
	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
 | 
						runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/apis/scheduling"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/features"
 | 
						"k8s.io/kubernetes/pkg/features"
 | 
				
			||||||
	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 | 
						kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 | 
				
			||||||
 | 
						"k8s.io/utils/ptr"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func makeExpectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIndex int, enforceMemoryQoS bool) *runtimeapi.ContainerConfig {
 | 
					func makeExpectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIndex int, enforceMemoryQoS bool) *runtimeapi.ContainerConfig {
 | 
				
			||||||
@@ -1031,6 +1033,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
 | 
				
			|||||||
		swapBehavior                string
 | 
							swapBehavior                string
 | 
				
			||||||
		addContainerWithoutRequests bool
 | 
							addContainerWithoutRequests bool
 | 
				
			||||||
		addGuaranteedContainer      bool
 | 
							addGuaranteedContainer      bool
 | 
				
			||||||
 | 
							isCriticalPod               bool
 | 
				
			||||||
	}{
 | 
						}{
 | 
				
			||||||
		// With cgroup v1
 | 
							// With cgroup v1
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
@@ -1208,6 +1211,16 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
 | 
				
			|||||||
			addContainerWithoutRequests: true,
 | 
								addContainerWithoutRequests: true,
 | 
				
			||||||
			addGuaranteedContainer:      false,
 | 
								addGuaranteedContainer:      false,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// When the pod is considered critical, disallow swap access
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								name:                       "Best-effort QoS, cgroups v2, LimitedSwap, critical pod",
 | 
				
			||||||
 | 
								cgroupVersion:              cgroupV2,
 | 
				
			||||||
 | 
								qosClass:                   v1.PodQOSBurstable,
 | 
				
			||||||
 | 
								nodeSwapFeatureGateEnabled: true,
 | 
				
			||||||
 | 
								swapBehavior:               types.LimitedSwap,
 | 
				
			||||||
 | 
								isCriticalPod:              true,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
	} {
 | 
						} {
 | 
				
			||||||
		t.Run(tc.name, func(t *testing.T) {
 | 
							t.Run(tc.name, func(t *testing.T) {
 | 
				
			||||||
			setCgroupVersionDuringTest(tc.cgroupVersion)
 | 
								setCgroupVersionDuringTest(tc.cgroupVersion)
 | 
				
			||||||
@@ -1244,6 +1257,11 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
 | 
				
			|||||||
			pod.Spec.Containers[0].Resources = resourceReqsC1
 | 
								pod.Spec.Containers[0].Resources = resourceReqsC1
 | 
				
			||||||
			pod.Spec.Containers[1].Resources = resourceReqsC2
 | 
								pod.Spec.Containers[1].Resources = resourceReqsC2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if tc.isCriticalPod {
 | 
				
			||||||
 | 
									pod.Spec.Priority = ptr.To(scheduling.SystemCriticalPriority)
 | 
				
			||||||
 | 
									assert.True(t, types.IsCriticalPod(pod), "pod is expected to be critical")
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			resourcesC1 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false)
 | 
								resourcesC1 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false)
 | 
				
			||||||
			resourcesC2 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[1], false)
 | 
								resourcesC2 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[1], false)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -1252,7 +1270,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
 | 
				
			|||||||
				return
 | 
									return
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if !tc.nodeSwapFeatureGateEnabled || tc.cgroupVersion == cgroupV1 || (tc.swapBehavior == types.LimitedSwap && tc.qosClass != v1.PodQOSBurstable) {
 | 
								if tc.isCriticalPod || !tc.nodeSwapFeatureGateEnabled || tc.cgroupVersion == cgroupV1 || (tc.swapBehavior == types.LimitedSwap && tc.qosClass != v1.PodQOSBurstable) {
 | 
				
			||||||
				expectNoSwap(tc.cgroupVersion, resourcesC1, resourcesC2)
 | 
									expectNoSwap(tc.cgroupVersion, resourcesC1, resourcesC2)
 | 
				
			||||||
				return
 | 
									return
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -84,6 +84,19 @@ var _ = SIGDescribe("Swap", "[LinuxOnly]", nodefeature.Swap, framework.WithSeria
 | 
				
			|||||||
			ginkgo.Entry("QOS Burstable with memory request equals to limit", v1.PodQOSBurstable, true),
 | 
								ginkgo.Entry("QOS Burstable with memory request equals to limit", v1.PodQOSBurstable, true),
 | 
				
			||||||
			ginkgo.Entry("QOS Guaranteed", v1.PodQOSGuaranteed, false),
 | 
								ginkgo.Entry("QOS Guaranteed", v1.PodQOSGuaranteed, false),
 | 
				
			||||||
		)
 | 
							)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ginkgo.It("with a critical pod - should avoid swap", func() {
 | 
				
			||||||
 | 
								ginkgo.By("Creating a critical pod")
 | 
				
			||||||
 | 
								const memoryRequestEqualLimit = false
 | 
				
			||||||
 | 
								pod := getSwapTestPod(f, v1.PodQOSBurstable, memoryRequestEqualLimit)
 | 
				
			||||||
 | 
								pod.Spec.PriorityClassName = "system-node-critical"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								pod = runPodAndWaitUntilScheduled(f, pod)
 | 
				
			||||||
 | 
								gomega.Expect(types.IsCriticalPod(pod)).To(gomega.BeTrueBecause("pod should be critical"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								ginkgo.By("expecting pod to not have swap access")
 | 
				
			||||||
 | 
								expectNoSwap(f, pod)
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
	})
 | 
						})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	f.Context(framework.WithSerial(), func() {
 | 
						f.Context(framework.WithSerial(), func() {
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user