Merge pull request #118764 from iholder101/Swap/burstableQoS-impl

Add full cgroup v2 swap support with automatically calculated swap limit for LimitedSwap and Burstable QoS Pods
This commit is contained in:
Kubernetes Prow Robot
2023-07-17 19:49:07 -07:00
committed by GitHub
10 changed files with 831 additions and 169 deletions

View File

@@ -596,8 +596,9 @@ const (
// Allow pods to failover to a different node in case of non graceful node shutdown
NodeOutOfServiceVolumeDetach featuregate.Feature = "NodeOutOfServiceVolumeDetach"
// owner: @ehashman
// owner: @iholder101
// alpha: v1.22
// beta1: v1.28. For more info, please look at the KEP: https://kep.k8s.io/2400.
//
// Permits kubelet to run with swap enabled
NodeSwap featuregate.Feature = "NodeSwap"
@@ -1074,7 +1075,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
NodeOutOfServiceVolumeDetach: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.31
NodeSwap: {Default: false, PreRelease: featuregate.Alpha},
NodeSwap: {Default: false, PreRelease: featuregate.Beta},
PDBUnhealthyPodEvictionPolicy: {Default: true, PreRelease: featuregate.Beta},

View File

@@ -45,11 +45,12 @@ import (
const (
// systemdSuffix is the cgroup name suffix for systemd
systemdSuffix string = ".slice"
// MemoryMin is memory.min for cgroup v2
MemoryMin string = "memory.min"
// MemoryHigh is memory.high for cgroup v2
MemoryHigh string = "memory.high"
Cgroup2MaxCpuLimit string = "max"
// Cgroup2MemoryMin is memory.min for cgroup v2
Cgroup2MemoryMin string = "memory.min"
// Cgroup2MemoryHigh is memory.high for cgroup v2
Cgroup2MemoryHigh string = "memory.high"
Cgroup2MaxCpuLimit string = "max"
Cgroup2MaxSwapFilename string = "memory.swap.max"
)
var RootCgroupName = CgroupName([]string{})

View File

@@ -196,7 +196,7 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64,
}
if memoryMin > 0 {
result.Unified = map[string]string{
MemoryMin: strconv.FormatInt(memoryMin, 10),
Cgroup2MemoryMin: strconv.FormatInt(memoryMin, 10),
}
}
}

View File

@@ -147,7 +147,7 @@ func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.
if rp.Unified == nil {
rp.Unified = make(map[string]string)
}
rp.Unified[MemoryMin] = strconv.FormatInt(*rp.Memory, 10)
rp.Unified[Cgroup2MemoryMin] = strconv.FormatInt(*rp.Memory, 10)
}
}

View File

@@ -292,7 +292,7 @@ func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*Cgrou
if configs[v1.PodQOSBurstable].ResourceParameters.Unified == nil {
configs[v1.PodQOSBurstable].ResourceParameters.Unified = make(map[string]string)
}
configs[v1.PodQOSBurstable].ResourceParameters.Unified[MemoryMin] = strconv.FormatInt(burstableMin, 10)
configs[v1.PodQOSBurstable].ResourceParameters.Unified[Cgroup2MemoryMin] = strconv.FormatInt(burstableMin, 10)
klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSBurstable, "memoryMin", burstableMin)
}
@@ -300,7 +300,7 @@ func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*Cgrou
if configs[v1.PodQOSGuaranteed].ResourceParameters.Unified == nil {
configs[v1.PodQOSGuaranteed].ResourceParameters.Unified = make(map[string]string)
}
configs[v1.PodQOSGuaranteed].ResourceParameters.Unified[MemoryMin] = strconv.FormatInt(guaranteedMin, 10)
configs[v1.PodQOSGuaranteed].ResourceParameters.Unified[Cgroup2MemoryMin] = strconv.FormatInt(guaranteedMin, 10)
klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSGuaranteed, "memoryMin", guaranteedMin)
}
}

View File

@@ -20,6 +20,9 @@ limitations under the License.
package kuberuntime
import (
"fmt"
cadvisorv1 "github.com/google/cadvisor/info/v1"
kubeapiqos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"math"
"os"
"strconv"
@@ -46,7 +49,7 @@ func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config
enforceMemoryQoS := false
// Set memory.min and memory.high if MemoryQoS enabled with cgroups v2
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
libcontainercgroups.IsCgroup2UnifiedMode() {
isCgroup2UnifiedMode() {
enforceMemoryQoS = true
}
cl, err := m.generateLinuxContainerConfig(container, pod, uid, username, nsTarget, enforceMemoryQoS)
@@ -99,21 +102,17 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod,
lcr.HugepageLimits = GetHugepageLimitsFromResources(container.Resources)
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
if swapConfigurationHelper := newSwapConfigurationHelper(*m.machineInfo); utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
// NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec:
// https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory
switch m.memorySwapBehavior {
case kubelettypes.UnlimitedSwap:
// -1 = unlimited swap
lcr.MemorySwapLimitInBytes = -1
case kubelettypes.LimitedSwap:
fallthrough
swapConfigurationHelper.ConfigureLimitedSwap(lcr, pod, container)
default:
// memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit
// Some swapping is still possible.
// Note that if memory limit is 0, memory swap limit is ignored.
lcr.MemorySwapLimitInBytes = lcr.MemoryLimitInBytes
swapConfigurationHelper.ConfigureUnlimitedSwap(lcr)
}
} else {
swapConfigurationHelper.ConfigureNoSwap(lcr)
}
// Set memory.min and memory.high to enforce MemoryQoS
@@ -122,7 +121,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod,
memoryRequest := container.Resources.Requests.Memory().Value()
memoryLimit := container.Resources.Limits.Memory().Value()
if memoryRequest != 0 {
unified[cm.MemoryMin] = strconv.FormatInt(memoryRequest, 10)
unified[cm.Cgroup2MemoryMin] = strconv.FormatInt(memoryRequest, 10)
}
// Guaranteed pods by their QoS definition requires that memory request equals memory limit and cpu request must equal cpu limit.
@@ -148,7 +147,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod,
}
}
if memoryHigh != 0 && memoryHigh > memoryRequest {
unified[cm.MemoryHigh] = strconv.FormatInt(memoryHigh, 10)
unified[cm.Cgroup2MemoryHigh] = strconv.FormatInt(memoryHigh, 10)
}
}
if len(unified) > 0 {
@@ -171,7 +170,7 @@ func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, cont
enforceMemoryQoS := false
// Set memory.min and memory.high if MemoryQoS enabled with cgroups v2
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
libcontainercgroups.IsCgroup2UnifiedMode() {
isCgroup2UnifiedMode() {
enforceMemoryQoS = true
}
return &runtimeapi.ContainerResources{
@@ -216,7 +215,7 @@ func (m *kubeGenericRuntimeManager) calculateLinuxResources(cpuRequest, cpuLimit
}
// runc requires cgroupv2 for unified mode
if libcontainercgroups.IsCgroup2UnifiedMode() {
if isCgroup2UnifiedMode() {
resources.Unified = map[string]string{
// Ask the kernel to kill all processes in the container cgroup in case of OOM.
// See memory.oom.group in https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html for
@@ -298,3 +297,94 @@ func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *k
}
return cStatusResources
}
// Note: this function variable is being added here so it would be possible to mock
// the cgroup version for unit tests by assigning a new mocked function into it. Without it,
// the cgroup version would solely depend on the environment running the test.
var isCgroup2UnifiedMode = func() bool {
return libcontainercgroups.IsCgroup2UnifiedMode()
}
type swapConfigurationHelper struct {
machineInfo cadvisorv1.MachineInfo
}
func newSwapConfigurationHelper(machineInfo cadvisorv1.MachineInfo) *swapConfigurationHelper {
return &swapConfigurationHelper{machineInfo: machineInfo}
}
func (m swapConfigurationHelper) ConfigureLimitedSwap(lcr *runtimeapi.LinuxContainerResources, pod *v1.Pod, container *v1.Container) {
podQos := kubeapiqos.GetPodQOS(pod)
containerDoesNotRequestMemory := container.Resources.Requests.Memory().IsZero() && container.Resources.Limits.Memory().IsZero()
memoryRequestEqualsToLimit := container.Resources.Requests.Memory().Cmp(*container.Resources.Limits.Memory()) == 0
if podQos != v1.PodQOSBurstable || containerDoesNotRequestMemory || !isCgroup2UnifiedMode() || memoryRequestEqualsToLimit {
m.ConfigureNoSwap(lcr)
return
}
containerMemoryRequest := container.Resources.Requests.Memory()
swapLimit, err := calcSwapForBurstablePods(containerMemoryRequest.Value(), int64(m.machineInfo.MemoryCapacity), int64(m.machineInfo.SwapCapacity))
if err != nil {
klog.ErrorS(err, "cannot calculate swap allocation amount; disallowing swap")
m.ConfigureNoSwap(lcr)
return
}
m.configureSwap(lcr, swapLimit)
}
func (m swapConfigurationHelper) ConfigureNoSwap(lcr *runtimeapi.LinuxContainerResources) {
if !isCgroup2UnifiedMode() {
// memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit
// Some swapping is still possible.
// Note that if memory limit is 0, memory swap limit is ignored.
lcr.MemorySwapLimitInBytes = lcr.MemoryLimitInBytes
return
}
m.configureSwap(lcr, 0)
}
func (m swapConfigurationHelper) ConfigureUnlimitedSwap(lcr *runtimeapi.LinuxContainerResources) {
if !isCgroup2UnifiedMode() {
m.ConfigureNoSwap(lcr)
return
}
if lcr.Unified == nil {
lcr.Unified = map[string]string{}
}
lcr.Unified[cm.Cgroup2MaxSwapFilename] = "max"
}
func (m swapConfigurationHelper) configureSwap(lcr *runtimeapi.LinuxContainerResources, swapMemory int64) {
if !isCgroup2UnifiedMode() {
klog.ErrorS(fmt.Errorf("swap configuration is not supported with cgroup v1"), "swap configuration under cgroup v1 is unexpected")
return
}
if lcr.Unified == nil {
lcr.Unified = map[string]string{}
}
lcr.Unified[cm.Cgroup2MaxSwapFilename] = fmt.Sprintf("%d", swapMemory)
}
// The swap limit is calculated as (<containerMemoryRequest>/<nodeTotalMemory>)*<totalPodsSwapAvailable>.
// For more info, please look at the following KEP: https://kep.k8s.io/2400
func calcSwapForBurstablePods(containerMemoryRequest, nodeTotalMemory, totalPodsSwapAvailable int64) (int64, error) {
if nodeTotalMemory <= 0 {
return 0, fmt.Errorf("total node memory is 0")
}
if containerMemoryRequest > nodeTotalMemory {
return 0, fmt.Errorf("container request %d is larger than total node memory %d", containerMemoryRequest, nodeTotalMemory)
}
containerMemoryProportion := float64(containerMemoryRequest) / float64(nodeTotalMemory)
swapAllocation := containerMemoryProportion * float64(totalPodsSwapAvailable)
return int64(swapAllocation), nil
}

View File

@@ -21,6 +21,9 @@ package kuberuntime
import (
"context"
"fmt"
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/pkg/kubelet/types"
"math"
"os"
"reflect"
@@ -38,7 +41,6 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
)
func makeExpectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIndex int, enforceMemoryQoS bool) *runtimeapi.ContainerConfig {
@@ -244,11 +246,12 @@ func TestCalculateLinuxResources(t *testing.T) {
}
tests := []struct {
name string
cpuReq *resource.Quantity
cpuLim *resource.Quantity
memLim *resource.Quantity
expected *runtimeapi.LinuxContainerResources
name string
cpuReq *resource.Quantity
cpuLim *resource.Quantity
memLim *resource.Quantity
expected *runtimeapi.LinuxContainerResources
cgroupVersion CgroupVersion
}{
{
name: "Request128MBLimit256MB",
@@ -261,6 +264,7 @@ func TestCalculateLinuxResources(t *testing.T) {
CpuShares: 1024,
MemoryLimitInBytes: 134217728,
},
cgroupVersion: cgroupV1,
},
{
name: "RequestNoMemory",
@@ -273,6 +277,7 @@ func TestCalculateLinuxResources(t *testing.T) {
CpuShares: 2048,
MemoryLimitInBytes: 0,
},
cgroupVersion: cgroupV1,
},
{
name: "RequestNilCPU",
@@ -284,6 +289,7 @@ func TestCalculateLinuxResources(t *testing.T) {
CpuShares: 2048,
MemoryLimitInBytes: 0,
},
cgroupVersion: cgroupV1,
},
{
name: "RequestZeroCPU",
@@ -296,9 +302,66 @@ func TestCalculateLinuxResources(t *testing.T) {
CpuShares: 2,
MemoryLimitInBytes: 0,
},
cgroupVersion: cgroupV1,
},
{
name: "Request128MBLimit256MB",
cpuReq: generateResourceQuantity("1"),
cpuLim: generateResourceQuantity("2"),
memLim: generateResourceQuantity("128Mi"),
expected: &runtimeapi.LinuxContainerResources{
CpuPeriod: 100000,
CpuQuota: 200000,
CpuShares: 1024,
MemoryLimitInBytes: 134217728,
Unified: map[string]string{"memory.oom.group": "1"},
},
cgroupVersion: cgroupV2,
},
{
name: "RequestNoMemory",
cpuReq: generateResourceQuantity("2"),
cpuLim: generateResourceQuantity("8"),
memLim: generateResourceQuantity("0"),
expected: &runtimeapi.LinuxContainerResources{
CpuPeriod: 100000,
CpuQuota: 800000,
CpuShares: 2048,
MemoryLimitInBytes: 0,
Unified: map[string]string{"memory.oom.group": "1"},
},
cgroupVersion: cgroupV2,
},
{
name: "RequestNilCPU",
cpuLim: generateResourceQuantity("2"),
memLim: generateResourceQuantity("0"),
expected: &runtimeapi.LinuxContainerResources{
CpuPeriod: 100000,
CpuQuota: 200000,
CpuShares: 2048,
MemoryLimitInBytes: 0,
Unified: map[string]string{"memory.oom.group": "1"},
},
cgroupVersion: cgroupV2,
},
{
name: "RequestZeroCPU",
cpuReq: generateResourceQuantity("0"),
cpuLim: generateResourceQuantity("2"),
memLim: generateResourceQuantity("0"),
expected: &runtimeapi.LinuxContainerResources{
CpuPeriod: 100000,
CpuQuota: 200000,
CpuShares: 2,
MemoryLimitInBytes: 0,
Unified: map[string]string{"memory.oom.group": "1"},
},
cgroupVersion: cgroupV2,
},
}
for _, test := range tests {
setCgroupVersionDuringTest(test.cgroupVersion)
linuxContainerResources := m.calculateLinuxResources(test.cpuReq, test.cpuLim, test.memLim)
assert.Equal(t, test.expected, linuxContainerResources)
}
@@ -634,96 +697,6 @@ func TestGenerateLinuxContainerConfigNamespaces(t *testing.T) {
}
}
func TestGenerateLinuxContainerConfigSwap(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, true)()
_, _, m, err := createTestRuntimeManager()
if err != nil {
t.Fatalf("error creating test RuntimeManager: %v", err)
}
m.machineInfo.MemoryCapacity = 1000000
containerName := "test"
for _, tc := range []struct {
name string
swapSetting string
pod *v1.Pod
expected int64
}{
{
name: "config unset, memory limit set",
// no swap setting
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{{
Name: containerName,
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
Requests: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
},
}},
},
},
expected: 1000,
},
{
name: "config unset, no memory limit",
// no swap setting
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{Name: containerName},
},
},
},
expected: 0,
},
{
// Note: behaviour will be the same as previous two cases
name: "config set to LimitedSwap, memory limit set",
swapSetting: kubelettypes.LimitedSwap,
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{{
Name: containerName,
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
Requests: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
},
}},
},
},
expected: 1000,
},
{
name: "UnlimitedSwap enabled",
swapSetting: kubelettypes.UnlimitedSwap,
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{Name: containerName},
},
},
},
expected: -1,
},
} {
t.Run(tc.name, func(t *testing.T) {
m.memorySwapBehavior = tc.swapSetting
actual, err := m.generateLinuxContainerConfig(&tc.pod.Spec.Containers[0], tc.pod, nil, "", nil, false)
assert.NoError(t, err)
assert.Equal(t, tc.expected, actual.Resources.MemorySwapLimitInBytes, "memory swap config for %s", tc.name)
})
}
}
func TestGenerateLinuxContainerResources(t *testing.T) {
_, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
@@ -875,6 +848,10 @@ func TestGenerateLinuxContainerResources(t *testing.T) {
if tc.scalingFg {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
}
setCgroupVersionDuringTest(cgroupV1)
tc.expected.MemorySwapLimitInBytes = tc.expected.MemoryLimitInBytes
pod.Spec.Containers[0].Resources = v1.ResourceRequirements{Limits: tc.limits, Requests: tc.requests}
if len(tc.cStatus) > 0 {
pod.Status.ContainerStatuses = tc.cStatus
@@ -888,3 +865,289 @@ func TestGenerateLinuxContainerResources(t *testing.T) {
}
//TODO(vinaykul,InPlacePodVerticalScaling): Add unit tests for cgroup v1 & v2
}
func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
_, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
m.machineInfo.MemoryCapacity = 42949672960 // 40Gb == 40 * 1024^3
m.machineInfo.SwapCapacity = 5368709120 // 5Gb == 5 * 1024^3
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "bar",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "c1",
},
{
Name: "c2",
},
},
},
Status: v1.PodStatus{},
}
expectNoSwap := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) {
const msg = "container is expected to not have swap access"
for _, r := range resources {
switch cgroupVersion {
case cgroupV1:
assert.Equal(t, r.MemoryLimitInBytes, r.MemorySwapLimitInBytes, msg)
case cgroupV2:
assert.Equal(t, "0", r.Unified[cm.Cgroup2MaxSwapFilename], msg)
}
}
}
expectUnlimitedSwap := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) {
const msg = "container is expected to have unlimited swap access"
for _, r := range resources {
switch cgroupVersion {
case cgroupV1:
assert.Equal(t, int64(-1), r.MemorySwapLimitInBytes, msg)
case cgroupV2:
assert.Equal(t, "max", r.Unified[cm.Cgroup2MaxSwapFilename], msg)
}
}
}
expectSwap := func(cgroupVersion CgroupVersion, swapBytesExpected int64, resources *runtimeapi.LinuxContainerResources) {
msg := fmt.Sprintf("container swap is expected to be limited by %d bytes", swapBytesExpected)
switch cgroupVersion {
case cgroupV1:
assert.Equal(t, resources.MemoryLimitInBytes+swapBytesExpected, resources.MemorySwapLimitInBytes, msg)
case cgroupV2:
assert.Equal(t, fmt.Sprintf("%d", swapBytesExpected), resources.Unified[cm.Cgroup2MaxSwapFilename], msg)
}
}
calcSwapForBurstablePods := func(containerMemoryRequest int64) int64 {
swapSize, err := calcSwapForBurstablePods(containerMemoryRequest, int64(m.machineInfo.MemoryCapacity), int64(m.machineInfo.SwapCapacity))
assert.NoError(t, err)
return swapSize
}
for _, tc := range []struct {
name string
cgroupVersion CgroupVersion
qosClass v1.PodQOSClass
nodeSwapFeatureGateEnabled bool
swapBehavior string
addContainerWithoutRequests bool
addGuaranteedContainer bool
}{
// With cgroup v1
{
name: "cgroups v1, LimitedSwap, Burstable QoS",
cgroupVersion: cgroupV1,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
},
{
name: "cgroups v1, UnlimitedSwap, Burstable QoS",
cgroupVersion: cgroupV1,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
},
{
name: "cgroups v1, LimitedSwap, Best-effort QoS",
cgroupVersion: cgroupV1,
qosClass: v1.PodQOSBestEffort,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
},
// With feature gate turned off
{
name: "NodeSwap feature gate turned off, cgroups v2, LimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: false,
swapBehavior: types.LimitedSwap,
},
{
name: "NodeSwap feature gate turned off, cgroups v2, UnlimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: false,
swapBehavior: types.UnlimitedSwap,
},
// With no swapBehavior, UnlimitedSwap should be the default
{
name: "With no swapBehavior - UnlimitedSwap should be the default",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBestEffort,
nodeSwapFeatureGateEnabled: true,
swapBehavior: "",
},
// With Guaranteed and Best-effort QoS
{
name: "Best-effort Qos, cgroups v2, LimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
},
{
name: "Best-effort Qos, cgroups v2, UnlimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
},
{
name: "Guaranteed Qos, cgroups v2, LimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSGuaranteed,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
},
{
name: "Guaranteed Qos, cgroups v2, UnlimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSGuaranteed,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
},
// With a "guaranteed" container (when memory requests equal to limits)
{
name: "Burstable Qos, cgroups v2, LimitedSwap, with a guaranteed container",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
addContainerWithoutRequests: false,
addGuaranteedContainer: true,
},
{
name: "Burstable Qos, cgroups v2, UnlimitedSwap, with a guaranteed container",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
addContainerWithoutRequests: false,
addGuaranteedContainer: true,
},
// Swap is expected to be allocated
{
name: "Burstable Qos, cgroups v2, LimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
addContainerWithoutRequests: false,
addGuaranteedContainer: false,
},
{
name: "Burstable Qos, cgroups v2, UnlimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
addContainerWithoutRequests: false,
addGuaranteedContainer: false,
},
{
name: "Burstable Qos, cgroups v2, LimitedSwap, with a container with no requests",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
addContainerWithoutRequests: true,
addGuaranteedContainer: false,
},
{
name: "Burstable Qos, cgroups v2, UnlimitedSwap, with a container with no requests",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
addContainerWithoutRequests: true,
addGuaranteedContainer: false,
},
} {
t.Run(tc.name, func(t *testing.T) {
setCgroupVersionDuringTest(tc.cgroupVersion)
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tc.nodeSwapFeatureGateEnabled)()
m.memorySwapBehavior = tc.swapBehavior
var resourceReqsC1, resourceReqsC2 v1.ResourceRequirements
switch tc.qosClass {
case v1.PodQOSBurstable:
resourceReqsC1 = v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
}
if !tc.addContainerWithoutRequests {
resourceReqsC2 = v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")},
}
if tc.addGuaranteedContainer {
resourceReqsC2.Limits = v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")}
}
}
case v1.PodQOSGuaranteed:
resourceReqsC1 = v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi"), v1.ResourceCPU: resource.MustParse("1")},
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi"), v1.ResourceCPU: resource.MustParse("1")},
}
resourceReqsC2 = v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi"), v1.ResourceCPU: resource.MustParse("1")},
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi"), v1.ResourceCPU: resource.MustParse("1")},
}
}
pod.Spec.Containers[0].Resources = resourceReqsC1
pod.Spec.Containers[1].Resources = resourceReqsC2
resourcesC1 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false)
resourcesC2 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[1], false)
if !tc.nodeSwapFeatureGateEnabled || tc.cgroupVersion == cgroupV1 || (tc.swapBehavior == types.LimitedSwap && tc.qosClass != v1.PodQOSBurstable) {
expectNoSwap(tc.cgroupVersion, resourcesC1, resourcesC2)
return
}
if tc.swapBehavior == types.UnlimitedSwap || tc.swapBehavior == "" {
expectUnlimitedSwap(tc.cgroupVersion, resourcesC1, resourcesC2)
return
}
c1ExpectedSwap := calcSwapForBurstablePods(resourceReqsC1.Requests.Memory().Value())
c2ExpectedSwap := int64(0)
if !tc.addContainerWithoutRequests && !tc.addGuaranteedContainer {
c2ExpectedSwap = calcSwapForBurstablePods(resourceReqsC2.Requests.Memory().Value())
}
expectSwap(tc.cgroupVersion, c1ExpectedSwap, resourcesC1)
expectSwap(tc.cgroupVersion, c2ExpectedSwap, resourcesC2)
})
}
}
type CgroupVersion string
const (
cgroupV1 CgroupVersion = "v1"
cgroupV2 CgroupVersion = "v2"
)
func setCgroupVersionDuringTest(version CgroupVersion) {
isCgroup2UnifiedMode = func() bool {
return version == cgroupV2
}
}

View File

@@ -38,6 +38,59 @@ func TestApplySandboxResources(t *testing.T) {
Linux: &runtimeapi.LinuxPodSandboxConfig{},
}
getPodWithOverhead := func() *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("128Mi"),
v1.ResourceCPU: resource.MustParse("2"),
},
Limits: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("256Mi"),
v1.ResourceCPU: resource.MustParse("4"),
},
},
},
},
Overhead: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("128Mi"),
v1.ResourceCPU: resource.MustParse("1"),
},
},
}
}
getPodWithoutOverhead := func() *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("128Mi"),
},
Limits: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("256Mi"),
},
},
},
},
},
}
}
require.NoError(t, err)
tests := []struct {
@@ -45,36 +98,11 @@ func TestApplySandboxResources(t *testing.T) {
pod *v1.Pod
expectedResource *runtimeapi.LinuxContainerResources
expectedOverhead *runtimeapi.LinuxContainerResources
cgroupVersion CgroupVersion
}{
{
description: "pod with overhead defined",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("128Mi"),
v1.ResourceCPU: resource.MustParse("2"),
},
Limits: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("256Mi"),
v1.ResourceCPU: resource.MustParse("4"),
},
},
},
},
Overhead: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("128Mi"),
v1.ResourceCPU: resource.MustParse("1"),
},
},
},
pod: getPodWithOverhead(),
expectedResource: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 268435456,
CpuPeriod: 100000,
@@ -87,30 +115,11 @@ func TestApplySandboxResources(t *testing.T) {
CpuQuota: 100000,
CpuShares: 1024,
},
cgroupVersion: cgroupV1,
},
{
description: "pod without overhead defined",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("128Mi"),
},
Limits: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("256Mi"),
},
},
},
},
},
},
pod: getPodWithoutOverhead(),
expectedResource: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 268435456,
CpuPeriod: 100000,
@@ -118,10 +127,45 @@ func TestApplySandboxResources(t *testing.T) {
CpuShares: 2,
},
expectedOverhead: &runtimeapi.LinuxContainerResources{},
cgroupVersion: cgroupV1,
},
{
description: "pod with overhead defined",
pod: getPodWithOverhead(),
expectedResource: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 268435456,
CpuPeriod: 100000,
CpuQuota: 400000,
CpuShares: 2048,
Unified: map[string]string{"memory.oom.group": "1"},
},
expectedOverhead: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 134217728,
CpuPeriod: 100000,
CpuQuota: 100000,
CpuShares: 1024,
Unified: map[string]string{"memory.oom.group": "1"},
},
cgroupVersion: cgroupV2,
},
{
description: "pod without overhead defined",
pod: getPodWithoutOverhead(),
expectedResource: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 268435456,
CpuPeriod: 100000,
CpuQuota: 0,
CpuShares: 2,
Unified: map[string]string{"memory.oom.group": "1"},
},
expectedOverhead: &runtimeapi.LinuxContainerResources{},
cgroupVersion: cgroupV2,
},
}
for i, test := range tests {
setCgroupVersionDuringTest(test.cgroupVersion)
m.applySandboxResources(test.pod, config)
assert.Equal(t, test.expectedResource, config.Linux.Resources, "TestCase[%d]: %s", i, test.description)
assert.Equal(t, test.expectedOverhead, config.Linux.Overhead, "TestCase[%d]: %s", i, test.description)