Merge pull request #124060 from iholder101/swap/tmpfs-noswap
[KEP-2400] Mount tmpfs memory-backed volumes with a noswap option if supported
This commit is contained in:
		| @@ -20,12 +20,10 @@ limitations under the License. | ||||
| package cm | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"path" | ||||
| 	"strings" | ||||
| 	"sync" | ||||
| 	"time" | ||||
|  | ||||
| @@ -59,11 +57,13 @@ import ( | ||||
| 	cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/config" | ||||
| 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/events" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/lifecycle" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/status" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/userns/inuserns" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/util/swap" | ||||
| 	schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" | ||||
| 	"k8s.io/kubernetes/pkg/util/oom" | ||||
| ) | ||||
| @@ -204,25 +204,22 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I | ||||
| 		return nil, fmt.Errorf("failed to get mounted cgroup subsystems: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	if failSwapOn { | ||||
| 		// Check whether swap is enabled. The Kubelet does not support running with swap enabled. | ||||
| 		swapFile := "/proc/swaps" | ||||
| 		swapData, err := os.ReadFile(swapFile) | ||||
| 		if err != nil { | ||||
| 			if os.IsNotExist(err) { | ||||
| 				klog.InfoS("File does not exist, assuming that swap is disabled", "path", swapFile) | ||||
| 			} else { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 		} else { | ||||
| 			swapData = bytes.TrimSpace(swapData) // extra trailing \n | ||||
| 			swapLines := strings.Split(string(swapData), "\n") | ||||
| 	isSwapOn, err := swap.IsSwapOn() | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("failed to determine if swap is on: %w", err) | ||||
| 	} | ||||
|  | ||||
| 			// If there is more than one line (table headers) in /proc/swaps, swap is enabled and we should | ||||
| 			// error out unless --fail-swap-on is set to false. | ||||
| 			if len(swapLines) > 1 { | ||||
| 				return nil, fmt.Errorf("running with swap on is not supported, please disable swap! or set --fail-swap-on flag to false. /proc/swaps contained: %v", swapLines) | ||||
| 			} | ||||
| 	if isSwapOn { | ||||
| 		if failSwapOn { | ||||
| 			return nil, fmt.Errorf("running with swap on is not supported, please disable swap or set --fail-swap-on flag to false") | ||||
| 		} | ||||
|  | ||||
| 		if !swap.IsTmpfsNoswapOptionSupported(mountUtil, nodeConfig.KubeletRootDir) { | ||||
| 			nodeRef := nodeRefFromNode(string(nodeConfig.NodeName)) | ||||
| 			recorder.Event(nodeRef, v1.EventTypeWarning, events.PossibleMemoryBackedVolumesOnDisk, | ||||
| 				"The tmpfs noswap option is not supported. Memory-backed volumes (e.g. secrets, emptyDirs, etc.) "+ | ||||
| 					"might be swapped to disk and should no longer be considered secure.", | ||||
| 			) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
|   | ||||
| @@ -84,12 +84,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { | ||||
| 	} | ||||
|  | ||||
| 	// Using ObjectReference for events as the node maybe not cached; refer to #42701 for detail. | ||||
| 	nodeRef := &v1.ObjectReference{ | ||||
| 		Kind:      "Node", | ||||
| 		Name:      cm.nodeInfo.Name, | ||||
| 		UID:       types.UID(cm.nodeInfo.Name), | ||||
| 		Namespace: "", | ||||
| 	} | ||||
| 	nodeRef := nodeRefFromNode(cm.nodeInfo.Name) | ||||
|  | ||||
| 	// If Node Allocatable is enforced on a node that has not been drained or is updated on an existing node to a lower value, | ||||
| 	// existing memory usage across pods might be higher than current Node Allocatable Memory Limits. | ||||
| @@ -265,3 +260,13 @@ func (cm *containerManagerImpl) validateNodeAllocatable() error { | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Using ObjectReference for events as the node maybe not cached; refer to #42701 for detail. | ||||
| func nodeRefFromNode(nodeName string) *v1.ObjectReference { | ||||
| 	return &v1.ObjectReference{ | ||||
| 		Kind:      "Node", | ||||
| 		Name:      nodeName, | ||||
| 		UID:       types.UID(nodeName), | ||||
| 		Namespace: "", | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -75,6 +75,7 @@ const ( | ||||
| 	FailedStatusPodSandBox               = "FailedPodSandBoxStatus" | ||||
| 	FailedMountOnFilesystemMismatch      = "FailedMountOnFilesystemMismatch" | ||||
| 	FailedPrepareDynamicResources        = "FailedPrepareDynamicResources" | ||||
| 	PossibleMemoryBackedVolumesOnDisk    = "PossibleMemoryBackedVolumesOnDisk" | ||||
| ) | ||||
|  | ||||
| // Image manager event reason list | ||||
|   | ||||
							
								
								
									
										136
									
								
								pkg/kubelet/util/swap/swap_util.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								pkg/kubelet/util/swap/swap_util.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,136 @@ | ||||
| /* | ||||
| Copyright 2024 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package swap | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"errors" | ||||
| 	"os" | ||||
| 	sysruntime "runtime" | ||||
| 	"strings" | ||||
| 	"sync" | ||||
|  | ||||
| 	"k8s.io/apimachinery/pkg/util/version" | ||||
| 	"k8s.io/klog/v2" | ||||
| 	utilkernel "k8s.io/kubernetes/pkg/util/kernel" | ||||
| 	"k8s.io/mount-utils" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	tmpfsNoswapOptionSupported        bool | ||||
| 	tmpfsNoswapOptionAvailabilityOnce sync.Once | ||||
| 	swapOn                            bool | ||||
| 	swapOnErr                         error | ||||
| 	swapOnOnce                        sync.Once | ||||
| ) | ||||
|  | ||||
| const TmpfsNoswapOption = "noswap" | ||||
|  | ||||
| func IsTmpfsNoswapOptionSupported(mounter mount.Interface, mountPath string) bool { | ||||
| 	isTmpfsNoswapOptionSupportedHelper := func() bool { | ||||
| 		if sysruntime.GOOS == "windows" { | ||||
| 			return false | ||||
| 		} | ||||
|  | ||||
| 		kernelVersion, err := utilkernel.GetVersion() | ||||
| 		if err != nil { | ||||
| 			klog.ErrorS(err, "cannot determine kernel version, unable to determine is tmpfs noswap is supported") | ||||
| 			return false | ||||
| 		} | ||||
|  | ||||
| 		if kernelVersion.AtLeast(version.MustParseGeneric(utilkernel.TmpfsNoswapSupportKernelVersion)) { | ||||
| 			return true | ||||
| 		} | ||||
|  | ||||
| 		if mountPath == "" { | ||||
| 			klog.ErrorS(errors.New("mount path is empty, falling back to /tmp"), "") | ||||
| 		} | ||||
|  | ||||
| 		mountPath, err = os.MkdirTemp(mountPath, "tmpfs-noswap-test-") | ||||
| 		if err != nil { | ||||
| 			klog.InfoS("error creating dir to test if tmpfs noswap is enabled. Assuming not supported", "mount path", mountPath, "error", err) | ||||
| 			return false | ||||
| 		} | ||||
|  | ||||
| 		defer func() { | ||||
| 			err = os.RemoveAll(mountPath) | ||||
| 			if err != nil { | ||||
| 				klog.ErrorS(err, "error removing test tmpfs dir", "mount path", mountPath) | ||||
| 			} | ||||
| 		}() | ||||
|  | ||||
| 		err = mounter.MountSensitiveWithoutSystemd("tmpfs", mountPath, "tmpfs", []string{TmpfsNoswapOption}, nil) | ||||
| 		if err != nil { | ||||
| 			klog.InfoS("error mounting tmpfs with the noswap option. Assuming not supported", "error", err) | ||||
| 			return false | ||||
| 		} | ||||
|  | ||||
| 		err = mounter.Unmount(mountPath) | ||||
| 		if err != nil { | ||||
| 			klog.ErrorS(err, "error unmounting test tmpfs dir", "mount path", mountPath) | ||||
| 		} | ||||
|  | ||||
| 		return true | ||||
| 	} | ||||
|  | ||||
| 	tmpfsNoswapOptionAvailabilityOnce.Do(func() { | ||||
| 		tmpfsNoswapOptionSupported = isTmpfsNoswapOptionSupportedHelper() | ||||
| 	}) | ||||
|  | ||||
| 	return tmpfsNoswapOptionSupported | ||||
| } | ||||
|  | ||||
| // gets /proc/swaps's content as an input, returns true if swap is enabled. | ||||
| func isSwapOnAccordingToProcSwaps(procSwapsContent []byte) bool { | ||||
| 	procSwapsContent = bytes.TrimSpace(procSwapsContent) // extra trailing \n | ||||
| 	procSwapsStr := string(procSwapsContent) | ||||
| 	procSwapsLines := strings.Split(procSwapsStr, "\n") | ||||
|  | ||||
| 	// If there is more than one line (table headers) in /proc/swaps then swap is enabled | ||||
| 	klog.InfoS("Swap is on", "/proc/swaps contents", procSwapsStr) | ||||
| 	return len(procSwapsLines) > 1 | ||||
| } | ||||
|  | ||||
| // IsSwapOn detects whether swap in enabled on the system by inspecting | ||||
| // /proc/swaps. If the file does not exist, an os.NotFound error will be returned. | ||||
| // If running on windows, swap is assumed to always be false. | ||||
| func IsSwapOn() (bool, error) { | ||||
| 	isSwapOnHelper := func() (bool, error) { | ||||
| 		if sysruntime.GOOS == "windows" { | ||||
| 			return false, nil | ||||
| 		} | ||||
|  | ||||
| 		const swapFilePath = "/proc/swaps" | ||||
| 		procSwapsContent, err := os.ReadFile(swapFilePath) | ||||
| 		if err != nil { | ||||
| 			if os.IsNotExist(err) { | ||||
| 				klog.InfoS("File does not exist, assuming that swap is disabled", "path", swapFilePath) | ||||
| 				return false, nil | ||||
| 			} | ||||
|  | ||||
| 			return false, err | ||||
| 		} | ||||
|  | ||||
| 		return isSwapOnAccordingToProcSwaps(procSwapsContent), nil | ||||
| 	} | ||||
|  | ||||
| 	swapOnOnce.Do(func() { | ||||
| 		swapOn, swapOnErr = isSwapOnHelper() | ||||
| 	}) | ||||
|  | ||||
| 	return swapOn, swapOnErr | ||||
| } | ||||
							
								
								
									
										66
									
								
								pkg/kubelet/util/swap/swap_util_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								pkg/kubelet/util/swap/swap_util_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| /* | ||||
| Copyright 2024 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package swap | ||||
|  | ||||
| import "testing" | ||||
|  | ||||
| func TestIsSwapEnabled(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		name             string | ||||
| 		procSwapsContent string | ||||
| 		expectedEnabled  bool | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name:             "empty", | ||||
| 			procSwapsContent: "", | ||||
| 			expectedEnabled:  false, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name: "with swap enabled, one partition", | ||||
| 			procSwapsContent: ` | ||||
| Filename				Type		Size		Used		Priority | ||||
| /dev/dm-1               partition	33554428	0		-2 | ||||
| `, | ||||
| 			expectedEnabled: true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name: "with swap enabled, 2 partitions", | ||||
| 			procSwapsContent: ` | ||||
| Filename				Type		Size		Used		Priority | ||||
| /dev/dm-1               partition	33554428	0		-2 | ||||
| /dev/zram0              partition	8388604		0		100 | ||||
| `, | ||||
| 			expectedEnabled: true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name: "empty lines", | ||||
| 			procSwapsContent: ` | ||||
|  | ||||
| `, | ||||
| 			expectedEnabled: false, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for _, tc := range testCases { | ||||
| 		t.Run(tc.name, func(t *testing.T) { | ||||
| 			isEnabled := isSwapOnAccordingToProcSwaps([]byte(tc.procSwapsContent)) | ||||
| 			if isEnabled != tc.expectedEnabled { | ||||
| 				t.Errorf("expected %v, got %v", tc.expectedEnabled, isEnabled) | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
| @@ -47,3 +47,5 @@ const IPVSConnReuseModeFixedKernelVersion = "5.9" | ||||
| // UserNamespacesSupportKernelVersion is the kernel version where idmap for tmpfs support was added | ||||
| // (ref: https://github.com/torvalds/linux/commit/05e6295f7b5e05f09e369a3eb2882ec5b40fff20) | ||||
| const UserNamespacesSupportKernelVersion = "6.3" | ||||
|  | ||||
| const TmpfsNoswapSupportKernelVersion = "6.4" | ||||
|   | ||||
| @@ -18,6 +18,7 @@ package emptydir | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/util/swap" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
|  | ||||
| @@ -327,11 +328,7 @@ func (ed *emptyDir) setupTmpfs(dir string) error { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	var options []string | ||||
| 	// Linux system default is 50% of capacity. | ||||
| 	if ed.sizeLimit != nil && ed.sizeLimit.Value() > 0 { | ||||
| 		options = []string{fmt.Sprintf("size=%d", ed.sizeLimit.Value())} | ||||
| 	} | ||||
| 	options := ed.generateTmpfsMountOptions(swap.IsTmpfsNoswapOptionSupported(ed.mounter, ed.plugin.host.GetPluginDir(emptyDirPluginName))) | ||||
|  | ||||
| 	klog.V(3).Infof("pod %v: mounting tmpfs for volume %v", ed.pod.UID, ed.volName) | ||||
| 	return ed.mounter.MountSensitiveWithoutSystemd("tmpfs", dir, "tmpfs", options, nil) | ||||
| @@ -555,3 +552,16 @@ func getVolumeSource(spec *volume.Spec) (*v1.EmptyDirVolumeSource, bool) { | ||||
|  | ||||
| 	return volumeSource, readOnly | ||||
| } | ||||
|  | ||||
| func (ed *emptyDir) generateTmpfsMountOptions(noswapSupported bool) (options []string) { | ||||
| 	// Linux system default is 50% of capacity. | ||||
| 	if ed.sizeLimit != nil && ed.sizeLimit.Value() > 0 { | ||||
| 		options = append(options, fmt.Sprintf("size=%d", ed.sizeLimit.Value())) | ||||
| 	} | ||||
|  | ||||
| 	if noswapSupported { | ||||
| 		options = append(options, swap.TmpfsNoswapOption) | ||||
| 	} | ||||
|  | ||||
| 	return options | ||||
| } | ||||
|   | ||||
| @@ -21,8 +21,10 @@ package emptydir | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/util/swap" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strings" | ||||
| 	"testing" | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| @@ -1061,3 +1063,57 @@ func TestCalculateEmptyDirMemorySize(t *testing.T) { | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestTmpfsMountOptions(t *testing.T) { | ||||
| 	subQuantity := resource.MustParse("123Ki") | ||||
|  | ||||
| 	doesStringArrayContainSubstring := func(strSlice []string, substr string) bool { | ||||
| 		for _, s := range strSlice { | ||||
| 			if strings.Contains(s, substr) { | ||||
| 				return true | ||||
| 			} | ||||
| 		} | ||||
| 		return false | ||||
| 	} | ||||
|  | ||||
| 	testCases := map[string]struct { | ||||
| 		tmpfsNoswapSupported bool | ||||
| 		sizeLimit            resource.Quantity | ||||
| 	}{ | ||||
| 		"default bahavior": {}, | ||||
| 		"tmpfs noswap is supported": { | ||||
| 			tmpfsNoswapSupported: true, | ||||
| 		}, | ||||
| 		"size limit is non-zero": { | ||||
| 			sizeLimit: subQuantity, | ||||
| 		}, | ||||
| 		"tmpfs noswap is supported and size limit is non-zero": { | ||||
| 			tmpfsNoswapSupported: true, | ||||
| 			sizeLimit:            subQuantity, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for testCaseName, testCase := range testCases { | ||||
| 		t.Run(testCaseName, func(t *testing.T) { | ||||
| 			emptyDirObj := emptyDir{ | ||||
| 				sizeLimit: &testCase.sizeLimit, | ||||
| 			} | ||||
|  | ||||
| 			options := emptyDirObj.generateTmpfsMountOptions(testCase.tmpfsNoswapSupported) | ||||
|  | ||||
| 			if testCase.tmpfsNoswapSupported && !doesStringArrayContainSubstring(options, swap.TmpfsNoswapOption) { | ||||
| 				t.Errorf("tmpfs noswap option is expected when supported. options: %v", options) | ||||
| 			} | ||||
| 			if !testCase.tmpfsNoswapSupported && doesStringArrayContainSubstring(options, swap.TmpfsNoswapOption) { | ||||
| 				t.Errorf("tmpfs noswap option is not expected when unsupported. options: %v", options) | ||||
| 			} | ||||
|  | ||||
| 			if testCase.sizeLimit.IsZero() && doesStringArrayContainSubstring(options, "size=") { | ||||
| 				t.Errorf("size is not expected when is zero. options: %v", options) | ||||
| 			} | ||||
| 			if expectedOption := fmt.Sprintf("size=%d", testCase.sizeLimit.Value()); !testCase.sizeLimit.IsZero() && !doesStringArrayContainSubstring(options, expectedOption) { | ||||
| 				t.Errorf("size option is not expected when is zero. options: %v", options) | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot