Merge pull request #125496 from harche/cgroup_imp
KEP-4569: Separate cgroup v1 and v2 manager implementations
This commit is contained in:
		| @@ -17,7 +17,6 @@ limitations under the License. | ||||
| package cm | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"path" | ||||
| @@ -32,13 +31,11 @@ import ( | ||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups/manager" | ||||
| 	cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd" | ||||
| 	libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs" | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/klog/v2" | ||||
| 	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" | ||||
|  | ||||
| 	utilruntime "k8s.io/apimachinery/pkg/util/runtime" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/metrics" | ||||
| ) | ||||
|  | ||||
| @@ -139,11 +136,11 @@ type CgroupSubsystems struct { | ||||
| 	MountPoints map[string]string | ||||
| } | ||||
|  | ||||
| // cgroupManagerImpl implements the CgroupManager interface. | ||||
| // Its a stateless object which can be used to | ||||
| // update,create or delete any number of cgroups | ||||
| // It relies on runc/libcontainer cgroup managers. | ||||
| type cgroupManagerImpl struct { | ||||
| // cgroupCommon implements common tasks | ||||
| // that are valid for both cgroup v1 and v2. | ||||
| // This prevents duplicating the code between | ||||
| // v1 and v2 specific implementations. | ||||
| type cgroupCommon struct { | ||||
| 	// subsystems holds information about all the | ||||
| 	// mounted cgroup subsystems on the node | ||||
| 	subsystems *CgroupSubsystems | ||||
| @@ -152,12 +149,20 @@ type cgroupManagerImpl struct { | ||||
| 	useSystemd bool | ||||
| } | ||||
|  | ||||
| // Make sure that cgroupManagerImpl implements the CgroupManager interface | ||||
| var _ CgroupManager = &cgroupManagerImpl{} | ||||
| // Make sure that cgroupV1impl and cgroupV2impl implement the CgroupManager interface | ||||
| var _ CgroupManager = &cgroupV1impl{} | ||||
| var _ CgroupManager = &cgroupV2impl{} | ||||
|  | ||||
| // NewCgroupManager is a factory method that returns a CgroupManager | ||||
| func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { | ||||
| 	return &cgroupManagerImpl{ | ||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { | ||||
| 		return NewCgroupV2Manager(cs, cgroupDriver) | ||||
| 	} | ||||
| 	return NewCgroupV1Manager(cs, cgroupDriver) | ||||
| } | ||||
|  | ||||
| func newCgroupCommon(cs *CgroupSubsystems, cgroupDriver string) cgroupCommon { | ||||
| 	return cgroupCommon{ | ||||
| 		subsystems: cs, | ||||
| 		useSystemd: cgroupDriver == "systemd", | ||||
| 	} | ||||
| @@ -165,7 +170,7 @@ func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { | ||||
|  | ||||
| // Name converts the cgroup to the driver specific value in cgroupfs form. | ||||
| // This always returns a valid cgroupfs path even when systemd driver is in use! | ||||
| func (m *cgroupManagerImpl) Name(name CgroupName) string { | ||||
| func (m *cgroupCommon) Name(name CgroupName) string { | ||||
| 	if m.useSystemd { | ||||
| 		return name.ToSystemd() | ||||
| 	} | ||||
| @@ -173,7 +178,7 @@ func (m *cgroupManagerImpl) Name(name CgroupName) string { | ||||
| } | ||||
|  | ||||
| // CgroupName converts the literal cgroupfs name on the host to an internal identifier. | ||||
| func (m *cgroupManagerImpl) CgroupName(name string) CgroupName { | ||||
| func (m *cgroupCommon) CgroupName(name string) CgroupName { | ||||
| 	if m.useSystemd { | ||||
| 		return ParseSystemdToCgroupName(name) | ||||
| 	} | ||||
| @@ -181,7 +186,7 @@ func (m *cgroupManagerImpl) CgroupName(name string) CgroupName { | ||||
| } | ||||
|  | ||||
| // buildCgroupPaths builds a path to each cgroup subsystem for the specified name. | ||||
| func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string { | ||||
| func (m *cgroupCommon) buildCgroupPaths(name CgroupName) map[string]string { | ||||
| 	cgroupFsAdaptedName := m.Name(name) | ||||
| 	cgroupPaths := make(map[string]string, len(m.subsystems.MountPoints)) | ||||
| 	for key, val := range m.subsystems.MountPoints { | ||||
| @@ -190,14 +195,8 @@ func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string | ||||
| 	return cgroupPaths | ||||
| } | ||||
|  | ||||
| // buildCgroupUnifiedPath builds a path to the specified name. | ||||
| func (m *cgroupManagerImpl) buildCgroupUnifiedPath(name CgroupName) string { | ||||
| 	cgroupFsAdaptedName := m.Name(name) | ||||
| 	return path.Join(cmutil.CgroupRoot, cgroupFsAdaptedName) | ||||
| } | ||||
|  | ||||
| // libctCgroupConfig converts CgroupConfig to libcontainer's Cgroup config. | ||||
| func (m *cgroupManagerImpl) libctCgroupConfig(in *CgroupConfig, needResources bool) *libcontainerconfigs.Cgroup { | ||||
| func (m *cgroupCommon) libctCgroupConfig(in *CgroupConfig, needResources bool) *libcontainerconfigs.Cgroup { | ||||
| 	config := &libcontainerconfigs.Cgroup{ | ||||
| 		Systemd: m.useSystemd, | ||||
| 	} | ||||
| @@ -235,62 +234,8 @@ func (m *cgroupManagerImpl) libctCgroupConfig(in *CgroupConfig, needResources bo | ||||
| 	return config | ||||
| } | ||||
|  | ||||
| // Validate checks if all subsystem cgroups already exist | ||||
| func (m *cgroupManagerImpl) Validate(name CgroupName) error { | ||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { | ||||
| 		cgroupPath := m.buildCgroupUnifiedPath(name) | ||||
| 		neededControllers := getSupportedUnifiedControllers() | ||||
| 		enabledControllers, err := readUnifiedControllers(cgroupPath) | ||||
| 		if err != nil { | ||||
| 			return fmt.Errorf("could not read controllers for cgroup %q: %w", name, err) | ||||
| 		} | ||||
| 		difference := neededControllers.Difference(enabledControllers) | ||||
| 		if difference.Len() > 0 { | ||||
| 			return fmt.Errorf("cgroup %q has some missing controllers: %v", name, strings.Join(sets.List(difference), ", ")) | ||||
| 		} | ||||
| 		return nil // valid V2 cgroup | ||||
| 	} | ||||
|  | ||||
| 	// Get map of all cgroup paths on the system for the particular cgroup | ||||
| 	cgroupPaths := m.buildCgroupPaths(name) | ||||
|  | ||||
| 	// the presence of alternative control groups not known to runc confuses | ||||
| 	// the kubelet existence checks. | ||||
| 	// ideally, we would have a mechanism in runc to support Exists() logic | ||||
| 	// scoped to the set control groups it understands.  this is being discussed | ||||
| 	// in https://github.com/opencontainers/runc/issues/1440 | ||||
| 	// once resolved, we can remove this code. | ||||
| 	allowlistControllers := sets.New[string]("cpu", "cpuacct", "cpuset", "memory", "systemd", "pids") | ||||
|  | ||||
| 	if _, ok := m.subsystems.MountPoints["hugetlb"]; ok { | ||||
| 		allowlistControllers.Insert("hugetlb") | ||||
| 	} | ||||
| 	var missingPaths []string | ||||
| 	// If even one cgroup path doesn't exist, then the cgroup doesn't exist. | ||||
| 	for controller, path := range cgroupPaths { | ||||
| 		// ignore mounts we don't care about | ||||
| 		if !allowlistControllers.Has(controller) { | ||||
| 			continue | ||||
| 		} | ||||
| 		if !libcontainercgroups.PathExists(path) { | ||||
| 			missingPaths = append(missingPaths, path) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if len(missingPaths) > 0 { | ||||
| 		return fmt.Errorf("cgroup %q has some missing paths: %v", name, strings.Join(missingPaths, ", ")) | ||||
| 	} | ||||
|  | ||||
| 	return nil // valid V1 cgroup | ||||
| } | ||||
|  | ||||
| // Exists checks if all subsystem cgroups already exist | ||||
| func (m *cgroupManagerImpl) Exists(name CgroupName) bool { | ||||
| 	return m.Validate(name) == nil | ||||
| } | ||||
|  | ||||
| // Destroy destroys the specified cgroup | ||||
| func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error { | ||||
| func (m *cgroupCommon) Destroy(cgroupConfig *CgroupConfig) error { | ||||
| 	start := time.Now() | ||||
| 	defer func() { | ||||
| 		metrics.CgroupManagerDuration.WithLabelValues("destroy").Observe(metrics.SinceInSeconds(start)) | ||||
| @@ -321,38 +266,12 @@ func getCPUWeight(cpuShares *uint64) uint64 { | ||||
| 	return 1 + ((*cpuShares-2)*9999)/262142 | ||||
| } | ||||
|  | ||||
| // readUnifiedControllers reads the controllers available at the specified cgroup | ||||
| func readUnifiedControllers(path string) (sets.Set[string], error) { | ||||
| 	controllersFileContent, err := os.ReadFile(filepath.Join(path, "cgroup.controllers")) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	controllers := strings.Fields(string(controllersFileContent)) | ||||
| 	return sets.New(controllers...), nil | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	availableRootControllersOnce sync.Once | ||||
| 	availableRootControllers     sets.Set[string] | ||||
| ) | ||||
|  | ||||
| // getSupportedUnifiedControllers returns a set of supported controllers when running on cgroup v2 | ||||
| func getSupportedUnifiedControllers() sets.Set[string] { | ||||
| 	// This is the set of controllers used by the Kubelet | ||||
| 	supportedControllers := sets.New("cpu", "cpuset", "memory", "hugetlb", "pids") | ||||
| 	// Memoize the set of controllers that are present in the root cgroup | ||||
| 	availableRootControllersOnce.Do(func() { | ||||
| 		var err error | ||||
| 		availableRootControllers, err = readUnifiedControllers(cmutil.CgroupRoot) | ||||
| 		if err != nil { | ||||
| 			panic(fmt.Errorf("cannot read cgroup controllers at %s", cmutil.CgroupRoot)) | ||||
| 		} | ||||
| 	}) | ||||
| 	// Return the set of controllers that are supported both by the Kubelet and by the kernel | ||||
| 	return supportedControllers.Intersection(availableRootControllers) | ||||
| } | ||||
|  | ||||
| func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources { | ||||
| func (m *cgroupCommon) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources { | ||||
| 	resources := &libcontainerconfigs.Resources{ | ||||
| 		SkipDevices:     true, | ||||
| 		SkipFreezeOnSet: true, | ||||
| @@ -394,7 +313,7 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont | ||||
| 	return resources | ||||
| } | ||||
|  | ||||
| func (m *cgroupManagerImpl) maybeSetHugetlb(resourceConfig *ResourceConfig, resources *libcontainerconfigs.Resources) { | ||||
| func (m *cgroupCommon) maybeSetHugetlb(resourceConfig *ResourceConfig, resources *libcontainerconfigs.Resources) { | ||||
| 	// Check if hugetlb is supported. | ||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { | ||||
| 		if !getSupportedUnifiedControllers().Has("hugetlb") { | ||||
| @@ -433,7 +352,7 @@ func (m *cgroupManagerImpl) maybeSetHugetlb(resourceConfig *ResourceConfig, reso | ||||
| } | ||||
|  | ||||
| // Update updates the cgroup with the specified Cgroup Configuration | ||||
| func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { | ||||
| func (m *cgroupCommon) Update(cgroupConfig *CgroupConfig) error { | ||||
| 	start := time.Now() | ||||
| 	defer func() { | ||||
| 		metrics.CgroupManagerDuration.WithLabelValues("update").Observe(metrics.SinceInSeconds(start)) | ||||
| @@ -448,7 +367,7 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { | ||||
| } | ||||
|  | ||||
| // Create creates the specified cgroup | ||||
| func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { | ||||
| func (m *cgroupCommon) Create(cgroupConfig *CgroupConfig) error { | ||||
| 	start := time.Now() | ||||
| 	defer func() { | ||||
| 		metrics.CgroupManagerDuration.WithLabelValues("create").Observe(metrics.SinceInSeconds(start)) | ||||
| @@ -480,7 +399,7 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { | ||||
| } | ||||
|  | ||||
| // Scans through all subsystems to find pids associated with specified cgroup. | ||||
| func (m *cgroupManagerImpl) Pids(name CgroupName) []int { | ||||
| func (m *cgroupCommon) Pids(name CgroupName) []int { | ||||
| 	// we need the driver specific name | ||||
| 	cgroupFsName := m.Name(name) | ||||
|  | ||||
| @@ -530,7 +449,7 @@ func (m *cgroupManagerImpl) Pids(name CgroupName) []int { | ||||
| } | ||||
|  | ||||
| // ReduceCPULimits reduces the cgroup's cpu shares to the lowest possible value | ||||
| func (m *cgroupManagerImpl) ReduceCPULimits(cgroupName CgroupName) error { | ||||
| func (m *cgroupCommon) ReduceCPULimits(cgroupName CgroupName) error { | ||||
| 	// Set lowest possible CpuShares value for the cgroup | ||||
| 	minimumCPUShares := uint64(MinShares) | ||||
| 	resources := &ResourceConfig{ | ||||
| @@ -543,100 +462,7 @@ func (m *cgroupManagerImpl) ReduceCPULimits(cgroupName CgroupName) error { | ||||
| 	return m.Update(containerConfig) | ||||
| } | ||||
|  | ||||
| // MemoryUsage returns the current memory usage of the specified cgroup, | ||||
| // as read from cgroupfs. | ||||
| func (m *cgroupManagerImpl) MemoryUsage(name CgroupName) (int64, error) { | ||||
| 	var path, file string | ||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { | ||||
| 		path = m.buildCgroupUnifiedPath(name) | ||||
| 		file = "memory.current" | ||||
| 	} else { | ||||
| 		mp, ok := m.subsystems.MountPoints["memory"] | ||||
| 		if !ok { // should not happen | ||||
| 			return -1, errors.New("no cgroup v1 mountpoint for memory controller found") | ||||
| 		} | ||||
| 		path = mp + "/" + m.Name(name) | ||||
| 		file = "memory.usage_in_bytes" | ||||
| 	} | ||||
| 	val, err := fscommon.GetCgroupParamUint(path, file) | ||||
| 	return int64(val), err | ||||
| } | ||||
|  | ||||
| // Convert cgroup v1 cpu.shares value to cgroup v2 cpu.weight | ||||
| // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2 | ||||
| func CpuSharesToCpuWeight(cpuShares uint64) uint64 { | ||||
| 	return uint64((((cpuShares - 2) * 9999) / 262142) + 1) | ||||
| } | ||||
|  | ||||
| // Convert cgroup v2 cpu.weight value to cgroup v1 cpu.shares | ||||
| // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2 | ||||
| func CpuWeightToCpuShares(cpuWeight uint64) uint64 { | ||||
| 	return uint64((((cpuWeight - 1) * 262142) / 9999) + 2) | ||||
| } | ||||
|  | ||||
| func getCgroupv1CpuConfig(cgroupPath string) (*ResourceConfig, error) { | ||||
| 	cpuQuotaStr, errQ := fscommon.GetCgroupParamString(cgroupPath, "cpu.cfs_quota_us") | ||||
| 	if errQ != nil { | ||||
| 		return nil, fmt.Errorf("failed to read CPU quota for cgroup %v: %v", cgroupPath, errQ) | ||||
| 	} | ||||
| 	cpuQuota, errInt := strconv.ParseInt(cpuQuotaStr, 10, 64) | ||||
| 	if errInt != nil { | ||||
| 		return nil, fmt.Errorf("failed to convert CPU quota as integer for cgroup %v: %v", cgroupPath, errInt) | ||||
| 	} | ||||
| 	cpuPeriod, errP := fscommon.GetCgroupParamUint(cgroupPath, "cpu.cfs_period_us") | ||||
| 	if errP != nil { | ||||
| 		return nil, fmt.Errorf("failed to read CPU period for cgroup %v: %v", cgroupPath, errP) | ||||
| 	} | ||||
| 	cpuShares, errS := fscommon.GetCgroupParamUint(cgroupPath, "cpu.shares") | ||||
| 	if errS != nil { | ||||
| 		return nil, fmt.Errorf("failed to read CPU shares for cgroup %v: %v", cgroupPath, errS) | ||||
| 	} | ||||
| 	return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuQuota, CPUPeriod: &cpuPeriod}, nil | ||||
| } | ||||
|  | ||||
| func getCgroupv2CpuConfig(cgroupPath string) (*ResourceConfig, error) { | ||||
| 	var cpuLimitStr, cpuPeriodStr string | ||||
| 	cpuLimitAndPeriod, err := fscommon.GetCgroupParamString(cgroupPath, "cpu.max") | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("failed to read cpu.max file for cgroup %v: %v", cgroupPath, err) | ||||
| 	} | ||||
| 	numItems, errScan := fmt.Sscanf(cpuLimitAndPeriod, "%s %s", &cpuLimitStr, &cpuPeriodStr) | ||||
| 	if errScan != nil || numItems != 2 { | ||||
| 		return nil, fmt.Errorf("failed to correctly parse content of cpu.max file ('%s') for cgroup %v: %v", | ||||
| 			cpuLimitAndPeriod, cgroupPath, errScan) | ||||
| 	} | ||||
| 	cpuLimit := int64(-1) | ||||
| 	if cpuLimitStr != Cgroup2MaxCpuLimit { | ||||
| 		cpuLimit, err = strconv.ParseInt(cpuLimitStr, 10, 64) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("failed to convert CPU limit as integer for cgroup %v: %v", cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	cpuPeriod, errPeriod := strconv.ParseUint(cpuPeriodStr, 10, 64) | ||||
| 	if errPeriod != nil { | ||||
| 		return nil, fmt.Errorf("failed to convert CPU period as integer for cgroup %v: %v", cgroupPath, errPeriod) | ||||
| 	} | ||||
| 	cpuWeight, errWeight := fscommon.GetCgroupParamUint(cgroupPath, "cpu.weight") | ||||
| 	if errWeight != nil { | ||||
| 		return nil, fmt.Errorf("failed to read CPU weight for cgroup %v: %v", cgroupPath, errWeight) | ||||
| 	} | ||||
| 	cpuShares := CpuWeightToCpuShares(cpuWeight) | ||||
| 	return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuLimit, CPUPeriod: &cpuPeriod}, nil | ||||
| } | ||||
|  | ||||
| func getCgroupCpuConfig(cgroupPath string) (*ResourceConfig, error) { | ||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { | ||||
| 		return getCgroupv2CpuConfig(cgroupPath) | ||||
| 	} else { | ||||
| 		return getCgroupv1CpuConfig(cgroupPath) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) { | ||||
| 	memLimitFile := "memory.limit_in_bytes" | ||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { | ||||
| 		memLimitFile = "memory.max" | ||||
| 	} | ||||
| func readCgroupMemoryConfig(cgroupPath string, memLimitFile string) (*ResourceConfig, error) { | ||||
| 	memLimit, err := fscommon.GetCgroupParamUint(cgroupPath, memLimitFile) | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("failed to read %s for cgroup %v: %v", memLimitFile, cgroupPath, err) | ||||
| @@ -647,103 +473,11 @@ func getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) { | ||||
|  | ||||
| } | ||||
|  | ||||
| // Get the resource config values applied to the cgroup for specified resource type | ||||
| func (m *cgroupManagerImpl) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) { | ||||
| 	cgroupPaths := m.buildCgroupPaths(name) | ||||
| 	cgroupResourcePath, found := cgroupPaths[string(resource)] | ||||
| 	if !found { | ||||
| 		return nil, fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) | ||||
| 	} | ||||
| 	switch resource { | ||||
| 	case v1.ResourceCPU: | ||||
| 		return getCgroupCpuConfig(cgroupResourcePath) | ||||
| 	case v1.ResourceMemory: | ||||
| 		return getCgroupMemoryConfig(cgroupResourcePath) | ||||
| 	} | ||||
| 	return nil, fmt.Errorf("unsupported resource %v for cgroup %v", resource, name) | ||||
| } | ||||
|  | ||||
| func setCgroupv1CpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||
| 	var cpuQuotaStr, cpuPeriodStr, cpuSharesStr string | ||||
| 	if resourceConfig.CPUQuota != nil { | ||||
| 		cpuQuotaStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_quota_us"), []byte(cpuQuotaStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuQuotaStr, cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	if resourceConfig.CPUPeriod != nil { | ||||
| 		cpuPeriodStr = strconv.FormatUint(*resourceConfig.CPUPeriod, 10) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_period_us"), []byte(cpuPeriodStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuPeriodStr, cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	if resourceConfig.CPUShares != nil { | ||||
| 		cpuSharesStr = strconv.FormatUint(*resourceConfig.CPUShares, 10) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.shares"), []byte(cpuSharesStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuSharesStr, cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func setCgroupv2CpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||
| 	if resourceConfig.CPUQuota != nil { | ||||
| 		if resourceConfig.CPUPeriod == nil { | ||||
| 			return fmt.Errorf("CpuPeriod must be specified in order to set CpuLimit") | ||||
| 		} | ||||
| 		cpuLimitStr := Cgroup2MaxCpuLimit | ||||
| 		if *resourceConfig.CPUQuota > -1 { | ||||
| 			cpuLimitStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10) | ||||
| 		} | ||||
| 		cpuPeriodStr := strconv.FormatUint(*resourceConfig.CPUPeriod, 10) | ||||
| 		cpuMaxStr := fmt.Sprintf("%s %s", cpuLimitStr, cpuPeriodStr) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.max"), []byte(cpuMaxStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuMaxStr, cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	if resourceConfig.CPUShares != nil { | ||||
| 		cpuWeight := CpuSharesToCpuWeight(*resourceConfig.CPUShares) | ||||
| 		cpuWeightStr := strconv.FormatUint(cpuWeight, 10) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.weight"), []byte(cpuWeightStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuWeightStr, cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func setCgroupCpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { | ||||
| 		return setCgroupv2CpuConfig(cgroupPath, resourceConfig) | ||||
| 	} else { | ||||
| 		return setCgroupv1CpuConfig(cgroupPath, resourceConfig) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func setCgroupMemoryConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||
| 	memLimitFile := "memory.limit_in_bytes" | ||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { | ||||
| 		memLimitFile = "memory.max" | ||||
| 	} | ||||
| func writeCgroupMemoryLimit(memoryLimitFileLocation string, resourceConfig *ResourceConfig) error { | ||||
| 	memLimit := strconv.FormatInt(*resourceConfig.Memory, 10) | ||||
| 	if err := os.WriteFile(filepath.Join(cgroupPath, memLimitFile), []byte(memLimit), 0700); err != nil { | ||||
| 		return fmt.Errorf("failed to write %v to %v/%v: %v", memLimit, cgroupPath, memLimitFile, err) | ||||
| 	if err := os.WriteFile(memoryLimitFileLocation, []byte(memLimit), 0700); err != nil { | ||||
| 		return fmt.Errorf("failed to write %v to %v: %w", memLimit, memoryLimitFileLocation, err) | ||||
| 	} | ||||
| 	//TODO(vinaykul,InPlacePodVerticalScaling): Add memory request support | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Set resource config for the specified resource type on the cgroup | ||||
| func (m *cgroupManagerImpl) SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error { | ||||
| 	cgroupPaths := m.buildCgroupPaths(name) | ||||
| 	cgroupResourcePath, found := cgroupPaths[string(resource)] | ||||
| 	if !found { | ||||
| 		return fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) | ||||
| 	} | ||||
| 	switch resource { | ||||
| 	case v1.ResourceCPU: | ||||
| 		return setCgroupCpuConfig(cgroupResourcePath, resourceConfig) | ||||
| 	case v1.ResourceMemory: | ||||
| 		return setCgroupMemoryConfig(cgroupResourcePath, resourceConfig) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|   | ||||
| @@ -170,7 +170,7 @@ func TestParseSystemdToCgroupName(t *testing.T) { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestCpuSharesToCpuWeight(t *testing.T) { | ||||
| func TestCpuSharesToCPUWeight(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		cpuShares         uint64 | ||||
| 		expectedCpuWeight uint64 | ||||
| @@ -206,14 +206,14 @@ func TestCpuSharesToCpuWeight(t *testing.T) { | ||||
| 	} | ||||
|  | ||||
| 	for _, testCase := range testCases { | ||||
| 		if actual := CpuSharesToCpuWeight(testCase.cpuShares); actual != testCase.expectedCpuWeight { | ||||
| 		if actual := cpuSharesToCPUWeight(testCase.cpuShares); actual != testCase.expectedCpuWeight { | ||||
| 			t.Errorf("cpuShares: %v, expectedCpuWeight: %v, actualCpuWeight: %v", | ||||
| 				testCase.cpuShares, testCase.expectedCpuWeight, actual) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestCpuWeightToCpuShares(t *testing.T) { | ||||
| func TestCpuWeightToCPUShares(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		cpuWeight         uint64 | ||||
| 		expectedCpuShares uint64 | ||||
| @@ -245,7 +245,7 @@ func TestCpuWeightToCpuShares(t *testing.T) { | ||||
| 	} | ||||
|  | ||||
| 	for _, testCase := range testCases { | ||||
| 		if actual := CpuWeightToCpuShares(testCase.cpuWeight); actual != testCase.expectedCpuShares { | ||||
| 		if actual := cpuWeightToCPUShares(testCase.cpuWeight); actual != testCase.expectedCpuShares { | ||||
| 			t.Errorf("cpuWeight: %v, expectedCpuShares: %v, actualCpuShares: %v", | ||||
| 				testCase.cpuWeight, testCase.expectedCpuShares, actual) | ||||
| 		} | ||||
|   | ||||
							
								
								
									
										186
									
								
								pkg/kubelet/cm/cgroup_v1_manager_linux.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										186
									
								
								pkg/kubelet/cm/cgroup_v1_manager_linux.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,186 @@ | ||||
| /* | ||||
| Copyright 2024 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package cm | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
|  | ||||
| 	libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups" | ||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon" | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| ) | ||||
|  | ||||
| const cgroupv1MemLimitFile string = "memory.limit_in_bytes" | ||||
|  | ||||
| // cgroupV1impl implements the CgroupManager interface | ||||
| // for cgroup v1. | ||||
| // It's a stateless object which can be used to | ||||
| // update, create or delete any number of cgroups | ||||
| // It relies on runc/libcontainer cgroup managers. | ||||
| type cgroupV1impl struct { | ||||
| 	cgroupCommon | ||||
| } | ||||
|  | ||||
| func NewCgroupV1Manager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { | ||||
| 	return &cgroupV1impl{ | ||||
| 		cgroupCommon: newCgroupCommon(cs, cgroupDriver), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Validate checks if all subsystem cgroups are valid | ||||
| func (c *cgroupV1impl) Validate(name CgroupName) error { | ||||
| 	// Get map of all cgroup paths on the system for the particular cgroup | ||||
| 	cgroupPaths := c.buildCgroupPaths(name) | ||||
|  | ||||
| 	// the presence of alternative control groups not known to runc confuses | ||||
| 	// the kubelet existence checks. | ||||
| 	// ideally, we would have a mechanism in runc to support Exists() logic | ||||
| 	// scoped to the set control groups it understands.  this is being discussed | ||||
| 	// in https://github.com/opencontainers/runc/issues/1440 | ||||
| 	// once resolved, we can remove this code. | ||||
| 	allowlistControllers := sets.New[string]("cpu", "cpuacct", "cpuset", "memory", "systemd", "pids") | ||||
|  | ||||
| 	if _, ok := c.subsystems.MountPoints["hugetlb"]; ok { | ||||
| 		allowlistControllers.Insert("hugetlb") | ||||
| 	} | ||||
| 	var missingPaths []string | ||||
| 	// If even one cgroup path doesn't exist, then the cgroup doesn't exist. | ||||
| 	for controller, path := range cgroupPaths { | ||||
| 		// ignore mounts we don't care about | ||||
| 		if !allowlistControllers.Has(controller) { | ||||
| 			continue | ||||
| 		} | ||||
| 		if !libcontainercgroups.PathExists(path) { | ||||
| 			missingPaths = append(missingPaths, path) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if len(missingPaths) > 0 { | ||||
| 		return fmt.Errorf("cgroup %q has some missing paths: %v", name, strings.Join(missingPaths, ", ")) | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Exists checks if all subsystem cgroups already exist | ||||
| func (c *cgroupV1impl) Exists(name CgroupName) bool { | ||||
| 	return c.Validate(name) == nil | ||||
| } | ||||
|  | ||||
| // MemoryUsage returns the current memory usage of the specified cgroup, | ||||
| // as read from cgroupfs. | ||||
| func (c *cgroupV1impl) MemoryUsage(name CgroupName) (int64, error) { | ||||
| 	var path, file string | ||||
| 	mp, ok := c.subsystems.MountPoints["memory"] | ||||
| 	if !ok { // should not happen | ||||
| 		return -1, errors.New("no cgroup v1 mountpoint for memory controller found") | ||||
| 	} | ||||
| 	path = mp + "/" + c.Name(name) | ||||
| 	file = "memory.usage_in_bytes" | ||||
| 	val, err := fscommon.GetCgroupParamUint(path, file) | ||||
| 	return int64(val), err | ||||
| } | ||||
|  | ||||
| // Get the resource config values applied to the cgroup for specified resource type | ||||
| func (c *cgroupV1impl) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) { | ||||
| 	cgroupPaths := c.buildCgroupPaths(name) | ||||
| 	cgroupResourcePath, found := cgroupPaths[string(resource)] | ||||
| 	if !found { | ||||
| 		return nil, fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) | ||||
| 	} | ||||
| 	switch resource { | ||||
| 	case v1.ResourceCPU: | ||||
| 		return c.getCgroupCPUConfig(cgroupResourcePath) | ||||
| 	case v1.ResourceMemory: | ||||
| 		return c.getCgroupMemoryConfig(cgroupResourcePath) | ||||
| 	} | ||||
| 	return nil, fmt.Errorf("unsupported resource %v for cgroup %v", resource, name) | ||||
| } | ||||
|  | ||||
| // Set resource config for the specified resource type on the cgroup | ||||
| func (c *cgroupV1impl) SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error { | ||||
| 	cgroupPaths := c.buildCgroupPaths(name) | ||||
| 	cgroupResourcePath, found := cgroupPaths[string(resource)] | ||||
| 	if !found { | ||||
| 		return fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) | ||||
| 	} | ||||
| 	switch resource { | ||||
| 	case v1.ResourceCPU: | ||||
| 		return c.setCgroupCPUConfig(cgroupResourcePath, resourceConfig) | ||||
| 	case v1.ResourceMemory: | ||||
| 		return c.setCgroupMemoryConfig(cgroupResourcePath, resourceConfig) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (c *cgroupV1impl) getCgroupCPUConfig(cgroupPath string) (*ResourceConfig, error) { | ||||
| 	cpuQuotaStr, errQ := fscommon.GetCgroupParamString(cgroupPath, "cpu.cfs_quota_us") | ||||
| 	if errQ != nil { | ||||
| 		return nil, fmt.Errorf("failed to read CPU quota for cgroup %v: %w", cgroupPath, errQ) | ||||
| 	} | ||||
| 	cpuQuota, errInt := strconv.ParseInt(cpuQuotaStr, 10, 64) | ||||
| 	if errInt != nil { | ||||
| 		return nil, fmt.Errorf("failed to convert CPU quota as integer for cgroup %v: %w", cgroupPath, errInt) | ||||
| 	} | ||||
| 	cpuPeriod, errP := fscommon.GetCgroupParamUint(cgroupPath, "cpu.cfs_period_us") | ||||
| 	if errP != nil { | ||||
| 		return nil, fmt.Errorf("failed to read CPU period for cgroup %v: %w", cgroupPath, errP) | ||||
| 	} | ||||
| 	cpuShares, errS := fscommon.GetCgroupParamUint(cgroupPath, "cpu.shares") | ||||
| 	if errS != nil { | ||||
| 		return nil, fmt.Errorf("failed to read CPU shares for cgroup %v: %w", cgroupPath, errS) | ||||
| 	} | ||||
| 	return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuQuota, CPUPeriod: &cpuPeriod}, nil | ||||
| } | ||||
|  | ||||
| func (c *cgroupV1impl) setCgroupCPUConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||
| 	var cpuQuotaStr, cpuPeriodStr, cpuSharesStr string | ||||
| 	if resourceConfig.CPUQuota != nil { | ||||
| 		cpuQuotaStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_quota_us"), []byte(cpuQuotaStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %w", cpuQuotaStr, cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	if resourceConfig.CPUPeriod != nil { | ||||
| 		cpuPeriodStr = strconv.FormatUint(*resourceConfig.CPUPeriod, 10) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_period_us"), []byte(cpuPeriodStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %w", cpuPeriodStr, cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	if resourceConfig.CPUShares != nil { | ||||
| 		cpuSharesStr = strconv.FormatUint(*resourceConfig.CPUShares, 10) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.shares"), []byte(cpuSharesStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %w", cpuSharesStr, cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (c *cgroupV1impl) setCgroupMemoryConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||
| 	return writeCgroupMemoryLimit(filepath.Join(cgroupPath, cgroupv1MemLimitFile), resourceConfig) | ||||
| } | ||||
|  | ||||
| func (c *cgroupV1impl) getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) { | ||||
| 	return readCgroupMemoryConfig(cgroupPath, cgroupv1MemLimitFile) | ||||
| } | ||||
							
								
								
									
										217
									
								
								pkg/kubelet/cm/cgroup_v2_manager_linux.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										217
									
								
								pkg/kubelet/cm/cgroup_v2_manager_linux.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,217 @@ | ||||
| /* | ||||
| Copyright 2024 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package cm | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"path" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon" | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util" | ||||
| ) | ||||
|  | ||||
| const cgroupv2MemLimitFile string = "memory.max" | ||||
|  | ||||
| // cgroupV2impl implements the CgroupManager interface | ||||
| // for cgroup v2. | ||||
| // It's a stateless object which can be used to | ||||
| // update, create or delete any number of cgroups | ||||
| // It relies on runc/libcontainer cgroup managers. | ||||
| type cgroupV2impl struct { | ||||
| 	cgroupCommon | ||||
| } | ||||
|  | ||||
| func NewCgroupV2Manager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { | ||||
| 	return &cgroupV2impl{ | ||||
| 		cgroupCommon: newCgroupCommon(cs, cgroupDriver), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Validate checks if all subsystem cgroups are valid | ||||
| func (c *cgroupV2impl) Validate(name CgroupName) error { | ||||
| 	cgroupPath := c.buildCgroupUnifiedPath(name) | ||||
| 	neededControllers := getSupportedUnifiedControllers() | ||||
| 	enabledControllers, err := readUnifiedControllers(cgroupPath) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("could not read controllers for cgroup %q: %w", name, err) | ||||
| 	} | ||||
| 	difference := neededControllers.Difference(enabledControllers) | ||||
| 	if difference.Len() > 0 { | ||||
| 		return fmt.Errorf("cgroup %q has some missing controllers: %v", name, strings.Join(sets.List(difference), ", ")) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Exists checks if all subsystem cgroups already exist | ||||
| func (c *cgroupV2impl) Exists(name CgroupName) bool { | ||||
| 	return c.Validate(name) == nil | ||||
| } | ||||
|  | ||||
| // MemoryUsage returns the current memory usage of the specified cgroup, | ||||
| // as read from cgroupfs. | ||||
| func (c *cgroupV2impl) MemoryUsage(name CgroupName) (int64, error) { | ||||
| 	var path, file string | ||||
| 	path = c.buildCgroupUnifiedPath(name) | ||||
| 	file = "memory.current" | ||||
| 	val, err := fscommon.GetCgroupParamUint(path, file) | ||||
| 	return int64(val), err | ||||
| } | ||||
|  | ||||
| // Get the resource config values applied to the cgroup for specified resource type | ||||
| func (c *cgroupV2impl) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) { | ||||
| 	cgroupPaths := c.buildCgroupPaths(name) | ||||
| 	cgroupResourcePath, found := cgroupPaths[string(resource)] | ||||
| 	if !found { | ||||
| 		return nil, fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) | ||||
| 	} | ||||
| 	switch resource { | ||||
| 	case v1.ResourceCPU: | ||||
| 		return c.getCgroupCPUConfig(cgroupResourcePath) | ||||
| 	case v1.ResourceMemory: | ||||
| 		return c.getCgroupMemoryConfig(cgroupResourcePath) | ||||
| 	} | ||||
| 	return nil, fmt.Errorf("unsupported resource %v for cgroup %v", resource, name) | ||||
| } | ||||
|  | ||||
| // Set resource config for the specified resource type on the cgroup | ||||
| func (c *cgroupV2impl) SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error { | ||||
| 	cgroupPaths := c.buildCgroupPaths(name) | ||||
| 	cgroupResourcePath, found := cgroupPaths[string(resource)] | ||||
| 	if !found { | ||||
| 		return fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) | ||||
| 	} | ||||
| 	switch resource { | ||||
| 	case v1.ResourceCPU: | ||||
| 		return c.setCgroupCPUConfig(cgroupResourcePath, resourceConfig) | ||||
| 	case v1.ResourceMemory: | ||||
| 		return c.setCgroupMemoryConfig(cgroupResourcePath, resourceConfig) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (c *cgroupV2impl) getCgroupCPUConfig(cgroupPath string) (*ResourceConfig, error) { | ||||
| 	var cpuLimitStr, cpuPeriodStr string | ||||
| 	cpuLimitAndPeriod, err := fscommon.GetCgroupParamString(cgroupPath, "cpu.max") | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("failed to read cpu.max file for cgroup %v: %w", cgroupPath, err) | ||||
| 	} | ||||
| 	numItems, errScan := fmt.Sscanf(cpuLimitAndPeriod, "%s %s", &cpuLimitStr, &cpuPeriodStr) | ||||
| 	if errScan != nil || numItems != 2 { | ||||
| 		return nil, fmt.Errorf("failed to correctly parse content of cpu.max file ('%s') for cgroup %v: %w", | ||||
| 			cpuLimitAndPeriod, cgroupPath, errScan) | ||||
| 	} | ||||
| 	cpuLimit := int64(-1) | ||||
| 	if cpuLimitStr != Cgroup2MaxCpuLimit { | ||||
| 		cpuLimit, err = strconv.ParseInt(cpuLimitStr, 10, 64) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("failed to convert CPU limit as integer for cgroup %v: %w", cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	cpuPeriod, errPeriod := strconv.ParseUint(cpuPeriodStr, 10, 64) | ||||
| 	if errPeriod != nil { | ||||
| 		return nil, fmt.Errorf("failed to convert CPU period as integer for cgroup %v: %w", cgroupPath, errPeriod) | ||||
| 	} | ||||
| 	cpuWeight, errWeight := fscommon.GetCgroupParamUint(cgroupPath, "cpu.weight") | ||||
| 	if errWeight != nil { | ||||
| 		return nil, fmt.Errorf("failed to read CPU weight for cgroup %v: %w", cgroupPath, errWeight) | ||||
| 	} | ||||
| 	cpuShares := cpuWeightToCPUShares(cpuWeight) | ||||
| 	return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuLimit, CPUPeriod: &cpuPeriod}, nil | ||||
| } | ||||
|  | ||||
| func (c *cgroupV2impl) setCgroupCPUConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||
| 	if resourceConfig.CPUQuota != nil { | ||||
| 		if resourceConfig.CPUPeriod == nil { | ||||
| 			return fmt.Errorf("CpuPeriod must be specified in order to set CpuLimit") | ||||
| 		} | ||||
| 		cpuLimitStr := Cgroup2MaxCpuLimit | ||||
| 		if *resourceConfig.CPUQuota > -1 { | ||||
| 			cpuLimitStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10) | ||||
| 		} | ||||
| 		cpuPeriodStr := strconv.FormatUint(*resourceConfig.CPUPeriod, 10) | ||||
| 		cpuMaxStr := fmt.Sprintf("%s %s", cpuLimitStr, cpuPeriodStr) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.max"), []byte(cpuMaxStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %w", cpuMaxStr, cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	if resourceConfig.CPUShares != nil { | ||||
| 		cpuWeight := cpuSharesToCPUWeight(*resourceConfig.CPUShares) | ||||
| 		cpuWeightStr := strconv.FormatUint(cpuWeight, 10) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.weight"), []byte(cpuWeightStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %w", cpuWeightStr, cgroupPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (c *cgroupV2impl) setCgroupMemoryConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||
| 	return writeCgroupMemoryLimit(filepath.Join(cgroupPath, cgroupv2MemLimitFile), resourceConfig) | ||||
| } | ||||
|  | ||||
| func (c *cgroupV2impl) getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) { | ||||
| 	return readCgroupMemoryConfig(cgroupPath, cgroupv2MemLimitFile) | ||||
| } | ||||
|  | ||||
| // getSupportedUnifiedControllers returns a set of supported controllers when running on cgroup v2 | ||||
| func getSupportedUnifiedControllers() sets.Set[string] { | ||||
| 	// This is the set of controllers used by the Kubelet | ||||
| 	supportedControllers := sets.New("cpu", "cpuset", "memory", "hugetlb", "pids") | ||||
| 	// Memoize the set of controllers that are present in the root cgroup | ||||
| 	availableRootControllersOnce.Do(func() { | ||||
| 		var err error | ||||
| 		availableRootControllers, err = readUnifiedControllers(cmutil.CgroupRoot) | ||||
| 		if err != nil { | ||||
| 			panic(fmt.Errorf("cannot read cgroup controllers at %s", cmutil.CgroupRoot)) | ||||
| 		} | ||||
| 	}) | ||||
| 	// Return the set of controllers that are supported both by the Kubelet and by the kernel | ||||
| 	return supportedControllers.Intersection(availableRootControllers) | ||||
| } | ||||
|  | ||||
| // readUnifiedControllers reads the controllers available at the specified cgroup | ||||
| func readUnifiedControllers(path string) (sets.Set[string], error) { | ||||
| 	controllersFileContent, err := os.ReadFile(filepath.Join(path, "cgroup.controllers")) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	controllers := strings.Fields(string(controllersFileContent)) | ||||
| 	return sets.New(controllers...), nil | ||||
| } | ||||
|  | ||||
| // buildCgroupUnifiedPath builds a path to the specified name. | ||||
| func (c *cgroupV2impl) buildCgroupUnifiedPath(name CgroupName) string { | ||||
| 	cgroupFsAdaptedName := c.Name(name) | ||||
| 	return path.Join(cmutil.CgroupRoot, cgroupFsAdaptedName) | ||||
| } | ||||
|  | ||||
| // Convert cgroup v1 cpu.shares value to cgroup v2 cpu.weight | ||||
| // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2 | ||||
| func cpuSharesToCPUWeight(cpuShares uint64) uint64 { | ||||
| 	return uint64((((cpuShares - 2) * 9999) / 262142) + 1) | ||||
| } | ||||
|  | ||||
| // Convert cgroup v2 cpu.weight value to cgroup v1 cpu.shares | ||||
| // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2 | ||||
| func cpuWeightToCPUShares(cpuWeight uint64) uint64 { | ||||
| 	return uint64((((cpuWeight - 1) * 262142) / 9999) + 2) | ||||
| } | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot