Merge pull request #125496 from harche/cgroup_imp
KEP-4569: Separate cgroup v1 and v2 manager implementations
This commit is contained in:
		| @@ -17,7 +17,6 @@ limitations under the License. | |||||||
| package cm | package cm | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"errors" |  | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"os" | 	"os" | ||||||
| 	"path" | 	"path" | ||||||
| @@ -32,13 +31,11 @@ import ( | |||||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups/manager" | 	"github.com/opencontainers/runc/libcontainer/cgroups/manager" | ||||||
| 	cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd" | 	cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd" | ||||||
| 	libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs" | 	libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs" | ||||||
| 	v1 "k8s.io/api/core/v1" |  | ||||||
| 	"k8s.io/klog/v2" | 	"k8s.io/klog/v2" | ||||||
| 	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" | 	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" | ||||||
|  |  | ||||||
| 	utilruntime "k8s.io/apimachinery/pkg/util/runtime" | 	utilruntime "k8s.io/apimachinery/pkg/util/runtime" | ||||||
| 	"k8s.io/apimachinery/pkg/util/sets" | 	"k8s.io/apimachinery/pkg/util/sets" | ||||||
| 	cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util" |  | ||||||
| 	"k8s.io/kubernetes/pkg/kubelet/metrics" | 	"k8s.io/kubernetes/pkg/kubelet/metrics" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -139,11 +136,11 @@ type CgroupSubsystems struct { | |||||||
| 	MountPoints map[string]string | 	MountPoints map[string]string | ||||||
| } | } | ||||||
|  |  | ||||||
| // cgroupManagerImpl implements the CgroupManager interface. | // cgroupCommon implements common tasks | ||||||
| // Its a stateless object which can be used to | // that are valid for both cgroup v1 and v2. | ||||||
| // update,create or delete any number of cgroups | // This prevents duplicating the code between | ||||||
| // It relies on runc/libcontainer cgroup managers. | // v1 and v2 specific implementations. | ||||||
| type cgroupManagerImpl struct { | type cgroupCommon struct { | ||||||
| 	// subsystems holds information about all the | 	// subsystems holds information about all the | ||||||
| 	// mounted cgroup subsystems on the node | 	// mounted cgroup subsystems on the node | ||||||
| 	subsystems *CgroupSubsystems | 	subsystems *CgroupSubsystems | ||||||
| @@ -152,12 +149,20 @@ type cgroupManagerImpl struct { | |||||||
| 	useSystemd bool | 	useSystemd bool | ||||||
| } | } | ||||||
|  |  | ||||||
| // Make sure that cgroupManagerImpl implements the CgroupManager interface | // Make sure that cgroupV1impl and cgroupV2impl implement the CgroupManager interface | ||||||
| var _ CgroupManager = &cgroupManagerImpl{} | var _ CgroupManager = &cgroupV1impl{} | ||||||
|  | var _ CgroupManager = &cgroupV2impl{} | ||||||
|  |  | ||||||
| // NewCgroupManager is a factory method that returns a CgroupManager | // NewCgroupManager is a factory method that returns a CgroupManager | ||||||
| func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { | func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { | ||||||
| 	return &cgroupManagerImpl{ | 	if libcontainercgroups.IsCgroup2UnifiedMode() { | ||||||
|  | 		return NewCgroupV2Manager(cs, cgroupDriver) | ||||||
|  | 	} | ||||||
|  | 	return NewCgroupV1Manager(cs, cgroupDriver) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func newCgroupCommon(cs *CgroupSubsystems, cgroupDriver string) cgroupCommon { | ||||||
|  | 	return cgroupCommon{ | ||||||
| 		subsystems: cs, | 		subsystems: cs, | ||||||
| 		useSystemd: cgroupDriver == "systemd", | 		useSystemd: cgroupDriver == "systemd", | ||||||
| 	} | 	} | ||||||
| @@ -165,7 +170,7 @@ func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { | |||||||
|  |  | ||||||
| // Name converts the cgroup to the driver specific value in cgroupfs form. | // Name converts the cgroup to the driver specific value in cgroupfs form. | ||||||
| // This always returns a valid cgroupfs path even when systemd driver is in use! | // This always returns a valid cgroupfs path even when systemd driver is in use! | ||||||
| func (m *cgroupManagerImpl) Name(name CgroupName) string { | func (m *cgroupCommon) Name(name CgroupName) string { | ||||||
| 	if m.useSystemd { | 	if m.useSystemd { | ||||||
| 		return name.ToSystemd() | 		return name.ToSystemd() | ||||||
| 	} | 	} | ||||||
| @@ -173,7 +178,7 @@ func (m *cgroupManagerImpl) Name(name CgroupName) string { | |||||||
| } | } | ||||||
|  |  | ||||||
| // CgroupName converts the literal cgroupfs name on the host to an internal identifier. | // CgroupName converts the literal cgroupfs name on the host to an internal identifier. | ||||||
| func (m *cgroupManagerImpl) CgroupName(name string) CgroupName { | func (m *cgroupCommon) CgroupName(name string) CgroupName { | ||||||
| 	if m.useSystemd { | 	if m.useSystemd { | ||||||
| 		return ParseSystemdToCgroupName(name) | 		return ParseSystemdToCgroupName(name) | ||||||
| 	} | 	} | ||||||
| @@ -181,7 +186,7 @@ func (m *cgroupManagerImpl) CgroupName(name string) CgroupName { | |||||||
| } | } | ||||||
|  |  | ||||||
| // buildCgroupPaths builds a path to each cgroup subsystem for the specified name. | // buildCgroupPaths builds a path to each cgroup subsystem for the specified name. | ||||||
| func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string { | func (m *cgroupCommon) buildCgroupPaths(name CgroupName) map[string]string { | ||||||
| 	cgroupFsAdaptedName := m.Name(name) | 	cgroupFsAdaptedName := m.Name(name) | ||||||
| 	cgroupPaths := make(map[string]string, len(m.subsystems.MountPoints)) | 	cgroupPaths := make(map[string]string, len(m.subsystems.MountPoints)) | ||||||
| 	for key, val := range m.subsystems.MountPoints { | 	for key, val := range m.subsystems.MountPoints { | ||||||
| @@ -190,14 +195,8 @@ func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string | |||||||
| 	return cgroupPaths | 	return cgroupPaths | ||||||
| } | } | ||||||
|  |  | ||||||
| // buildCgroupUnifiedPath builds a path to the specified name. |  | ||||||
| func (m *cgroupManagerImpl) buildCgroupUnifiedPath(name CgroupName) string { |  | ||||||
| 	cgroupFsAdaptedName := m.Name(name) |  | ||||||
| 	return path.Join(cmutil.CgroupRoot, cgroupFsAdaptedName) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // libctCgroupConfig converts CgroupConfig to libcontainer's Cgroup config. | // libctCgroupConfig converts CgroupConfig to libcontainer's Cgroup config. | ||||||
| func (m *cgroupManagerImpl) libctCgroupConfig(in *CgroupConfig, needResources bool) *libcontainerconfigs.Cgroup { | func (m *cgroupCommon) libctCgroupConfig(in *CgroupConfig, needResources bool) *libcontainerconfigs.Cgroup { | ||||||
| 	config := &libcontainerconfigs.Cgroup{ | 	config := &libcontainerconfigs.Cgroup{ | ||||||
| 		Systemd: m.useSystemd, | 		Systemd: m.useSystemd, | ||||||
| 	} | 	} | ||||||
| @@ -235,62 +234,8 @@ func (m *cgroupManagerImpl) libctCgroupConfig(in *CgroupConfig, needResources bo | |||||||
| 	return config | 	return config | ||||||
| } | } | ||||||
|  |  | ||||||
| // Validate checks if all subsystem cgroups already exist |  | ||||||
| func (m *cgroupManagerImpl) Validate(name CgroupName) error { |  | ||||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { |  | ||||||
| 		cgroupPath := m.buildCgroupUnifiedPath(name) |  | ||||||
| 		neededControllers := getSupportedUnifiedControllers() |  | ||||||
| 		enabledControllers, err := readUnifiedControllers(cgroupPath) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return fmt.Errorf("could not read controllers for cgroup %q: %w", name, err) |  | ||||||
| 		} |  | ||||||
| 		difference := neededControllers.Difference(enabledControllers) |  | ||||||
| 		if difference.Len() > 0 { |  | ||||||
| 			return fmt.Errorf("cgroup %q has some missing controllers: %v", name, strings.Join(sets.List(difference), ", ")) |  | ||||||
| 		} |  | ||||||
| 		return nil // valid V2 cgroup |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Get map of all cgroup paths on the system for the particular cgroup |  | ||||||
| 	cgroupPaths := m.buildCgroupPaths(name) |  | ||||||
|  |  | ||||||
| 	// the presence of alternative control groups not known to runc confuses |  | ||||||
| 	// the kubelet existence checks. |  | ||||||
| 	// ideally, we would have a mechanism in runc to support Exists() logic |  | ||||||
| 	// scoped to the set control groups it understands.  this is being discussed |  | ||||||
| 	// in https://github.com/opencontainers/runc/issues/1440 |  | ||||||
| 	// once resolved, we can remove this code. |  | ||||||
| 	allowlistControllers := sets.New[string]("cpu", "cpuacct", "cpuset", "memory", "systemd", "pids") |  | ||||||
|  |  | ||||||
| 	if _, ok := m.subsystems.MountPoints["hugetlb"]; ok { |  | ||||||
| 		allowlistControllers.Insert("hugetlb") |  | ||||||
| 	} |  | ||||||
| 	var missingPaths []string |  | ||||||
| 	// If even one cgroup path doesn't exist, then the cgroup doesn't exist. |  | ||||||
| 	for controller, path := range cgroupPaths { |  | ||||||
| 		// ignore mounts we don't care about |  | ||||||
| 		if !allowlistControllers.Has(controller) { |  | ||||||
| 			continue |  | ||||||
| 		} |  | ||||||
| 		if !libcontainercgroups.PathExists(path) { |  | ||||||
| 			missingPaths = append(missingPaths, path) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	if len(missingPaths) > 0 { |  | ||||||
| 		return fmt.Errorf("cgroup %q has some missing paths: %v", name, strings.Join(missingPaths, ", ")) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return nil // valid V1 cgroup |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Exists checks if all subsystem cgroups already exist |  | ||||||
| func (m *cgroupManagerImpl) Exists(name CgroupName) bool { |  | ||||||
| 	return m.Validate(name) == nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Destroy destroys the specified cgroup | // Destroy destroys the specified cgroup | ||||||
| func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error { | func (m *cgroupCommon) Destroy(cgroupConfig *CgroupConfig) error { | ||||||
| 	start := time.Now() | 	start := time.Now() | ||||||
| 	defer func() { | 	defer func() { | ||||||
| 		metrics.CgroupManagerDuration.WithLabelValues("destroy").Observe(metrics.SinceInSeconds(start)) | 		metrics.CgroupManagerDuration.WithLabelValues("destroy").Observe(metrics.SinceInSeconds(start)) | ||||||
| @@ -321,38 +266,12 @@ func getCPUWeight(cpuShares *uint64) uint64 { | |||||||
| 	return 1 + ((*cpuShares-2)*9999)/262142 | 	return 1 + ((*cpuShares-2)*9999)/262142 | ||||||
| } | } | ||||||
|  |  | ||||||
| // readUnifiedControllers reads the controllers available at the specified cgroup |  | ||||||
| func readUnifiedControllers(path string) (sets.Set[string], error) { |  | ||||||
| 	controllersFileContent, err := os.ReadFile(filepath.Join(path, "cgroup.controllers")) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return nil, err |  | ||||||
| 	} |  | ||||||
| 	controllers := strings.Fields(string(controllersFileContent)) |  | ||||||
| 	return sets.New(controllers...), nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| var ( | var ( | ||||||
| 	availableRootControllersOnce sync.Once | 	availableRootControllersOnce sync.Once | ||||||
| 	availableRootControllers     sets.Set[string] | 	availableRootControllers     sets.Set[string] | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // getSupportedUnifiedControllers returns a set of supported controllers when running on cgroup v2 | func (m *cgroupCommon) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources { | ||||||
| func getSupportedUnifiedControllers() sets.Set[string] { |  | ||||||
| 	// This is the set of controllers used by the Kubelet |  | ||||||
| 	supportedControllers := sets.New("cpu", "cpuset", "memory", "hugetlb", "pids") |  | ||||||
| 	// Memoize the set of controllers that are present in the root cgroup |  | ||||||
| 	availableRootControllersOnce.Do(func() { |  | ||||||
| 		var err error |  | ||||||
| 		availableRootControllers, err = readUnifiedControllers(cmutil.CgroupRoot) |  | ||||||
| 		if err != nil { |  | ||||||
| 			panic(fmt.Errorf("cannot read cgroup controllers at %s", cmutil.CgroupRoot)) |  | ||||||
| 		} |  | ||||||
| 	}) |  | ||||||
| 	// Return the set of controllers that are supported both by the Kubelet and by the kernel |  | ||||||
| 	return supportedControllers.Intersection(availableRootControllers) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources { |  | ||||||
| 	resources := &libcontainerconfigs.Resources{ | 	resources := &libcontainerconfigs.Resources{ | ||||||
| 		SkipDevices:     true, | 		SkipDevices:     true, | ||||||
| 		SkipFreezeOnSet: true, | 		SkipFreezeOnSet: true, | ||||||
| @@ -394,7 +313,7 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont | |||||||
| 	return resources | 	return resources | ||||||
| } | } | ||||||
|  |  | ||||||
| func (m *cgroupManagerImpl) maybeSetHugetlb(resourceConfig *ResourceConfig, resources *libcontainerconfigs.Resources) { | func (m *cgroupCommon) maybeSetHugetlb(resourceConfig *ResourceConfig, resources *libcontainerconfigs.Resources) { | ||||||
| 	// Check if hugetlb is supported. | 	// Check if hugetlb is supported. | ||||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { | 	if libcontainercgroups.IsCgroup2UnifiedMode() { | ||||||
| 		if !getSupportedUnifiedControllers().Has("hugetlb") { | 		if !getSupportedUnifiedControllers().Has("hugetlb") { | ||||||
| @@ -433,7 +352,7 @@ func (m *cgroupManagerImpl) maybeSetHugetlb(resourceConfig *ResourceConfig, reso | |||||||
| } | } | ||||||
|  |  | ||||||
| // Update updates the cgroup with the specified Cgroup Configuration | // Update updates the cgroup with the specified Cgroup Configuration | ||||||
| func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { | func (m *cgroupCommon) Update(cgroupConfig *CgroupConfig) error { | ||||||
| 	start := time.Now() | 	start := time.Now() | ||||||
| 	defer func() { | 	defer func() { | ||||||
| 		metrics.CgroupManagerDuration.WithLabelValues("update").Observe(metrics.SinceInSeconds(start)) | 		metrics.CgroupManagerDuration.WithLabelValues("update").Observe(metrics.SinceInSeconds(start)) | ||||||
| @@ -448,7 +367,7 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { | |||||||
| } | } | ||||||
|  |  | ||||||
| // Create creates the specified cgroup | // Create creates the specified cgroup | ||||||
| func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { | func (m *cgroupCommon) Create(cgroupConfig *CgroupConfig) error { | ||||||
| 	start := time.Now() | 	start := time.Now() | ||||||
| 	defer func() { | 	defer func() { | ||||||
| 		metrics.CgroupManagerDuration.WithLabelValues("create").Observe(metrics.SinceInSeconds(start)) | 		metrics.CgroupManagerDuration.WithLabelValues("create").Observe(metrics.SinceInSeconds(start)) | ||||||
| @@ -480,7 +399,7 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { | |||||||
| } | } | ||||||
|  |  | ||||||
| // Scans through all subsystems to find pids associated with specified cgroup. | // Scans through all subsystems to find pids associated with specified cgroup. | ||||||
| func (m *cgroupManagerImpl) Pids(name CgroupName) []int { | func (m *cgroupCommon) Pids(name CgroupName) []int { | ||||||
| 	// we need the driver specific name | 	// we need the driver specific name | ||||||
| 	cgroupFsName := m.Name(name) | 	cgroupFsName := m.Name(name) | ||||||
|  |  | ||||||
| @@ -530,7 +449,7 @@ func (m *cgroupManagerImpl) Pids(name CgroupName) []int { | |||||||
| } | } | ||||||
|  |  | ||||||
| // ReduceCPULimits reduces the cgroup's cpu shares to the lowest possible value | // ReduceCPULimits reduces the cgroup's cpu shares to the lowest possible value | ||||||
| func (m *cgroupManagerImpl) ReduceCPULimits(cgroupName CgroupName) error { | func (m *cgroupCommon) ReduceCPULimits(cgroupName CgroupName) error { | ||||||
| 	// Set lowest possible CpuShares value for the cgroup | 	// Set lowest possible CpuShares value for the cgroup | ||||||
| 	minimumCPUShares := uint64(MinShares) | 	minimumCPUShares := uint64(MinShares) | ||||||
| 	resources := &ResourceConfig{ | 	resources := &ResourceConfig{ | ||||||
| @@ -543,100 +462,7 @@ func (m *cgroupManagerImpl) ReduceCPULimits(cgroupName CgroupName) error { | |||||||
| 	return m.Update(containerConfig) | 	return m.Update(containerConfig) | ||||||
| } | } | ||||||
|  |  | ||||||
| // MemoryUsage returns the current memory usage of the specified cgroup, | func readCgroupMemoryConfig(cgroupPath string, memLimitFile string) (*ResourceConfig, error) { | ||||||
| // as read from cgroupfs. |  | ||||||
| func (m *cgroupManagerImpl) MemoryUsage(name CgroupName) (int64, error) { |  | ||||||
| 	var path, file string |  | ||||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { |  | ||||||
| 		path = m.buildCgroupUnifiedPath(name) |  | ||||||
| 		file = "memory.current" |  | ||||||
| 	} else { |  | ||||||
| 		mp, ok := m.subsystems.MountPoints["memory"] |  | ||||||
| 		if !ok { // should not happen |  | ||||||
| 			return -1, errors.New("no cgroup v1 mountpoint for memory controller found") |  | ||||||
| 		} |  | ||||||
| 		path = mp + "/" + m.Name(name) |  | ||||||
| 		file = "memory.usage_in_bytes" |  | ||||||
| 	} |  | ||||||
| 	val, err := fscommon.GetCgroupParamUint(path, file) |  | ||||||
| 	return int64(val), err |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Convert cgroup v1 cpu.shares value to cgroup v2 cpu.weight |  | ||||||
| // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2 |  | ||||||
| func CpuSharesToCpuWeight(cpuShares uint64) uint64 { |  | ||||||
| 	return uint64((((cpuShares - 2) * 9999) / 262142) + 1) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Convert cgroup v2 cpu.weight value to cgroup v1 cpu.shares |  | ||||||
| // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2 |  | ||||||
| func CpuWeightToCpuShares(cpuWeight uint64) uint64 { |  | ||||||
| 	return uint64((((cpuWeight - 1) * 262142) / 9999) + 2) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func getCgroupv1CpuConfig(cgroupPath string) (*ResourceConfig, error) { |  | ||||||
| 	cpuQuotaStr, errQ := fscommon.GetCgroupParamString(cgroupPath, "cpu.cfs_quota_us") |  | ||||||
| 	if errQ != nil { |  | ||||||
| 		return nil, fmt.Errorf("failed to read CPU quota for cgroup %v: %v", cgroupPath, errQ) |  | ||||||
| 	} |  | ||||||
| 	cpuQuota, errInt := strconv.ParseInt(cpuQuotaStr, 10, 64) |  | ||||||
| 	if errInt != nil { |  | ||||||
| 		return nil, fmt.Errorf("failed to convert CPU quota as integer for cgroup %v: %v", cgroupPath, errInt) |  | ||||||
| 	} |  | ||||||
| 	cpuPeriod, errP := fscommon.GetCgroupParamUint(cgroupPath, "cpu.cfs_period_us") |  | ||||||
| 	if errP != nil { |  | ||||||
| 		return nil, fmt.Errorf("failed to read CPU period for cgroup %v: %v", cgroupPath, errP) |  | ||||||
| 	} |  | ||||||
| 	cpuShares, errS := fscommon.GetCgroupParamUint(cgroupPath, "cpu.shares") |  | ||||||
| 	if errS != nil { |  | ||||||
| 		return nil, fmt.Errorf("failed to read CPU shares for cgroup %v: %v", cgroupPath, errS) |  | ||||||
| 	} |  | ||||||
| 	return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuQuota, CPUPeriod: &cpuPeriod}, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func getCgroupv2CpuConfig(cgroupPath string) (*ResourceConfig, error) { |  | ||||||
| 	var cpuLimitStr, cpuPeriodStr string |  | ||||||
| 	cpuLimitAndPeriod, err := fscommon.GetCgroupParamString(cgroupPath, "cpu.max") |  | ||||||
| 	if err != nil { |  | ||||||
| 		return nil, fmt.Errorf("failed to read cpu.max file for cgroup %v: %v", cgroupPath, err) |  | ||||||
| 	} |  | ||||||
| 	numItems, errScan := fmt.Sscanf(cpuLimitAndPeriod, "%s %s", &cpuLimitStr, &cpuPeriodStr) |  | ||||||
| 	if errScan != nil || numItems != 2 { |  | ||||||
| 		return nil, fmt.Errorf("failed to correctly parse content of cpu.max file ('%s') for cgroup %v: %v", |  | ||||||
| 			cpuLimitAndPeriod, cgroupPath, errScan) |  | ||||||
| 	} |  | ||||||
| 	cpuLimit := int64(-1) |  | ||||||
| 	if cpuLimitStr != Cgroup2MaxCpuLimit { |  | ||||||
| 		cpuLimit, err = strconv.ParseInt(cpuLimitStr, 10, 64) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, fmt.Errorf("failed to convert CPU limit as integer for cgroup %v: %v", cgroupPath, err) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	cpuPeriod, errPeriod := strconv.ParseUint(cpuPeriodStr, 10, 64) |  | ||||||
| 	if errPeriod != nil { |  | ||||||
| 		return nil, fmt.Errorf("failed to convert CPU period as integer for cgroup %v: %v", cgroupPath, errPeriod) |  | ||||||
| 	} |  | ||||||
| 	cpuWeight, errWeight := fscommon.GetCgroupParamUint(cgroupPath, "cpu.weight") |  | ||||||
| 	if errWeight != nil { |  | ||||||
| 		return nil, fmt.Errorf("failed to read CPU weight for cgroup %v: %v", cgroupPath, errWeight) |  | ||||||
| 	} |  | ||||||
| 	cpuShares := CpuWeightToCpuShares(cpuWeight) |  | ||||||
| 	return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuLimit, CPUPeriod: &cpuPeriod}, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func getCgroupCpuConfig(cgroupPath string) (*ResourceConfig, error) { |  | ||||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { |  | ||||||
| 		return getCgroupv2CpuConfig(cgroupPath) |  | ||||||
| 	} else { |  | ||||||
| 		return getCgroupv1CpuConfig(cgroupPath) |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) { |  | ||||||
| 	memLimitFile := "memory.limit_in_bytes" |  | ||||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { |  | ||||||
| 		memLimitFile = "memory.max" |  | ||||||
| 	} |  | ||||||
| 	memLimit, err := fscommon.GetCgroupParamUint(cgroupPath, memLimitFile) | 	memLimit, err := fscommon.GetCgroupParamUint(cgroupPath, memLimitFile) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, fmt.Errorf("failed to read %s for cgroup %v: %v", memLimitFile, cgroupPath, err) | 		return nil, fmt.Errorf("failed to read %s for cgroup %v: %v", memLimitFile, cgroupPath, err) | ||||||
| @@ -647,103 +473,11 @@ func getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) { | |||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
| // Get the resource config values applied to the cgroup for specified resource type | func writeCgroupMemoryLimit(memoryLimitFileLocation string, resourceConfig *ResourceConfig) error { | ||||||
| func (m *cgroupManagerImpl) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) { |  | ||||||
| 	cgroupPaths := m.buildCgroupPaths(name) |  | ||||||
| 	cgroupResourcePath, found := cgroupPaths[string(resource)] |  | ||||||
| 	if !found { |  | ||||||
| 		return nil, fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) |  | ||||||
| 	} |  | ||||||
| 	switch resource { |  | ||||||
| 	case v1.ResourceCPU: |  | ||||||
| 		return getCgroupCpuConfig(cgroupResourcePath) |  | ||||||
| 	case v1.ResourceMemory: |  | ||||||
| 		return getCgroupMemoryConfig(cgroupResourcePath) |  | ||||||
| 	} |  | ||||||
| 	return nil, fmt.Errorf("unsupported resource %v for cgroup %v", resource, name) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func setCgroupv1CpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error { |  | ||||||
| 	var cpuQuotaStr, cpuPeriodStr, cpuSharesStr string |  | ||||||
| 	if resourceConfig.CPUQuota != nil { |  | ||||||
| 		cpuQuotaStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10) |  | ||||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_quota_us"), []byte(cpuQuotaStr), 0700); err != nil { |  | ||||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuQuotaStr, cgroupPath, err) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	if resourceConfig.CPUPeriod != nil { |  | ||||||
| 		cpuPeriodStr = strconv.FormatUint(*resourceConfig.CPUPeriod, 10) |  | ||||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_period_us"), []byte(cpuPeriodStr), 0700); err != nil { |  | ||||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuPeriodStr, cgroupPath, err) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	if resourceConfig.CPUShares != nil { |  | ||||||
| 		cpuSharesStr = strconv.FormatUint(*resourceConfig.CPUShares, 10) |  | ||||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.shares"), []byte(cpuSharesStr), 0700); err != nil { |  | ||||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuSharesStr, cgroupPath, err) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func setCgroupv2CpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error { |  | ||||||
| 	if resourceConfig.CPUQuota != nil { |  | ||||||
| 		if resourceConfig.CPUPeriod == nil { |  | ||||||
| 			return fmt.Errorf("CpuPeriod must be specified in order to set CpuLimit") |  | ||||||
| 		} |  | ||||||
| 		cpuLimitStr := Cgroup2MaxCpuLimit |  | ||||||
| 		if *resourceConfig.CPUQuota > -1 { |  | ||||||
| 			cpuLimitStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10) |  | ||||||
| 		} |  | ||||||
| 		cpuPeriodStr := strconv.FormatUint(*resourceConfig.CPUPeriod, 10) |  | ||||||
| 		cpuMaxStr := fmt.Sprintf("%s %s", cpuLimitStr, cpuPeriodStr) |  | ||||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.max"), []byte(cpuMaxStr), 0700); err != nil { |  | ||||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuMaxStr, cgroupPath, err) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	if resourceConfig.CPUShares != nil { |  | ||||||
| 		cpuWeight := CpuSharesToCpuWeight(*resourceConfig.CPUShares) |  | ||||||
| 		cpuWeightStr := strconv.FormatUint(cpuWeight, 10) |  | ||||||
| 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.weight"), []byte(cpuWeightStr), 0700); err != nil { |  | ||||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuWeightStr, cgroupPath, err) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func setCgroupCpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error { |  | ||||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { |  | ||||||
| 		return setCgroupv2CpuConfig(cgroupPath, resourceConfig) |  | ||||||
| 	} else { |  | ||||||
| 		return setCgroupv1CpuConfig(cgroupPath, resourceConfig) |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func setCgroupMemoryConfig(cgroupPath string, resourceConfig *ResourceConfig) error { |  | ||||||
| 	memLimitFile := "memory.limit_in_bytes" |  | ||||||
| 	if libcontainercgroups.IsCgroup2UnifiedMode() { |  | ||||||
| 		memLimitFile = "memory.max" |  | ||||||
| 	} |  | ||||||
| 	memLimit := strconv.FormatInt(*resourceConfig.Memory, 10) | 	memLimit := strconv.FormatInt(*resourceConfig.Memory, 10) | ||||||
| 	if err := os.WriteFile(filepath.Join(cgroupPath, memLimitFile), []byte(memLimit), 0700); err != nil { | 	if err := os.WriteFile(memoryLimitFileLocation, []byte(memLimit), 0700); err != nil { | ||||||
| 		return fmt.Errorf("failed to write %v to %v/%v: %v", memLimit, cgroupPath, memLimitFile, err) | 		return fmt.Errorf("failed to write %v to %v: %w", memLimit, memoryLimitFileLocation, err) | ||||||
| 	} | 	} | ||||||
| 	//TODO(vinaykul,InPlacePodVerticalScaling): Add memory request support | 	//TODO(vinaykul,InPlacePodVerticalScaling): Add memory request support | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
| // Set resource config for the specified resource type on the cgroup |  | ||||||
| func (m *cgroupManagerImpl) SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error { |  | ||||||
| 	cgroupPaths := m.buildCgroupPaths(name) |  | ||||||
| 	cgroupResourcePath, found := cgroupPaths[string(resource)] |  | ||||||
| 	if !found { |  | ||||||
| 		return fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) |  | ||||||
| 	} |  | ||||||
| 	switch resource { |  | ||||||
| 	case v1.ResourceCPU: |  | ||||||
| 		return setCgroupCpuConfig(cgroupResourcePath, resourceConfig) |  | ||||||
| 	case v1.ResourceMemory: |  | ||||||
| 		return setCgroupMemoryConfig(cgroupResourcePath, resourceConfig) |  | ||||||
| 	} |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
|   | |||||||
| @@ -170,7 +170,7 @@ func TestParseSystemdToCgroupName(t *testing.T) { | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| func TestCpuSharesToCpuWeight(t *testing.T) { | func TestCpuSharesToCPUWeight(t *testing.T) { | ||||||
| 	testCases := []struct { | 	testCases := []struct { | ||||||
| 		cpuShares         uint64 | 		cpuShares         uint64 | ||||||
| 		expectedCpuWeight uint64 | 		expectedCpuWeight uint64 | ||||||
| @@ -206,14 +206,14 @@ func TestCpuSharesToCpuWeight(t *testing.T) { | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	for _, testCase := range testCases { | 	for _, testCase := range testCases { | ||||||
| 		if actual := CpuSharesToCpuWeight(testCase.cpuShares); actual != testCase.expectedCpuWeight { | 		if actual := cpuSharesToCPUWeight(testCase.cpuShares); actual != testCase.expectedCpuWeight { | ||||||
| 			t.Errorf("cpuShares: %v, expectedCpuWeight: %v, actualCpuWeight: %v", | 			t.Errorf("cpuShares: %v, expectedCpuWeight: %v, actualCpuWeight: %v", | ||||||
| 				testCase.cpuShares, testCase.expectedCpuWeight, actual) | 				testCase.cpuShares, testCase.expectedCpuWeight, actual) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| func TestCpuWeightToCpuShares(t *testing.T) { | func TestCpuWeightToCPUShares(t *testing.T) { | ||||||
| 	testCases := []struct { | 	testCases := []struct { | ||||||
| 		cpuWeight         uint64 | 		cpuWeight         uint64 | ||||||
| 		expectedCpuShares uint64 | 		expectedCpuShares uint64 | ||||||
| @@ -245,7 +245,7 @@ func TestCpuWeightToCpuShares(t *testing.T) { | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	for _, testCase := range testCases { | 	for _, testCase := range testCases { | ||||||
| 		if actual := CpuWeightToCpuShares(testCase.cpuWeight); actual != testCase.expectedCpuShares { | 		if actual := cpuWeightToCPUShares(testCase.cpuWeight); actual != testCase.expectedCpuShares { | ||||||
| 			t.Errorf("cpuWeight: %v, expectedCpuShares: %v, actualCpuShares: %v", | 			t.Errorf("cpuWeight: %v, expectedCpuShares: %v, actualCpuShares: %v", | ||||||
| 				testCase.cpuWeight, testCase.expectedCpuShares, actual) | 				testCase.cpuWeight, testCase.expectedCpuShares, actual) | ||||||
| 		} | 		} | ||||||
|   | |||||||
							
								
								
									
										186
									
								
								pkg/kubelet/cm/cgroup_v1_manager_linux.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										186
									
								
								pkg/kubelet/cm/cgroup_v1_manager_linux.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,186 @@ | |||||||
|  | /* | ||||||
|  | Copyright 2024 The Kubernetes Authors. | ||||||
|  |  | ||||||
|  | Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | you may not use this file except in compliance with the License. | ||||||
|  | You may obtain a copy of the License at | ||||||
|  |  | ||||||
|  |     http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  |  | ||||||
|  | Unless required by applicable law or agreed to in writing, software | ||||||
|  | distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | See the License for the specific language governing permissions and | ||||||
|  | limitations under the License. | ||||||
|  | */ | ||||||
|  |  | ||||||
|  | package cm | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"errors" | ||||||
|  | 	"fmt" | ||||||
|  | 	"os" | ||||||
|  | 	"path/filepath" | ||||||
|  | 	"strconv" | ||||||
|  | 	"strings" | ||||||
|  |  | ||||||
|  | 	libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups" | ||||||
|  | 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon" | ||||||
|  |  | ||||||
|  | 	v1 "k8s.io/api/core/v1" | ||||||
|  | 	"k8s.io/apimachinery/pkg/util/sets" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | const cgroupv1MemLimitFile string = "memory.limit_in_bytes" | ||||||
|  |  | ||||||
|  | // cgroupV1impl implements the CgroupManager interface | ||||||
|  | // for cgroup v1. | ||||||
|  | // It's a stateless object which can be used to | ||||||
|  | // update, create or delete any number of cgroups | ||||||
|  | // It relies on runc/libcontainer cgroup managers. | ||||||
|  | type cgroupV1impl struct { | ||||||
|  | 	cgroupCommon | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func NewCgroupV1Manager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { | ||||||
|  | 	return &cgroupV1impl{ | ||||||
|  | 		cgroupCommon: newCgroupCommon(cs, cgroupDriver), | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Validate checks if all subsystem cgroups are valid | ||||||
|  | func (c *cgroupV1impl) Validate(name CgroupName) error { | ||||||
|  | 	// Get map of all cgroup paths on the system for the particular cgroup | ||||||
|  | 	cgroupPaths := c.buildCgroupPaths(name) | ||||||
|  |  | ||||||
|  | 	// the presence of alternative control groups not known to runc confuses | ||||||
|  | 	// the kubelet existence checks. | ||||||
|  | 	// ideally, we would have a mechanism in runc to support Exists() logic | ||||||
|  | 	// scoped to the set control groups it understands.  this is being discussed | ||||||
|  | 	// in https://github.com/opencontainers/runc/issues/1440 | ||||||
|  | 	// once resolved, we can remove this code. | ||||||
|  | 	allowlistControllers := sets.New[string]("cpu", "cpuacct", "cpuset", "memory", "systemd", "pids") | ||||||
|  |  | ||||||
|  | 	if _, ok := c.subsystems.MountPoints["hugetlb"]; ok { | ||||||
|  | 		allowlistControllers.Insert("hugetlb") | ||||||
|  | 	} | ||||||
|  | 	var missingPaths []string | ||||||
|  | 	// If even one cgroup path doesn't exist, then the cgroup doesn't exist. | ||||||
|  | 	for controller, path := range cgroupPaths { | ||||||
|  | 		// ignore mounts we don't care about | ||||||
|  | 		if !allowlistControllers.Has(controller) { | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  | 		if !libcontainercgroups.PathExists(path) { | ||||||
|  | 			missingPaths = append(missingPaths, path) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if len(missingPaths) > 0 { | ||||||
|  | 		return fmt.Errorf("cgroup %q has some missing paths: %v", name, strings.Join(missingPaths, ", ")) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Exists checks if all subsystem cgroups already exist | ||||||
|  | func (c *cgroupV1impl) Exists(name CgroupName) bool { | ||||||
|  | 	return c.Validate(name) == nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // MemoryUsage returns the current memory usage of the specified cgroup, | ||||||
|  | // as read from cgroupfs. | ||||||
|  | func (c *cgroupV1impl) MemoryUsage(name CgroupName) (int64, error) { | ||||||
|  | 	var path, file string | ||||||
|  | 	mp, ok := c.subsystems.MountPoints["memory"] | ||||||
|  | 	if !ok { // should not happen | ||||||
|  | 		return -1, errors.New("no cgroup v1 mountpoint for memory controller found") | ||||||
|  | 	} | ||||||
|  | 	path = mp + "/" + c.Name(name) | ||||||
|  | 	file = "memory.usage_in_bytes" | ||||||
|  | 	val, err := fscommon.GetCgroupParamUint(path, file) | ||||||
|  | 	return int64(val), err | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Get the resource config values applied to the cgroup for specified resource type | ||||||
|  | func (c *cgroupV1impl) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) { | ||||||
|  | 	cgroupPaths := c.buildCgroupPaths(name) | ||||||
|  | 	cgroupResourcePath, found := cgroupPaths[string(resource)] | ||||||
|  | 	if !found { | ||||||
|  | 		return nil, fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) | ||||||
|  | 	} | ||||||
|  | 	switch resource { | ||||||
|  | 	case v1.ResourceCPU: | ||||||
|  | 		return c.getCgroupCPUConfig(cgroupResourcePath) | ||||||
|  | 	case v1.ResourceMemory: | ||||||
|  | 		return c.getCgroupMemoryConfig(cgroupResourcePath) | ||||||
|  | 	} | ||||||
|  | 	return nil, fmt.Errorf("unsupported resource %v for cgroup %v", resource, name) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Set resource config for the specified resource type on the cgroup | ||||||
|  | func (c *cgroupV1impl) SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error { | ||||||
|  | 	cgroupPaths := c.buildCgroupPaths(name) | ||||||
|  | 	cgroupResourcePath, found := cgroupPaths[string(resource)] | ||||||
|  | 	if !found { | ||||||
|  | 		return fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) | ||||||
|  | 	} | ||||||
|  | 	switch resource { | ||||||
|  | 	case v1.ResourceCPU: | ||||||
|  | 		return c.setCgroupCPUConfig(cgroupResourcePath, resourceConfig) | ||||||
|  | 	case v1.ResourceMemory: | ||||||
|  | 		return c.setCgroupMemoryConfig(cgroupResourcePath, resourceConfig) | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *cgroupV1impl) getCgroupCPUConfig(cgroupPath string) (*ResourceConfig, error) { | ||||||
|  | 	cpuQuotaStr, errQ := fscommon.GetCgroupParamString(cgroupPath, "cpu.cfs_quota_us") | ||||||
|  | 	if errQ != nil { | ||||||
|  | 		return nil, fmt.Errorf("failed to read CPU quota for cgroup %v: %w", cgroupPath, errQ) | ||||||
|  | 	} | ||||||
|  | 	cpuQuota, errInt := strconv.ParseInt(cpuQuotaStr, 10, 64) | ||||||
|  | 	if errInt != nil { | ||||||
|  | 		return nil, fmt.Errorf("failed to convert CPU quota as integer for cgroup %v: %w", cgroupPath, errInt) | ||||||
|  | 	} | ||||||
|  | 	cpuPeriod, errP := fscommon.GetCgroupParamUint(cgroupPath, "cpu.cfs_period_us") | ||||||
|  | 	if errP != nil { | ||||||
|  | 		return nil, fmt.Errorf("failed to read CPU period for cgroup %v: %w", cgroupPath, errP) | ||||||
|  | 	} | ||||||
|  | 	cpuShares, errS := fscommon.GetCgroupParamUint(cgroupPath, "cpu.shares") | ||||||
|  | 	if errS != nil { | ||||||
|  | 		return nil, fmt.Errorf("failed to read CPU shares for cgroup %v: %w", cgroupPath, errS) | ||||||
|  | 	} | ||||||
|  | 	return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuQuota, CPUPeriod: &cpuPeriod}, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *cgroupV1impl) setCgroupCPUConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||||
|  | 	var cpuQuotaStr, cpuPeriodStr, cpuSharesStr string | ||||||
|  | 	if resourceConfig.CPUQuota != nil { | ||||||
|  | 		cpuQuotaStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10) | ||||||
|  | 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_quota_us"), []byte(cpuQuotaStr), 0700); err != nil { | ||||||
|  | 			return fmt.Errorf("failed to write %v to %v: %w", cpuQuotaStr, cgroupPath, err) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	if resourceConfig.CPUPeriod != nil { | ||||||
|  | 		cpuPeriodStr = strconv.FormatUint(*resourceConfig.CPUPeriod, 10) | ||||||
|  | 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_period_us"), []byte(cpuPeriodStr), 0700); err != nil { | ||||||
|  | 			return fmt.Errorf("failed to write %v to %v: %w", cpuPeriodStr, cgroupPath, err) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	if resourceConfig.CPUShares != nil { | ||||||
|  | 		cpuSharesStr = strconv.FormatUint(*resourceConfig.CPUShares, 10) | ||||||
|  | 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.shares"), []byte(cpuSharesStr), 0700); err != nil { | ||||||
|  | 			return fmt.Errorf("failed to write %v to %v: %w", cpuSharesStr, cgroupPath, err) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *cgroupV1impl) setCgroupMemoryConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||||
|  | 	return writeCgroupMemoryLimit(filepath.Join(cgroupPath, cgroupv1MemLimitFile), resourceConfig) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *cgroupV1impl) getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) { | ||||||
|  | 	return readCgroupMemoryConfig(cgroupPath, cgroupv1MemLimitFile) | ||||||
|  | } | ||||||
							
								
								
									
										217
									
								
								pkg/kubelet/cm/cgroup_v2_manager_linux.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										217
									
								
								pkg/kubelet/cm/cgroup_v2_manager_linux.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,217 @@ | |||||||
|  | /* | ||||||
|  | Copyright 2024 The Kubernetes Authors. | ||||||
|  |  | ||||||
|  | Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | you may not use this file except in compliance with the License. | ||||||
|  | You may obtain a copy of the License at | ||||||
|  |  | ||||||
|  |     http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  |  | ||||||
|  | Unless required by applicable law or agreed to in writing, software | ||||||
|  | distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | See the License for the specific language governing permissions and | ||||||
|  | limitations under the License. | ||||||
|  | */ | ||||||
|  |  | ||||||
|  | package cm | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 	"os" | ||||||
|  | 	"path" | ||||||
|  | 	"path/filepath" | ||||||
|  | 	"strconv" | ||||||
|  | 	"strings" | ||||||
|  |  | ||||||
|  | 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon" | ||||||
|  | 	v1 "k8s.io/api/core/v1" | ||||||
|  | 	"k8s.io/apimachinery/pkg/util/sets" | ||||||
|  | 	cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | const cgroupv2MemLimitFile string = "memory.max" | ||||||
|  |  | ||||||
|  | // cgroupV2impl implements the CgroupManager interface | ||||||
|  | // for cgroup v2. | ||||||
|  | // It's a stateless object which can be used to | ||||||
|  | // update, create or delete any number of cgroups | ||||||
|  | // It relies on runc/libcontainer cgroup managers. | ||||||
|  | type cgroupV2impl struct { | ||||||
|  | 	cgroupCommon | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func NewCgroupV2Manager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { | ||||||
|  | 	return &cgroupV2impl{ | ||||||
|  | 		cgroupCommon: newCgroupCommon(cs, cgroupDriver), | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Validate checks if all subsystem cgroups are valid | ||||||
|  | func (c *cgroupV2impl) Validate(name CgroupName) error { | ||||||
|  | 	cgroupPath := c.buildCgroupUnifiedPath(name) | ||||||
|  | 	neededControllers := getSupportedUnifiedControllers() | ||||||
|  | 	enabledControllers, err := readUnifiedControllers(cgroupPath) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return fmt.Errorf("could not read controllers for cgroup %q: %w", name, err) | ||||||
|  | 	} | ||||||
|  | 	difference := neededControllers.Difference(enabledControllers) | ||||||
|  | 	if difference.Len() > 0 { | ||||||
|  | 		return fmt.Errorf("cgroup %q has some missing controllers: %v", name, strings.Join(sets.List(difference), ", ")) | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Exists checks if all subsystem cgroups already exist | ||||||
|  | func (c *cgroupV2impl) Exists(name CgroupName) bool { | ||||||
|  | 	return c.Validate(name) == nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // MemoryUsage returns the current memory usage of the specified cgroup, | ||||||
|  | // as read from cgroupfs. | ||||||
|  | func (c *cgroupV2impl) MemoryUsage(name CgroupName) (int64, error) { | ||||||
|  | 	var path, file string | ||||||
|  | 	path = c.buildCgroupUnifiedPath(name) | ||||||
|  | 	file = "memory.current" | ||||||
|  | 	val, err := fscommon.GetCgroupParamUint(path, file) | ||||||
|  | 	return int64(val), err | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Get the resource config values applied to the cgroup for specified resource type | ||||||
|  | func (c *cgroupV2impl) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) { | ||||||
|  | 	cgroupPaths := c.buildCgroupPaths(name) | ||||||
|  | 	cgroupResourcePath, found := cgroupPaths[string(resource)] | ||||||
|  | 	if !found { | ||||||
|  | 		return nil, fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) | ||||||
|  | 	} | ||||||
|  | 	switch resource { | ||||||
|  | 	case v1.ResourceCPU: | ||||||
|  | 		return c.getCgroupCPUConfig(cgroupResourcePath) | ||||||
|  | 	case v1.ResourceMemory: | ||||||
|  | 		return c.getCgroupMemoryConfig(cgroupResourcePath) | ||||||
|  | 	} | ||||||
|  | 	return nil, fmt.Errorf("unsupported resource %v for cgroup %v", resource, name) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Set resource config for the specified resource type on the cgroup | ||||||
|  | func (c *cgroupV2impl) SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error { | ||||||
|  | 	cgroupPaths := c.buildCgroupPaths(name) | ||||||
|  | 	cgroupResourcePath, found := cgroupPaths[string(resource)] | ||||||
|  | 	if !found { | ||||||
|  | 		return fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) | ||||||
|  | 	} | ||||||
|  | 	switch resource { | ||||||
|  | 	case v1.ResourceCPU: | ||||||
|  | 		return c.setCgroupCPUConfig(cgroupResourcePath, resourceConfig) | ||||||
|  | 	case v1.ResourceMemory: | ||||||
|  | 		return c.setCgroupMemoryConfig(cgroupResourcePath, resourceConfig) | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *cgroupV2impl) getCgroupCPUConfig(cgroupPath string) (*ResourceConfig, error) { | ||||||
|  | 	var cpuLimitStr, cpuPeriodStr string | ||||||
|  | 	cpuLimitAndPeriod, err := fscommon.GetCgroupParamString(cgroupPath, "cpu.max") | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, fmt.Errorf("failed to read cpu.max file for cgroup %v: %w", cgroupPath, err) | ||||||
|  | 	} | ||||||
|  | 	numItems, errScan := fmt.Sscanf(cpuLimitAndPeriod, "%s %s", &cpuLimitStr, &cpuPeriodStr) | ||||||
|  | 	if errScan != nil || numItems != 2 { | ||||||
|  | 		return nil, fmt.Errorf("failed to correctly parse content of cpu.max file ('%s') for cgroup %v: %w", | ||||||
|  | 			cpuLimitAndPeriod, cgroupPath, errScan) | ||||||
|  | 	} | ||||||
|  | 	cpuLimit := int64(-1) | ||||||
|  | 	if cpuLimitStr != Cgroup2MaxCpuLimit { | ||||||
|  | 		cpuLimit, err = strconv.ParseInt(cpuLimitStr, 10, 64) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, fmt.Errorf("failed to convert CPU limit as integer for cgroup %v: %w", cgroupPath, err) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	cpuPeriod, errPeriod := strconv.ParseUint(cpuPeriodStr, 10, 64) | ||||||
|  | 	if errPeriod != nil { | ||||||
|  | 		return nil, fmt.Errorf("failed to convert CPU period as integer for cgroup %v: %w", cgroupPath, errPeriod) | ||||||
|  | 	} | ||||||
|  | 	cpuWeight, errWeight := fscommon.GetCgroupParamUint(cgroupPath, "cpu.weight") | ||||||
|  | 	if errWeight != nil { | ||||||
|  | 		return nil, fmt.Errorf("failed to read CPU weight for cgroup %v: %w", cgroupPath, errWeight) | ||||||
|  | 	} | ||||||
|  | 	cpuShares := cpuWeightToCPUShares(cpuWeight) | ||||||
|  | 	return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuLimit, CPUPeriod: &cpuPeriod}, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *cgroupV2impl) setCgroupCPUConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||||
|  | 	if resourceConfig.CPUQuota != nil { | ||||||
|  | 		if resourceConfig.CPUPeriod == nil { | ||||||
|  | 			return fmt.Errorf("CpuPeriod must be specified in order to set CpuLimit") | ||||||
|  | 		} | ||||||
|  | 		cpuLimitStr := Cgroup2MaxCpuLimit | ||||||
|  | 		if *resourceConfig.CPUQuota > -1 { | ||||||
|  | 			cpuLimitStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10) | ||||||
|  | 		} | ||||||
|  | 		cpuPeriodStr := strconv.FormatUint(*resourceConfig.CPUPeriod, 10) | ||||||
|  | 		cpuMaxStr := fmt.Sprintf("%s %s", cpuLimitStr, cpuPeriodStr) | ||||||
|  | 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.max"), []byte(cpuMaxStr), 0700); err != nil { | ||||||
|  | 			return fmt.Errorf("failed to write %v to %v: %w", cpuMaxStr, cgroupPath, err) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	if resourceConfig.CPUShares != nil { | ||||||
|  | 		cpuWeight := cpuSharesToCPUWeight(*resourceConfig.CPUShares) | ||||||
|  | 		cpuWeightStr := strconv.FormatUint(cpuWeight, 10) | ||||||
|  | 		if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.weight"), []byte(cpuWeightStr), 0700); err != nil { | ||||||
|  | 			return fmt.Errorf("failed to write %v to %v: %w", cpuWeightStr, cgroupPath, err) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *cgroupV2impl) setCgroupMemoryConfig(cgroupPath string, resourceConfig *ResourceConfig) error { | ||||||
|  | 	return writeCgroupMemoryLimit(filepath.Join(cgroupPath, cgroupv2MemLimitFile), resourceConfig) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *cgroupV2impl) getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) { | ||||||
|  | 	return readCgroupMemoryConfig(cgroupPath, cgroupv2MemLimitFile) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // getSupportedUnifiedControllers returns a set of supported controllers when running on cgroup v2 | ||||||
|  | func getSupportedUnifiedControllers() sets.Set[string] { | ||||||
|  | 	// This is the set of controllers used by the Kubelet | ||||||
|  | 	supportedControllers := sets.New("cpu", "cpuset", "memory", "hugetlb", "pids") | ||||||
|  | 	// Memoize the set of controllers that are present in the root cgroup | ||||||
|  | 	availableRootControllersOnce.Do(func() { | ||||||
|  | 		var err error | ||||||
|  | 		availableRootControllers, err = readUnifiedControllers(cmutil.CgroupRoot) | ||||||
|  | 		if err != nil { | ||||||
|  | 			panic(fmt.Errorf("cannot read cgroup controllers at %s", cmutil.CgroupRoot)) | ||||||
|  | 		} | ||||||
|  | 	}) | ||||||
|  | 	// Return the set of controllers that are supported both by the Kubelet and by the kernel | ||||||
|  | 	return supportedControllers.Intersection(availableRootControllers) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // readUnifiedControllers reads the controllers available at the specified cgroup | ||||||
|  | func readUnifiedControllers(path string) (sets.Set[string], error) { | ||||||
|  | 	controllersFileContent, err := os.ReadFile(filepath.Join(path, "cgroup.controllers")) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  | 	controllers := strings.Fields(string(controllersFileContent)) | ||||||
|  | 	return sets.New(controllers...), nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // buildCgroupUnifiedPath builds a path to the specified name. | ||||||
|  | func (c *cgroupV2impl) buildCgroupUnifiedPath(name CgroupName) string { | ||||||
|  | 	cgroupFsAdaptedName := c.Name(name) | ||||||
|  | 	return path.Join(cmutil.CgroupRoot, cgroupFsAdaptedName) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Convert cgroup v1 cpu.shares value to cgroup v2 cpu.weight | ||||||
|  | // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2 | ||||||
|  | func cpuSharesToCPUWeight(cpuShares uint64) uint64 { | ||||||
|  | 	return uint64((((cpuShares - 2) * 9999) / 262142) + 1) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Convert cgroup v2 cpu.weight value to cgroup v1 cpu.shares | ||||||
|  | // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2 | ||||||
|  | func cpuWeightToCPUShares(cpuWeight uint64) uint64 { | ||||||
|  | 	return uint64((((cpuWeight - 1) * 262142) / 9999) + 2) | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot