containerd/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
Markus Lehtonen ed47d6ba76 cri: implement RuntimeConfig rpc
The rpc only reports one field, i.e. the cgroup driver, to kubelet.
Containerd determines the effective cgroup driver by looking at all
runtime handlers, starting from the default runtime handler (the rest in
alphabetical order), and returning the cgroup driver setting of the
first runtime handler that supports one. If no runtime handler supports
cgroup driver (i.e. has a config option for it) containerd falls back to
auto-detection, returning systemd if systemd is running and cgroupfs
otherwise.

This patch implements the CRI server side of Kubernetes KEP-4033:
https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/4033-group-driver-detection-over-cri

Signed-off-by: Markus Lehtonen <markus.lehtonen@intel.com>
2023-07-28 13:50:43 +03:00

291 lines
6.7 KiB
Go

package cgroups
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"syscall"
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/moby/sys/mountinfo"
"golang.org/x/sys/unix"
)
// Code in this source file are specific to cgroup v1,
// and must not be used from any cgroup v2 code.
const (
CgroupNamePrefix = "name="
defaultPrefix = "/sys/fs/cgroup"
)
var (
errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
ErrV1NoUnified = errors.New("invalid configuration: cannot use unified on cgroup v1")
readMountinfoOnce sync.Once
readMountinfoErr error
cgroupMountinfo []*mountinfo.Info
)
type NotFoundError struct {
Subsystem string
}
func (e *NotFoundError) Error() string {
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
}
func NewNotFoundError(sub string) error {
return &NotFoundError{
Subsystem: sub,
}
}
func IsNotFound(err error) bool {
var nfErr *NotFoundError
return errors.As(err, &nfErr)
}
func tryDefaultPath(cgroupPath, subsystem string) string {
if !strings.HasPrefix(defaultPrefix, cgroupPath) {
return ""
}
// remove possible prefix
subsystem = strings.TrimPrefix(subsystem, CgroupNamePrefix)
// Make sure we're still under defaultPrefix, and resolve
// a possible symlink (like cpu -> cpu,cpuacct).
path, err := securejoin.SecureJoin(defaultPrefix, subsystem)
if err != nil {
return ""
}
// (1) path should be a directory.
st, err := os.Lstat(path)
if err != nil || !st.IsDir() {
return ""
}
// (2) path should be a mount point.
pst, err := os.Lstat(filepath.Dir(path))
if err != nil {
return ""
}
if st.Sys().(*syscall.Stat_t).Dev == pst.Sys().(*syscall.Stat_t).Dev {
// parent dir has the same dev -- path is not a mount point
return ""
}
// (3) path should have 'cgroup' fs type.
fst := unix.Statfs_t{}
err = unix.Statfs(path, &fst)
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
return ""
}
return path
}
// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones
// with fstype of "cgroup") for the current running process.
//
// The results are cached (to avoid re-reading mountinfo which is relatively
// expensive), so it is assumed that cgroup mounts are not being changed.
func readCgroupMountinfo() ([]*mountinfo.Info, error) {
readMountinfoOnce.Do(func() {
cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts(
mountinfo.FSTypeFilter("cgroup"),
)
})
return cgroupMountinfo, readMountinfoErr
}
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
// If subsystem is empty, we look for the cgroupv2 hybrid path.
if len(subsystem) == 0 {
return hybridMountpoint, nil
}
// Avoid parsing mountinfo by trying the default path first, if possible.
if path := tryDefaultPath(cgroupPath, subsystem); path != "" {
return path, nil
}
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
return mnt, err
}
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
if IsCgroup2UnifiedMode() {
return "", "", errUnified
}
mi, err := readCgroupMountinfo()
if err != nil {
return "", "", err
}
return findCgroupMountpointAndRootFromMI(mi, cgroupPath, subsystem)
}
func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) {
for _, mi := range mounts {
if strings.HasPrefix(mi.Mountpoint, cgroupPath) {
for _, opt := range strings.Split(mi.VFSOptions, ",") {
if opt == subsystem {
return mi.Mountpoint, mi.Root, nil
}
}
}
}
return "", "", NewNotFoundError(subsystem)
}
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", errors.New("no subsystem for mount")
}
return getControllerPath(m.Subsystems[0], cgroups)
}
func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all bool) ([]Mount, error) {
res := make([]Mount, 0, len(ss))
numFound := 0
for _, mi := range mounts {
m := Mount{
Mountpoint: mi.Mountpoint,
Root: mi.Root,
}
for _, opt := range strings.Split(mi.VFSOptions, ",") {
seen, known := ss[opt]
if !known || (!all && seen) {
continue
}
ss[opt] = true
opt = strings.TrimPrefix(opt, CgroupNamePrefix)
m.Subsystems = append(m.Subsystems, opt)
numFound++
}
if len(m.Subsystems) > 0 || all {
res = append(res, m)
}
if !all && numFound >= len(ss) {
break
}
}
return res, nil
}
func getCgroupMountsV1(all bool) ([]Mount, error) {
mi, err := readCgroupMountinfo()
if err != nil {
return nil, err
}
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for s := range allSubsystems {
allMap[s] = false
}
return getCgroupMountsHelper(allMap, mi, all)
}
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
// If subsystem is empty, we look for the cgroupv2 hybrid path.
if len(subsystem) == 0 {
return hybridMountpoint, nil
}
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see paths from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
if p, ok := cgroups[subsystem]; ok {
return p, nil
}
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
return p, nil
}
return "", NewNotFoundError(subsystem)
}