
The rpc only reports one field, i.e. the cgroup driver, to kubelet. Containerd determines the effective cgroup driver by looking at all runtime handlers, starting from the default runtime handler (the rest in alphabetical order), and returning the cgroup driver setting of the first runtime handler that supports one. If no runtime handler supports cgroup driver (i.e. has a config option for it) containerd falls back to auto-detection, returning systemd if systemd is running and cgroupfs otherwise. This patch implements the CRI server side of Kubernetes KEP-4033: https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/4033-group-driver-detection-over-cri Signed-off-by: Markus Lehtonen <markus.lehtonen@intel.com>
159 lines
4.7 KiB
Go
159 lines
4.7 KiB
Go
package fs
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
type FreezerGroup struct{}
|
|
|
|
func (s *FreezerGroup) Name() string {
|
|
return "freezer"
|
|
}
|
|
|
|
func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
|
return apply(path, pid)
|
|
}
|
|
|
|
func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
|
|
switch r.Freezer {
|
|
case configs.Frozen:
|
|
defer func() {
|
|
if Err != nil {
|
|
// Freezing failed, and it is bad and dangerous
|
|
// to leave the cgroup in FROZEN or FREEZING
|
|
// state, so (try to) thaw it back.
|
|
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
|
}
|
|
}()
|
|
|
|
// As per older kernel docs (freezer-subsystem.txt before
|
|
// kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
|
|
// userspace should either retry or thaw. While current
|
|
// kernel cgroup v1 docs no longer mention a need to retry,
|
|
// even a recent kernel (v5.4, Ubuntu 20.04) can't reliably
|
|
// freeze a cgroup v1 while new processes keep appearing in it
|
|
// (either via fork/clone or by writing new PIDs to
|
|
// cgroup.procs).
|
|
//
|
|
// The numbers below are empirically chosen to have a decent
|
|
// chance to succeed in various scenarios ("runc pause/unpause
|
|
// with parallel runc exec" and "bare freeze/unfreeze on a very
|
|
// slow system"), tested on RHEL7 and Ubuntu 20.04 kernels.
|
|
//
|
|
// Adding any amount of sleep in between retries did not
|
|
// increase the chances of successful freeze in "pause/unpause
|
|
// with parallel exec" reproducer. OTOH, adding an occasional
|
|
// sleep helped for the case where the system is extremely slow
|
|
// (CentOS 7 VM on GHA CI).
|
|
//
|
|
// Alas, this is still a game of chances, since the real fix
|
|
// belong to the kernel (cgroup v2 do not have this bug).
|
|
|
|
for i := 0; i < 1000; i++ {
|
|
if i%50 == 49 {
|
|
// Occasional thaw and sleep improves
|
|
// the chances to succeed in freezing
|
|
// in case new processes keep appearing
|
|
// in the cgroup.
|
|
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
|
time.Sleep(10 * time.Millisecond)
|
|
}
|
|
|
|
if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
|
|
return err
|
|
}
|
|
|
|
if i%25 == 24 {
|
|
// Occasional short sleep before reading
|
|
// the state back also improves the chances to
|
|
// succeed in freezing in case of a very slow
|
|
// system.
|
|
time.Sleep(10 * time.Microsecond)
|
|
}
|
|
state, err := cgroups.ReadFile(path, "freezer.state")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
state = strings.TrimSpace(state)
|
|
switch state {
|
|
case "FREEZING":
|
|
continue
|
|
case string(configs.Frozen):
|
|
if i > 1 {
|
|
logrus.Debugf("frozen after %d retries", i)
|
|
}
|
|
return nil
|
|
default:
|
|
// should never happen
|
|
return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
|
|
}
|
|
}
|
|
// Despite our best efforts, it got stuck in FREEZING.
|
|
return errors.New("unable to freeze")
|
|
case configs.Thawed:
|
|
return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
|
case configs.Undefined:
|
|
return nil
|
|
default:
|
|
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer))
|
|
}
|
|
}
|
|
|
|
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|
return nil
|
|
}
|
|
|
|
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
|
|
for {
|
|
state, err := cgroups.ReadFile(path, "freezer.state")
|
|
if err != nil {
|
|
// If the kernel is too old, then we just treat the freezer as
|
|
// being in an "undefined" state.
|
|
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
|
err = nil
|
|
}
|
|
return configs.Undefined, err
|
|
}
|
|
switch strings.TrimSpace(state) {
|
|
case "THAWED":
|
|
return configs.Thawed, nil
|
|
case "FROZEN":
|
|
// Find out whether the cgroup is frozen directly,
|
|
// or indirectly via an ancestor.
|
|
self, err := cgroups.ReadFile(path, "freezer.self_freezing")
|
|
if err != nil {
|
|
// If the kernel is too old, then we just treat
|
|
// it as being frozen.
|
|
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) {
|
|
err = nil
|
|
}
|
|
return configs.Frozen, err
|
|
}
|
|
switch self {
|
|
case "0\n":
|
|
return configs.Thawed, nil
|
|
case "1\n":
|
|
return configs.Frozen, nil
|
|
default:
|
|
return configs.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self)
|
|
}
|
|
case "FREEZING":
|
|
// Make sure we get a stable freezer state, so retry if the cgroup
|
|
// is still undergoing freezing. This should be a temporary delay.
|
|
time.Sleep(1 * time.Millisecond)
|
|
continue
|
|
default:
|
|
return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
|
|
}
|
|
}
|
|
}
|