vendor: update github.com/opencontainers/runc

when the systemd cgroup manager is used, controllers not handled by
systemd are created manually afterwards.
libcontainer didn't correctly cleanup these cgroups that were leaked
on cgroup v1.

Closes: https://github.com/kubernetes/kubernetes/issues/92766

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano
2020-07-09 09:37:05 +02:00
parent 896da2253c
commit c655a5b636
44 changed files with 1122 additions and 722 deletions

View File

@@ -23,6 +23,9 @@ func (s *DevicesGroup) Name() string {
}
func (s *DevicesGroup) Apply(d *cgroupData) error {
if d.config.SkipDevices {
return nil
}
_, err := d.join("devices")
if err != nil {
// We will return error even it's `not found` error, devices
@@ -52,7 +55,7 @@ func buildEmulator(rules []*configs.DeviceRule) (*devices.Emulator, error) {
}
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
if system.RunningInUserNS() {
if system.RunningInUserNS() || cgroup.SkipDevices {
return nil
}

View File

@@ -204,7 +204,7 @@ func (m *manager) Apply(pid int) (err error) {
if err != nil {
// The non-presence of the devices subsystem is
// considered fatal for security reasons.
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
if cgroups.IsNotFound(err) && (c.SkipDevices || sys.Name() != "devices") {
continue
}
return err
@@ -298,7 +298,7 @@ func (m *manager) Set(container *configs.Config) error {
// Freeze toggles the container's freezer cgroup depending on the state
// provided
func (m *manager) Freeze(state configs.FreezerState) (Err error) {
func (m *manager) Freeze(state configs.FreezerState) error {
path := m.Path("freezer")
if m.cgroups == nil || path == "" {
return errors.New("cannot toggle freezer: cgroups not configured for container")
@@ -306,17 +306,9 @@ func (m *manager) Freeze(state configs.FreezerState) (Err error) {
prevState := m.cgroups.Resources.Freezer
m.cgroups.Resources.Freezer = state
defer func() {
if Err != nil {
m.cgroups.Resources.Freezer = prevState
}
}()
freezer, err := m.getSubsystems().Get("freezer")
if err != nil {
return err
}
freezer := &FreezerGroup{}
if err := freezer.Set(path, m.cgroups); err != nil {
m.cgroups.Resources.Freezer = prevState
return err
}
return nil
@@ -359,14 +351,14 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
}
func (raw *cgroupData) path(subsystem string) (string, error) {
mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err
}
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
if filepath.IsAbs(raw.innerPath) {
mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err
}
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
}
@@ -418,13 +410,12 @@ func (m *manager) GetCgroups() (*configs.Cgroup, error) {
func (m *manager) GetFreezerState() (configs.FreezerState, error) {
dir := m.Path("freezer")
freezer, err := m.getSubsystems().Get("freezer")
// If the container doesn't have the freezer cgroup, say it's undefined.
if err != nil || dir == "" {
if dir == "" {
return configs.Undefined, nil
}
return freezer.(*FreezerGroup).GetState(dir)
freezer := &FreezerGroup{}
return freezer.GetState(dir)
}
func (m *manager) Exists() bool {

View File

@@ -37,6 +37,9 @@ func canSkipEBPFError(cgroup *configs.Cgroup) bool {
}
func setDevices(dirPath string, cgroup *configs.Cgroup) error {
if cgroup.SkipDevices {
return nil
}
// XXX: This is currently a white-list (but all callers pass a blacklist of
// devices). This is bad for a whole variety of reasons, but will need
// to be fixed with co-ordinated effort with downstreams.

View File

@@ -27,6 +27,9 @@ var (
versionOnce sync.Once
version int
versionErr error
isRunningSystemdOnce sync.Once
isRunningSystemd bool
)
// NOTE: This function comes from package github.com/coreos/go-systemd/util
@@ -37,11 +40,11 @@ var (
// checks whether /run/systemd/system/ exists and is a directory.
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
func IsRunningSystemd() bool {
fi, err := os.Lstat("/run/systemd/system")
if err != nil {
return false
}
return fi.IsDir()
isRunningSystemdOnce.Do(func() {
fi, err := os.Lstat("/run/systemd/system")
isRunningSystemd = err == nil && fi.IsDir()
})
return isRunningSystemd
}
// systemd represents slice hierarchy using `-`, so we need to follow suit when

View File

@@ -222,7 +222,14 @@ func (m *legacyManager) Destroy() error {
return err
}
unitName := getUnitName(m.cgroups)
if err := stopUnit(dbusConnection, unitName); err != nil {
err = stopUnit(dbusConnection, unitName)
// Both on success and on error, cleanup all the cgroups we are aware of.
// Some of them were created directly by Apply() and are not managed by systemd.
if err := cgroups.RemovePaths(m.paths); err != nil {
return err
}
if err != nil {
return err
}
m.paths = make(map[string]string)
@@ -319,10 +326,7 @@ func (m *legacyManager) Freeze(state configs.FreezerState) error {
}
prevState := m.cgroups.Resources.Freezer
m.cgroups.Resources.Freezer = state
freezer, err := legacySubsystems.Get("freezer")
if err != nil {
return err
}
freezer := &fs.FreezerGroup{}
err = freezer.Set(path, m.cgroups)
if err != nil {
m.cgroups.Resources.Freezer = prevState
@@ -379,24 +383,27 @@ func (m *legacyManager) Set(container *configs.Config) error {
return err
}
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err := m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
// We have to freeze the container while systemd sets the cgroup settings.
// The reason for this is that systemd's application of DeviceAllow rules
// is done disruptively, resulting in spurrious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we freeze the container to avoid them hitting the cgroup
// error. But if the freezer cgroup isn't supported, we just warn about it.
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
targetFreezerState := configs.Undefined
if !m.cgroups.SkipDevices {
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err = m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
}
}
if err := dbusConnection.SetUnitProperties(getUnitName(container.Cgroups), true, properties...); err != nil {
@@ -458,11 +465,8 @@ func (m *legacyManager) GetFreezerState() (configs.FreezerState, error) {
if err != nil && !cgroups.IsNotFound(err) {
return configs.Undefined, err
}
freezer, err := legacySubsystems.Get("freezer")
if err != nil {
return configs.Undefined, err
}
return freezer.(*fs.FreezerGroup).GetState(path)
freezer := &fs.FreezerGroup{}
return freezer.GetState(path)
}
func (m *legacyManager) Exists() bool {

View File

@@ -298,24 +298,27 @@ func (m *unifiedManager) Set(container *configs.Config) error {
return err
}
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err := m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
// We have to freeze the container while systemd sets the cgroup settings.
// The reason for this is that systemd's application of DeviceAllow rules
// is done disruptively, resulting in spurrious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we freeze the container to avoid them hitting the cgroup
// error. But if the freezer cgroup isn't supported, we just warn about it.
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
targetFreezerState := configs.Undefined
if !m.cgroups.SkipDevices {
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err = m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
}
}
if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil {

View File

@@ -126,4 +126,11 @@ type Resources struct {
// CpuWeight sets a proportional bandwidth limit.
CpuWeight uint64 `json:"cpu_weight"`
// SkipDevices allows to skip configuring device permissions.
// Used by e.g. kubelet while creating a parent cgroup (kubepods)
// common for many containers.
//
// NOTE it is impossible to start a container which has this flag set.
SkipDevices bool `json:"skip_devices"`
}

View File

@@ -251,6 +251,9 @@ func (c *linuxContainer) Set(config configs.Config) error {
func (c *linuxContainer) Start(process *Process) error {
c.m.Lock()
defer c.m.Unlock()
if c.config.Cgroups.Resources.SkipDevices {
return newGenericError(errors.New("can't start container with SkipDevices set"), ConfigInvalid)
}
if process.Init {
if err := c.createExecFifo(); err != nil {
return err

View File

@@ -184,6 +184,9 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error {
return err
}
// After we return from here, we don't need the console anymore.
defer pty.Close()
if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 {
err = pty.Resize(console.WinSize{
Height: config.ConsoleHeight,
@@ -195,9 +198,6 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error {
}
}
// After we return from here, we don't need the console anymore.
defer pty.Close()
// Mount the console inside our rootfs.
if mount {
if err := mountConsole(slavePath); err != nil {

View File

@@ -60,7 +60,7 @@ type Group struct {
// groupFromOS converts an os/user.(*Group) to local Group
//
// (This does not include Pass, Shell or Gecos)
// (This does not include Pass or List)
func groupFromOS(g *user.Group) (Group, error) {
newGroup := Group{
Name: g.Name,