deps: update runc to 1.1.0

This updates vendored runc/libcontainer to 1.1.0, and google/cadvisor to a version updated to runc 1.1.0 (google/cadvisor#3048). Changes in vendor are generated by (roughly): ./hack/pin-dependency.sh github.com/google/cadvisor v0.44.0 ./hack/pin-dependency.sh github.com/opencontainers/runc v1.1.0 ./hack/update-vendor.sh ./hack/lint-dependencies.sh # And follow all its recommendations. ./hack/update-vendor.sh ./hack/update-internal-modules.sh ./hack/lint-dependencies.sh # Re-check everything again. Co-Authored-By: Kir Kolyshkin <kolyshkin@gmail.com>
2022-03-28 11:32:04 -07:00
parent 41830a1f79
commit 07af1bab70
245 changed files with 6520 additions and 5250 deletions
--- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go
@@ -3,7 +3,6 @@ package apparmor
 import (
 	"errors"
 	"fmt"
-	"io/ioutil"
 	"os"
 	"sync"

@@ -19,7 +18,7 @@ var (
 func isEnabled() bool {
 	checkAppArmor.Do(func() {
 		if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil {
-			buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
+			buf, err := os.ReadFile("/sys/module/apparmor/parameters/enabled")
 			appArmorEnabled = err == nil && len(buf) > 1 && buf[0] == 'Y'
 		}
 	})
@@ -52,7 +51,7 @@ func setProcAttr(attr, value string) error {
 // changeOnExec reimplements aa_change_onexec from libapparmor in Go
 func changeOnExec(name string) error {
 	if err := setProcAttr("exec", "exec "+name); err != nil {
-		return fmt.Errorf("apparmor failed to apply profile: %s", err)
+		return fmt.Errorf("apparmor failed to apply profile: %w", err)
 	}
 	return nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go
@@ -1,3 +1,4 @@
+//go:build !linux
 // +build !linux

 package apparmor
--- a/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go
@@ -1,3 +1,4 @@
+//go:build linux
 // +build linux

 package capabilities
@@ -34,6 +35,17 @@ func init() {
 	}
 }

+// KnownCapabilities returns the list of the known capabilities.
+// Used by `runc features`.
+func KnownCapabilities() []string {
+	list := capability.List()
+	res := make([]string, len(list))
+	for i, c := range list {
+		res[i] = "CAP_" + strings.ToUpper(c.String())
+	}
+	return res
+}
+
 // New creates a new Caps from the given Capabilities config. Unknown Capabilities
 // or Capabilities that are unavailable in the current environment are ignored,
 // printing a warning instead.
--- a/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities_unsupported.go
@@ -1,3 +1,4 @@
+//go:build !linux
 // +build !linux

 package capabilities
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package cgroups

 import (
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go
@@ -1,3 +0,0 @@
-// +build !linux
-
-package cgroups
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
@@ -1,5 +1,3 @@
-// +build linux
-
 // SPDX-License-Identifier: Apache-2.0
 /*
 * Copyright (C) 2020 Aleksa Sarai <cyphar@cyphar.com>
@@ -22,14 +20,13 @@ package devices

 import (
 	"bufio"
+	"fmt"
 	"io"
-	"regexp"
 	"sort"
 	"strconv"
+	"strings"

 	"github.com/opencontainers/runc/libcontainer/devices"
-
-	"github.com/pkg/errors"
 )

 // deviceMeta is a Rule without the Allow or Permissions fields, and no
@@ -79,19 +76,21 @@ func (e *Emulator) IsAllowAll() bool {
 	return e.IsBlacklist() && len(e.rules) == 0
 }

-var devicesListRegexp = regexp.MustCompile(`^([abc])\s+(\d+|\*):(\d+|\*)\s+([rwm]+)$`)
-
 func parseLine(line string) (*deviceRule, error) {
-	matches := devicesListRegexp.FindStringSubmatch(line)
-	if matches == nil {
-		return nil, errors.Errorf("line doesn't match devices.list format")
+	// Input: node major:minor perms.
+	fields := strings.FieldsFunc(line, func(r rune) bool {
+		return r == ' ' || r == ':'
+	})
+	if len(fields) != 4 {
+		return nil, fmt.Errorf("malformed devices.list rule %s", line)
 	}
+
 	var (
 		rule  deviceRule
-		node  = matches[1]
-		major = matches[2]
-		minor = matches[3]
-		perms = matches[4]
+		node  = fields[0]
+		major = fields[1]
+		minor = fields[2]
+		perms = fields[3]
 	)

 	// Parse the node type.
@@ -107,8 +106,7 @@ func parseLine(line string) (*deviceRule, error) {
 	case "c":
 		rule.meta.node = devices.CharDevice
 	default:
-		// Should never happen!
-		return nil, errors.Errorf("unknown device type %q", node)
+		return nil, fmt.Errorf("unknown device type %q", node)
 	}

 	// Parse the major number.
@@ -117,7 +115,7 @@ func parseLine(line string) (*deviceRule, error) {
 	} else {
 		val, err := strconv.ParseUint(major, 10, 32)
 		if err != nil {
-			return nil, errors.Wrap(err, "parse major number")
+			return nil, fmt.Errorf("invalid major number: %w", err)
 		}
 		rule.meta.major = int64(val)
 	}
@@ -128,7 +126,7 @@ func parseLine(line string) (*deviceRule, error) {
 	} else {
 		val, err := strconv.ParseUint(minor, 10, 32)
 		if err != nil {
-			return nil, errors.Wrap(err, "parse minor number")
+			return nil, fmt.Errorf("invalid minor number: %w", err)
 		}
 		rule.meta.minor = int64(val)
 	}
@@ -136,13 +134,12 @@ func parseLine(line string) (*deviceRule, error) {
 	// Parse the access permissions.
 	rule.perms = devices.Permissions(perms)
 	if !rule.perms.IsValid() || rule.perms.IsEmpty() {
-		// Should never happen!
-		return nil, errors.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
+		return nil, fmt.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
 	}
 	return &rule, nil
 }

-func (e *Emulator) addRule(rule deviceRule) error {
+func (e *Emulator) addRule(rule deviceRule) error { //nolint:unparam
 	if e.rules == nil {
 		e.rules = make(map[deviceMeta]devices.Permissions)
 	}
@@ -180,7 +177,7 @@ func (e *Emulator) rmRule(rule deviceRule) error {
 		// Only give an error if the set of permissions overlap.
 		partialPerms := e.rules[partialMeta]
 		if !partialPerms.Intersection(rule.perms).IsEmpty() {
-			return errors.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
+			return fmt.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
 		}
 	}

@@ -212,9 +209,9 @@ func (e *Emulator) allow(rule *deviceRule) error {

 	var err error
 	if e.defaultAllow {
-		err = errors.Wrap(e.rmRule(*rule), "remove 'deny' exception")
+		err = wrapErr(e.rmRule(*rule), "unable to remove 'deny' exception")
 	} else {
-		err = errors.Wrap(e.addRule(*rule), "add 'allow' exception")
+		err = wrapErr(e.addRule(*rule), "unable to add 'allow' exception")
 	}
 	return err
 }
@@ -232,16 +229,16 @@ func (e *Emulator) deny(rule *deviceRule) error {

 	var err error
 	if e.defaultAllow {
-		err = errors.Wrap(e.addRule(*rule), "add 'deny' exception")
+		err = wrapErr(e.addRule(*rule), "unable to add 'deny' exception")
 	} else {
-		err = errors.Wrap(e.rmRule(*rule), "remove 'allow' exception")
+		err = wrapErr(e.rmRule(*rule), "unable to remove 'allow' exception")
 	}
 	return err
 }

 func (e *Emulator) Apply(rule devices.Rule) error {
 	if !rule.Type.CanCgroup() {
-		return errors.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
+		return fmt.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
 	}

 	innerRule := &deviceRule{
@@ -283,17 +280,17 @@ func EmulatorFromList(list io.Reader) (*Emulator, error) {
 		line := s.Text()
 		deviceRule, err := parseLine(line)
 		if err != nil {
-			return nil, errors.Wrapf(err, "parsing line %q", line)
+			return nil, fmt.Errorf("error parsing line %q: %w", line, err)
 		}
 		// "devices.list" is an allow list. Note that this means that in
 		// black-list mode, we have no idea what rules are in play. As a
 		// result, we need to be very careful in Transition().
 		if err := e.allow(deviceRule); err != nil {
-			return nil, errors.Wrapf(err, "adding devices.list rule")
+			return nil, fmt.Errorf("error adding devices.list rule: %w", err)
 		}
 	}
 	if err := s.Err(); err != nil {
-		return nil, errors.Wrap(err, "reading devices.list lines")
+		return nil, fmt.Errorf("error reading devices.list lines: %w", err)
 	}
 	return e, nil
 }
@@ -305,7 +302,7 @@ func EmulatorFromList(list io.Reader) (*Emulator, error) {
 // necessary.
 //
 // This function is the sole reason for all of Emulator -- to allow us
-// to figure out how to update a containers' cgroups without causing spurrious
+// to figure out how to update a containers' cgroups without causing spurious
 // device errors (if possible).
 func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
 	var transitionRules []*devices.Rule
@@ -380,3 +377,10 @@ func (e *Emulator) Rules() ([]*devices.Rule, error) {
 	defaultCgroup := &Emulator{defaultAllow: false}
 	return defaultCgroup.Transition(e)
 }
+
+func wrapErr(err error, text string) error {
+	if err == nil {
+		return nil
+	}
+	return fmt.Errorf(text+": %w", err)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
@@ -7,13 +7,14 @@
 package devicefilter

 import (
+	"errors"
+	"fmt"
 	"math"
 	"strconv"

 	"github.com/cilium/ebpf/asm"
 	devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
 	"github.com/opencontainers/runc/libcontainer/devices"
-	"github.com/pkg/errors"
 	"golang.org/x/sys/unix"
 )

@@ -51,21 +52,20 @@ func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
 			// only be one (at most) at the very start to instruct cgroupv1 to
 			// go into allow-list mode. However we do double-check this here.
 			if idx != 0 || rule.Allow != emu.IsBlacklist() {
-				return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
+				return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
 			}
 			continue
 		}
 		if rule.Allow == p.defaultAllow {
 			// There should be no rules which have an action equal to the
 			// default action, the emulator removes those.
-			return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
+			return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
 		}
 		if err := p.appendRule(rule); err != nil {
 			return nil, "", err
 		}
 	}
-	insts, err := p.finalize()
-	return insts, license, err
+	return p.finalize(), license, nil
 }

 type program struct {
@@ -118,13 +118,13 @@ func (p *program) appendRule(rule *devices.Rule) error {
 		bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
 	default:
 		// We do not permit 'a', nor any other types we don't know about.
-		return errors.Errorf("invalid type %q", string(rule.Type))
+		return fmt.Errorf("invalid type %q", string(rule.Type))
 	}
 	if rule.Major > math.MaxUint32 {
-		return errors.Errorf("invalid major %d", rule.Major)
+		return fmt.Errorf("invalid major %d", rule.Major)
 	}
 	if rule.Minor > math.MaxUint32 {
-		return errors.Errorf("invalid minor %d", rule.Major)
+		return fmt.Errorf("invalid minor %d", rule.Major)
 	}
 	hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
 	hasMinor := rule.Minor >= 0
@@ -138,7 +138,7 @@ func (p *program) appendRule(rule *devices.Rule) error {
 		case 'm':
 			bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
 		default:
-			return errors.Errorf("unknown device access %v", r)
+			return fmt.Errorf("unknown device access %v", r)
 		}
 	}
 	// If the access is rwm, skip the check.
@@ -180,7 +180,7 @@ func (p *program) appendRule(rule *devices.Rule) error {
 	return nil
 }

-func (p *program) finalize() (asm.Instructions, error) {
+func (p *program) finalize() asm.Instructions {
 	var v int32
 	if p.defaultAllow {
 		v = 1
@@ -192,7 +192,7 @@ func (p *program) finalize() (asm.Instructions, error) {
 		asm.Return(),
 	)
 	p.blockID = -1
-	return p.insts, nil
+	return p.insts
 }

 func acceptBlock(accept bool) asm.Instructions {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
@@ -1,6 +1,7 @@
 package ebpf

 import (
+	"errors"
 	"fmt"
 	"os"
 	"runtime"
@@ -10,7 +11,6 @@ import (
 	"github.com/cilium/ebpf"
 	"github.com/cilium/ebpf/asm"
 	"github.com/cilium/ebpf/link"
-	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/sys/unix"
 )
@@ -134,7 +134,7 @@ func haveBpfProgReplace() bool {
 			// not supported
 			return
 		}
-		// attach_flags test succeded.
+		// attach_flags test succeeded.
 		if !errors.Is(err, unix.EBADF) {
 			logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
 		}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
@@ -2,20 +2,27 @@ package cgroups

 import (
 	"bytes"
+	"errors"
+	"fmt"
 	"os"
+	"path"
+	"strconv"
 	"strings"
 	"sync"

-	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/sys/unix"
 )

 // OpenFile opens a cgroup file in a given dir with given flags.
-// It is supposed to be used for cgroup files only.
+// It is supposed to be used for cgroup files only, and returns
+// an error if the file is not a cgroup file.
+//
+// Arguments dir and file are joined together to form an absolute path
+// to a file being opened.
 func OpenFile(dir, file string, flags int) (*os.File, error) {
 	if dir == "" {
-		return nil, errors.Errorf("no directory specified for %s", file)
+		return nil, fmt.Errorf("no directory specified for %s", file)
 	}
 	return openFile(dir, file, flags)
 }
@@ -43,7 +50,8 @@ func WriteFile(dir, file, data string) error {
 	}
 	defer fd.Close()
 	if err := retryingWriteFile(fd, data); err != nil {
-		return errors.Wrapf(err, "failed to write %q", data)
+		// Having data in the error message helps in debugging.
+		return fmt.Errorf("failed to write %q: %w", data, err)
 	}
 	return nil
 }
@@ -81,7 +89,7 @@ func prepareOpenat2() error {
 		})
 		if err != nil {
 			prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
-			if err != unix.ENOSYS {
+			if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare
 				logrus.Warnf("falling back to securejoin: %s", prepErr)
 			} else {
 				logrus.Debug("openat2 not available, falling back to securejoin")
@@ -107,8 +115,6 @@ func prepareOpenat2() error {
 	return prepErr
 }

-// OpenFile opens a cgroup file in a given dir with given flags.
-// It is supposed to be used for cgroup files only.
 func openFile(dir, file string, flags int) (*os.File, error) {
 	mode := os.FileMode(0)
 	if TestMode && flags&os.O_WRONLY != 0 {
@@ -116,34 +122,52 @@ func openFile(dir, file string, flags int) (*os.File, error) {
 		flags |= os.O_TRUNC | os.O_CREATE
 		mode = 0o600
 	}
+	path := path.Join(dir, file)
 	if prepareOpenat2() != nil {
-		return openFallback(dir, file, flags, mode)
+		return openFallback(path, flags, mode)
 	}
-	reldir := strings.TrimPrefix(dir, cgroupfsPrefix)
-	if len(reldir) == len(dir) { // non-standard path, old system?
-		return openFallback(dir, file, flags, mode)
+	relPath := strings.TrimPrefix(path, cgroupfsPrefix)
+	if len(relPath) == len(path) { // non-standard path, old system?
+		return openFallback(path, flags, mode)
 	}

-	relname := reldir + "/" + file
-	fd, err := unix.Openat2(cgroupFd, relname,
+	fd, err := unix.Openat2(cgroupFd, relPath,
 		&unix.OpenHow{
 			Resolve: resolveFlags,
 			Flags:   uint64(flags) | unix.O_CLOEXEC,
 			Mode:    uint64(mode),
 		})
 	if err != nil {
-		return nil, &os.PathError{Op: "openat2", Path: dir + "/" + file, Err: err}
+		err = &os.PathError{Op: "openat2", Path: path, Err: err}
+		// Check if cgroupFd is still opened to cgroupfsDir
+		// (happens when this package is incorrectly used
+		// across the chroot/pivot_root/mntns boundary, or
+		// when /sys/fs/cgroup is remounted).
+		//
+		// TODO: if such usage will ever be common, amend this
+		// to reopen cgroupFd and retry openat2.
+		fdStr := strconv.Itoa(cgroupFd)
+		fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
+		if fdDest != cgroupfsDir {
+			// Wrap the error so it is clear that cgroupFd
+			// is opened to an unexpected/wrong directory.
+			err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
+				fdStr, fdDest, cgroupfsDir, err)
+		}
+		return nil, err
 	}

-	return os.NewFile(uintptr(fd), cgroupfsPrefix+relname), nil
+	return os.NewFile(uintptr(fd), path), nil
 }

 var errNotCgroupfs = errors.New("not a cgroup file")

-// openFallback is used when openat2(2) is not available. It checks the opened
+// Can be changed by unit tests.
+var openFallback = openAndCheck
+
+// openAndCheck is used when openat2(2) is not available. It checks the opened
 // file is on cgroupfs, returning an error otherwise.
-func openFallback(dir, file string, flags int, mode os.FileMode) (*os.File, error) {
-	path := dir + "/" + file
+func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
 	fd, err := os.OpenFile(path, flags, mode)
 	if err != nil {
 		return nil, err
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
@@ -1,10 +1,7 @@
-// +build linux
-
 package fs

 import (
 	"bufio"
-	"fmt"
 	"os"
 	"path/filepath"
 	"strconv"
@@ -23,8 +20,8 @@ func (s *BlkioGroup) Name() string {
 	return "blkio"
 }

-func (s *BlkioGroup) Apply(path string, d *cgroupData) error {
-	return join(path, d.pid)
+func (s *BlkioGroup) Apply(path string, _ *configs.Resources, pid int) error {
+	return apply(path, pid)
 }

 func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
@@ -131,19 +128,19 @@ func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
 				// skip total line
 				continue
 			} else {
-				return nil, fmt.Errorf("Invalid line found while parsing %s/%s: %s", dir, file, sc.Text())
+				return nil, malformedLine(dir, file, sc.Text())
 			}
 		}

 		v, err := strconv.ParseUint(fields[0], 10, 64)
 		if err != nil {
-			return nil, err
+			return nil, &parseError{Path: dir, File: file, Err: err}
 		}
 		major := v

 		v, err = strconv.ParseUint(fields[1], 10, 64)
 		if err != nil {
-			return nil, err
+			return nil, &parseError{Path: dir, File: file, Err: err}
 		}
 		minor := v

@@ -155,10 +152,13 @@ func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
 		}
 		v, err = strconv.ParseUint(fields[valueField], 10, 64)
 		if err != nil {
-			return nil, err
+			return nil, &parseError{Path: dir, File: file, Err: err}
 		}
 		blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
 	}
+	if err := sc.Err(); err != nil {
+		return nil, &parseError{Path: dir, File: file, Err: err}
+	}

 	return blkioStats, nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package fs

 import (
@@ -21,24 +19,19 @@ func (s *CpuGroup) Name() string {
 	return "cpu"
 }

-func (s *CpuGroup) Apply(path string, d *cgroupData) error {
-	// This might happen if we have no cpu cgroup mounted.
-	// Just do nothing and don't fail.
-	if path == "" {
-		return nil
-	}
+func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error {
 	if err := os.MkdirAll(path, 0o755); err != nil {
 		return err
 	}
 	// We should set the real-Time group scheduling settings before moving
 	// in the process because if the process is already in SCHED_RR mode
 	// and no RT bandwidth is set, adding it will fail.
-	if err := s.SetRtSched(path, d.config.Resources); err != nil {
+	if err := s.SetRtSched(path, r); err != nil {
 		return err
 	}
-	// Since we are not using join(), we need to place the pid
-	// into the procs file unlike other subsystems.
-	return cgroups.WriteCgroupProc(path, d.pid)
+	// Since we are not using apply(), we need to place the pid
+	// into the procs file.
+	return cgroups.WriteCgroupProc(path, pid)
 }

 func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
@@ -105,7 +98,8 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error {
 }

 func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
-	f, err := cgroups.OpenFile(path, "cpu.stat", os.O_RDONLY)
+	const file = "cpu.stat"
+	f, err := cgroups.OpenFile(path, file, os.O_RDONLY)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return nil
@@ -118,7 +112,7 @@ func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
 	for sc.Scan() {
 		t, v, err := fscommon.ParseKeyValue(sc.Text())
 		if err != nil {
-			return err
+			return &parseError{Path: path, File: file, Err: err}
 		}
 		switch t {
 		case "nr_periods":
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
@@ -1,12 +1,8 @@
-// +build linux
-
 package fs

 import (
 	"bufio"
-	"fmt"
 	"os"
-	"path/filepath"
 	"strconv"
 	"strings"

@@ -38,8 +34,8 @@ func (s *CpuacctGroup) Name() string {
 	return "cpuacct"
 }

-func (s *CpuacctGroup) Apply(path string, d *cgroupData) error {
-	return join(path, d.pid)
+func (s *CpuacctGroup) Apply(path string, _ *configs.Resources, pid int) error {
+	return apply(path, pid)
 }

 func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
@@ -85,45 +81,43 @@ func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
 	const (
 		userField   = "user"
 		systemField = "system"
+		file        = cgroupCpuacctStat
 	)

 	// Expected format:
 	// user <usage in ticks>
 	// system <usage in ticks>
-	data, err := cgroups.ReadFile(path, cgroupCpuacctStat)
+	data, err := cgroups.ReadFile(path, file)
 	if err != nil {
 		return 0, 0, err
 	}
+	// TODO: use strings.SplitN instead.
 	fields := strings.Fields(data)
-	if len(fields) < 4 {
-		return 0, 0, fmt.Errorf("failure - %s is expected to have at least 4 fields", filepath.Join(path, cgroupCpuacctStat))
-	}
-	if fields[0] != userField {
-		return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
-	}
-	if fields[2] != systemField {
-		return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField)
+	if len(fields) < 4 || fields[0] != userField || fields[2] != systemField {
+		return 0, 0, malformedLine(path, file, data)
 	}
 	if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
-		return 0, 0, err
+		return 0, 0, &parseError{Path: path, File: file, Err: err}
 	}
 	if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
-		return 0, 0, err
+		return 0, 0, &parseError{Path: path, File: file, Err: err}
 	}

 	return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
 }

 func getPercpuUsage(path string) ([]uint64, error) {
+	const file = "cpuacct.usage_percpu"
 	percpuUsage := []uint64{}
-	data, err := cgroups.ReadFile(path, "cpuacct.usage_percpu")
+	data, err := cgroups.ReadFile(path, file)
 	if err != nil {
 		return percpuUsage, err
 	}
+	// TODO: use strings.SplitN instead.
 	for _, value := range strings.Fields(data) {
 		value, err := strconv.ParseUint(value, 10, 64)
 		if err != nil {
-			return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err)
+			return percpuUsage, &parseError{Path: path, File: file, Err: err}
 		}
 		percpuUsage = append(percpuUsage, value)
 	}
@@ -133,16 +127,17 @@ func getPercpuUsage(path string) ([]uint64, error) {
 func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
 	usageKernelMode := []uint64{}
 	usageUserMode := []uint64{}
+	const file = cgroupCpuacctUsageAll

-	file, err := cgroups.OpenFile(path, cgroupCpuacctUsageAll, os.O_RDONLY)
+	fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
 	if os.IsNotExist(err) {
 		return usageKernelMode, usageUserMode, nil
 	} else if err != nil {
 		return nil, nil, err
 	}
-	defer file.Close()
+	defer fd.Close()

-	scanner := bufio.NewScanner(file)
+	scanner := bufio.NewScanner(fd)
 	scanner.Scan() // skipping header line

 	for scanner.Scan() {
@@ -153,19 +148,18 @@ func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {

 		usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
 		if err != nil {
-			return nil, nil, fmt.Errorf("Unable to convert CPU usage in kernel mode to uint64: %s", err)
+			return nil, nil, &parseError{Path: path, File: file, Err: err}
 		}
 		usageKernelMode = append(usageKernelMode, usageInKernelMode)

 		usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
 		if err != nil {
-			return nil, nil, fmt.Errorf("Unable to convert CPU usage in user mode to uint64: %s", err)
+			return nil, nil, &parseError{Path: path, File: file, Err: err}
 		}
 		usageUserMode = append(usageUserMode, usageInUserMode)
 	}
-
 	if err := scanner.Err(); err != nil {
-		return nil, nil, fmt.Errorf("Problem in reading %s line by line, %s", cgroupCpuacctUsageAll, err)
+		return nil, nil, &parseError{Path: path, File: file, Err: err}
 	}

 	return usageKernelMode, usageUserMode, nil
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
@@ -1,19 +1,17 @@
-// +build linux
-
 package fs

 import (
-	"fmt"
+	"errors"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"

+	"golang.org/x/sys/unix"
+
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/pkg/errors"
-	"golang.org/x/sys/unix"
 )

 type CpusetGroup struct{}
@@ -22,8 +20,8 @@ func (s *CpusetGroup) Name() string {
 	return "cpuset"
 }

-func (s *CpusetGroup) Apply(path string, d *cgroupData) error {
-	return s.ApplyDir(path, d.config.Resources, d.pid)
+func (s *CpusetGroup) Apply(path string, r *configs.Resources, pid int) error {
+	return s.ApplyDir(path, r, pid)
 }

 func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
@@ -40,32 +38,32 @@ func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
 	return nil
 }

-func getCpusetStat(path string, filename string) ([]uint16, error) {
+func getCpusetStat(path string, file string) ([]uint16, error) {
 	var extracted []uint16
-	fileContent, err := fscommon.GetCgroupParamString(path, filename)
+	fileContent, err := fscommon.GetCgroupParamString(path, file)
 	if err != nil {
 		return extracted, err
 	}
 	if len(fileContent) == 0 {
-		return extracted, fmt.Errorf("%s found to be empty", filepath.Join(path, filename))
+		return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")}
 	}

 	for _, s := range strings.Split(fileContent, ",") {
-		splitted := strings.SplitN(s, "-", 3)
-		switch len(splitted) {
+		sp := strings.SplitN(s, "-", 3)
+		switch len(sp) {
 		case 3:
-			return extracted, fmt.Errorf("invalid values in %s", filepath.Join(path, filename))
+			return extracted, &parseError{Path: path, File: file, Err: errors.New("extra dash")}
 		case 2:
-			min, err := strconv.ParseUint(splitted[0], 10, 16)
+			min, err := strconv.ParseUint(sp[0], 10, 16)
 			if err != nil {
-				return extracted, err
+				return extracted, &parseError{Path: path, File: file, Err: err}
 			}
-			max, err := strconv.ParseUint(splitted[1], 10, 16)
+			max, err := strconv.ParseUint(sp[1], 10, 16)
 			if err != nil {
-				return extracted, err
+				return extracted, &parseError{Path: path, File: file, Err: err}
 			}
 			if min > max {
-				return extracted, fmt.Errorf("invalid values in %s", filepath.Join(path, filename))
+				return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, min > max")}
 			}
 			for i := min; i <= max; i++ {
 				extracted = append(extracted, uint16(i))
@@ -73,7 +71,7 @@ func getCpusetStat(path string, filename string) ([]uint16, error) {
 		case 1:
 			value, err := strconv.ParseUint(s, 10, 16)
 			if err != nil {
-				return extracted, err
+				return extracted, &parseError{Path: path, File: file, Err: err}
 			}
 			extracted = append(extracted, uint16(value))
 		}
@@ -168,9 +166,8 @@ func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error
 	if err := s.ensureCpusAndMems(dir, r); err != nil {
 		return err
 	}
-
-	// because we are not using d.join we need to place the pid into the procs file
-	// unlike the other subsystems
+	// Since we are not using apply(), we need to place the pid
+	// into the procs file.
 	return cgroups.WriteCgroupProc(dir, pid)
 }

@@ -198,7 +195,7 @@ func cpusetEnsureParent(current string) error {
 	}
 	// Treat non-existing directory as cgroupfs as it will be created,
 	// and the root cpuset directory obviously exists.
-	if err != nil && err != unix.ENOENT {
+	if err != nil && err != unix.ENOENT { //nolint:errorlint // unix errors are bare
 		return &os.PathError{Op: "statfs", Path: parent, Err: err}
 	}

@@ -224,12 +221,12 @@ func cpusetCopyIfNeeded(current, parent string) error {
 	}

 	if isEmptyCpuset(currentCpus) {
-		if err := cgroups.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
+		if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
 			return err
 		}
 	}
 	if isEmptyCpuset(currentMems) {
-		if err := cgroups.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
+		if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
 			return err
 		}
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package fs

 import (
@@ -15,15 +13,15 @@ import (
 )

 type DevicesGroup struct {
-	testingSkipFinalCheck bool
+	TestingSkipFinalCheck bool
 }

 func (s *DevicesGroup) Name() string {
 	return "devices"
 }

-func (s *DevicesGroup) Apply(path string, d *cgroupData) error {
-	if d.config.SkipDevices {
+func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
+	if r.SkipDevices {
 		return nil
 	}
 	if path == "" {
@@ -31,7 +29,8 @@ func (s *DevicesGroup) Apply(path string, d *cgroupData) error {
 		// is a hard requirement for container's security.
 		return errSubsystemDoesNotExist
 	}
-	return join(path, d.pid)
+
+	return apply(path, pid)
 }

 func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
@@ -91,7 +90,7 @@ func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
 	//
 	// This safety-check is skipped for the unit tests because we cannot
 	// currently mock devices.list correctly.
-	if !s.testingSkipFinalCheck {
+	if !s.TestingSkipFinalCheck {
 		currentAfter, err := loadEmulator(path)
 		if err != nil {
 			return err
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
@@ -0,0 +1,15 @@
+package fs
+
+import (
+	"fmt"
+
+	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+)
+
+type parseError = fscommon.ParseError
+
+// malformedLine is used by all cgroupfs file parsers that expect a line
+// in a particular format but get some garbage instead.
+func malformedLine(path, file, line string) error {
+	return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)}
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package fs

 import (
@@ -21,8 +19,8 @@ func (s *FreezerGroup) Name() string {
 	return "freezer"
 }

-func (s *FreezerGroup) Apply(path string, d *cgroupData) error {
-	return join(path, d.pid)
+func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
+	return apply(path, pid)
 }

 func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
@@ -1,165 +1,86 @@
-// +build linux
-
 package fs

 import (
+	"errors"
 	"fmt"
 	"os"
-	"path/filepath"
 	"sync"

+	"golang.org/x/sys/unix"
+
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
-	libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
-	"github.com/pkg/errors"
-	"golang.org/x/sys/unix"
 )

-var (
-	subsystems = []subsystem{
-		&CpusetGroup{},
-		&DevicesGroup{},
-		&MemoryGroup{},
-		&CpuGroup{},
-		&CpuacctGroup{},
-		&PidsGroup{},
-		&BlkioGroup{},
-		&HugetlbGroup{},
-		&NetClsGroup{},
-		&NetPrioGroup{},
-		&PerfEventGroup{},
-		&FreezerGroup{},
-		&NameGroup{GroupName: "name=systemd", Join: true},
-	}
-	HugePageSizes, _ = cgroups.GetHugePageSize()
-)
+var subsystems = []subsystem{
+	&CpusetGroup{},
+	&DevicesGroup{},
+	&MemoryGroup{},
+	&CpuGroup{},
+	&CpuacctGroup{},
+	&PidsGroup{},
+	&BlkioGroup{},
+	&HugetlbGroup{},
+	&NetClsGroup{},
+	&NetPrioGroup{},
+	&PerfEventGroup{},
+	&FreezerGroup{},
+	&RdmaGroup{},
+	&NameGroup{GroupName: "name=systemd", Join: true},
+}

 var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")

+func init() {
+	// If using cgroups-hybrid mode then add a "" controller indicating
+	// it should join the cgroups v2.
+	if cgroups.IsCgroup2HybridMode() {
+		subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
+	}
+}
+
 type subsystem interface {
 	// Name returns the name of the subsystem.
 	Name() string
-	// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
+	// GetStats fills in the stats for the subsystem.
 	GetStats(path string, stats *cgroups.Stats) error
-	// Creates and joins the cgroup represented by 'cgroupData'.
-	Apply(path string, c *cgroupData) error
+	// Apply creates and joins a cgroup, adding pid into it. Some
+	// subsystems use resources to pre-configure the cgroup parents
+	// before creating or joining it.
+	Apply(path string, r *configs.Resources, pid int) error
 	// Set sets the cgroup resources.
 	Set(path string, r *configs.Resources) error
 }

 type manager struct {
-	mu       sync.Mutex
-	cgroups  *configs.Cgroup
-	rootless bool // ignore permission-related errors
-	paths    map[string]string
+	mu      sync.Mutex
+	cgroups *configs.Cgroup
+	paths   map[string]string
 }

-func NewManager(cg *configs.Cgroup, paths map[string]string, rootless bool) cgroups.Manager {
+func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
+	// Some v1 controllers (cpu, cpuset, and devices) expect
+	// cgroups.Resources to not be nil in Apply.
+	if cg.Resources == nil {
+		return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
+	}
+	if cg.Resources.Unified != nil {
+		return nil, cgroups.ErrV1NoUnified
+	}
+
+	if paths == nil {
+		var err error
+		paths, err = initPaths(cg)
+		if err != nil {
+			return nil, err
+		}
+	}
+
 	return &manager{
-		cgroups:  cg,
-		paths:    paths,
-		rootless: rootless,
-	}
-}
-
-// The absolute path to the root of the cgroup hierarchies.
-var (
-	cgroupRootLock sync.Mutex
-	cgroupRoot     string
-)
-
-const defaultCgroupRoot = "/sys/fs/cgroup"
-
-func tryDefaultCgroupRoot() string {
-	var st, pst unix.Stat_t
-
-	// (1) it should be a directory...
-	err := unix.Lstat(defaultCgroupRoot, &st)
-	if err != nil || st.Mode&unix.S_IFDIR == 0 {
-		return ""
-	}
-
-	// (2) ... and a mount point ...
-	err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
-	if err != nil {
-		return ""
-	}
-
-	if st.Dev == pst.Dev {
-		// parent dir has the same dev -- not a mount point
-		return ""
-	}
-
-	// (3) ... of 'tmpfs' fs type.
-	var fst unix.Statfs_t
-	err = unix.Statfs(defaultCgroupRoot, &fst)
-	if err != nil || fst.Type != unix.TMPFS_MAGIC {
-		return ""
-	}
-
-	// (4) it should have at least 1 entry ...
-	dir, err := os.Open(defaultCgroupRoot)
-	if err != nil {
-		return ""
-	}
-	names, err := dir.Readdirnames(1)
-	if err != nil {
-		return ""
-	}
-	if len(names) < 1 {
-		return ""
-	}
-	// ... which is a cgroup mount point.
-	err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
-	if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
-		return ""
-	}
-
-	return defaultCgroupRoot
-}
-
-// Gets the cgroupRoot.
-func getCgroupRoot() (string, error) {
-	cgroupRootLock.Lock()
-	defer cgroupRootLock.Unlock()
-
-	if cgroupRoot != "" {
-		return cgroupRoot, nil
-	}
-
-	// fast path
-	cgroupRoot = tryDefaultCgroupRoot()
-	if cgroupRoot != "" {
-		return cgroupRoot, nil
-	}
-
-	// slow path: parse mountinfo
-	mi, err := cgroups.GetCgroupMounts(false)
-	if err != nil {
-		return "", err
-	}
-	if len(mi) < 1 {
-		return "", errors.New("no cgroup mount found in mountinfo")
-	}
-
-	// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
-	// use its parent directory.
-	root := filepath.Dir(mi[0].Mountpoint)
-
-	if _, err := os.Stat(root); err != nil {
-		return "", err
-	}
-
-	cgroupRoot = root
-	return cgroupRoot, nil
-}
-
-type cgroupData struct {
-	root      string
-	innerPath string
-	config    *configs.Cgroup
-	pid       int
+		cgroups: cg,
+		paths:   paths,
+	}, nil
 }

 // isIgnorableError returns whether err is a permission error (in the loose
@@ -171,8 +92,6 @@ func isIgnorableError(rootless bool, err error) bool {
 	if !rootless {
 		return false
 	}
-	// TODO: rm errors.Cause once we switch to %w everywhere
-	err = errors.Cause(err)
 	// Is it an ordinary EPERM?
 	if errors.Is(err, os.ErrPermission) {
 		return true
@@ -186,56 +105,30 @@ func isIgnorableError(rootless bool, err error) bool {
 }

 func (m *manager) Apply(pid int) (err error) {
-	if m.cgroups == nil {
-		return nil
-	}
 	m.mu.Lock()
 	defer m.mu.Unlock()

 	c := m.cgroups
-	if c.Resources.Unified != nil {
-		return cgroups.ErrV1NoUnified
-	}
-
-	m.paths = make(map[string]string)
-	if c.Paths != nil {
-		cgMap, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
-		if err != nil {
-			return err
-		}
-		for name, path := range c.Paths {
-			// XXX(kolyshkin@): why this check is needed?
-			if _, ok := cgMap[name]; ok {
-				m.paths[name] = path
-			}
-		}
-		return cgroups.EnterPid(m.paths, pid)
-	}
-
-	d, err := getCgroupData(m.cgroups, pid)
-	if err != nil {
-		return err
-	}

 	for _, sys := range subsystems {
-		p, err := d.path(sys.Name())
-		if err != nil {
-			// The non-presence of the devices subsystem is
-			// considered fatal for security reasons.
-			if cgroups.IsNotFound(err) && (c.SkipDevices || sys.Name() != "devices") {
-				continue
-			}
-			return err
+		name := sys.Name()
+		p, ok := m.paths[name]
+		if !ok {
+			continue
 		}
-		m.paths[sys.Name()] = p

-		if err := sys.Apply(p, d); err != nil {
+		if err := sys.Apply(p, c.Resources, pid); err != nil {
 			// In the case of rootless (including euid=0 in userns), where an
 			// explicit cgroup path hasn't been set, we don't bail on error in
-			// case of permission problems. Cases where limits have been set
-			// (and we couldn't create our own cgroup) are handled by Set.
-			if isIgnorableError(m.rootless, err) && m.cgroups.Path == "" {
-				delete(m.paths, sys.Name())
+			// case of permission problems here, but do delete the path from
+			// the m.paths map, since it is either non-existent and could not
+			// be created, or the pid could not be added to it.
+			//
+			// Cases where limits for the subsystem have been set are handled
+			// later by Set, which fails with a friendly error (see
+			// if path == "" in Set).
+			if isIgnorableError(c.Rootless, err) && c.Path == "" {
+				delete(m.paths, name)
 				continue
 			}
 			return err
@@ -246,9 +139,6 @@ func (m *manager) Apply(pid int) (err error) {
 }

 func (m *manager) Destroy() error {
-	if m.cgroups == nil || m.cgroups.Paths != nil {
-		return nil
-	}
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	return cgroups.RemovePaths(m.paths)
@@ -281,11 +171,6 @@ func (m *manager) Set(r *configs.Resources) error {
 		return nil
 	}

-	// If Paths are set, then we are just joining cgroups paths
-	// and there is no need to set any values.
-	if m.cgroups != nil && m.cgroups.Paths != nil {
-		return nil
-	}
 	if r.Unified != nil {
 		return cgroups.ErrV1NoUnified
 	}
@@ -295,10 +180,11 @@ func (m *manager) Set(r *configs.Resources) error {
 	for _, sys := range subsystems {
 		path := m.paths[sys.Name()]
 		if err := sys.Set(path, r); err != nil {
-			if m.rootless && sys.Name() == "devices" {
+			// When rootless is true, errors from the device subsystem
+			// are ignored, as it is really not expected to work.
+			if m.cgroups.Rootless && sys.Name() == "devices" {
 				continue
 			}
-			// When m.rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
 			// However, errors from other subsystems are not ignored.
 			// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
 			if path == "" {
@@ -317,7 +203,7 @@ func (m *manager) Set(r *configs.Resources) error {
 // provided
 func (m *manager) Freeze(state configs.FreezerState) error {
 	path := m.Path("freezer")
-	if m.cgroups == nil || path == "" {
+	if path == "" {
 		return errors.New("cannot toggle freezer: cgroups not configured for container")
 	}

@@ -339,68 +225,6 @@ func (m *manager) GetAllPids() ([]int, error) {
 	return cgroups.GetAllPids(m.Path("devices"))
 }

-func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
-	root, err := getCgroupRoot()
-	if err != nil {
-		return nil, err
-	}
-
-	if (c.Name != "" || c.Parent != "") && c.Path != "" {
-		return nil, errors.New("cgroup: either Path or Name and Parent should be used")
-	}
-
-	// XXX: Do not remove this code. Path safety is important! -- cyphar
-	cgPath := libcontainerUtils.CleanPath(c.Path)
-	cgParent := libcontainerUtils.CleanPath(c.Parent)
-	cgName := libcontainerUtils.CleanPath(c.Name)
-
-	innerPath := cgPath
-	if innerPath == "" {
-		innerPath = filepath.Join(cgParent, cgName)
-	}
-
-	return &cgroupData{
-		root:      root,
-		innerPath: innerPath,
-		config:    c,
-		pid:       pid,
-	}, nil
-}
-
-func (raw *cgroupData) path(subsystem string) (string, error) {
-	// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
-	if filepath.IsAbs(raw.innerPath) {
-		mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem)
-		// If we didn't mount the subsystem, there is no point we make the path.
-		if err != nil {
-			return "", err
-		}
-
-		// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
-		return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
-	}
-
-	// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
-	// process could in container and shared pid namespace with host, and
-	// /proc/1/cgroup could point to whole other world of cgroups.
-	parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
-	if err != nil {
-		return "", err
-	}
-
-	return filepath.Join(parentPath, raw.innerPath), nil
-}
-
-func join(path string, pid int) error {
-	if path == "" {
-		return nil
-	}
-	if err := os.MkdirAll(path, 0o755); err != nil {
-		return err
-	}
-	return cgroups.WriteCgroupProc(path, pid)
-}
-
 func (m *manager) GetPaths() map[string]string {
 	m.mu.Lock()
 	defer m.mu.Unlock()
@@ -432,7 +256,7 @@ func OOMKillCount(path string) (uint64, error) {
 func (m *manager) OOMKillCount() (uint64, error) {
 	c, err := OOMKillCount(m.Path("memory"))
 	// Ignore ENOENT when rootless as it couldn't create cgroup.
-	if err != nil && m.rootless && os.IsNotExist(err) {
+	if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
 		err = nil
 	}

--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
@@ -1,9 +1,6 @@
-// +build linux
-
 package fs

 import (
-	"fmt"
 	"strconv"

 	"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -17,8 +14,8 @@ func (s *HugetlbGroup) Name() string {
 	return "hugetlb"
 }

-func (s *HugetlbGroup) Apply(path string, d *cgroupData) error {
-	return join(path, d.pid)
+func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
+	return apply(path, pid)
 }

 func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
@@ -32,29 +29,29 @@ func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
 }

 func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
-	hugetlbStats := cgroups.HugetlbStats{}
 	if !cgroups.PathExists(path) {
 		return nil
 	}
-	for _, pageSize := range HugePageSizes {
+	hugetlbStats := cgroups.HugetlbStats{}
+	for _, pageSize := range cgroups.HugePageSizes() {
 		usage := "hugetlb." + pageSize + ".usage_in_bytes"
 		value, err := fscommon.GetCgroupParamUint(path, usage)
 		if err != nil {
-			return fmt.Errorf("failed to parse %s - %v", usage, err)
+			return err
 		}
 		hugetlbStats.Usage = value

 		maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes"
 		value, err = fscommon.GetCgroupParamUint(path, maxUsage)
 		if err != nil {
-			return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
+			return err
 		}
 		hugetlbStats.MaxUsage = value

 		failcnt := "hugetlb." + pageSize + ".failcnt"
 		value, err = fscommon.GetCgroupParamUint(path, failcnt)
 		if err != nil {
-			return fmt.Errorf("failed to parse %s - %v", failcnt, err)
+			return err
 		}
 		hugetlbStats.Failcnt = value

--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
@@ -1,9 +1,8 @@
-// +build linux
-
 package fs

 import (
 	"bufio"
+	"errors"
 	"fmt"
 	"math"
 	"os"
@@ -11,11 +10,11 @@ import (
 	"strconv"
 	"strings"

+	"golang.org/x/sys/unix"
+
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/pkg/errors"
-	"golang.org/x/sys/unix"
 )

 const (
@@ -31,8 +30,8 @@ func (s *MemoryGroup) Name() string {
 	return "memory"
 }

-func (s *MemoryGroup) Apply(path string, d *cgroupData) (err error) {
-	return join(path, d.pid)
+func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error {
+	return apply(path, pid)
 }

 func setMemory(path string, val int64) error {
@@ -56,7 +55,7 @@ func setMemory(path string, val int64) error {
 		return err
 	}

-	return errors.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
+	return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
 }

 func setSwap(path string, val int64) error {
@@ -134,15 +133,15 @@ func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
 			return err
 		}
 	} else {
-		return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *r.MemorySwappiness)
+		return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness)
 	}

 	return nil
 }

 func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
-	// Set stats from memory.stat.
-	statsFile, err := cgroups.OpenFile(path, "memory.stat", os.O_RDONLY)
+	const file = "memory.stat"
+	statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return nil
@@ -155,7 +154,7 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
 	for sc.Scan() {
 		t, v, err := fscommon.ParseKeyValue(sc.Text())
 		if err != nil {
-			return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
+			return &parseError{Path: path, File: file, Err: err}
 		}
 		stats.MemoryStats.Stats[t] = v
 	}
@@ -220,42 +219,42 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
 			// are optional in the kernel.
 			return cgroups.MemoryData{}, nil
 		}
-		return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
+		return cgroups.MemoryData{}, err
 	}
 	memoryData.Usage = value
 	value, err = fscommon.GetCgroupParamUint(path, maxUsage)
 	if err != nil {
-		return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
+		return cgroups.MemoryData{}, err
 	}
 	memoryData.MaxUsage = value
 	value, err = fscommon.GetCgroupParamUint(path, failcnt)
 	if err != nil {
-		return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
+		return cgroups.MemoryData{}, err
 	}
 	memoryData.Failcnt = value
 	value, err = fscommon.GetCgroupParamUint(path, limit)
 	if err != nil {
-		return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
+		return cgroups.MemoryData{}, err
 	}
 	memoryData.Limit = value

 	return memoryData, nil
 }

-func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
+func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) {
 	const (
 		maxColumns = math.MaxUint8 + 1
-		filename   = "memory.numa_stat"
+		file       = "memory.numa_stat"
 	)
 	stats := cgroups.PageUsageByNUMA{}

-	file, err := cgroups.OpenFile(cgroupPath, filename, os.O_RDONLY)
+	fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
 	if os.IsNotExist(err) {
 		return stats, nil
 	} else if err != nil {
 		return stats, err
 	}
-	defer file.Close()
+	defer fd.Close()

 	// File format is documented in linux/Documentation/cgroup-v1/memory.txt
 	// and it looks like this:
@@ -266,7 +265,7 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
 	// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
 	// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...

-	scanner := bufio.NewScanner(file)
+	scanner := bufio.NewScanner(fd)
 	for scanner.Scan() {
 		var field *cgroups.PageStats

@@ -284,8 +283,7 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
 				} else {
 					// The first column was already validated,
 					// so be strict to the rest.
-					return stats, fmt.Errorf("malformed line %q in %s",
-						line, filename)
+					return stats, malformedLine(path, file, line)
 				}
 			}
 			key, val := byNode[0], byNode[1]
@@ -296,24 +294,23 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
 				}
 				field.Total, err = strconv.ParseUint(val, 0, 64)
 				if err != nil {
-					return stats, err
+					return stats, &parseError{Path: path, File: file, Err: err}
 				}
 				field.Nodes = map[uint8]uint64{}
 			} else { // Subsequent columns: key is N<id>, val is usage.
 				if len(key) < 2 || key[0] != 'N' {
 					// This is definitely an error.
-					return stats, fmt.Errorf("malformed line %q in %s",
-						line, filename)
+					return stats, malformedLine(path, file, line)
 				}

 				n, err := strconv.ParseUint(key[1:], 10, 8)
 				if err != nil {
-					return cgroups.PageUsageByNUMA{}, err
+					return stats, &parseError{Path: path, File: file, Err: err}
 				}

 				usage, err := strconv.ParseUint(val, 10, 64)
 				if err != nil {
-					return cgroups.PageUsageByNUMA{}, err
+					return stats, &parseError{Path: path, File: file, Err: err}
 				}

 				field.Nodes[uint8(n)] = usage
@@ -321,9 +318,8 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {

 		}
 	}
-	err = scanner.Err()
-	if err != nil {
-		return cgroups.PageUsageByNUMA{}, err
+	if err := scanner.Err(); err != nil {
+		return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err}
 	}

 	return stats, nil
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package fs

 import (
@@ -16,10 +14,10 @@ func (s *NameGroup) Name() string {
 	return s.GroupName
 }

-func (s *NameGroup) Apply(path string, d *cgroupData) error {
+func (s *NameGroup) Apply(path string, _ *configs.Resources, pid int) error {
 	if s.Join {
-		// ignore errors if the named cgroup does not exist
-		_ = join(path, d.pid)
+		// Ignore errors if the named cgroup does not exist.
+		_ = apply(path, pid)
 	}
 	return nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package fs

 import (
@@ -15,8 +13,8 @@ func (s *NetClsGroup) Name() string {
 	return "net_cls"
 }

-func (s *NetClsGroup) Apply(path string, d *cgroupData) error {
-	return join(path, d.pid)
+func (s *NetClsGroup) Apply(path string, _ *configs.Resources, pid int) error {
+	return apply(path, pid)
 }

 func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package fs

 import (
@@ -13,8 +11,8 @@ func (s *NetPrioGroup) Name() string {
 	return "net_prio"
 }

-func (s *NetPrioGroup) Apply(path string, d *cgroupData) error {
-	return join(path, d.pid)
+func (s *NetPrioGroup) Apply(path string, _ *configs.Resources, pid int) error {
+	return apply(path, pid)
 }

 func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
@@ -0,0 +1,186 @@
+package fs
+
+import (
+	"errors"
+	"os"
+	"path/filepath"
+	"sync"
+
+	"golang.org/x/sys/unix"
+
+	"github.com/opencontainers/runc/libcontainer/cgroups"
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/utils"
+)
+
+// The absolute path to the root of the cgroup hierarchies.
+var (
+	cgroupRootLock sync.Mutex
+	cgroupRoot     string
+)
+
+const defaultCgroupRoot = "/sys/fs/cgroup"
+
+func initPaths(cg *configs.Cgroup) (map[string]string, error) {
+	root, err := rootPath()
+	if err != nil {
+		return nil, err
+	}
+
+	inner, err := innerPath(cg)
+	if err != nil {
+		return nil, err
+	}
+
+	paths := make(map[string]string)
+	for _, sys := range subsystems {
+		name := sys.Name()
+		path, err := subsysPath(root, inner, name)
+		if err != nil {
+			// The non-presence of the devices subsystem
+			// is considered fatal for security reasons.
+			if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") {
+				continue
+			}
+
+			return nil, err
+		}
+		paths[name] = path
+	}
+
+	return paths, nil
+}
+
+func tryDefaultCgroupRoot() string {
+	var st, pst unix.Stat_t
+
+	// (1) it should be a directory...
+	err := unix.Lstat(defaultCgroupRoot, &st)
+	if err != nil || st.Mode&unix.S_IFDIR == 0 {
+		return ""
+	}
+
+	// (2) ... and a mount point ...
+	err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
+	if err != nil {
+		return ""
+	}
+
+	if st.Dev == pst.Dev {
+		// parent dir has the same dev -- not a mount point
+		return ""
+	}
+
+	// (3) ... of 'tmpfs' fs type.
+	var fst unix.Statfs_t
+	err = unix.Statfs(defaultCgroupRoot, &fst)
+	if err != nil || fst.Type != unix.TMPFS_MAGIC {
+		return ""
+	}
+
+	// (4) it should have at least 1 entry ...
+	dir, err := os.Open(defaultCgroupRoot)
+	if err != nil {
+		return ""
+	}
+	names, err := dir.Readdirnames(1)
+	if err != nil {
+		return ""
+	}
+	if len(names) < 1 {
+		return ""
+	}
+	// ... which is a cgroup mount point.
+	err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
+	if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
+		return ""
+	}
+
+	return defaultCgroupRoot
+}
+
+// rootPath finds and returns path to the root of the cgroup hierarchies.
+func rootPath() (string, error) {
+	cgroupRootLock.Lock()
+	defer cgroupRootLock.Unlock()
+
+	if cgroupRoot != "" {
+		return cgroupRoot, nil
+	}
+
+	// fast path
+	cgroupRoot = tryDefaultCgroupRoot()
+	if cgroupRoot != "" {
+		return cgroupRoot, nil
+	}
+
+	// slow path: parse mountinfo
+	mi, err := cgroups.GetCgroupMounts(false)
+	if err != nil {
+		return "", err
+	}
+	if len(mi) < 1 {
+		return "", errors.New("no cgroup mount found in mountinfo")
+	}
+
+	// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
+	// use its parent directory.
+	root := filepath.Dir(mi[0].Mountpoint)
+
+	if _, err := os.Stat(root); err != nil {
+		return "", err
+	}
+
+	cgroupRoot = root
+	return cgroupRoot, nil
+}
+
+func innerPath(c *configs.Cgroup) (string, error) {
+	if (c.Name != "" || c.Parent != "") && c.Path != "" {
+		return "", errors.New("cgroup: either Path or Name and Parent should be used")
+	}
+
+	// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
+	innerPath := utils.CleanPath(c.Path)
+	if innerPath == "" {
+		cgParent := utils.CleanPath(c.Parent)
+		cgName := utils.CleanPath(c.Name)
+		innerPath = filepath.Join(cgParent, cgName)
+	}
+
+	return innerPath, nil
+}
+
+func subsysPath(root, inner, subsystem string) (string, error) {
+	// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
+	if filepath.IsAbs(inner) {
+		mnt, err := cgroups.FindCgroupMountpoint(root, subsystem)
+		// If we didn't mount the subsystem, there is no point we make the path.
+		if err != nil {
+			return "", err
+		}
+
+		// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
+		return filepath.Join(root, filepath.Base(mnt), inner), nil
+	}
+
+	// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
+	// process could in container and shared pid namespace with host, and
+	// /proc/1/cgroup could point to whole other world of cgroups.
+	parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
+	if err != nil {
+		return "", err
+	}
+
+	return filepath.Join(parentPath, inner), nil
+}
+
+func apply(path string, pid int) error {
+	if path == "" {
+		return nil
+	}
+	if err := os.MkdirAll(path, 0o755); err != nil {
+		return err
+	}
+	return cgroups.WriteCgroupProc(path, pid)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package fs

 import (
@@ -13,8 +11,8 @@ func (s *PerfEventGroup) Name() string {
 	return "perf_event"
 }

-func (s *PerfEventGroup) Apply(path string, d *cgroupData) error {
-	return join(path, d.pid)
+func (s *PerfEventGroup) Apply(path string, _ *configs.Resources, pid int) error {
+	return apply(path, pid)
 }

 func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
@@ -1,10 +1,7 @@
-// +build linux
-
 package fs

 import (
-	"fmt"
-	"path/filepath"
+	"math"
 	"strconv"

 	"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -18,8 +15,8 @@ func (s *PidsGroup) Name() string {
 	return "pids"
 }

-func (s *PidsGroup) Apply(path string, d *cgroupData) error {
-	return join(path, d.pid)
+func (s *PidsGroup) Apply(path string, _ *configs.Resources, pid int) error {
+	return apply(path, pid)
 }

 func (s *PidsGroup) Set(path string, r *configs.Resources) error {
@@ -45,21 +42,18 @@ func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
 	}
 	current, err := fscommon.GetCgroupParamUint(path, "pids.current")
 	if err != nil {
-		return fmt.Errorf("failed to parse pids.current - %s", err)
+		return err
 	}

-	maxString, err := fscommon.GetCgroupParamString(path, "pids.max")
+	max, err := fscommon.GetCgroupParamUint(path, "pids.max")
 	if err != nil {
-		return fmt.Errorf("failed to parse pids.max - %s", err)
+		return err
 	}
-
-	// Default if pids.max == "max" is 0 -- which represents "no limit".
-	var max uint64
-	if maxString != "max" {
-		max, err = fscommon.ParseUint(maxString, 10, 64)
-		if err != nil {
-			return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
-		}
+	// If no limit is set, read from pids.max returns "max", which is
+	// converted to MaxUint64 by GetCgroupParamUint. Historically, we
+	// represent "no limit" for pids as 0, thus this conversion.
+	if max == math.MaxUint64 {
+		max = 0
 	}

 	stats.PidsStats.Current = current
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
@@ -0,0 +1,25 @@
+package fs
+
+import (
+	"github.com/opencontainers/runc/libcontainer/cgroups"
+	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type RdmaGroup struct{}
+
+func (s *RdmaGroup) Name() string {
+	return "rdma"
+}
+
+func (s *RdmaGroup) Apply(path string, _ *configs.Resources, pid int) error {
+	return apply(path, pid)
+}
+
+func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
+	return fscommon.RdmaSet(path, r)
+}
+
+func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
+	return fscommon.RdmaGetStats(path, stats)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/unsupported.go
@@ -1,3 +0,0 @@
-// +build !linux
-
-package fs
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package fs2

 import (
@@ -49,7 +47,8 @@ func setCpu(dirPath string, r *configs.Resources) error {
 }

 func statCpu(dirPath string, stats *cgroups.Stats) error {
-	f, err := cgroups.OpenFile(dirPath, "cpu.stat", os.O_RDONLY)
+	const file = "cpu.stat"
+	f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
 	if err != nil {
 		return err
 	}
@@ -59,7 +58,7 @@ func statCpu(dirPath string, stats *cgroups.Stats) error {
 	for sc.Scan() {
 		t, v, err := fscommon.ParseKeyValue(sc.Text())
 		if err != nil {
-			return err
+			return &parseError{Path: dirPath, File: file, Err: err}
 		}
 		switch t {
 		case "usage_usec":
@@ -81,5 +80,8 @@ func statCpu(dirPath string, stats *cgroups.Stats) error {
 			stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000
 		}
 	}
+	if err := sc.Err(); err != nil {
+		return &parseError{Path: dirPath, File: file, Err: err}
+	}
 	return nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package fs2

 import (
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
@@ -18,41 +18,37 @@ package fs2

 import (
 	"bufio"
+	"errors"
+	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"strings"

 	"github.com/opencontainers/runc/libcontainer/configs"
-	libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
-	"github.com/pkg/errors"
+	"github.com/opencontainers/runc/libcontainer/utils"
 )

 const UnifiedMountpoint = "/sys/fs/cgroup"

 func defaultDirPath(c *configs.Cgroup) (string, error) {
 	if (c.Name != "" || c.Parent != "") && c.Path != "" {
-		return "", errors.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
-	}
-	if len(c.Paths) != 0 {
-		// never set by specconv
-		return "", errors.Errorf("cgroup: Paths is unsupported, use Path, got %+v", c)
+		return "", fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
 	}

-	// XXX: Do not remove this code. Path safety is important! -- cyphar
-	cgPath := libcontainerUtils.CleanPath(c.Path)
-	cgParent := libcontainerUtils.CleanPath(c.Parent)
-	cgName := libcontainerUtils.CleanPath(c.Name)
-
-	return _defaultDirPath(UnifiedMountpoint, cgPath, cgParent, cgName)
+	return _defaultDirPath(UnifiedMountpoint, c.Path, c.Parent, c.Name)
 }

 func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) {
 	if (cgName != "" || cgParent != "") && cgPath != "" {
 		return "", errors.New("cgroup: either Path or Name and Parent should be used")
 	}
-	innerPath := cgPath
+
+	// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
+	innerPath := utils.CleanPath(cgPath)
 	if innerPath == "" {
+		cgParent := utils.CleanPath(cgParent)
+		cgName := utils.CleanPath(cgName)
 		innerPath = filepath.Join(cgParent, cgName)
 	}
 	if filepath.IsAbs(innerPath) {
@@ -89,7 +85,7 @@ func parseCgroupFromReader(r io.Reader) (string, error) {
 			parts = strings.SplitN(text, ":", 3)
 		)
 		if len(parts) < 3 {
-			return "", errors.Errorf("invalid cgroup entry: %q", text)
+			return "", fmt.Errorf("invalid cgroup entry: %q", text)
 		}
 		// text is like "0::/user.slice/user-1001.slice/session-1.scope"
 		if parts[0] == "0" && parts[1] == "" {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
@@ -1,16 +1,15 @@
-// +build linux
-
 package fs2

 import (
+	"fmt"
+
+	"golang.org/x/sys/unix"
+
 	"github.com/opencontainers/runc/libcontainer/cgroups/ebpf"
 	"github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/devices"
 	"github.com/opencontainers/runc/libcontainer/userns"
-
-	"github.com/pkg/errors"
-	"golang.org/x/sys/unix"
 )

 func isRWM(perms devices.Permissions) bool {
@@ -64,7 +63,7 @@ func setDevices(dirPath string, r *configs.Resources) error {
 	}
 	dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600)
 	if err != nil {
-		return errors.Errorf("cannot get dir FD for %s", dirPath)
+		return fmt.Errorf("cannot get dir FD for %s", dirPath)
 	}
 	defer unix.Close(dirFD)
 	if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
@@ -1,19 +1,17 @@
-// +build linux
-
 package fs2

 import (
 	"bufio"
-	stdErrors "errors"
+	"errors"
 	"fmt"
 	"os"
 	"strings"
 	"time"

+	"golang.org/x/sys/unix"
+
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/pkg/errors"
-	"golang.org/x/sys/unix"
 )

 func setFreezer(dirPath string, state configs.FreezerState) error {
@@ -26,7 +24,7 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
 	case configs.Thawed:
 		stateStr = "0"
 	default:
-		return errors.Errorf("invalid freezer state %q requested", state)
+		return fmt.Errorf("invalid freezer state %q requested", state)
 	}

 	fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR)
@@ -37,7 +35,7 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
 		if state != configs.Frozen {
 			return nil
 		}
-		return errors.Wrap(err, "freezer not supported")
+		return fmt.Errorf("freezer not supported: %w", err)
 	}
 	defer fd.Close()

@@ -48,7 +46,7 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
 	if actualState, err := readFreezer(dirPath, fd); err != nil {
 		return err
 	} else if actualState != state {
-		return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
+		return fmt.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
 	}
 	return nil
 }
@@ -58,7 +56,7 @@ func getFreezer(dirPath string) (configs.FreezerState, error) {
 	if err != nil {
 		// If the kernel is too old, then we just treat the freezer as being in
 		// an "undefined" state.
-		if os.IsNotExist(err) || stdErrors.Is(err, unix.ENODEV) {
+		if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
 			err = nil
 		}
 		return configs.Undefined, err
@@ -82,7 +80,7 @@ func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) {
 	case "1\n":
 		return waitFrozen(dirPath)
 	default:
-		return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state)
+		return configs.Undefined, fmt.Errorf(`unknown "cgroup.freeze" state: %q`, state)
 	}
 }

--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
@@ -1,8 +1,7 @@
-// +build linux
-
 package fs2

 import (
+	"errors"
 	"fmt"
 	"os"
 	"strings"
@@ -10,9 +9,10 @@ import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/pkg/errors"
 )

+type parseError = fscommon.ParseError
+
 type manager struct {
 	config *configs.Cgroup
 	// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
@@ -20,16 +20,12 @@ type manager struct {
 	// controllers is content of "cgroup.controllers" file.
 	// excludes pseudo-controllers ("devices" and "freezer").
 	controllers map[string]struct{}
-	rootless    bool
 }

 // NewManager creates a manager for cgroup v2 unified hierarchy.
 // dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
 // If dirPath is empty, it is automatically set using config.
-func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.Manager, error) {
-	if config == nil {
-		config = &configs.Cgroup{}
-	}
+func NewManager(config *configs.Cgroup, dirPath string) (cgroups.Manager, error) {
 	if dirPath == "" {
 		var err error
 		dirPath, err = defaultDirPath(config)
@@ -39,9 +35,8 @@ func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.
 	}

 	m := &manager{
-		config:   config,
-		dirPath:  dirPath,
-		rootless: rootless,
+		config:  config,
+		dirPath: dirPath,
 	}
 	return m, nil
 }
@@ -53,7 +48,7 @@ func (m *manager) getControllers() error {

 	data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers")
 	if err != nil {
-		if m.rootless && m.config.Path == "" {
+		if m.config.Rootless && m.config.Path == "" {
 			return nil
 		}
 		return err
@@ -73,12 +68,12 @@ func (m *manager) Apply(pid int) error {
 		// - "runc create (no limits + no cgrouppath + no permission) succeeds"
 		// - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error"
 		// - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
-		if m.rootless {
+		if m.config.Rootless {
 			if m.config.Path == "" {
 				if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
 					return nil
 				}
-				return errors.Wrap(err, "rootless needs no limits + no cgrouppath when no permission is granted for cgroups")
+				return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
 			}
 		}
 		return err
@@ -123,13 +118,20 @@ func (m *manager) GetStats() (*cgroups.Stats, error) {
 	if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
 		errs = append(errs, err)
 	}
-	if len(errs) > 0 && !m.rootless {
-		return st, errors.Errorf("error while statting cgroup v2: %+v", errs)
+	// rdma (since kernel 4.11)
+	if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) {
+		errs = append(errs, err)
+	}
+	if len(errs) > 0 && !m.config.Rootless {
+		return st, fmt.Errorf("error while statting cgroup v2: %+v", errs)
 	}
 	return st, nil
 }

 func (m *manager) Freeze(state configs.FreezerState) error {
+	if m.config.Resources == nil {
+		return errors.New("cannot toggle freezer: cgroups not configured for container")
+	}
 	if err := setFreezer(m.dirPath, state); err != nil {
 		return err
 	}
@@ -146,6 +148,9 @@ func (m *manager) Path(_ string) string {
 }

 func (m *manager) Set(r *configs.Resources) error {
+	if r == nil {
+		return nil
+	}
 	if err := m.getControllers(); err != nil {
 		return err
 	}
@@ -167,10 +172,10 @@ func (m *manager) Set(r *configs.Resources) error {
 	}
 	// devices (since kernel 4.15, pseudo-controller)
 	//
-	// When m.rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
+	// When rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
 	// However, errors from other subsystems are not ignored.
 	// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
-	if err := setDevices(m.dirPath, r); err != nil && !m.rootless {
+	if err := setDevices(m.dirPath, r); err != nil && !m.config.Rootless {
 		return err
 	}
 	// cpuset (since kernel 5.0)
@@ -181,6 +186,10 @@ func (m *manager) Set(r *configs.Resources) error {
 	if err := setHugeTlb(m.dirPath, r); err != nil {
 		return err
 	}
+	// rdma (since kernel 4.11)
+	if err := fscommon.RdmaSet(m.dirPath, r); err != nil {
+		return err
+	}
 	// freezer (since kernel 5.2, pseudo-controller)
 	if err := setFreezer(m.dirPath, r.Freezer); err != nil {
 		return err
@@ -198,9 +207,8 @@ func (m *manager) setUnified(res map[string]string) error {
 			return fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
 		}
 		if err := cgroups.WriteFile(m.dirPath, k, v); err != nil {
-			errC := errors.Cause(err)
 			// Check for both EPERM and ENOENT since O_CREAT is used by WriteFile.
-			if errors.Is(errC, os.ErrPermission) || errors.Is(errC, os.ErrNotExist) {
+			if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) {
 				// Check if a controller is available,
 				// to give more specific error if not.
 				sk := strings.SplitN(k, ".", 2)
@@ -212,7 +220,7 @@ func (m *manager) setUnified(res map[string]string) error {
 					return fmt.Errorf("unified resource %q can't be set: controller %q not available", k, c)
 				}
 			}
-			return errors.Wrapf(err, "can't set unified resource %q", k)
+			return fmt.Errorf("unable to set unified resource %q: %w", k, err)
 		}
 	}

@@ -243,7 +251,7 @@ func OOMKillCount(path string) (uint64, error) {

 func (m *manager) OOMKillCount() (uint64, error) {
 	c, err := OOMKillCount(m.dirPath)
-	if err != nil && m.rootless && os.IsNotExist(err) {
+	if err != nil && m.config.Rootless && os.IsNotExist(err) {
 		err = nil
 	}

--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
@@ -1,12 +1,8 @@
-// +build linux
-
 package fs2

 import (
 	"strconv"

-	"github.com/pkg/errors"
-
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
@@ -30,10 +26,8 @@ func setHugeTlb(dirPath string, r *configs.Resources) error {
 }

 func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
-	hugePageSizes, _ := cgroups.GetHugePageSize()
 	hugetlbStats := cgroups.HugetlbStats{}
-
-	for _, pagesize := range hugePageSizes {
+	for _, pagesize := range cgroups.HugePageSizes() {
 		value, err := fscommon.GetCgroupParamUint(dirPath, "hugetlb."+pagesize+".current")
 		if err != nil {
 			return err
@@ -43,7 +37,7 @@ func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
 		fileName := "hugetlb." + pagesize + ".events"
 		value, err = fscommon.GetValueByKey(dirPath, fileName, "max")
 		if err != nil {
-			return errors.Wrap(err, "failed to read stats")
+			return err
 		}
 		hugetlbStats.Failcnt = value

--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package fs2

 import (
@@ -117,13 +115,14 @@ func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error
 		ret[parts[0]] = parts[1:]
 	}
 	if err := scanner.Err(); err != nil {
-		return nil, err
+		return nil, &parseError{Path: dirPath, File: name, Err: err}
 	}
 	return ret, nil
 }

 func statIo(dirPath string, stats *cgroups.Stats) error {
-	values, err := readCgroup2MapFile(dirPath, "io.stat")
+	const file = "io.stat"
+	values, err := readCgroup2MapFile(dirPath, file)
 	if err != nil {
 		return err
 	}
@@ -136,11 +135,11 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
 		}
 		major, err := strconv.ParseUint(d[0], 10, 64)
 		if err != nil {
-			return err
+			return &parseError{Path: dirPath, File: file, Err: err}
 		}
 		minor, err := strconv.ParseUint(d[1], 10, 64)
 		if err != nil {
-			return err
+			return &parseError{Path: dirPath, File: file, Err: err}
 		}

 		for _, item := range v {
@@ -177,7 +176,7 @@ func statIo(dirPath string, stats *cgroups.Stats) error {

 			value, err := strconv.ParseUint(d[1], 10, 64)
 			if err != nil {
-				return err
+				return &parseError{Path: dirPath, File: file, Err: err}
 			}

 			entry := cgroups.BlkioStatEntry{
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
@@ -1,19 +1,18 @@
-// +build linux
-
 package fs2

 import (
 	"bufio"
+	"errors"
 	"math"
 	"os"
 	"strconv"
 	"strings"

+	"golang.org/x/sys/unix"
+
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/pkg/errors"
-	"golang.org/x/sys/unix"
 )

 // numToStr converts an int64 value to a string for writing to a
@@ -75,8 +74,8 @@ func setMemory(dirPath string, r *configs.Resources) error {
 }

 func statMemory(dirPath string, stats *cgroups.Stats) error {
-	// Set stats from memory.stat.
-	statsFile, err := cgroups.OpenFile(dirPath, "memory.stat", os.O_RDONLY)
+	const file = "memory.stat"
+	statsFile, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
 	if err != nil {
 		return err
 	}
@@ -86,10 +85,13 @@ func statMemory(dirPath string, stats *cgroups.Stats) error {
 	for sc.Scan() {
 		t, v, err := fscommon.ParseKeyValue(sc.Text())
 		if err != nil {
-			return errors.Wrapf(err, "failed to parse memory.stat (%q)", sc.Text())
+			return &parseError{Path: dirPath, File: file, Err: err}
 		}
 		stats.MemoryStats.Stats[t] = v
 	}
+	if err := sc.Err(); err != nil {
+		return &parseError{Path: dirPath, File: file, Err: err}
+	}
 	stats.MemoryStats.Cache = stats.MemoryStats.Stats["file"]
 	// Unlike cgroup v1 which has memory.use_hierarchy binary knob,
 	// cgroup v2 is always hierarchical.
@@ -139,13 +141,13 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
 			// swapaccount=0 kernel boot parameter is given.
 			return cgroups.MemoryData{}, nil
 		}
-		return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", usage)
+		return cgroups.MemoryData{}, err
 	}
 	memoryData.Usage = value

 	value, err = fscommon.GetCgroupParamUint(path, limit)
 	if err != nil {
-		return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", limit)
+		return cgroups.MemoryData{}, err
 	}
 	memoryData.Limit = value

@@ -153,7 +155,8 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
 }

 func statsFromMeminfo(stats *cgroups.Stats) error {
-	f, err := os.Open("/proc/meminfo")
+	const file = "/proc/meminfo"
+	f, err := os.Open(file)
 	if err != nil {
 		return err
 	}
@@ -190,7 +193,7 @@ func statsFromMeminfo(stats *cgroups.Stats) error {
 		vStr := strings.TrimSpace(strings.TrimSuffix(parts[1], " kB"))
 		*p, err = strconv.ParseUint(vStr, 10, 64)
 		if err != nil {
-			return errors.Wrap(err, "parsing /proc/meminfo "+k)
+			return &parseError{File: file, Err: errors.New("bad value for " + k)}
 		}

 		found++
@@ -199,8 +202,8 @@ func statsFromMeminfo(stats *cgroups.Stats) error {
 			break
 		}
 	}
-	if sc.Err() != nil {
-		return sc.Err()
+	if err := sc.Err(); err != nil {
+		return &parseError{Path: "", File: file, Err: err}
 	}

 	stats.MemoryStats.SwapUsage.Usage = (swap_total - swap_free) * 1024
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
@@ -1,17 +1,16 @@
-// +build linux
-
 package fs2

 import (
+	"errors"
+	"math"
 	"os"
-	"path/filepath"
 	"strings"

+	"golang.org/x/sys/unix"
+
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/pkg/errors"
-	"golang.org/x/sys/unix"
 )

 func isPidsSet(r *configs.Resources) bool {
@@ -53,22 +52,18 @@ func statPids(dirPath string, stats *cgroups.Stats) error {
 		if os.IsNotExist(err) {
 			return statPidsFromCgroupProcs(dirPath, stats)
 		}
-		return errors.Wrap(err, "failed to parse pids.current")
+		return err
 	}

-	maxString, err := fscommon.GetCgroupParamString(dirPath, "pids.max")
+	max, err := fscommon.GetCgroupParamUint(dirPath, "pids.max")
 	if err != nil {
-		return errors.Wrap(err, "failed to parse pids.max")
+		return err
 	}
-
-	// Default if pids.max == "max" is 0 -- which represents "no limit".
-	var max uint64
-	if maxString != "max" {
-		max, err = fscommon.ParseUint(maxString, 10, 64)
-		if err != nil {
-			return errors.Wrapf(err, "failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q",
-				maxString, filepath.Join(dirPath, "pids.max"))
-		}
+	// If no limit is set, read from pids.max returns "max", which is
+	// converted to MaxUint64 by GetCgroupParamUint. Historically, we
+	// represent "no limit" for pids as 0, thus this conversion.
+	if max == math.MaxUint64 {
+		max = 0
 	}

 	stats.PidsStats.Current = current
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
@@ -0,0 +1,121 @@
+package fscommon
+
+import (
+	"bufio"
+	"errors"
+	"math"
+	"os"
+	"strconv"
+	"strings"
+
+	"github.com/opencontainers/runc/libcontainer/cgroups"
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"golang.org/x/sys/unix"
+)
+
+// parseRdmaKV parses raw string to RdmaEntry.
+func parseRdmaKV(raw string, entry *cgroups.RdmaEntry) error {
+	var value uint32
+
+	parts := strings.SplitN(raw, "=", 3)
+
+	if len(parts) != 2 {
+		return errors.New("Unable to parse RDMA entry")
+	}
+
+	k, v := parts[0], parts[1]
+
+	if v == "max" {
+		value = math.MaxUint32
+	} else {
+		val64, err := strconv.ParseUint(v, 10, 32)
+		if err != nil {
+			return err
+		}
+		value = uint32(val64)
+	}
+	if k == "hca_handle" {
+		entry.HcaHandles = value
+	} else if k == "hca_object" {
+		entry.HcaObjects = value
+	}
+
+	return nil
+}
+
+// readRdmaEntries reads and converts array of rawstrings to RdmaEntries from file.
+// example entry: mlx4_0 hca_handle=2 hca_object=2000
+func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) {
+	rdmaEntries := make([]cgroups.RdmaEntry, 0)
+	fd, err := cgroups.OpenFile(dir, file, unix.O_RDONLY)
+	if err != nil {
+		return nil, err
+	}
+	defer fd.Close() //nolint:errorlint
+	scanner := bufio.NewScanner(fd)
+	for scanner.Scan() {
+		parts := strings.SplitN(scanner.Text(), " ", 4)
+		if len(parts) == 3 {
+			entry := new(cgroups.RdmaEntry)
+			entry.Device = parts[0]
+			err = parseRdmaKV(parts[1], entry)
+			if err != nil {
+				continue
+			}
+			err = parseRdmaKV(parts[2], entry)
+			if err != nil {
+				continue
+			}
+
+			rdmaEntries = append(rdmaEntries, *entry)
+		}
+	}
+	return rdmaEntries, scanner.Err()
+}
+
+// RdmaGetStats returns rdma stats such as totalLimit and current entries.
+func RdmaGetStats(path string, stats *cgroups.Stats) error {
+	currentEntries, err := readRdmaEntries(path, "rdma.current")
+	if err != nil {
+		if errors.Is(err, os.ErrNotExist) {
+			err = nil
+		}
+		return err
+	}
+	maxEntries, err := readRdmaEntries(path, "rdma.max")
+	if err != nil {
+		return err
+	}
+	// If device got removed between reading two files, ignore returning stats.
+	if len(currentEntries) != len(maxEntries) {
+		return nil
+	}
+
+	stats.RdmaStats = cgroups.RdmaStats{
+		RdmaLimit:   maxEntries,
+		RdmaCurrent: currentEntries,
+	}
+
+	return nil
+}
+
+func createCmdString(device string, limits configs.LinuxRdma) string {
+	cmdString := device
+	if limits.HcaHandles != nil {
+		cmdString += " hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10)
+	}
+	if limits.HcaObjects != nil {
+		cmdString += " hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10)
+	}
+	return cmdString
+}
+
+// RdmaSet sets RDMA resources.
+func RdmaSet(path string, r *configs.Resources) error {
+	for device, limits := range r.Rdma {
+		if err := cgroups.WriteFile(path, "rdma.max", createCmdString(device, limits)); err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
@@ -1,11 +1,10 @@
-// +build linux
-
 package fscommon

 import (
 	"errors"
 	"fmt"
 	"math"
+	"path"
 	"strconv"
 	"strings"

@@ -13,8 +12,6 @@ import (
 )

 var (
-	ErrNotValidFormat = errors.New("line is not a valid key value format")
-
 	// Deprecated: use cgroups.OpenFile instead.
 	OpenFile = cgroups.OpenFile
 	// Deprecated: use cgroups.ReadFile instead.
@@ -23,6 +20,19 @@ var (
 	WriteFile = cgroups.WriteFile
 )

+// ParseError records a parse error details, including the file path.
+type ParseError struct {
+	Path string
+	File string
+	Err  error
+}
+
+func (e *ParseError) Error() string {
+	return "unable to parse " + path.Join(e.Path, e.File) + ": " + e.Err.Error()
+}
+
+func (e *ParseError) Unwrap() error { return e.Err }
+
 // ParseUint converts a string to an uint64 integer.
 // Negative values are returned at zero as, due to kernel bugs,
 // some of the memory cgroup stats can be negative.
@@ -34,7 +44,7 @@ func ParseUint(s string, base, bitSize int) (uint64, error) {
 		// 2. Handle negative values lesser than MinInt64
 		if intErr == nil && intValue < 0 {
 			return 0, nil
-		} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
+		} else if errors.Is(intErr, strconv.ErrRange) && intValue < 0 {
 			return 0, nil
 		}

@@ -56,7 +66,7 @@ func ParseKeyValue(t string) (string, uint64, error) {

 	value, err := ParseUint(parts[1], 10, 64)
 	if err != nil {
-		return "", 0, fmt.Errorf("unable to convert to uint64: %v", err)
+		return "", 0, err
 	}

 	return parts[0], value, nil
@@ -71,11 +81,15 @@ func GetValueByKey(path, file, key string) (uint64, error) {
 		return 0, err
 	}

-	lines := strings.Split(string(content), "\n")
+	lines := strings.Split(content, "\n")
 	for _, line := range lines {
 		arr := strings.Split(line, " ")
 		if len(arr) == 2 && arr[0] == key {
-			return ParseUint(arr[1], 10, 64)
+			val, err := ParseUint(arr[1], 10, 64)
+			if err != nil {
+				err = &ParseError{Path: path, File: file, Err: err}
+			}
+			return val, err
 		}
 	}

@@ -96,7 +110,7 @@ func GetCgroupParamUint(path, file string) (uint64, error) {

 	res, err := ParseUint(contents, 10, 64)
 	if err != nil {
-		return res, fmt.Errorf("unable to parse file %q", path+"/"+file)
+		return res, &ParseError{Path: path, File: file, Err: err}
 	}
 	return res, nil
 }
@@ -115,7 +129,7 @@ func GetCgroupParamInt(path, file string) (int64, error) {

 	res, err := strconv.ParseInt(contents, 10, 64)
 	if err != nil {
-		return res, fmt.Errorf("unable to parse %q as a int from Cgroup file %q", contents, path+"/"+file)
+		return res, &ParseError{Path: path, File: file, Err: err}
 	}
 	return res, nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
@@ -0,0 +1,27 @@
+package cgroups
+
+import (
+	"io/fs"
+	"path/filepath"
+)
+
+// GetAllPids returns all pids from the cgroup identified by path, and all its
+// sub-cgroups.
+func GetAllPids(path string) ([]int, error) {
+	var pids []int
+	err := filepath.WalkDir(path, func(p string, d fs.DirEntry, iErr error) error {
+		if iErr != nil {
+			return iErr
+		}
+		if !d.IsDir() {
+			return nil
+		}
+		cPids, err := readProcsFile(p)
+		if err != nil {
+			return err
+		}
+		pids = append(pids, cPids...)
+		return nil
+	})
+	return pids, err
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/manager/new.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/manager/new.go
@@ -0,0 +1,78 @@
+package manager
+
+import (
+	"errors"
+	"fmt"
+	"path/filepath"
+
+	"github.com/opencontainers/runc/libcontainer/cgroups"
+	"github.com/opencontainers/runc/libcontainer/cgroups/fs"
+	"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
+	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+// New returns the instance of a cgroup manager, which is chosen
+// based on the local environment (whether cgroup v1 or v2 is used)
+// and the config (whether config.Systemd is set or not).
+func New(config *configs.Cgroup) (cgroups.Manager, error) {
+	return NewWithPaths(config, nil)
+}
+
+// NewWithPaths is similar to New, and can be used in case cgroup paths
+// are already well known, which can save some resources.
+//
+// For cgroup v1, the keys are controller/subsystem name, and the values
+// are absolute filesystem paths to the appropriate cgroups.
+//
+// For cgroup v2, the only key allowed is "" (empty string), and the value
+// is the unified cgroup path.
+func NewWithPaths(config *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
+	if config == nil {
+		return nil, errors.New("cgroups/manager.New: config must not be nil")
+	}
+	if config.Systemd && !systemd.IsRunningSystemd() {
+		return nil, errors.New("systemd not running on this host, cannot use systemd cgroups manager")
+	}
+
+	// Cgroup v2 aka unified hierarchy.
+	if cgroups.IsCgroup2UnifiedMode() {
+		path, err := getUnifiedPath(paths)
+		if err != nil {
+			return nil, fmt.Errorf("manager.NewWithPaths: inconsistent paths: %w", err)
+		}
+		if config.Systemd {
+			return systemd.NewUnifiedManager(config, path)
+		}
+		return fs2.NewManager(config, path)
+	}
+
+	// Cgroup v1.
+	if config.Systemd {
+		return systemd.NewLegacyManager(config, paths)
+	}
+
+	return fs.NewManager(config, paths)
+}
+
+// getUnifiedPath is an implementation detail of libcontainer factory.
+// Historically, it saves cgroup paths as per-subsystem path map (as returned
+// by cm.GetPaths(""), but with v2 we only have one single unified path
+// (with "" as a key).
+//
+// This function converts from that map to string (using "" as a key),
+// and also checks that the map itself is sane.
+func getUnifiedPath(paths map[string]string) (string, error) {
+	if len(paths) > 1 {
+		return "", fmt.Errorf("expected a single path, got %+v", paths)
+	}
+	path := paths[""]
+	// can be empty
+	if path != "" {
+		if filepath.Clean(path) != path || !filepath.IsAbs(path) {
+			return "", fmt.Errorf("invalid path: %q", path)
+		}
+	}
+
+	return path, nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package cgroups

 type ThrottlingData struct {
@@ -126,7 +124,7 @@ type BlkioStatEntry struct {
 }

 type BlkioStats struct {
-	// number of bytes tranferred to and from the block device
+	// number of bytes transferred to and from the block device
 	IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
 	IoServicedRecursive     []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
 	IoQueuedRecursive       []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
@@ -146,6 +144,17 @@ type HugetlbStats struct {
 	Failcnt uint64 `json:"failcnt"`
 }

+type RdmaEntry struct {
+	Device     string `json:"device,omitempty"`
+	HcaHandles uint32 `json:"hca_handles,omitempty"`
+	HcaObjects uint32 `json:"hca_objects,omitempty"`
+}
+
+type RdmaStats struct {
+	RdmaLimit   []RdmaEntry `json:"rdma_limit,omitempty"`
+	RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"`
+}
+
 type Stats struct {
 	CpuStats    CpuStats    `json:"cpu_stats,omitempty"`
 	CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"`
@@ -154,6 +163,7 @@ type Stats struct {
 	BlkioStats  BlkioStats  `json:"blkio_stats,omitempty"`
 	// the map is in the format "size of hugepage: stats of the hugepage"
 	HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
+	RdmaStats    RdmaStats               `json:"rdma_stats,omitempty"`
 }

 func NewStats() *Stats {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
@@ -3,6 +3,7 @@ package systemd
 import (
 	"bufio"
 	"context"
+	"errors"
 	"fmt"
 	"math"
 	"os"
@@ -14,11 +15,11 @@ import (

 	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
 	dbus "github.com/godbus/dbus/v5"
+	"github.com/sirupsen/logrus"
+
 	cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/devices"
-	"github.com/pkg/errors"
-	"github.com/sirupsen/logrus"
 )

 const (
@@ -92,7 +93,7 @@ func groupPrefix(ruleType devices.Type) (string, error) {
 	case devices.CharDevice:
 		return "char-", nil
 	default:
-		return "", errors.Errorf("device type %v has no group prefix", ruleType)
+		return "", fmt.Errorf("device type %v has no group prefix", ruleType)
 	}
 }

@@ -142,9 +143,9 @@ func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
 		)
 		if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 {
 			if err == nil {
-				err = errors.Errorf("wrong number of fields")
+				err = errors.New("wrong number of fields")
 			}
-			return "", errors.Wrapf(err, "scan /proc/devices line %q", line)
+			return "", fmt.Errorf("scan /proc/devices line %q: %w", line, err)
 		}

 		if currMajor == ruleMajor {
@@ -152,7 +153,7 @@ func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
 		}
 	}
 	if err := scanner.Err(); err != nil {
-		return "", errors.Wrap(err, "reading /proc/devices")
+		return "", fmt.Errorf("reading /proc/devices: %w", err)
 	}
 	// Couldn't find the device group.
 	return "", nil
@@ -192,12 +193,12 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
 	configEmu := &cgroupdevices.Emulator{}
 	for _, rule := range r.Devices {
 		if err := configEmu.Apply(*rule); err != nil {
-			return nil, errors.Wrap(err, "apply rule for systemd")
+			return nil, fmt.Errorf("unable to apply rule for systemd: %w", err)
 		}
 	}
 	// systemd doesn't support blacklists. So we log a warning, and tell
 	// systemd to act as a deny-all whitelist. This ruleset will be replaced
-	// with our normal fallback code. This may result in spurrious errors, but
+	// with our normal fallback code. This may result in spurious errors, but
 	// the only other option is to error out here.
 	if configEmu.IsBlacklist() {
 		// However, if we're dealing with an allow-all rule then we can do it.
@@ -213,19 +214,19 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
 	// whitelist which is the default for devices.Emulator.
 	finalRules, err := configEmu.Rules()
 	if err != nil {
-		return nil, errors.Wrap(err, "get simplified rules for systemd")
+		return nil, fmt.Errorf("unable to get simplified rules for systemd: %w", err)
 	}
 	var deviceAllowList []deviceAllowEntry
 	for _, rule := range finalRules {
 		if !rule.Allow {
 			// Should never happen.
-			return nil, errors.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
+			return nil, fmt.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
 		}
 		switch rule.Type {
 		case devices.BlockDevice, devices.CharDevice:
 		default:
 			// Should never happen.
-			return nil, errors.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
+			return nil, fmt.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
 		}

 		entry := deviceAllowEntry{
@@ -271,7 +272,7 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
 			// "_ n:* _" rules require a device group from /proc/devices.
 			group, err := findDeviceGroup(rule.Type, rule.Major)
 			if err != nil {
-				return nil, errors.Wrapf(err, "find device '%v/%d'", rule.Type, rule.Major)
+				return nil, fmt.Errorf("unable to find device '%v/%d': %w", rule.Type, rule.Major, err)
 			}
 			if group == "" {
 				// Couldn't find a group.
@@ -350,7 +351,7 @@ func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Pr
 			// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
 			if s != "done" {
 				resetFailedUnit(cm, unitName)
-				return errors.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
+				return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
 			}
 		case <-timeout.C:
 			resetFailedUnit(cm, unitName)
@@ -449,10 +450,13 @@ func systemdVersionAtoi(verStr string) (int, error) {
 	re := regexp.MustCompile(`v?([0-9]+)`)
 	matches := re.FindStringSubmatch(verStr)
 	if len(matches) < 2 {
-		return 0, errors.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
+		return 0, fmt.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
 	}
 	ver, err := strconv.Atoi(matches[1])
-	return ver, errors.Wrapf(err, "can't parse version %s", verStr)
+	if err != nil {
+		return -1, fmt.Errorf("can't parse version: %w", err)
+	}
+	return ver, nil
 }

 func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
@@ -1,12 +1,10 @@
 package systemd

 import (
-	"encoding/binary"
+	"errors"
+	"math/big"
 	"strconv"
 	"strings"
-
-	"github.com/bits-and-blooms/bitset"
-	"github.com/pkg/errors"
 )

 // RangeToBits converts a text representation of a CPU mask (as written to
@@ -14,7 +12,7 @@ import (
 // with the corresponding bits set (as consumed by systemd over dbus as
 // AllowedCPUs/AllowedMemoryNodes unit property value).
 func RangeToBits(str string) ([]byte, error) {
-	bits := &bitset.BitSet{}
+	bits := new(big.Int)

 	for _, r := range strings.Split(str, ",") {
 		// allow extra spaces around
@@ -36,32 +34,22 @@ func RangeToBits(str string) ([]byte, error) {
 			if start > end {
 				return nil, errors.New("invalid range: " + r)
 			}
-			for i := uint(start); i <= uint(end); i++ {
-				bits.Set(i)
+			for i := start; i <= end; i++ {
+				bits.SetBit(bits, int(i), 1)
 			}
 		} else {
 			val, err := strconv.ParseUint(ranges[0], 10, 32)
 			if err != nil {
 				return nil, err
 			}
-			bits.Set(uint(val))
+			bits.SetBit(bits, int(val), 1)
 		}
 	}

-	val := bits.Bytes()
-	if len(val) == 0 {
+	ret := bits.Bytes()
+	if len(ret) == 0 {
 		// do not allow empty values
 		return nil, errors.New("empty value")
 	}
-	ret := make([]byte, len(val)*8)
-	for i := range val {
-		// bitset uses BigEndian internally
-		binary.BigEndian.PutUint64(ret[i*8:], val[len(val)-1-i])
-	}
-	// remove upper all-zero bytes
-	for ret[0] == 0 {
-		ret = ret[1:]
-	}
-
 	return ret, nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package systemd

 import (
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unsupported.go
@@ -1,71 +0,0 @@
-// +build !linux
-
-package systemd
-
-import (
-	"errors"
-
-	"github.com/opencontainers/runc/libcontainer/cgroups"
-	"github.com/opencontainers/runc/libcontainer/configs"
-)
-
-type Manager struct {
-	Cgroups *configs.Cgroup
-	Paths   map[string]string
-}
-
-func IsRunningSystemd() bool {
-	return false
-}
-
-func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
-	return nil, errors.New("Systemd not supported")
-}
-
-func (m *Manager) Apply(pid int) error {
-	return errors.New("Systemd not supported")
-}
-
-func (m *Manager) GetPids() ([]int, error) {
-	return nil, errors.New("Systemd not supported")
-}
-
-func (m *Manager) GetAllPids() ([]int, error) {
-	return nil, errors.New("Systemd not supported")
-}
-
-func (m *Manager) Destroy() error {
-	return errors.New("Systemd not supported")
-}
-
-func (m *Manager) GetPaths() map[string]string {
-	return nil
-}
-
-func (m *Manager) Path(_ string) string {
-	return ""
-}
-
-func (m *Manager) GetStats() (*cgroups.Stats, error) {
-	return nil, errors.New("Systemd not supported")
-}
-
-func (m *Manager) Set(container *configs.Config) error {
-	return errors.New("Systemd not supported")
-}
-
-func (m *Manager) Freeze(state configs.FreezerState) error {
-	return errors.New("Systemd not supported")
-}
-
-func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
-	return errors.New("Systemd not supported")
-}
-
-func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
-	return nil, errors.New("Systemd not supported")
-}
-
-func (m *Manager) Exists() bool {
-	return false
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
@@ -1,10 +1,10 @@
-// +build linux
-
 package systemd

 import (
 	"bufio"
 	"bytes"
+	"errors"
+	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -13,8 +13,8 @@ import (

 	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
 	dbus "github.com/godbus/dbus/v5"
+
 	"github.com/opencontainers/runc/libcontainer/userns"
-	"github.com/pkg/errors"
 )

 // newUserSystemdDbus creates a connection for systemd user-instance.
@@ -31,17 +31,17 @@ func newUserSystemdDbus() (*systemdDbus.Conn, error) {
 	return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
 		conn, err := dbus.Dial(addr)
 		if err != nil {
-			return nil, errors.Wrapf(err, "error while dialing %q", addr)
+			return nil, fmt.Errorf("error while dialing %q: %w", addr, err)
 		}
 		methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
 		err = conn.Auth(methods)
 		if err != nil {
 			conn.Close()
-			return nil, errors.Wrapf(err, "error while authenticating connection, address=%q, UID=%d", addr, uid)
+			return nil, fmt.Errorf("error while authenticating connection (address=%q, UID=%d): %w", addr, uid, err)
 		}
 		if err = conn.Hello(); err != nil {
 			conn.Close()
-			return nil, errors.Wrapf(err, "error while sending Hello message, address=%q, UID=%d", addr, uid)
+			return nil, fmt.Errorf("error while sending Hello message (address=%q, UID=%d): %w", addr, uid, err)
 		}
 		return conn, nil
 	})
@@ -57,7 +57,7 @@ func DetectUID() (int, error) {
 	}
 	b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
 	if err != nil {
-		return -1, errors.Wrapf(err, "could not execute `busctl --user --no-pager status`: %q", string(b))
+		return -1, fmt.Errorf("could not execute `busctl --user --no-pager status` (output: %q): %w", string(b), err)
 	}
 	scanner := bufio.NewScanner(bytes.NewReader(b))
 	for scanner.Scan() {
@@ -66,7 +66,7 @@ func DetectUID() (int, error) {
 			uidStr := strings.TrimPrefix(s, "OwnerUID=")
 			i, err := strconv.Atoi(uidStr)
 			if err != nil {
-				return -1, errors.Wrapf(err, "could not detect the OwnerUID: %s", s)
+				return -1, fmt.Errorf("could not detect the OwnerUID: %w", err)
 			}
 			return i, nil
 		}
@@ -93,7 +93,7 @@ func DetectUserDbusSessionBusAddress() (string, error) {
 	}
 	b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput()
 	if err != nil {
-		return "", errors.Wrapf(err, "could not execute `systemctl --user --no-pager show-environment`, output=%q", string(b))
+		return "", fmt.Errorf("could not execute `systemctl --user --no-pager show-environment` (output=%q): %w", string(b), err)
 	}
 	scanner := bufio.NewScanner(bytes.NewReader(b))
 	for scanner.Scan() {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package systemd

 import (
@@ -26,12 +24,25 @@ type legacyManager struct {
 	dbus    *dbusConnManager
 }

-func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) cgroups.Manager {
+func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
+	if cg.Rootless {
+		return nil, errors.New("cannot use rootless systemd cgroups manager on cgroup v1")
+	}
+	if cg.Resources != nil && cg.Resources.Unified != nil {
+		return nil, cgroups.ErrV1NoUnified
+	}
+	if paths == nil {
+		var err error
+		paths, err = initPaths(cg)
+		if err != nil {
+			return nil, err
+		}
+	}
 	return &legacyManager{
 		cgroups: cg,
 		paths:   paths,
 		dbus:    newDbusConnManager(false),
-	}
+	}, nil
 }

 type subsystem interface {
@@ -59,6 +70,7 @@ var legacySubsystems = []subsystem{
 	&fs.NetPrioGroup{},
 	&fs.NetClsGroup{},
 	&fs.NameGroup{GroupName: "name=systemd"},
+	&fs.RdmaGroup{},
 }

 func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
@@ -100,6 +112,53 @@ func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]syst
 	return properties, nil
 }

+// initPaths figures out and returns paths to cgroups.
+func initPaths(c *configs.Cgroup) (map[string]string, error) {
+	slice := "system.slice"
+	if c.Parent != "" {
+		var err error
+		slice, err = ExpandSlice(c.Parent)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	unit := getUnitName(c)
+
+	paths := make(map[string]string)
+	for _, s := range legacySubsystems {
+		subsystemPath, err := getSubsystemPath(slice, unit, s.Name())
+		if err != nil {
+			// Even if it's `not found` error, we'll return err
+			// because devices cgroup is hard requirement for
+			// container security.
+			if s.Name() == "devices" {
+				return nil, err
+			}
+			// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
+			if cgroups.IsNotFound(err) {
+				continue
+			}
+			return nil, err
+		}
+		paths[s.Name()] = subsystemPath
+	}
+
+	// If systemd is using cgroups-hybrid mode then add the slice path of
+	// this container to the paths so the following process executed with
+	// "runc exec" joins that cgroup as well.
+	if cgroups.IsCgroup2HybridMode() {
+		// "" means cgroup-hybrid path
+		cgroupsHybridPath, err := getSubsystemPath(slice, unit, "")
+		if err != nil && cgroups.IsNotFound(err) {
+			return nil, err
+		}
+		paths[""] = cgroupsHybridPath
+	}
+
+	return paths, nil
+}
+
 func (m *legacyManager) Apply(pid int) error {
 	var (
 		c          = m.cgroups
@@ -108,27 +167,8 @@ func (m *legacyManager) Apply(pid int) error {
 		properties []systemdDbus.Property
 	)

-	if c.Resources.Unified != nil {
-		return cgroups.ErrV1NoUnified
-	}
-
 	m.mu.Lock()
 	defer m.mu.Unlock()
-	if c.Paths != nil {
-		paths := make(map[string]string)
-		cgMap, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
-		if err != nil {
-			return err
-		}
-		// XXX(kolyshkin@): why this check is needed?
-		for name, path := range c.Paths {
-			if _, ok := cgMap[name]; ok {
-				paths[name] = path
-			}
-		}
-		m.paths = paths
-		return cgroups.EnterPid(m.paths, pid)
-	}

 	if c.Parent != "" {
 		slice = c.Parent
@@ -136,12 +176,14 @@ func (m *legacyManager) Apply(pid int) error {

 	properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))

-	// if we create a slice, the parent is defined via a Wants=
 	if strings.HasSuffix(unitName, ".slice") {
+		// If we create a slice, the parent is defined via a Wants=.
 		properties = append(properties, systemdDbus.PropWants(slice))
 	} else {
-		// otherwise, we use Slice=
+		// Otherwise it's a scope, which we put into a Slice=.
 		properties = append(properties, systemdDbus.PropSlice(slice))
+		// Assume scopes always support delegation (supported since systemd v218).
+		properties = append(properties, newProp("Delegate", true))
 	}

 	// only add pid if its valid, -1 is used w/ general slice creation.
@@ -149,12 +191,6 @@ func (m *legacyManager) Apply(pid int) error {
 		properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
 	}

-	// Check if we can delegate. This is only supported on systemd versions 218 and above.
-	if !strings.HasSuffix(unitName, ".slice") {
-		// Assume scopes always support delegation.
-		properties = append(properties, newProp("Delegate", true))
-	}
-
 	// Always enable accounting, this gets us the same behaviour as the fs implementation,
 	// plus the kernel has some problems with joining the memory cgroup at a later time.
 	properties = append(properties,
@@ -174,26 +210,6 @@ func (m *legacyManager) Apply(pid int) error {
 		return err
 	}

-	paths := make(map[string]string)
-	for _, s := range legacySubsystems {
-		subsystemPath, err := getSubsystemPath(m.cgroups, s.Name())
-		if err != nil {
-			// Even if it's `not found` error, we'll return err
-			// because devices cgroup is hard requirement for
-			// container security.
-			if s.Name() == "devices" {
-				return err
-			}
-			// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
-			if cgroups.IsNotFound(err) {
-				continue
-			}
-			return err
-		}
-		paths[s.Name()] = subsystemPath
-	}
-	m.paths = paths
-
 	if err := m.joinCgroups(pid); err != nil {
 		return err
 	}
@@ -202,9 +218,6 @@ func (m *legacyManager) Apply(pid int) error {
 }

 func (m *legacyManager) Destroy() error {
-	if m.cgroups.Paths != nil {
-		return nil
-	}
 	m.mu.Lock()
 	defer m.mu.Unlock()

@@ -254,7 +267,7 @@ func (m *legacyManager) joinCgroups(pid int) error {
 	return nil
 }

-func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
+func getSubsystemPath(slice, unit, subsystem string) (string, error) {
 	mountpoint, err := cgroups.FindCgroupMountpoint("", subsystem)
 	if err != nil {
 		return "", err
@@ -267,17 +280,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
 	// if pid 1 is systemd 226 or later, it will be in init.scope, not the root
 	initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope")

-	slice := "system.slice"
-	if c.Parent != "" {
-		slice = c.Parent
-	}
-
-	slice, err = ExpandSlice(slice)
-	if err != nil {
-		return "", err
-	}
-
-	return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
+	return filepath.Join(mountpoint, initPath, slice, unit), nil
 }

 func (m *legacyManager) Freeze(state configs.FreezerState) error {
@@ -399,9 +402,7 @@ func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (
 }

 func (m *legacyManager) Set(r *configs.Resources) error {
-	// If Paths are set, then we are just joining cgroups paths
-	// and there is no need to set any values.
-	if m.cgroups.Paths != nil {
+	if r == nil {
 		return nil
 	}
 	if r.Unified != nil {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
@@ -1,10 +1,10 @@
-// +build linux
-
 package systemd

 import (
+	"bufio"
 	"fmt"
 	"math"
+	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
@@ -12,29 +12,39 @@ import (

 	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
 	securejoin "github.com/cyphar/filepath-securejoin"
+	"github.com/sirupsen/logrus"
+
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
 	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/pkg/errors"
-	"github.com/sirupsen/logrus"
 )

 type unifiedManager struct {
 	mu      sync.Mutex
 	cgroups *configs.Cgroup
 	// path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
-	path     string
-	rootless bool
-	dbus     *dbusConnManager
+	path  string
+	dbus  *dbusConnManager
+	fsMgr cgroups.Manager
 }

-func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgroups.Manager {
-	return &unifiedManager{
-		cgroups:  config,
-		path:     path,
-		rootless: rootless,
-		dbus:     newDbusConnManager(rootless),
+func NewUnifiedManager(config *configs.Cgroup, path string) (cgroups.Manager, error) {
+	m := &unifiedManager{
+		cgroups: config,
+		path:    path,
+		dbus:    newDbusConnManager(config.Rootless),
 	}
+	if err := m.initPath(); err != nil {
+		return nil, err
+	}
+
+	fsMgr, err := fs2.NewManager(config, m.path)
+	if err != nil {
+		return nil, err
+	}
+	m.fsMgr = fsMgr
+
+	return m, nil
 }

 // unifiedResToSystemdProps tries to convert from Cgroup.Resources.Unified
@@ -233,12 +243,8 @@ func (m *unifiedManager) Apply(pid int) error {
 		properties []systemdDbus.Property
 	)

-	if c.Paths != nil {
-		return cgroups.WriteCgroupProc(m.path, pid)
-	}
-
 	slice := "system.slice"
-	if m.rootless {
+	if m.cgroups.Rootless {
 		slice = "user.slice"
 	}
 	if c.Parent != "" {
@@ -247,12 +253,14 @@ func (m *unifiedManager) Apply(pid int) error {

 	properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))

-	// if we create a slice, the parent is defined via a Wants=
 	if strings.HasSuffix(unitName, ".slice") {
+		// If we create a slice, the parent is defined via a Wants=.
 		properties = append(properties, systemdDbus.PropWants(slice))
 	} else {
-		// otherwise, we use Slice=
+		// Otherwise it's a scope, which we put into a Slice=.
 		properties = append(properties, systemdDbus.PropSlice(slice))
+		// Assume scopes always support delegation (supported since systemd v218).
+		properties = append(properties, newProp("Delegate", true))
 	}

 	// only add pid if its valid, -1 is used w/ general slice creation.
@@ -260,12 +268,6 @@ func (m *unifiedManager) Apply(pid int) error {
 		properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
 	}

-	// Check if we can delegate. This is only supported on systemd versions 218 and above.
-	if !strings.HasSuffix(unitName, ".slice") {
-		// Assume scopes always support delegation.
-		properties = append(properties, newProp("Delegate", true))
-	}
-
 	// Always enable accounting, this gets us the same behaviour as the fs implementation,
 	// plus the kernel has some problems with joining the memory cgroup at a later time.
 	properties = append(properties,
@@ -282,22 +284,53 @@ func (m *unifiedManager) Apply(pid int) error {
 	properties = append(properties, c.SystemdProps...)

 	if err := startUnit(m.dbus, unitName, properties); err != nil {
-		return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties)
+		return fmt.Errorf("unable to start unit %q (properties %+v): %w", unitName, properties, err)
 	}

-	if err := m.initPath(); err != nil {
-		return err
-	}
 	if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
 		return err
 	}
+
+	if c.OwnerUID != nil {
+		filesToChown, err := cgroupFilesToChown()
+		if err != nil {
+			return err
+		}
+
+		for _, v := range filesToChown {
+			err := os.Chown(m.path+"/"+v, *c.OwnerUID, -1)
+			if err != nil {
+				return err
+			}
+		}
+	}
+
 	return nil
 }

-func (m *unifiedManager) Destroy() error {
-	if m.cgroups.Paths != nil {
-		return nil
+// The kernel exposes a list of files that should be chowned to the delegate
+// uid in /sys/kernel/cgroup/delegate.  If the file is not present
+// (Linux < 4.15), use the initial values mentioned in cgroups(7).
+func cgroupFilesToChown() ([]string, error) {
+	filesToChown := []string{"."} // the directory itself must be chowned
+	const cgroupDelegateFile = "/sys/kernel/cgroup/delegate"
+	f, err := os.Open(cgroupDelegateFile)
+	if err == nil {
+		defer f.Close()
+		scanner := bufio.NewScanner(f)
+		for scanner.Scan() {
+			filesToChown = append(filesToChown, scanner.Text())
+		}
+		if err := scanner.Err(); err != nil {
+			return nil, fmt.Errorf("error reading %s: %w", cgroupDelegateFile, err)
+		}
+	} else {
+		filesToChown = append(filesToChown, "cgroup.procs", "cgroup.subtree_control", "cgroup.threads")
 	}
+	return filesToChown, nil
+}
+
+func (m *unifiedManager) Destroy() error {
 	m.mu.Lock()
 	defer m.mu.Unlock()

@@ -307,8 +340,8 @@ func (m *unifiedManager) Destroy() error {
 	}

 	// systemd 239 do not remove sub-cgroups.
-	err := cgroups.RemovePath(m.path)
-	// cgroups.RemovePath has handled ErrNotExist
+	err := m.fsMgr.Destroy()
+	// fsMgr.Destroy has handled ErrNotExist
 	if err != nil {
 		return err
 	}
@@ -317,7 +350,6 @@ func (m *unifiedManager) Destroy() error {
 }

 func (m *unifiedManager) Path(_ string) string {
-	_ = m.initPath()
 	return m.path
 }

@@ -326,7 +358,7 @@ func (m *unifiedManager) Path(_ string) string {
 func (m *unifiedManager) getSliceFull() (string, error) {
 	c := m.cgroups
 	slice := "system.slice"
-	if m.rootless {
+	if c.Rootless {
 		slice = "user.slice"
 	}
 	if c.Parent != "" {
@@ -337,7 +369,7 @@ func (m *unifiedManager) getSliceFull() (string, error) {
 		}
 	}

-	if m.rootless {
+	if c.Rootless {
 		// managerCG is typically "/user.slice/user-${uid}.slice/user@${uid}.service".
 		managerCG, err := getManagerProperty(m.dbus, "ControlGroup")
 		if err != nil {
@@ -375,58 +407,36 @@ func (m *unifiedManager) initPath() error {
 	return nil
 }

-func (m *unifiedManager) fsManager() (cgroups.Manager, error) {
-	if err := m.initPath(); err != nil {
-		return nil, err
-	}
-	return fs2.NewManager(m.cgroups, m.path, m.rootless)
-}
-
 func (m *unifiedManager) Freeze(state configs.FreezerState) error {
-	fsMgr, err := m.fsManager()
-	if err != nil {
-		return err
-	}
-	return fsMgr.Freeze(state)
+	return m.fsMgr.Freeze(state)
 }

 func (m *unifiedManager) GetPids() ([]int, error) {
-	if err := m.initPath(); err != nil {
-		return nil, err
-	}
 	return cgroups.GetPids(m.path)
 }

 func (m *unifiedManager) GetAllPids() ([]int, error) {
-	if err := m.initPath(); err != nil {
-		return nil, err
-	}
 	return cgroups.GetAllPids(m.path)
 }

 func (m *unifiedManager) GetStats() (*cgroups.Stats, error) {
-	fsMgr, err := m.fsManager()
-	if err != nil {
-		return nil, err
-	}
-	return fsMgr.GetStats()
+	return m.fsMgr.GetStats()
 }

 func (m *unifiedManager) Set(r *configs.Resources) error {
+	if r == nil {
+		return nil
+	}
 	properties, err := genV2ResourcesProperties(r, m.dbus)
 	if err != nil {
 		return err
 	}

 	if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {
-		return errors.Wrap(err, "error while setting unit properties")
+		return fmt.Errorf("unable to set unit properties: %w", err)
 	}

-	fsMgr, err := m.fsManager()
-	if err != nil {
-		return err
-	}
-	return fsMgr.Set(r)
+	return m.fsMgr.Set(r)
 }

 func (m *unifiedManager) GetPaths() map[string]string {
@@ -440,11 +450,7 @@ func (m *unifiedManager) GetCgroups() (*configs.Cgroup, error) {
 }

 func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) {
-	fsMgr, err := m.fsManager()
-	if err != nil {
-		return configs.Undefined, err
-	}
-	return fsMgr.GetFreezerState()
+	return m.fsMgr.GetFreezerState()
 }

 func (m *unifiedManager) Exists() bool {
@@ -452,9 +458,5 @@ func (m *unifiedManager) Exists() bool {
 }

 func (m *unifiedManager) OOMKillCount() (uint64, error) {
-	fsMgr, err := m.fsManager()
-	if err != nil {
-		return 0, err
-	}
-	return fsMgr.OOMKillCount()
+	return m.fsMgr.OOMKillCount()
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package cgroups

 import (
@@ -7,7 +5,6 @@ import (
 	"errors"
 	"fmt"
 	"io"
-	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strconv"
@@ -23,11 +20,14 @@ import (
 const (
 	CgroupProcesses   = "cgroup.procs"
 	unifiedMountpoint = "/sys/fs/cgroup"
+	hybridMountpoint  = "/sys/fs/cgroup/unified"
 )

 var (
 	isUnifiedOnce sync.Once
 	isUnified     bool
+	isHybridOnce  sync.Once
+	isHybrid      bool
 )

 // IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
@@ -49,6 +49,24 @@ func IsCgroup2UnifiedMode() bool {
 	return isUnified
 }

+// IsCgroup2HybridMode returns whether we are running in cgroup v2 hybrid mode.
+func IsCgroup2HybridMode() bool {
+	isHybridOnce.Do(func() {
+		var st unix.Statfs_t
+		err := unix.Statfs(hybridMountpoint, &st)
+		if err != nil {
+			if os.IsNotExist(err) {
+				// ignore the "not found" error
+				isHybrid = false
+				return
+			}
+			panic(fmt.Sprintf("cannot statfs cgroup root: %s", err))
+		}
+		isHybrid = st.Type == unix.CGROUP2_SUPER_MAGIC
+	})
+	return isHybrid
+}
+
 type Mount struct {
 	Mountpoint string
 	Root       string
@@ -118,8 +136,8 @@ func GetAllSubsystems() ([]string, error) {
 	return subsystems, nil
 }

-func readProcsFile(file string) ([]int, error) {
-	f, err := os.Open(file)
+func readProcsFile(dir string) ([]int, error) {
+	f, err := OpenFile(dir, CgroupProcesses, os.O_RDONLY)
 	if err != nil {
 		return nil, err
 	}
@@ -210,7 +228,7 @@ func EnterPid(cgroupPaths map[string]string, pid int) error {

 func rmdir(path string) error {
 	err := unix.Rmdir(path)
-	if err == nil || err == unix.ENOENT {
+	if err == nil || err == unix.ENOENT { //nolint:errorlint // unix errors are bare
 		return nil
 	}
 	return &os.PathError{Op: "rmdir", Path: path, Err: err}
@@ -224,7 +242,7 @@ func RemovePath(path string) error {
 		return nil
 	}

-	infos, err := ioutil.ReadDir(path)
+	infos, err := os.ReadDir(path)
 	if err != nil {
 		if os.IsNotExist(err) {
 			err = nil
@@ -284,40 +302,61 @@ func RemovePaths(paths map[string]string) (err error) {
 	return fmt.Errorf("Failed to remove paths: %v", paths)
 }

-func GetHugePageSize() ([]string, error) {
-	dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
-	if err != nil {
-		return nil, err
-	}
-	files, err := dir.Readdirnames(0)
-	dir.Close()
-	if err != nil {
-		return nil, err
-	}
+var (
+	hugePageSizes []string
+	initHPSOnce   sync.Once
+)

-	return getHugePageSizeFromFilenames(files)
+func HugePageSizes() []string {
+	initHPSOnce.Do(func() {
+		dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
+		if err != nil {
+			return
+		}
+		files, err := dir.Readdirnames(0)
+		dir.Close()
+		if err != nil {
+			return
+		}
+
+		hugePageSizes, err = getHugePageSizeFromFilenames(files)
+		if err != nil {
+			logrus.Warn("HugePageSizes: ", err)
+		}
+	})
+
+	return hugePageSizes
 }

 func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
 	pageSizes := make([]string, 0, len(fileNames))
+	var warn error

 	for _, file := range fileNames {
 		// example: hugepages-1048576kB
 		val := strings.TrimPrefix(file, "hugepages-")
 		if len(val) == len(file) {
-			// unexpected file name: no prefix found
+			// Unexpected file name: no prefix found, ignore it.
 			continue
 		}
-		// The suffix is always "kB" (as of Linux 5.9)
+		// The suffix is always "kB" (as of Linux 5.13). If we find
+		// something else, produce an error but keep going.
 		eLen := len(val) - 2
 		val = strings.TrimSuffix(val, "kB")
 		if len(val) != eLen {
-			logrus.Warnf("GetHugePageSize: %s: invalid filename suffix (expected \"kB\")", file)
+			// Highly unlikely.
+			if warn == nil {
+				warn = errors.New(file + `: invalid suffix (expected "kB")`)
+			}
 			continue
 		}
 		size, err := strconv.Atoi(val)
 		if err != nil {
-			return nil, err
+			// Highly unlikely.
+			if warn == nil {
+				warn = fmt.Errorf("%s: %w", file, err)
+			}
+			continue
 		}
 		// Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574
 		// but in our case the size is in KB already.
@@ -331,34 +370,12 @@ func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
 		pageSizes = append(pageSizes, val)
 	}

-	return pageSizes, nil
+	return pageSizes, warn
 }

 // GetPids returns all pids, that were added to cgroup at path.
 func GetPids(dir string) ([]int, error) {
-	return readProcsFile(filepath.Join(dir, CgroupProcesses))
-}
-
-// GetAllPids returns all pids, that were added to cgroup at path and to all its
-// subcgroups.
-func GetAllPids(path string) ([]int, error) {
-	var pids []int
-	// collect pids from all sub-cgroups
-	err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
-		if iErr != nil {
-			return iErr
-		}
-		if info.IsDir() || info.Name() != CgroupProcesses {
-			return nil
-		}
-		cPids, err := readProcsFile(p)
-		if err != nil {
-			return err
-		}
-		pids = append(pids, cPids...)
-		return nil
-	})
-	return pids, err
+	return readProcsFile(dir)
 }

 // WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
@@ -376,7 +393,7 @@ func WriteCgroupProc(dir string, pid int) error {

 	file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY)
 	if err != nil {
-		return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
+		return fmt.Errorf("failed to write %v: %w", pid, err)
 	}
 	defer file.Close()

@@ -393,7 +410,7 @@ func WriteCgroupProc(dir string, pid int) error {
 			continue
 		}

-		return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
+		return fmt.Errorf("failed to write %v: %w", pid, err)
 	}
 	return err
 }
@@ -446,5 +463,5 @@ func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 {
 	if blkIoWeight == 0 {
 		return 0
 	}
-	return uint64(1 + (uint64(blkIoWeight)-10)*9999/990)
+	return 1 + (uint64(blkIoWeight)-10)*9999/990
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
@@ -46,11 +46,8 @@ func NewNotFoundError(sub string) error {
 }

 func IsNotFound(err error) bool {
-	if err == nil {
-		return false
-	}
-	_, ok := err.(*NotFoundError)
-	return ok
+	var nfErr *NotFoundError
+	return errors.As(err, &nfErr)
 }

 func tryDefaultPath(cgroupPath, subsystem string) string {
@@ -116,6 +113,11 @@ func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
 		return "", errUnified
 	}

+	// If subsystem is empty, we look for the cgroupv2 hybrid path.
+	if len(subsystem) == 0 {
+		return hybridMountpoint, nil
+	}
+
 	// Avoid parsing mountinfo by trying the default path first, if possible.
 	if path := tryDefaultPath(cgroupPath, subsystem); path != "" {
 		return path, nil
@@ -154,7 +156,7 @@ func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, sub

 func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
 	if len(m.Subsystems) == 0 {
-		return "", fmt.Errorf("no subsystem for mount")
+		return "", errors.New("no subsystem for mount")
 	}

 	return getControllerPath(m.Subsystems[0], cgroups)
@@ -226,6 +228,11 @@ func GetOwnCgroupPath(subsystem string) (string, error) {
 		return "", err
 	}

+	// If subsystem is empty, we look for the cgroupv2 hybrid path.
+	if len(subsystem) == 0 {
+		return hybridMountpoint, nil
+	}
+
 	return getCgroupPathHelper(subsystem, cgroup)
 }

--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
@@ -28,17 +28,26 @@ type Cgroup struct {
 	// ScopePrefix describes prefix for the scope name
 	ScopePrefix string `json:"scope_prefix"`

-	// Paths represent the absolute cgroups paths to join.
-	// This takes precedence over Path.
-	Paths map[string]string
-
 	// Resources contains various cgroups settings to apply
 	*Resources

+	// Systemd tells if systemd should be used to manage cgroups.
+	Systemd bool
+
 	// SystemdProps are any additional properties for systemd,
 	// derived from org.systemd.property.xxx annotations.
 	// Ignored unless systemd is used for managing cgroups.
 	SystemdProps []systemdDbus.Property `json:"-"`
+
+	// Rootless tells if rootless cgroups should be used.
+	Rootless bool
+
+	// The host UID that should own the cgroup, or nil to accept
+	// the default ownership.  This should only be set when the
+	// cgroupfs is to be mounted read/write.
+	// Not all cgroup manager implementations support changing
+	// the ownership.
+	OwnerUID *int `json:"owner_uid,omitempty"`
 }

 type Resources struct {
@@ -117,6 +126,9 @@ type Resources struct {
 	// Set class identifier for container's network packets
 	NetClsClassid uint32 `json:"net_cls_classid_u"`

+	// Rdma resource restriction configuration
+	Rdma map[string]LinuxRdma `json:"rdma"`
+
 	// Used on cgroups v2:

 	// CpuWeight sets a proportional bandwidth limit.
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
@@ -1,3 +1,4 @@
+//go:build !linux
 // +build !linux

 package configs
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
@@ -7,10 +7,10 @@ import (
 	"os/exec"
 	"time"

+	"github.com/sirupsen/logrus"
+
 	"github.com/opencontainers/runc/libcontainer/devices"
 	"github.com/opencontainers/runtime-spec/specs-go"
-	"github.com/pkg/errors"
-	"github.com/sirupsen/logrus"
 )

 type Rlimit struct {
@@ -31,10 +31,12 @@ type IDMap struct {
 // for syscalls. Additional architectures can be added by specifying them in
 // Architectures.
 type Seccomp struct {
-	DefaultAction   Action     `json:"default_action"`
-	Architectures   []string   `json:"architectures"`
-	Syscalls        []*Syscall `json:"syscalls"`
-	DefaultErrnoRet *uint      `json:"default_errno_ret"`
+	DefaultAction    Action     `json:"default_action"`
+	Architectures    []string   `json:"architectures"`
+	Syscalls         []*Syscall `json:"syscalls"`
+	DefaultErrnoRet  *uint      `json:"default_errno_ret"`
+	ListenerPath     string     `json:"listener_path,omitempty"`
+	ListenerMetadata string     `json:"listener_metadata,omitempty"`
 }

 // Action is taken upon rule match in Seccomp
@@ -47,6 +49,9 @@ const (
 	Allow
 	Trace
 	Log
+	Notify
+	KillThread
+	KillProcess
 )

 // Operator is a comparison operator to be used when matching syscall arguments in Seccomp
@@ -246,6 +251,19 @@ const (
 	Poststop HookName = "poststop"
 )

+// KnownHookNames returns the known hook names.
+// Used by `runc features`.
+func KnownHookNames() []string {
+	return []string{
+		string(Prestart), // deprecated
+		string(CreateRuntime),
+		string(CreateContainer),
+		string(StartContainer),
+		string(Poststart),
+		string(Poststop),
+	}
+}
+
 type Capabilities struct {
 	// Bounding is the set of capabilities checked by the kernel.
 	Bounding []string
@@ -262,7 +280,7 @@ type Capabilities struct {
 func (hooks HookList) RunHooks(state *specs.State) error {
 	for i, h := range hooks {
 		if err := h.Run(state); err != nil {
-			return errors.Wrapf(err, "Running hook #%d:", i)
+			return fmt.Errorf("error running hook #%d: %w", i, err)
 		}
 	}

@@ -375,7 +393,7 @@ func (c Command) Run(s *specs.State) error {
 	go func() {
 		err := cmd.Wait()
 		if err != nil {
-			err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
+			err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
 		}
 		errC <- err
 	}()
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
@@ -1,17 +1,24 @@
 package configs

-import "fmt"
+import "errors"
+
+var (
+	errNoUIDMap   = errors.New("User namespaces enabled, but no uid mappings found.")
+	errNoUserMap  = errors.New("User namespaces enabled, but no user mapping found.")
+	errNoGIDMap   = errors.New("User namespaces enabled, but no gid mappings found.")
+	errNoGroupMap = errors.New("User namespaces enabled, but no group mapping found.")
+)

 // HostUID gets the translated uid for the process on host which could be
 // different when user namespaces are enabled.
 func (c Config) HostUID(containerId int) (int, error) {
 	if c.Namespaces.Contains(NEWUSER) {
 		if c.UidMappings == nil {
-			return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
+			return -1, errNoUIDMap
 		}
 		id, found := c.hostIDFromMapping(containerId, c.UidMappings)
 		if !found {
-			return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
+			return -1, errNoUserMap
 		}
 		return id, nil
 	}
@@ -30,11 +37,11 @@ func (c Config) HostRootUID() (int, error) {
 func (c Config) HostGID(containerId int) (int, error) {
 	if c.Namespaces.Contains(NEWUSER) {
 		if c.GidMappings == nil {
-			return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
+			return -1, errNoGIDMap
 		}
 		id, found := c.hostIDFromMapping(containerId, c.GidMappings)
 		if !found {
-			return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
+			return -1, errNoGroupMap
 		}
 		return id, nil
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
@@ -1,3 +1,4 @@
+//go:build gofuzz
 // +build gofuzz

 package configs
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
@@ -1,6 +1,9 @@
 package configs

 type IntelRdt struct {
+	// The identity for RDT Class of Service
+	ClosID string `json:"closID,omitempty"`
+
 	// The schema for L3 cache id and capacity bitmask (CBM)
 	// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
 	L3CacheSchema string `json:"l3_cache_schema,omitempty"`
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
@@ -1,5 +1,7 @@
 package configs

+import "golang.org/x/sys/unix"
+
 const (
 	// EXT_COPYUP is a directive to copy up the contents of a directory when
 	// a tmpfs is mounted over it.
@@ -28,6 +30,9 @@ type Mount struct {
 	// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
 	Relabel string `json:"relabel"`

+	// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2).
+	RecAttr *unix.MountAttr `json:"rec_attr"`
+
 	// Extensions are additional flags that are specific to runc.
 	Extensions int `json:"extensions"`

@@ -37,3 +42,7 @@ type Mount struct {
 	// Optional Command to be run after Source is mounted.
 	PostmountCmds []Command `json:"postmount_cmds"`
 }
+
+func (m *Mount) IsBind() bool {
+	return m.Flags&unix.MS_BIND != 0
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
@@ -1,3 +1,4 @@
+//go:build linux
 // +build linux

 package configs
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go
@@ -1,3 +1,4 @@
+//go:build !linux && !windows
 // +build !linux,!windows

 package configs
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
@@ -1,3 +1,4 @@
+//go:build !linux
 // +build !linux

 package configs
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go
@@ -0,0 +1,9 @@
+package configs
+
+// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11)
+type LinuxRdma struct {
+	// Maximum number of HCA handles that can be opened. Default is "no limit".
+	HcaHandles *uint32 `json:"hca_handles,omitempty"`
+	// Maximum number of HCA objects that can be created. Default is "no limit".
+	HcaObjects *uint32 `json:"hca_objects,omitempty"`
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
@@ -52,7 +52,7 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
 	}
 	for _, c := range warns {
 		if err := c(config); err != nil {
-			logrus.WithError(err).Warnf("invalid configuration")
+			logrus.WithError(err).Warn("invalid configuration")
 		}
 	}
 	return nil
@@ -62,20 +62,17 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
 // to the container's root filesystem.
 func (v *ConfigValidator) rootfs(config *configs.Config) error {
 	if _, err := os.Stat(config.Rootfs); err != nil {
-		if os.IsNotExist(err) {
-			return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
-		}
-		return err
+		return fmt.Errorf("invalid rootfs: %w", err)
 	}
 	cleaned, err := filepath.Abs(config.Rootfs)
 	if err != nil {
-		return err
+		return fmt.Errorf("invalid rootfs: %w", err)
 	}
 	if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
-		return err
+		return fmt.Errorf("invalid rootfs: %w", err)
 	}
 	if filepath.Clean(config.Rootfs) != cleaned {
-		return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
+		return errors.New("invalid rootfs: not an absolute path, or a symlink")
 	}
 	return nil
 }
@@ -131,6 +128,36 @@ func (v *ConfigValidator) cgroupnamespace(config *configs.Config) error {
 	return nil
 }

+// convertSysctlVariableToDotsSeparator can return sysctl variables in dots separator format.
+// The '/' separator is also accepted in place of a '.'.
+// Convert the sysctl variables to dots separator format for validation.
+// More info:
+//   https://man7.org/linux/man-pages/man8/sysctl.8.html
+//   https://man7.org/linux/man-pages/man5/sysctl.d.5.html
+// For example:
+// Input sysctl variable "net/ipv4/conf/eno2.100.rp_filter"
+// will return the converted value "net.ipv4.conf.eno2/100.rp_filter"
+func convertSysctlVariableToDotsSeparator(val string) string {
+	if val == "" {
+		return val
+	}
+	firstSepIndex := strings.IndexAny(val, "./")
+	if firstSepIndex == -1 || val[firstSepIndex] == '.' {
+		return val
+	}
+
+	f := func(r rune) rune {
+		switch r {
+		case '.':
+			return '/'
+		case '/':
+			return '.'
+		}
+		return r
+	}
+	return strings.Map(f, val)
+}
+
 // sysctl validates that the specified sysctl keys are valid or not.
 // /proc/sys isn't completely namespaced and depending on which namespaces
 // are specified, a subset of sysctls are permitted.
@@ -153,6 +180,7 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
 	)

 	for s := range config.Sysctl {
+		s := convertSysctlVariableToDotsSeparator(s)
 		if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") {
 			if config.Namespaces.Contains(configs.NEWIPC) {
 				continue
@@ -176,7 +204,7 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
 				hostnet, hostnetErr = isHostNetNS(path)
 			})
 			if hostnetErr != nil {
-				return hostnetErr
+				return fmt.Errorf("invalid netns path: %w", hostnetErr)
 			}
 			if hostnet {
 				return fmt.Errorf("sysctl %q not allowed in host network namespace", s)
@@ -205,19 +233,16 @@ func (v *ConfigValidator) intelrdt(config *configs.Config) error {
 			return errors.New("intelRdt is specified in config, but Intel RDT is not supported or enabled")
 		}

+		if config.IntelRdt.ClosID == "." || config.IntelRdt.ClosID == ".." || strings.Contains(config.IntelRdt.ClosID, "/") {
+			return fmt.Errorf("invalid intelRdt.ClosID %q", config.IntelRdt.ClosID)
+		}
+
 		if !intelrdt.IsCATEnabled() && config.IntelRdt.L3CacheSchema != "" {
 			return errors.New("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
 		}
 		if !intelrdt.IsMBAEnabled() && config.IntelRdt.MemBwSchema != "" {
 			return errors.New("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
 		}
-
-		if intelrdt.IsCATEnabled() && config.IntelRdt.L3CacheSchema == "" {
-			return errors.New("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
-		}
-		if intelrdt.IsMBAEnabled() && config.IntelRdt.MemBwSchema == "" {
-			return errors.New("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
-		}
 	}

 	return nil
@@ -268,10 +293,10 @@ func isHostNetNS(path string) (bool, error) {
 	var st1, st2 unix.Stat_t

 	if err := unix.Stat(currentProcessNetns, &st1); err != nil {
-		return false, fmt.Errorf("unable to stat %q: %s", currentProcessNetns, err)
+		return false, &os.PathError{Op: "stat", Path: currentProcessNetns, Err: err}
 	}
 	if err := unix.Stat(path, &st2); err != nil {
-		return false, fmt.Errorf("unable to stat %q: %s", path, err)
+		return false, &os.PathError{Op: "stat", Path: path, Err: err}
 	}

 	return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil
--- a/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
@@ -18,7 +18,7 @@ func mountConsole(slavePath string) error {
 	if f != nil {
 		f.Close()
 	}
-	return unix.Mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "")
+	return mount(slavePath, "/dev/console", "", "bind", unix.MS_BIND, "")
 }

 // dupStdio opens the slavePath for the console and dups the fds to the current
--- a/vendor/github.com/opencontainers/runc/libcontainer/container.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/container.go
@@ -74,22 +74,12 @@ type BaseContainer interface {
 	ID() string

 	// Returns the current status of the container.
-	//
-	// errors:
-	// ContainerNotExists - Container no longer exists,
-	// Systemerror - System error.
 	Status() (Status, error)

 	// State returns the current container's state information.
-	//
-	// errors:
-	// SystemError - System error.
 	State() (*State, error)

 	// OCIState returns the current container's state information.
-	//
-	// errors:
-	// SystemError - System error.
 	OCIState() (*specs.State, error)

 	// Returns the current config of the container.
@@ -97,48 +87,26 @@ type BaseContainer interface {

 	// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
 	//
-	// errors:
-	// ContainerNotExists - Container no longer exists,
-	// Systemerror - System error.
-	//
 	// Some of the returned PIDs may no longer refer to processes in the Container, unless
 	// the Container state is PAUSED in which case every PID in the slice is valid.
 	Processes() ([]int, error)

 	// Returns statistics for the container.
-	//
-	// errors:
-	// ContainerNotExists - Container no longer exists,
-	// Systemerror - System error.
 	Stats() (*Stats, error)

 	// Set resources of container as configured
 	//
 	// We can use this to change resources when containers are running.
 	//
-	// errors:
-	// SystemError - System error.
 	Set(config configs.Config) error

 	// Start a process inside the container. Returns error if process fails to
 	// start. You can track process lifecycle with passed Process structure.
-	//
-	// errors:
-	// ContainerNotExists - Container no longer exists,
-	// ConfigInvalid - config is invalid,
-	// ContainerPaused - Container is paused,
-	// SystemError - System error.
 	Start(process *Process) (err error)

 	// Run immediately starts the process inside the container.  Returns error if process
 	// fails to start.  It does not block waiting for the exec fifo  after start returns but
 	// opens the fifo after start returns.
-	//
-	// errors:
-	// ContainerNotExists - Container no longer exists,
-	// ConfigInvalid - config is invalid,
-	// ContainerPaused - Container is paused,
-	// SystemError - System error.
 	Run(process *Process) (err error)

 	// Destroys the container, if its in a valid state, after killing any
@@ -149,25 +117,14 @@ type BaseContainer interface {
 	//
 	// Running containers must first be stopped using Signal(..).
 	// Paused containers must first be resumed using Resume(..).
-	//
-	// errors:
-	// ContainerNotStopped - Container is still running,
-	// ContainerPaused - Container is paused,
-	// SystemError - System error.
 	Destroy() error

 	// Signal sends the provided signal code to the container's initial process.
 	//
 	// If all is specified the signal is sent to all processes in the container
 	// including the initial process.
-	//
-	// errors:
-	// SystemError - System error.
 	Signal(s os.Signal, all bool) error

 	// Exec signals the container to exec the users process at the end of the init.
-	//
-	// errors:
-	// SystemError - System error.
 	Exec() error
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package libcontainer

 import (
@@ -8,10 +6,10 @@ import (
 	"errors"
 	"fmt"
 	"io"
-	"io/ioutil"
 	"net"
 	"os"
 	"os/exec"
+	"path"
 	"path/filepath"
 	"reflect"
 	"strconv"
@@ -19,21 +17,20 @@ import (
 	"sync"
 	"time"

+	"github.com/checkpoint-restore/go-criu/v5"
+	criurpc "github.com/checkpoint-restore/go-criu/v5/rpc"
 	securejoin "github.com/cyphar/filepath-securejoin"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/sirupsen/logrus"
+	"github.com/vishvananda/netlink/nl"
+	"golang.org/x/sys/unix"
+	"google.golang.org/protobuf/proto"
+
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/intelrdt"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/utils"
-	"github.com/opencontainers/runtime-spec/specs-go"
-
-	"github.com/checkpoint-restore/go-criu/v5"
-	criurpc "github.com/checkpoint-restore/go-criu/v5/rpc"
-	errorsf "github.com/pkg/errors"
-	"github.com/sirupsen/logrus"
-	"github.com/vishvananda/netlink/nl"
-	"golang.org/x/sys/unix"
-	"google.golang.org/protobuf/proto"
 )

 const stdioFdCount = 3
@@ -98,48 +95,26 @@ type Container interface {
 	// Methods below here are platform specific

 	// Checkpoint checkpoints the running container's state to disk using the criu(8) utility.
-	//
-	// errors:
-	// Systemerror - System error.
 	Checkpoint(criuOpts *CriuOpts) error

 	// Restore restores the checkpointed container to a running state using the criu(8) utility.
-	//
-	// errors:
-	// Systemerror - System error.
 	Restore(process *Process, criuOpts *CriuOpts) error

 	// If the Container state is RUNNING or CREATED, sets the Container state to PAUSING and pauses
 	// the execution of any user processes. Asynchronously, when the container finished being paused the
 	// state is changed to PAUSED.
 	// If the Container state is PAUSED, do nothing.
-	//
-	// errors:
-	// ContainerNotExists - Container no longer exists,
-	// ContainerNotRunning - Container not running or created,
-	// Systemerror - System error.
 	Pause() error

 	// If the Container state is PAUSED, resumes the execution of any user processes in the
 	// Container before setting the Container state to RUNNING.
 	// If the Container state is RUNNING, do nothing.
-	//
-	// errors:
-	// ContainerNotExists - Container no longer exists,
-	// ContainerNotPaused - Container is not paused,
-	// Systemerror - System error.
 	Resume() error

 	// NotifyOOM returns a read-only channel signaling when the container receives an OOM notification.
-	//
-	// errors:
-	// Systemerror - System error.
 	NotifyOOM() (<-chan struct{}, error)

 	// NotifyMemoryPressure returns a read-only channel signaling when the container reaches a given pressure level
-	//
-	// errors:
-	// Systemerror - System error.
 	NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error)
 }

@@ -184,7 +159,7 @@ func (c *linuxContainer) Processes() ([]int, error) {

 	pids, err = c.cgroupManager.GetAllPids()
 	if err != nil {
-		return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups")
+		return nil, fmt.Errorf("unable to get all container pids: %w", err)
 	}
 	return pids, nil
 }
@@ -195,11 +170,11 @@ func (c *linuxContainer) Stats() (*Stats, error) {
 		stats = &Stats{}
 	)
 	if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
-		return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
+		return stats, fmt.Errorf("unable to get container cgroup stats: %w", err)
 	}
 	if c.intelRdtManager != nil {
 		if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil {
-			return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
+			return stats, fmt.Errorf("unable to get container Intel RDT stats: %w", err)
 		}
 	}
 	for _, iface := range c.config.Networks {
@@ -207,7 +182,7 @@ func (c *linuxContainer) Stats() (*Stats, error) {
 		case "veth":
 			istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
 			if err != nil {
-				return stats, newSystemErrorWithCausef(err, "getting network stats for interface %q", iface.HostInterfaceName)
+				return stats, fmt.Errorf("unable to get network stats for interface %q: %w", iface.HostInterfaceName, err)
 			}
 			stats.Interfaces = append(stats.Interfaces, istats)
 		}
@@ -223,7 +198,7 @@ func (c *linuxContainer) Set(config configs.Config) error {
 		return err
 	}
 	if status == Stopped {
-		return newGenericError(errors.New("container not running"), ContainerNotRunning)
+		return ErrNotRunning
 	}
 	if err := c.cgroupManager.Set(config.Cgroups.Resources); err != nil {
 		// Set configs back
@@ -254,7 +229,7 @@ func (c *linuxContainer) Start(process *Process) error {
 	c.m.Lock()
 	defer c.m.Unlock()
 	if c.config.Cgroups.Resources.SkipDevices {
-		return newGenericError(errors.New("can't start container with SkipDevices set"), ConfigInvalid)
+		return errors.New("can't start container with SkipDevices set")
 	}
 	if process.Init {
 		if err := c.createExecFifo(); err != nil {
@@ -310,7 +285,7 @@ func (c *linuxContainer) exec() error {
 }

 func readFromExecFifo(execFifo io.Reader) error {
-	data, err := ioutil.ReadAll(execFifo)
+	data, err := io.ReadAll(execFifo)
 	if err != nil {
 		return err
 	}
@@ -336,7 +311,7 @@ func fifoOpen(path string, block bool) openResult {
 	}
 	f, err := os.OpenFile(path, flags, 0)
 	if err != nil {
-		return openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
+		return openResult{err: fmt.Errorf("exec fifo: %w", err)}
 	}
 	return openResult{file: f}
 }
@@ -361,7 +336,7 @@ type openResult struct {
 func (c *linuxContainer) start(process *Process) (retErr error) {
 	parent, err := c.newParentProcess(process)
 	if err != nil {
-		return newSystemErrorWithCause(err, "creating new parent process")
+		return fmt.Errorf("unable to create new parent process: %w", err)
 	}

 	logsDone := parent.forwardChildLogs()
@@ -371,13 +346,13 @@ func (c *linuxContainer) start(process *Process) (retErr error) {
 			// runc init closing the _LIBCONTAINER_LOGPIPE log fd.
 			err := <-logsDone
 			if err != nil && retErr == nil {
-				retErr = newSystemErrorWithCause(err, "forwarding init logs")
+				retErr = fmt.Errorf("unable to forward init logs: %w", err)
 			}
 		}()
 	}

 	if err := parent.start(); err != nil {
-		return newSystemErrorWithCause(err, "starting container process")
+		return fmt.Errorf("unable to start container process: %w", err)
 	}

 	if process.Init {
@@ -390,7 +365,7 @@ func (c *linuxContainer) start(process *Process) (retErr error) {

 			if err := c.config.Hooks[configs.Poststart].RunHooks(s); err != nil {
 				if err := ignoreTerminateErrors(parent.terminate()); err != nil {
-					logrus.Warn(errorsf.Wrapf(err, "Running Poststart hook"))
+					logrus.Warn(fmt.Errorf("error running poststart hook: %w", err))
 				}
 				return err
 			}
@@ -416,11 +391,19 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
 	// to avoid a PID reuse attack
 	if status == Running || status == Created || status == Paused {
 		if err := c.initProcess.signal(s); err != nil {
-			return newSystemErrorWithCause(err, "signaling init process")
+			return fmt.Errorf("unable to signal init: %w", err)
+		}
+		if status == Paused {
+			// For cgroup v1, killing a process in a frozen cgroup
+			// does nothing until it's thawed. Only thaw the cgroup
+			// for SIGKILL.
+			if s, ok := s.(unix.Signal); ok && s == unix.SIGKILL {
+				_ = c.cgroupManager.Freeze(configs.Thawed)
+			}
 		}
 		return nil
 	}
-	return newGenericError(errors.New("container not running"), ContainerNotRunning)
+	return ErrNotRunning
 }

 func (c *linuxContainer) createExecFifo() error {
@@ -472,13 +455,13 @@ func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error {
 func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
 	parentInitPipe, childInitPipe, err := utils.NewSockPair("init")
 	if err != nil {
-		return nil, newSystemErrorWithCause(err, "creating new init pipe")
+		return nil, fmt.Errorf("unable to create init pipe: %w", err)
 	}
 	messageSockPair := filePair{parentInitPipe, childInitPipe}

 	parentLogPipe, childLogPipe, err := os.Pipe()
 	if err != nil {
-		return nil, fmt.Errorf("Unable to create the log pipe:  %s", err)
+		return nil, fmt.Errorf("unable to create log pipe: %w", err)
 	}
 	logFilePair := filePair{parentLogPipe, childLogPipe}

@@ -493,7 +476,7 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
 	// that problem), but we no longer do that. However, there's no need to do
 	// this for `runc exec` so we just keep it this way to be safe.
 	if err := c.includeExecFifo(cmd); err != nil {
-		return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup")
+		return nil, fmt.Errorf("unable to setup exec fifo: %w", err)
 	}
 	return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
 }
@@ -537,6 +520,33 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
 	return cmd
 }

+// shouldSendMountSources says whether the child process must setup bind mounts with
+// the source pre-opened (O_PATH) in the host user namespace.
+// See https://github.com/opencontainers/runc/issues/2484
+func (c *linuxContainer) shouldSendMountSources() bool {
+	// Passing the mount sources via SCM_RIGHTS is only necessary when
+	// both userns and mntns are active.
+	if !c.config.Namespaces.Contains(configs.NEWUSER) ||
+		!c.config.Namespaces.Contains(configs.NEWNS) {
+		return false
+	}
+
+	// nsexec.c send_mountsources() requires setns(mntns) capabilities
+	// CAP_SYS_CHROOT and CAP_SYS_ADMIN.
+	if c.config.RootlessEUID {
+		return false
+	}
+
+	// We need to send sources if there are bind-mounts.
+	for _, m := range c.config.Mounts {
+		if m.IsBind() {
+			return true
+		}
+	}
+
+	return false
+}
+
 func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
 	cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
 	nsMaps := make(map[configs.NamespaceType]string)
@@ -546,10 +556,40 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPa
 		}
 	}
 	_, sharePidns := nsMaps[configs.NEWPID]
-	data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps)
+	data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)
 	if err != nil {
 		return nil, err
 	}
+
+	if c.shouldSendMountSources() {
+		// Elements on this slice will be paired with mounts (see StartInitialization() and
+		// prepareRootfs()). This slice MUST have the same size as c.config.Mounts.
+		mountFds := make([]int, len(c.config.Mounts))
+		for i, m := range c.config.Mounts {
+			if !m.IsBind() {
+				// Non bind-mounts do not use an fd.
+				mountFds[i] = -1
+				continue
+			}
+
+			// The fd passed here will not be used: nsexec.c will overwrite it with dup3(). We just need
+			// to allocate a fd so that we know the number to pass in the environment variable. The fd
+			// must not be closed before cmd.Start(), so we reuse messageSockPair.child because the
+			// lifecycle of that fd is already taken care of.
+			cmd.ExtraFiles = append(cmd.ExtraFiles, messageSockPair.child)
+			mountFds[i] = stdioFdCount + len(cmd.ExtraFiles) - 1
+		}
+
+		mountFdsJson, err := json.Marshal(mountFds)
+		if err != nil {
+			return nil, fmt.Errorf("Error creating _LIBCONTAINER_MOUNT_FDS: %w", err)
+		}
+
+		cmd.Env = append(cmd.Env,
+			"_LIBCONTAINER_MOUNT_FDS="+string(mountFdsJson),
+		)
+	}
+
 	init := &initProcess{
 		cmd:             cmd,
 		messageSockPair: messageSockPair,
@@ -570,15 +610,15 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
 	cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
 	state, err := c.currentState()
 	if err != nil {
-		return nil, newSystemErrorWithCause(err, "getting container's current state")
+		return nil, fmt.Errorf("unable to get container state: %w", err)
 	}
 	// for setns process, we don't have to set cloneflags as the process namespaces
 	// will only be set via setns syscall
-	data, err := c.bootstrapData(0, state.NamespacePaths)
+	data, err := c.bootstrapData(0, state.NamespacePaths, initSetns)
 	if err != nil {
 		return nil, err
 	}
-	return &setnsProcess{
+	proc := &setnsProcess{
 		cmd:             cmd,
 		cgroupPaths:     state.CgroupPaths,
 		rootlessCgroups: c.config.RootlessCgroups,
@@ -590,7 +630,29 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
 		process:         p,
 		bootstrapData:   data,
 		initProcessPid:  state.InitProcessPid,
-	}, nil
+	}
+	if len(p.SubCgroupPaths) > 0 {
+		if add, ok := p.SubCgroupPaths[""]; ok {
+			// cgroup v1: using the same path for all controllers.
+			// cgroup v2: the only possible way.
+			for k := range proc.cgroupPaths {
+				proc.cgroupPaths[k] = path.Join(proc.cgroupPaths[k], add)
+			}
+			// cgroup v2: do not try to join init process's cgroup
+			// as a fallback (see (*setnsProcess).start).
+			proc.initProcessPid = 0
+		} else {
+			// Per-controller paths.
+			for ctrl, add := range p.SubCgroupPaths {
+				if val, ok := proc.cgroupPaths[ctrl]; ok {
+					proc.cgroupPaths[ctrl] = path.Join(val, add)
+				} else {
+					return nil, fmt.Errorf("unknown controller %s in SubCgroupPaths", ctrl)
+				}
+			}
+		}
+	}
+	return proc, nil
 }

 func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
@@ -655,7 +717,7 @@ func (c *linuxContainer) Pause() error {
 			c: c,
 		})
 	}
-	return newGenericError(fmt.Errorf("container not running or created: %s", status), ContainerNotRunning)
+	return ErrNotRunning
 }

 func (c *linuxContainer) Resume() error {
@@ -666,7 +728,7 @@ func (c *linuxContainer) Resume() error {
 		return err
 	}
 	if status != Paused {
-		return newGenericError(fmt.Errorf("container not paused"), ContainerNotPaused)
+		return ErrNotPaused
 	}
 	if err := c.cgroupManager.Freeze(configs.Thawed); err != nil {
 		return err
@@ -771,7 +833,7 @@ func (c *linuxContainer) checkCriuVersion(minVersion int) error {
 	var err error
 	c.criuVersion, err = criu.GetCriuVersion()
 	if err != nil {
-		return fmt.Errorf("CRIU version check failed: %s", err)
+		return fmt.Errorf("CRIU version check failed: %w", err)
 	}

 	return compareCriuVersion(c.criuVersion, minVersion)
@@ -781,6 +843,9 @@ const descriptorsFilename = "descriptors.json"

 func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) {
 	mountDest := strings.TrimPrefix(m.Destination, c.config.Rootfs)
+	if dest, err := securejoin.SecureJoin(c.config.Rootfs, mountDest); err == nil {
+		mountDest = dest[len(c.config.Rootfs):]
+	}
 	extMnt := &criurpc.ExtMountMap{
 		Key: proto.String(mountDest),
 		Val: proto.String(mountDest),
@@ -972,20 +1037,6 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
 		return err
 	}

-	if criuOpts.WorkDirectory == "" {
-		criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
-	}
-
-	if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) {
-		return err
-	}
-
-	workDir, err := os.Open(criuOpts.WorkDirectory)
-	if err != nil {
-		return err
-	}
-	defer workDir.Close()
-
 	imageDir, err := os.Open(criuOpts.ImagesDirectory)
 	if err != nil {
 		return err
@@ -994,7 +1045,6 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {

 	rpcOpts := criurpc.CriuOpts{
 		ImagesDirFd:     proto.Int32(int32(imageDir.Fd())),
-		WorkDirFd:       proto.Int32(int32(workDir.Fd())),
 		LogLevel:        proto.Int32(4),
 		LogFile:         proto.String("dump.log"),
 		Root:            proto.String(c.config.Rootfs),
@@ -1012,6 +1062,19 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
 		LazyPages:       proto.Bool(criuOpts.LazyPages),
 	}

+	// if criuOpts.WorkDirectory is not set, criu default is used.
+	if criuOpts.WorkDirectory != "" {
+		if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) {
+			return err
+		}
+		workDir, err := os.Open(criuOpts.WorkDirectory)
+		if err != nil {
+			return err
+		}
+		defer workDir.Close()
+		rpcOpts.WorkDirFd = proto.Int32(int32(workDir.Fd()))
+	}
+
 	c.handleCriuConfigurationFile(&rpcOpts)

 	// If the container is running in a network namespace and has
@@ -1054,7 +1117,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {

 	// append optional manage cgroups mode
 	if criuOpts.ManageCgroupsMode != 0 {
-		mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
+		mode := criuOpts.ManageCgroupsMode
 		rpcOpts.ManageCgroupsMode = &mode
 	}

@@ -1144,7 +1207,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
 			return err
 		}

-		err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0o600)
+		err = os.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0o600)
 		if err != nil {
 			return err
 		}
@@ -1159,6 +1222,9 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {

 func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mount) {
 	mountDest := strings.TrimPrefix(m.Destination, c.config.Rootfs)
+	if dest, err := securejoin.SecureJoin(c.config.Rootfs, mountDest); err == nil {
+		mountDest = dest[len(c.config.Rootfs):]
+	}
 	extMnt := &criurpc.ExtMountMap{
 		Key: proto.String(mountDest),
 		Val: proto.String(m.Source),
@@ -1203,7 +1269,9 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
 	case "bind":
 		// The prepareBindMount() function checks if source
 		// exists. So it cannot be used for other filesystem types.
-		if err := prepareBindMount(m, c.config.Rootfs); err != nil {
+		// TODO: pass something else than nil? Not sure if criu is
+		// impacted by issue #2484
+		if err := prepareBindMount(m, c.config.Rootfs, nil); err != nil {
 			return err
 		}
 	default:
@@ -1256,7 +1324,7 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
 		for _, u := range umounts {
 			_ = utils.WithProcfd(c.config.Rootfs, u, func(procfd string) error {
 				if e := unix.Unmount(procfd, unix.MNT_DETACH); e != nil {
-					if e != unix.EINVAL {
+					if e != unix.EINVAL { //nolint:errorlint // unix errors are bare
 						// Ignore EINVAL as it means 'target is not a mount point.'
 						// It probably has already been unmounted.
 						logrus.Warnf("Error during cleanup unmounting of %s (%s): %v", procfd, u, e)
@@ -1282,8 +1350,8 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
 			// set up in the order they are configured.
 			if m.Device == "bind" {
 				if err := utils.WithProcfd(c.config.Rootfs, m.Destination, func(procfd string) error {
-					if err := unix.Mount(m.Source, procfd, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
-						return errorsf.Wrapf(err, "unable to bind mount %q to %q (through %q)", m.Source, m.Destination, procfd)
+					if err := mount(m.Source, m.Destination, procfd, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
+						return err
 					}
 					return nil
 				}); err != nil {
@@ -1311,19 +1379,6 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 	if err := c.checkCriuVersion(30000); err != nil {
 		return err
 	}
-	if criuOpts.WorkDirectory == "" {
-		criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
-	}
-	// Since a container can be C/R'ed multiple times,
-	// the work directory may already exist.
-	if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) {
-		return err
-	}
-	workDir, err := os.Open(criuOpts.WorkDirectory)
-	if err != nil {
-		return err
-	}
-	defer workDir.Close()
 	if criuOpts.ImagesDirectory == "" {
 		return errors.New("invalid directory to restore checkpoint")
 	}
@@ -1346,7 +1401,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 	if err != nil {
 		return err
 	}
-	err = unix.Mount(c.config.Rootfs, root, "", unix.MS_BIND|unix.MS_REC, "")
+	err = mount(c.config.Rootfs, root, "", "", unix.MS_BIND|unix.MS_REC, "")
 	if err != nil {
 		return err
 	}
@@ -1356,7 +1411,6 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 		Type: &t,
 		Opts: &criurpc.CriuOpts{
 			ImagesDirFd:     proto.Int32(int32(imageDir.Fd())),
-			WorkDirFd:       proto.Int32(int32(workDir.Fd())),
 			EvasiveDevices:  proto.Bool(true),
 			LogLevel:        proto.Int32(4),
 			LogFile:         proto.String("restore.log"),
@@ -1383,7 +1437,26 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 		}
 		req.Opts.LsmProfile = proto.String(criuOpts.LsmProfile)
 	}
+	if criuOpts.LsmMountContext != "" {
+		if err := c.checkCriuVersion(31600); err != nil {
+			return errors.New("--lsm-mount-context requires at least CRIU 3.16")
+		}
+		req.Opts.LsmMountContext = proto.String(criuOpts.LsmMountContext)
+	}

+	if criuOpts.WorkDirectory != "" {
+		// Since a container can be C/R'ed multiple times,
+		// the work directory may already exist.
+		if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) {
+			return err
+		}
+		workDir, err := os.Open(criuOpts.WorkDirectory)
+		if err != nil {
+			return err
+		}
+		defer workDir.Close()
+		req.Opts.WorkDirFd = proto.Int32(int32(workDir.Fd()))
+	}
 	c.handleCriuConfigurationFile(req.Opts)

 	if err := c.handleRestoringNamespaces(req.Opts, &extraFiles); err != nil {
@@ -1432,7 +1505,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {

 	// append optional manage cgroups mode
 	if criuOpts.ManageCgroupsMode != 0 {
-		mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
+		mode := criuOpts.ManageCgroupsMode
 		req.Opts.ManageCgroupsMode = &mode
 	}

@@ -1440,7 +1513,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 		fds    []string
 		fdJSON []byte
 	)
-	if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil {
+	if fdJSON, err = os.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil {
 		return err
 	}

@@ -1477,7 +1550,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
 	}

 	if err := c.cgroupManager.Set(c.config.Cgroups.Resources); err != nil {
-		return newSystemError(err)
+		return err
 	}

 	if cgroups.IsCgroup2UnifiedMode() {
@@ -1835,7 +1908,7 @@ func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
 }

 func (c *linuxContainer) saveState(s *State) (retErr error) {
-	tmpFile, err := ioutil.TempFile(c.root, "state-")
+	tmpFile, err := os.CreateTemp(c.root, "state-")
 	if err != nil {
 		return err
 	}
@@ -1928,9 +2001,10 @@ func (c *linuxContainer) currentState() (*State, error) {
 		startTime, _ = c.initProcess.startTime()
 		externalDescriptors = c.initProcess.externalDescriptors()
 	}
-	intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
-	if err != nil {
-		intelRdtPath = ""
+
+	intelRdtPath := ""
+	if c.intelRdtManager != nil {
+		intelRdtPath = c.intelRdtManager.GetPath()
 	}
 	state := &State{
 		BaseState: BaseState{
@@ -1998,16 +2072,16 @@ func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceTyp
 		if p, ok := namespaces[ns]; ok && p != "" {
 			// check if the requested namespace is supported
 			if !configs.IsNamespaceSupported(ns) {
-				return nil, newSystemError(fmt.Errorf("namespace %s is not supported", ns))
+				return nil, fmt.Errorf("namespace %s is not supported", ns)
 			}
 			// only set to join this namespace if it exists
 			if _, err := os.Lstat(p); err != nil {
-				return nil, newSystemErrorWithCausef(err, "running lstat on namespace path %q", p)
+				return nil, fmt.Errorf("namespace path: %w", err)
 			}
 			// do not allow namespace path with comma as we use it to separate
 			// the namespace paths
 			if strings.ContainsRune(p, ',') {
-				return nil, newSystemError(fmt.Errorf("invalid path %s", p))
+				return nil, fmt.Errorf("invalid namespace path %s", p)
 			}
 			paths = append(paths, fmt.Sprintf("%s:%s", configs.NsName(ns), p))
 		}
@@ -2039,7 +2113,7 @@ type netlinkError struct{ error }
 // such as one that uses nsenter package to bootstrap the container's
 // init process correctly, i.e. with correct namespaces, uid/gid
 // mapping etc.
-func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (_ io.Reader, Err error) {
+func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, it initType) (_ io.Reader, Err error) {
 	// create the netlink message
 	r := nl.NewNetlinkRequest(int(InitMsg), 0)

@@ -2134,6 +2208,25 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
 		Value: c.config.RootlessEUID,
 	})

+	// Bind mount source to open.
+	if it == initStandard && c.shouldSendMountSources() {
+		var mounts []byte
+		for _, m := range c.config.Mounts {
+			if m.IsBind() {
+				if strings.IndexByte(m.Source, 0) >= 0 {
+					return nil, fmt.Errorf("mount source string contains null byte: %q", m.Source)
+				}
+				mounts = append(mounts, []byte(m.Source)...)
+			}
+			mounts = append(mounts, byte(0))
+		}
+
+		r.AddData(&Bytemsg{
+			Type:  MountSourcesAttr,
+			Value: mounts,
+		})
+	}
+
 	return bytes.NewReader(r.Serialize()), nil
 }

@@ -2144,7 +2237,7 @@ func ignoreTerminateErrors(err error) error {
 	if err == nil {
 		return nil
 	}
-	// terminate() might return an error from ether Kill or Wait.
+	// terminate() might return an error from either Kill or Wait.
 	// The (*Cmd).Wait documentation says: "If the command fails to run
 	// or doesn't complete successfully, the error is of type *ExitError".
 	// Filter out such errors (like "exit status 1" or "signal: killed").
@@ -2152,13 +2245,11 @@ func ignoreTerminateErrors(err error) error {
 	if errors.As(err, &exitErr) {
 		return nil
 	}
-	// TODO: use errors.Is(err, os.ErrProcessDone) here and
-	// remove "process already finished" string comparison below
-	// once go 1.16 is minimally supported version.
-
+	if errors.Is(err, os.ErrProcessDone) {
+		return nil
+	}
 	s := err.Error()
-	if strings.Contains(s, "process already finished") ||
-		strings.Contains(s, "Wait was already called") {
+	if strings.Contains(s, "Wait was already called") {
 		return nil
 	}
 	return err
--- a/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
@@ -30,4 +30,5 @@ type CriuOpts struct {
 	LazyPages               bool               // restore memory pages lazily using userfaultfd
 	StatusFd                int                // fd for feedback when lazy server is ready
 	LsmProfile              string             // LSM profile used to restore the container
+	LsmMountContext         string             // LSM mount context value to use during restore
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go
@@ -1,10 +1,10 @@
+//go:build !windows
 // +build !windows

 package devices

 import (
 	"errors"
-	"io/ioutil"
 	"os"
 	"path/filepath"

@@ -16,8 +16,8 @@ var ErrNotADevice = errors.New("not a device node")

 // Testing dependencies
 var (
-	unixLstat     = unix.Lstat
-	ioutilReadDir = ioutil.ReadDir
+	unixLstat = unix.Lstat
+	osReadDir = os.ReadDir
 )

 func mkDev(d *Rule) (uint64, error) {
@@ -40,7 +40,7 @@ func DeviceFromPath(path, permissions string) (*Device, error) {
 	var (
 		devType   Type
 		mode      = stat.Mode
-		devNumber = uint64(stat.Rdev)
+		devNumber = uint64(stat.Rdev) //nolint:unconvert // Rdev is uint32 on e.g. MIPS.
 		major     = unix.Major(devNumber)
 		minor     = unix.Minor(devNumber)
 	)
@@ -76,7 +76,7 @@ func HostDevices() ([]*Device, error) {
 // GetDevices recursively traverses a directory specified by path
 // and returns all devices found there.
 func GetDevices(path string) ([]*Device, error) {
-	files, err := ioutilReadDir(path)
+	files, err := osReadDir(path)
 	if err != nil {
 		return nil, err
 	}
@@ -103,7 +103,7 @@ func GetDevices(path string) ([]*Device, error) {
 		}
 		device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm")
 		if err != nil {
-			if err == ErrNotADevice {
+			if errors.Is(err, ErrNotADevice) {
 				continue
 			}
 			if os.IsNotExist(err) {
--- a/vendor/github.com/opencontainers/runc/libcontainer/error.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/error.go
@@ -1,70 +1,13 @@
 package libcontainer

-import "io"
+import "errors"

-// ErrorCode is the API error code type.
-type ErrorCode int
-
-// API error codes.
-const (
-	// Factory errors
-	IdInUse ErrorCode = iota
-	InvalidIdFormat
-
-	// Container errors
-	ContainerNotExists
-	ContainerPaused
-	ContainerNotStopped
-	ContainerNotRunning
-	ContainerNotPaused
-
-	// Process errors
-	NoProcessOps
-
-	// Common errors
-	ConfigInvalid
-	ConsoleExists
-	SystemError
+var (
+	ErrExist      = errors.New("container with given ID already exists")
+	ErrInvalidID  = errors.New("invalid container ID format")
+	ErrNotExist   = errors.New("container does not exist")
+	ErrPaused     = errors.New("container paused")
+	ErrRunning    = errors.New("container still running")
+	ErrNotRunning = errors.New("container not running")
+	ErrNotPaused  = errors.New("container not paused")
 )
-
-func (c ErrorCode) String() string {
-	switch c {
-	case IdInUse:
-		return "Id already in use"
-	case InvalidIdFormat:
-		return "Invalid format"
-	case ContainerPaused:
-		return "Container paused"
-	case ConfigInvalid:
-		return "Invalid configuration"
-	case SystemError:
-		return "System error"
-	case ContainerNotExists:
-		return "Container does not exist"
-	case ContainerNotStopped:
-		return "Container is not stopped"
-	case ContainerNotRunning:
-		return "Container is not running"
-	case ConsoleExists:
-		return "Console exists for process"
-	case ContainerNotPaused:
-		return "Container is not paused"
-	case NoProcessOps:
-		return "No process operations"
-	default:
-		return "Unknown error"
-	}
-}
-
-// Error is the API error type.
-type Error interface {
-	error
-
-	// Returns an error if it failed to write the detail of the Error to w.
-	// The detail of the Error may include the error message and a
-	// representation of the stack trace.
-	Detail(w io.Writer) error
-
-	// Returns the error code for this error.
-	Code() ErrorCode
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/factory.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/factory.go
@@ -14,29 +14,15 @@ type Factory interface {
 	//
 	// Returns the new container with a running process.
 	//
-	// errors:
-	// IdInUse - id is already in use by a container
-	// InvalidIdFormat - id has incorrect format
-	// ConfigInvalid - config is invalid
-	// Systemerror - System error
-	//
 	// On error, any partially created container parts are cleaned up (the operation is atomic).
 	Create(id string, config *configs.Config) (Container, error)

 	// Load takes an ID for an existing container and returns the container information
 	// from the state.  This presents a read only view of the container.
-	//
-	// errors:
-	// Path does not exist
-	// System error
 	Load(id string) (Container, error)

 	// StartInitialization is an internal API to libcontainer used during the reexec of the
 	// container.
-	//
-	// Errors:
-	// Pipe connection error
-	// System error
 	StartInitialization() error

 	// Type returns info string about factory type (e.g. lxc, libcontainer...)
--- a/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
@@ -1,9 +1,8 @@
-// +build linux
-
 package libcontainer

 import (
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -13,17 +12,14 @@ import (

 	securejoin "github.com/cyphar/filepath-securejoin"
 	"github.com/moby/sys/mountinfo"
-	"github.com/opencontainers/runc/libcontainer/cgroups"
-	"github.com/opencontainers/runc/libcontainer/cgroups/fs"
-	"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
-	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
+	"golang.org/x/sys/unix"
+
+	"github.com/opencontainers/runc/libcontainer/cgroups/manager"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/configs/validate"
 	"github.com/opencontainers/runc/libcontainer/intelrdt"
 	"github.com/opencontainers/runc/libcontainer/utils"
-	"github.com/pkg/errors"
-
-	"golang.org/x/sys/unix"
+	"github.com/sirupsen/logrus"
 )

 const (
@@ -41,7 +37,9 @@ func InitArgs(args ...string) func(*LinuxFactory) error {
 			// Resolve relative paths to ensure that its available
 			// after directory changes.
 			if args[0], err = filepath.Abs(args[0]); err != nil {
-				return newGenericError(err, ConfigInvalid)
+				// The only error returned from filepath.Abs is
+				// the one from os.Getwd, i.e. a system error.
+				return err
 			}
 		}

@@ -50,100 +48,6 @@ func InitArgs(args ...string) func(*LinuxFactory) error {
 	}
 }

-func getUnifiedPath(paths map[string]string) string {
-	path := ""
-	for k, v := range paths {
-		if path == "" {
-			path = v
-		} else if v != path {
-			panic(errors.Errorf("expected %q path to be unified path %q, got %q", k, path, v))
-		}
-	}
-	// can be empty
-	if path != "" {
-		if filepath.Clean(path) != path || !filepath.IsAbs(path) {
-			panic(errors.Errorf("invalid dir path %q", path))
-		}
-	}
-
-	return path
-}
-
-func systemdCgroupV2(l *LinuxFactory, rootless bool) error {
-	l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
-		return systemd.NewUnifiedManager(config, getUnifiedPath(paths), rootless)
-	}
-	return nil
-}
-
-// SystemdCgroups is an options func to configure a LinuxFactory to return
-// containers that use systemd to create and manage cgroups.
-func SystemdCgroups(l *LinuxFactory) error {
-	if !systemd.IsRunningSystemd() {
-		return fmt.Errorf("systemd not running on this host, can't use systemd as cgroups manager")
-	}
-
-	if cgroups.IsCgroup2UnifiedMode() {
-		return systemdCgroupV2(l, false)
-	}
-
-	l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
-		return systemd.NewLegacyManager(config, paths)
-	}
-
-	return nil
-}
-
-// RootlessSystemdCgroups is rootless version of SystemdCgroups.
-func RootlessSystemdCgroups(l *LinuxFactory) error {
-	if !systemd.IsRunningSystemd() {
-		return fmt.Errorf("systemd not running on this host, can't use systemd as cgroups manager")
-	}
-
-	if !cgroups.IsCgroup2UnifiedMode() {
-		return fmt.Errorf("cgroup v2 not enabled on this host, can't use systemd (rootless) as cgroups manager")
-	}
-	return systemdCgroupV2(l, true)
-}
-
-func cgroupfs2(l *LinuxFactory, rootless bool) error {
-	l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
-		m, err := fs2.NewManager(config, getUnifiedPath(paths), rootless)
-		if err != nil {
-			panic(err)
-		}
-		return m
-	}
-	return nil
-}
-
-func cgroupfs(l *LinuxFactory, rootless bool) error {
-	if cgroups.IsCgroup2UnifiedMode() {
-		return cgroupfs2(l, rootless)
-	}
-	l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
-		return fs.NewManager(config, paths, rootless)
-	}
-	return nil
-}
-
-// Cgroupfs is an options func to configure a LinuxFactory to return containers
-// that use the native cgroups filesystem implementation to create and manage
-// cgroups.
-func Cgroupfs(l *LinuxFactory) error {
-	return cgroupfs(l, false)
-}
-
-// RootlessCgroupfs is an options func to configure a LinuxFactory to return
-// containers that use the native cgroups filesystem implementation to create
-// and manage cgroups. The difference between RootlessCgroupfs and Cgroupfs is
-// that RootlessCgroupfs can transparently handle permission errors that occur
-// during rootless container (including euid=0 in userns) setup (while still allowing cgroup usage if
-// they've been set up properly).
-func RootlessCgroupfs(l *LinuxFactory) error {
-	return cgroupfs(l, true)
-}
-
 // IntelRdtfs is an options func to configure a LinuxFactory to return
 // containers that use the Intel RDT "resource control" filesystem to
 // create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth).
@@ -165,7 +69,7 @@ func TmpfsRoot(l *LinuxFactory) error {
 		return err
 	}
 	if !mounted {
-		if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
+		if err := mount("tmpfs", l.Root, "", "tmpfs", 0, ""); err != nil {
 			return err
 		}
 	}
@@ -186,7 +90,7 @@ func CriuPath(criupath string) func(*LinuxFactory) error {
 func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
 	if root != "" {
 		if err := os.MkdirAll(root, 0o700); err != nil {
-			return nil, newGenericError(err, SystemError)
+			return nil, err
 		}
 	}
 	l := &LinuxFactory{
@@ -197,10 +101,6 @@ func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
 		CriuPath:  "criu",
 	}

-	if err := Cgroupfs(l); err != nil {
-		return nil, err
-	}
-
 	for _, opt := range options {
 		if opt == nil {
 			continue
@@ -237,37 +137,69 @@ type LinuxFactory struct {
 	// Validator provides validation to container configurations.
 	Validator validate.Validator

-	// NewCgroupsManager returns an initialized cgroups manager for a single container.
-	NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
-
 	// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
 	NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
 }

 func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
 	if l.Root == "" {
-		return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
+		return nil, errors.New("root not set")
 	}
 	if err := l.validateID(id); err != nil {
 		return nil, err
 	}
 	if err := l.Validator.Validate(config); err != nil {
-		return nil, newGenericError(err, ConfigInvalid)
+		return nil, err
 	}
 	containerRoot, err := securejoin.SecureJoin(l.Root, id)
 	if err != nil {
 		return nil, err
 	}
 	if _, err := os.Stat(containerRoot); err == nil {
-		return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
+		return nil, ErrExist
 	} else if !os.IsNotExist(err) {
-		return nil, newGenericError(err, SystemError)
+		return nil, err
 	}
+
+	cm, err := manager.New(config.Cgroups)
+	if err != nil {
+		return nil, err
+	}
+
+	// Check that cgroup does not exist or empty (no processes).
+	// Note for cgroup v1 this check is not thorough, as there are multiple
+	// separate hierarchies, while both Exists() and GetAllPids() only use
+	// one for "devices" controller (assuming others are the same, which is
+	// probably true in almost all scenarios). Checking all the hierarchies
+	// would be too expensive.
+	if cm.Exists() {
+		pids, err := cm.GetAllPids()
+		// Reading PIDs can race with cgroups removal, so ignore ENOENT and ENODEV.
+		if err != nil && !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.ENODEV) {
+			return nil, fmt.Errorf("unable to get cgroup PIDs: %w", err)
+		}
+		if len(pids) != 0 {
+			// TODO: return an error.
+			logrus.Warnf("container's cgroup is not empty: %d process(es) found", len(pids))
+			logrus.Warn("DEPRECATED: running container in a non-empty cgroup won't be supported in runc 1.2; https://github.com/opencontainers/runc/issues/3132")
+		}
+	}
+
+	// Check that cgroup is not frozen. Do not use Exists() here
+	// since in cgroup v1 it only checks "devices" controller.
+	st, err := cm.GetFreezerState()
+	if err != nil {
+		return nil, fmt.Errorf("unable to get cgroup freezer state: %w", err)
+	}
+	if st == configs.Frozen {
+		return nil, errors.New("container's cgroup unexpectedly frozen")
+	}
+
 	if err := os.MkdirAll(containerRoot, 0o711); err != nil {
-		return nil, newGenericError(err, SystemError)
+		return nil, err
 	}
 	if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
-		return nil, newGenericError(err, SystemError)
+		return nil, err
 	}
 	c := &linuxContainer{
 		id:            id,
@@ -278,7 +210,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
 		criuPath:      l.CriuPath,
 		newuidmapPath: l.NewuidmapPath,
 		newgidmapPath: l.NewgidmapPath,
-		cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
+		cgroupManager: cm,
 	}
 	if l.NewIntelRdtManager != nil {
 		c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
@@ -289,7 +221,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err

 func (l *LinuxFactory) Load(id string) (Container, error) {
 	if l.Root == "" {
-		return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
+		return nil, errors.New("root not set")
 	}
 	// when load, we need to check id is valid or not.
 	if err := l.validateID(id); err != nil {
@@ -299,7 +231,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
 	if err != nil {
 		return nil, err
 	}
-	state, err := l.loadState(containerRoot, id)
+	state, err := l.loadState(containerRoot)
 	if err != nil {
 		return nil, err
 	}
@@ -308,6 +240,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
 		processStartTime: state.InitProcessStartTime,
 		fds:              state.ExternalDescriptors,
 	}
+	cm, err := manager.NewWithPaths(state.Config.Cgroups, state.CgroupPaths)
+	if err != nil {
+		return nil, err
+	}
 	c := &linuxContainer{
 		initProcess:          r,
 		initProcessStartTime: state.InitProcessStartTime,
@@ -318,7 +254,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
 		criuPath:             l.CriuPath,
 		newuidmapPath:        l.NewuidmapPath,
 		newgidmapPath:        l.NewgidmapPath,
-		cgroupManager:        l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
+		cgroupManager:        cm,
 		root:                 containerRoot,
 		created:              state.Created,
 	}
@@ -343,11 +279,26 @@ func (l *LinuxFactory) StartInitialization() (err error) {
 	envInitPipe := os.Getenv("_LIBCONTAINER_INITPIPE")
 	pipefd, err := strconv.Atoi(envInitPipe)
 	if err != nil {
-		return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
+		err = fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE: %w", err)
+		logrus.Error(err)
+		return err
 	}
 	pipe := os.NewFile(uintptr(pipefd), "pipe")
 	defer pipe.Close()

+	defer func() {
+		// We have an error during the initialization of the container's init,
+		// send it back to the parent process in the form of an initError.
+		if werr := writeSync(pipe, procError); werr != nil {
+			fmt.Fprintln(os.Stderr, err)
+			return
+		}
+		if werr := utils.WriteJSON(pipe, &initError{Message: err.Error()}); werr != nil {
+			fmt.Fprintln(os.Stderr, err)
+			return
+		}
+	}()
+
 	// Only init processes have FIFOFD.
 	fifofd := -1
 	envInitType := os.Getenv("_LIBCONTAINER_INITTYPE")
@@ -355,7 +306,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
 	if it == initStandard {
 		envFifoFd := os.Getenv("_LIBCONTAINER_FIFOFD")
 		if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
-			return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err)
+			return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD: %w", err)
 		}
 	}

@@ -363,7 +314,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
 	if envConsole := os.Getenv("_LIBCONTAINER_CONSOLE"); envConsole != "" {
 		console, err := strconv.Atoi(envConsole)
 		if err != nil {
-			return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
+			return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE: %w", err)
 		}
 		consoleSocket = os.NewFile(uintptr(console), "console-socket")
 		defer consoleSocket.Close()
@@ -372,32 +323,26 @@ func (l *LinuxFactory) StartInitialization() (err error) {
 	logPipeFdStr := os.Getenv("_LIBCONTAINER_LOGPIPE")
 	logPipeFd, err := strconv.Atoi(logPipeFdStr)
 	if err != nil {
-		return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE=%s to int: %s", logPipeFdStr, err)
+		return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err)
+	}
+
+	// Get mount files (O_PATH).
+	mountFds, err := parseMountFds()
+	if err != nil {
+		return err
 	}

 	// clear the current process's environment to clean any libcontainer
 	// specific env vars.
 	os.Clearenv()

-	defer func() {
-		// We have an error during the initialization of the container's init,
-		// send it back to the parent process in the form of an initError.
-		if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
-			fmt.Fprintln(os.Stderr, err)
-			return
-		}
-		if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
-			fmt.Fprintln(os.Stderr, err)
-			return
-		}
-	}()
 	defer func() {
 		if e := recover(); e != nil {
-			err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
+			err = fmt.Errorf("panic from initialization: %w, %v", e, string(debug.Stack()))
 		}
 	}()

-	i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd)
+	i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFds)
 	if err != nil {
 		return err
 	}
@@ -406,7 +351,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
 	return i.Init()
 }

-func (l *LinuxFactory) loadState(root, id string) (*State, error) {
+func (l *LinuxFactory) loadState(root string) (*State, error) {
 	stateFilePath, err := securejoin.SecureJoin(root, stateFilename)
 	if err != nil {
 		return nil, err
@@ -414,21 +359,21 @@ func (l *LinuxFactory) loadState(root, id string) (*State, error) {
 	f, err := os.Open(stateFilePath)
 	if err != nil {
 		if os.IsNotExist(err) {
-			return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
+			return nil, ErrNotExist
 		}
-		return nil, newGenericError(err, SystemError)
+		return nil, err
 	}
 	defer f.Close()
 	var state *State
 	if err := json.NewDecoder(f).Decode(&state); err != nil {
-		return nil, newGenericError(err, SystemError)
+		return nil, err
 	}
 	return state, nil
 }

 func (l *LinuxFactory) validateID(id string) error {
 	if !idRegex.MatchString(id) || string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) {
-		return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
+		return ErrInvalidID
 	}

 	return nil
@@ -451,3 +396,18 @@ func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
 		return nil
 	}
 }
+
+func parseMountFds() ([]int, error) {
+	fdsJson := os.Getenv("_LIBCONTAINER_MOUNT_FDS")
+	if fdsJson == "" {
+		// Always return the nil slice if no fd is present.
+		return nil, nil
+	}
+
+	var mountFds []int
+	if err := json.Unmarshal([]byte(fdsJson), &mountFds); err != nil {
+		return nil, fmt.Errorf("Error unmarshalling _LIBCONTAINER_MOUNT_FDS: %w", err)
+	}
+
+	return mountFds, nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
@@ -1,92 +0,0 @@
-package libcontainer
-
-import (
-	"fmt"
-	"io"
-	"text/template"
-	"time"
-
-	"github.com/opencontainers/runc/libcontainer/stacktrace"
-)
-
-var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
-Code: {{.ECode}}
-{{if .Message }}
-Message: {{.Message}}
-{{end}}
-Frames:{{range $i, $frame := .Stack.Frames}}
---
-{{$i}}: {{$frame.Function}}
-Package: {{$frame.Package}}
-File: {{$frame.File}}@{{$frame.Line}}{{end}}
-`))
-
-func newGenericError(err error, c ErrorCode) Error {
-	if le, ok := err.(Error); ok {
-		return le
-	}
-	gerr := &genericError{
-		Timestamp: time.Now(),
-		Err:       err,
-		ECode:     c,
-		Stack:     stacktrace.Capture(1),
-	}
-	if err != nil {
-		gerr.Message = err.Error()
-	}
-	return gerr
-}
-
-func newSystemError(err error) Error {
-	return createSystemError(err, "")
-}
-
-func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error {
-	return createSystemError(err, fmt.Sprintf(cause, v...))
-}
-
-func newSystemErrorWithCause(err error, cause string) Error {
-	return createSystemError(err, cause)
-}
-
-// createSystemError creates the specified error with the correct number of
-// stack frames skipped. This is only to be called by the other functions for
-// formatting the error.
-func createSystemError(err error, cause string) Error {
-	gerr := &genericError{
-		Timestamp: time.Now(),
-		Err:       err,
-		ECode:     SystemError,
-		Cause:     cause,
-		Stack:     stacktrace.Capture(2),
-	}
-	if err != nil {
-		gerr.Message = err.Error()
-	}
-	return gerr
-}
-
-type genericError struct {
-	Timestamp time.Time
-	ECode     ErrorCode
-	Err       error `json:"-"`
-	Cause     string
-	Message   string
-	Stack     stacktrace.Stacktrace
-}
-
-func (e *genericError) Error() string {
-	if e.Cause == "" {
-		return e.Message
-	}
-	frame := e.Stack.Frames[0]
-	return fmt.Sprintf("%s:%d: %s caused: %s", frame.File, frame.Line, e.Cause, e.Message)
-}
-
-func (e *genericError) Code() ErrorCode {
-	return e.ECode
-}
-
-func (e *genericError) Detail(w io.Writer) error {
-	return errorTemplate.Execute(w, e)
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
@@ -1,30 +1,29 @@
-// +build linux
-
 package libcontainer

 import (
 	"bytes"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
-	"io/ioutil"
 	"net"
 	"os"
+	"strconv"
 	"strings"
 	"unsafe"

 	"github.com/containerd/console"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/sirupsen/logrus"
+	"github.com/vishvananda/netlink"
+	"golang.org/x/sys/unix"
+
 	"github.com/opencontainers/runc/libcontainer/capabilities"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/user"
 	"github.com/opencontainers/runc/libcontainer/utils"
-	"github.com/opencontainers/runtime-spec/specs-go"
-	"github.com/pkg/errors"
-	"github.com/sirupsen/logrus"
-	"github.com/vishvananda/netlink"
-	"golang.org/x/sys/unix"
 )

 type initType string
@@ -77,7 +76,7 @@ type initer interface {
 	Init() error
 }

-func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int) (initer, error) {
+func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds []int) (initer, error) {
 	var config *initConfig
 	if err := json.NewDecoder(pipe).Decode(&config); err != nil {
 		return nil, err
@@ -87,6 +86,11 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
 	}
 	switch t {
 	case initSetns:
+		// mountFds must be nil in this case. We don't mount while doing runc exec.
+		if mountFds != nil {
+			return nil, errors.New("mountFds must be nil. Can't mount while doing runc exec.")
+		}
+
 		return &linuxSetnsInit{
 			pipe:          pipe,
 			consoleSocket: consoleSocket,
@@ -101,6 +105,7 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
 			config:        config,
 			fifoFd:        fifoFd,
 			logFd:         logFd,
+			mountFds:      mountFds,
 		}, nil
 	}
 	return nil, fmt.Errorf("unknown init type %q", t)
@@ -139,7 +144,7 @@ func finalizeNamespace(config *initConfig) error {
 	// inherited are marked close-on-exec so they stay out of the
 	// container
 	if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
-		return errors.Wrap(err, "close exec fds")
+		return fmt.Errorf("error closing exec fds: %w", err)
 	}

 	// we only do chdir if it's specified
@@ -158,7 +163,7 @@ func finalizeNamespace(config *initConfig) error {
 			// to the directory, but the user running runc does not.
 			// This is useful in cases where the cwd is also a volume that's been chowned to the container user.
 		default:
-			return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
+			return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
 		}
 	}

@@ -174,26 +179,26 @@ func finalizeNamespace(config *initConfig) error {
 	}
 	// drop capabilities in bounding set before changing user
 	if err := w.ApplyBoundingSet(); err != nil {
-		return errors.Wrap(err, "apply bounding set")
+		return fmt.Errorf("unable to apply bounding set: %w", err)
 	}
 	// preserve existing capabilities while we change users
 	if err := system.SetKeepCaps(); err != nil {
-		return errors.Wrap(err, "set keep caps")
+		return fmt.Errorf("unable to set keep caps: %w", err)
 	}
 	if err := setupUser(config); err != nil {
-		return errors.Wrap(err, "setup user")
+		return fmt.Errorf("unable to setup user: %w", err)
 	}
 	// Change working directory AFTER the user has been set up, if we haven't done it yet.
 	if doChdir {
 		if err := unix.Chdir(config.Cwd); err != nil {
-			return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
+			return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
 		}
 	}
 	if err := system.ClearKeepCaps(); err != nil {
-		return errors.Wrap(err, "clear keep caps")
+		return fmt.Errorf("unable to clear keep caps: %w", err)
 	}
 	if err := w.ApplyCaps(); err != nil {
-		return errors.Wrap(err, "apply caps")
+		return fmt.Errorf("unable to apply caps: %w", err)
 	}
 	return nil
 }
@@ -272,6 +277,36 @@ func syncParentHooks(pipe io.ReadWriter) error {
 	return readSync(pipe, procResume)
 }

+// syncParentSeccomp sends to the given pipe a JSON payload which
+// indicates that the parent should pick up the seccomp fd with pidfd_getfd()
+// and send it to the seccomp agent over a unix socket. It then waits for
+// the parent to indicate that it is cleared to resume and closes the seccompFd.
+// If the seccompFd is -1, there isn't anything to sync with the parent, so it
+// returns no error.
+func syncParentSeccomp(pipe io.ReadWriter, seccompFd int) error {
+	if seccompFd == -1 {
+		return nil
+	}
+
+	// Tell parent.
+	if err := writeSyncWithFd(pipe, procSeccomp, seccompFd); err != nil {
+		unix.Close(seccompFd)
+		return err
+	}
+
+	// Wait for parent to give the all-clear.
+	if err := readSync(pipe, procSeccompDone); err != nil {
+		unix.Close(seccompFd)
+		return fmt.Errorf("sync parent seccomp: %w", err)
+	}
+
+	if err := unix.Close(seccompFd); err != nil {
+		return fmt.Errorf("close seccomp fd: %w", err)
+	}
+
+	return nil
+}
+
 // setupUser changes the groups, gid, and uid for the user inside the container
 func setupUser(config *initConfig) error {
 	// Set up defaults.
@@ -325,11 +360,11 @@ func setupUser(config *initConfig) error {

 	// Before we change to the container's user make sure that the processes
 	// STDIO is correctly owned by the user that we are switching to.
-	if err := fixStdioPermissions(config, execUser); err != nil {
+	if err := fixStdioPermissions(execUser); err != nil {
 		return err
 	}

-	setgroups, err := ioutil.ReadFile("/proc/self/setgroups")
+	setgroups, err := os.ReadFile("/proc/self/setgroups")
 	if err != nil && !os.IsNotExist(err) {
 		return err
 	}
@@ -343,7 +378,7 @@ func setupUser(config *initConfig) error {
 	if allowSupGroups {
 		suppGroups := append(execUser.Sgids, addGroups...)
 		if err := unix.Setgroups(suppGroups); err != nil {
-			return err
+			return &os.SyscallError{Syscall: "setgroups", Err: err}
 		}
 	}

@@ -366,10 +401,10 @@ func setupUser(config *initConfig) error {
 // fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
 // The ownership needs to match because it is created outside of the container and needs to be
 // localized.
-func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
+func fixStdioPermissions(u *user.ExecUser) error {
 	var null unix.Stat_t
 	if err := unix.Stat("/dev/null", &null); err != nil {
-		return err
+		return &os.PathError{Op: "stat", Path: "/dev/null", Err: err}
 	}
 	for _, fd := range []uintptr{
 		os.Stdin.Fd(),
@@ -378,7 +413,7 @@ func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
 	} {
 		var s unix.Stat_t
 		if err := unix.Fstat(int(fd), &s); err != nil {
-			return err
+			return &os.PathError{Op: "fstat", Path: "fd " + strconv.Itoa(int(fd)), Err: err}
 		}

 		// Skip chown of /dev/null if it was used as one of the STDIO fds.
@@ -399,10 +434,12 @@ func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
 			// privileged_wrt_inode_uidgid() has failed). In either case, we
 			// are in a configuration where it's better for us to just not
 			// touch the stdio rather than bail at this point.
+
+			// nolint:errorlint // unix errors are bare
 			if err == unix.EINVAL || err == unix.EPERM {
 				continue
 			}
-			return err
+			return &os.PathError{Op: "fchown", Path: "fd " + strconv.Itoa(int(fd)), Err: err}
 		}
 	}
 	return nil
@@ -456,8 +493,8 @@ func setupRoute(config *configs.Config) error {

 func setupRlimits(limits []configs.Rlimit, pid int) error {
 	for _, rlimit := range limits {
-		if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
-			return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
+		if err := unix.Prlimit(pid, rlimit.Type, &unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}, nil); err != nil {
+			return fmt.Errorf("error setting rlimit type %v: %w", rlimit.Type, err)
 		}
 	}
 	return nil
@@ -482,27 +519,12 @@ func isWaitable(pid int) (bool, error) {
 	si := &siginfo{}
 	_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
 	if e != 0 {
-		return false, os.NewSyscallError("waitid", e)
+		return false, &os.SyscallError{Syscall: "waitid", Err: e}
 	}

 	return si.si_pid != 0, nil
 }

-// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise
-func isNoChildren(err error) bool {
-	switch err := err.(type) {
-	case unix.Errno:
-		if err == unix.ECHILD {
-			return true
-		}
-	case *os.SyscallError:
-		if err.Err == unix.ECHILD {
-			return true
-		}
-	}
-	return false
-}
-
 // signalAllProcesses freezes then iterates over all the processes inside the
 // manager's cgroups sending the signal s to them.
 // If s is SIGKILL then it will wait for each process to exit.
@@ -548,7 +570,7 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
 	for _, p := range procs {
 		if s != unix.SIGKILL {
 			if ok, err := isWaitable(p.Pid); err != nil {
-				if !isNoChildren(err) {
+				if !errors.Is(err, unix.ECHILD) {
 					logrus.Warn("signalAllProcesses: ", p.Pid, err)
 				}
 				continue
@@ -565,7 +587,7 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
 		// to retrieve its exit code.
 		if subreaper == 0 {
 			if _, err := p.Wait(); err != nil {
-				if !isNoChildren(err) {
+				if !errors.Is(err, unix.ECHILD) {
 					logrus.Warn("wait: ", err)
 				}
 			}
--- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
@@ -1,13 +1,11 @@
-// +build linux
-
 package intelrdt

 import (
 	"bufio"
 	"bytes"
+	"errors"
 	"fmt"
 	"io"
-	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strconv"
@@ -15,6 +13,7 @@ import (
 	"sync"

 	"github.com/moby/sys/mountinfo"
+	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )

@@ -70,7 +69,7 @@ import (
 * |-- ...
 * |-- schemata
 * |-- tasks
- * |-- <container_id>
+ * |-- <clos>
 *     |-- ...
 *     |-- schemata
 *     |-- tasks
@@ -153,7 +152,7 @@ type Manager interface {
 	// Returns statistics for Intel RDT
 	GetStats() (*Stats, error)

-	// Destroys the Intel RDT 'container_id' group
+	// Destroys the Intel RDT container-specific 'container_id' group
 	Destroy() error

 	// Returns Intel RDT path to save in a state file and to be able to
@@ -181,14 +180,10 @@ func NewManager(config *configs.Config, id string, path string) Manager {
 }

 const (
-	IntelRdtTasks = "tasks"
+	intelRdtTasks = "tasks"
 )

 var (
-	// The absolute root path of the Intel RDT "resource control" filesystem
-	intelRdtRoot     string
-	intelRdtRootLock sync.Mutex
-
 	// The flag to indicate if Intel RDT/CAT is enabled
 	catEnabled bool
 	// The flag to indicate if Intel RDT/MBA is enabled
@@ -198,13 +193,9 @@ var (

 	// For Intel RDT initialization
 	initOnce sync.Once
-)

-type intelRdtData struct {
-	root   string
-	config *configs.Config
-	pid    int
-}
+	errNotFound = errors.New("Intel RDT resctrl mount point not found")
+)

 // Check if Intel RDT sub-features are enabled in featuresInit()
 func featuresInit() {
@@ -215,9 +206,10 @@ func featuresInit() {
 			return
 		}

-		// 2. Check if Intel RDT "resource control" filesystem is mounted
-		// The user guarantees to mount the filesystem
-		if !isIntelRdtMounted() {
+		// 2. Check if Intel RDT "resource control" filesystem is available.
+		// The user guarantees to mount the filesystem.
+		root, err := Root()
+		if err != nil {
 			return
 		}

@@ -226,7 +218,7 @@ func featuresInit() {
 		// selectively disabled or enabled by kernel command line
 		// (e.g., rdt=!l3cat,mba) in 4.14 and newer kernel
 		if flagsSet.CAT {
-			if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "L3")); err == nil {
+			if _, err := os.Stat(filepath.Join(root, "info", "L3")); err == nil {
 				catEnabled = true
 			}
 		}
@@ -236,15 +228,15 @@ func featuresInit() {
 			// depends on MBA
 			mbaEnabled = true
 		} else if flagsSet.MBA {
-			if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "MB")); err == nil {
+			if _, err := os.Stat(filepath.Join(root, "info", "MB")); err == nil {
 				mbaEnabled = true
 			}
 		}
 		if flagsSet.MBMTotal || flagsSet.MBMLocal || flagsSet.CMT {
-			if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "L3_MON")); err != nil {
+			if _, err := os.Stat(filepath.Join(root, "info", "L3_MON")); err != nil {
 				return
 			}
-			enabledMonFeatures, err = getMonFeatures(intelRdtRoot)
+			enabledMonFeatures, err = getMonFeatures(root)
 			if err != nil {
 				return
 			}
@@ -271,7 +263,7 @@ func findIntelRdtMountpointDir(f io.Reader) (string, error) {
 		return "", err
 	}
 	if len(mi) < 1 {
-		return "", NewNotFoundError("Intel RDT")
+		return "", errNotFound
 	}

 	// Check if MBA Software Controller is enabled through mount option "-o mba_MBps"
@@ -282,10 +274,16 @@ func findIntelRdtMountpointDir(f io.Reader) (string, error) {
 	return mi[0].Mountpoint, nil
 }

-// Gets the root path of Intel RDT "resource control" filesystem
-func getIntelRdtRoot() (string, error) {
-	intelRdtRootLock.Lock()
-	defer intelRdtRootLock.Unlock()
+// For Root() use only.
+var (
+	intelRdtRoot string
+	rootMu       sync.Mutex
+)
+
+// Root returns the Intel RDT "resource control" filesystem mount point.
+func Root() (string, error) {
+	rootMu.Lock()
+	defer rootMu.Unlock()

 	if intelRdtRoot != "" {
 		return intelRdtRoot, nil
@@ -309,11 +307,6 @@ func getIntelRdtRoot() (string, error) {
 	return intelRdtRoot, nil
 }

-func isIntelRdtMounted() bool {
-	_, err := getIntelRdtRoot()
-	return err == nil
-}
-
 type cpuInfoFlags struct {
 	CAT bool // Cache Allocation Technology
 	MBA bool // Memory Bandwidth Allocation
@@ -366,33 +359,15 @@ func parseCpuInfoFile(path string) (cpuInfoFlags, error) {
 	return infoFlags, nil
 }

-func parseUint(s string, base, bitSize int) (uint64, error) {
-	value, err := strconv.ParseUint(s, base, bitSize)
-	if err != nil {
-		intValue, intErr := strconv.ParseInt(s, base, bitSize)
-		// 1. Handle negative values greater than MinInt64 (and)
-		// 2. Handle negative values lesser than MinInt64
-		if intErr == nil && intValue < 0 {
-			return 0, nil
-		} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
-			return 0, nil
-		}
-
-		return value, err
-	}
-
-	return value, nil
-}
-
 // Gets a single uint64 value from the specified file.
 func getIntelRdtParamUint(path, file string) (uint64, error) {
 	fileName := filepath.Join(path, file)
-	contents, err := ioutil.ReadFile(fileName)
+	contents, err := os.ReadFile(fileName)
 	if err != nil {
 		return 0, err
 	}

-	res, err := parseUint(string(bytes.TrimSpace(contents)), 10, 64)
+	res, err := fscommon.ParseUint(string(bytes.TrimSpace(contents)), 10, 64)
 	if err != nil {
 		return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
 	}
@@ -401,7 +376,7 @@ func getIntelRdtParamUint(path, file string) (uint64, error) {

 // Gets a string value from the specified file
 func getIntelRdtParamString(path, file string) (string, error) {
-	contents, err := ioutil.ReadFile(filepath.Join(path, file))
+	contents, err := os.ReadFile(filepath.Join(path, file))
 	if err != nil {
 		return "", err
 	}
@@ -413,29 +388,17 @@ func writeFile(dir, file, data string) error {
 	if dir == "" {
 		return fmt.Errorf("no such directory for %s", file)
 	}
-	if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0o600); err != nil {
-		return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
+	if err := os.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0o600); err != nil {
+		return newLastCmdError(fmt.Errorf("intelrdt: unable to write %v: %w", data, err))
 	}
 	return nil
 }

-func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) {
-	rootPath, err := getIntelRdtRoot()
-	if err != nil {
-		return nil, err
-	}
-	return &intelRdtData{
-		root:   rootPath,
-		config: c,
-		pid:    pid,
-	}, nil
-}
-
 // Get the read-only L3 cache information
 func getL3CacheInfo() (*L3CacheInfo, error) {
 	l3CacheInfo := &L3CacheInfo{}

-	rootPath, err := getIntelRdtRoot()
+	rootPath, err := Root()
 	if err != nil {
 		return l3CacheInfo, err
 	}
@@ -465,7 +428,7 @@ func getL3CacheInfo() (*L3CacheInfo, error) {
 func getMemBwInfo() (*MemBwInfo, error) {
 	memBwInfo := &MemBwInfo{}

-	rootPath, err := getIntelRdtRoot()
+	rootPath, err := Root()
 	if err != nil {
 		return memBwInfo, err
 	}
@@ -498,7 +461,7 @@ func getMemBwInfo() (*MemBwInfo, error) {

 // Get diagnostics for last filesystem operation error from file info/last_cmd_status
 func getLastCmdStatus() (string, error) {
-	rootPath, err := getIntelRdtRoot()
+	rootPath, err := Root()
 	if err != nil {
 		return "", err
 	}
@@ -515,13 +478,13 @@ func getLastCmdStatus() (string, error) {
 // WriteIntelRdtTasks writes the specified pid into the "tasks" file
 func WriteIntelRdtTasks(dir string, pid int) error {
 	if dir == "" {
-		return fmt.Errorf("no such directory for %s", IntelRdtTasks)
+		return fmt.Errorf("no such directory for %s", intelRdtTasks)
 	}

 	// Don't attach any pid if -1 is specified as a pid
 	if pid != -1 {
-		if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0o600); err != nil {
-			return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
+		if err := os.WriteFile(filepath.Join(dir, intelRdtTasks), []byte(strconv.Itoa(pid)), 0o600); err != nil {
+			return newLastCmdError(fmt.Errorf("intelrdt: unable to add pid %d: %w", pid, err))
 		}
 	}
 	return nil
@@ -545,15 +508,19 @@ func IsMBAScEnabled() bool {
 	return mbaScEnabled
 }

-// Get the 'container_id' path in Intel RDT "resource control" filesystem
-func GetIntelRdtPath(id string) (string, error) {
-	rootPath, err := getIntelRdtRoot()
+// Get the path of the clos group in "resource control" filesystem that the container belongs to
+func (m *intelRdtManager) getIntelRdtPath() (string, error) {
+	rootPath, err := Root()
 	if err != nil {
 		return "", err
 	}

-	path := filepath.Join(rootPath, id)
-	return path, nil
+	clos := m.id
+	if m.config.IntelRdt != nil && m.config.IntelRdt.ClosID != "" {
+		clos = m.config.IntelRdt.ClosID
+	}
+
+	return filepath.Join(rootPath, clos), nil
 }

 // Applies Intel RDT configuration to the process with the specified pid
@@ -562,30 +529,48 @@ func (m *intelRdtManager) Apply(pid int) (err error) {
 	if m.config.IntelRdt == nil {
 		return nil
 	}
-	d, err := getIntelRdtData(m.config, pid)
-	if err != nil && !IsNotFound(err) {
+
+	path, err := m.getIntelRdtPath()
+	if err != nil {
 		return err
 	}

 	m.mu.Lock()
 	defer m.mu.Unlock()
-	path, err := d.join(m.id)
-	if err != nil {
-		return err
+
+	if m.config.IntelRdt.ClosID != "" && m.config.IntelRdt.L3CacheSchema == "" && m.config.IntelRdt.MemBwSchema == "" {
+		// Check that the CLOS exists, i.e. it has been pre-configured to
+		// conform with the runtime spec
+		if _, err := os.Stat(path); err != nil {
+			return fmt.Errorf("clos dir not accessible (must be pre-created when l3CacheSchema and memBwSchema are empty): %w", err)
+		}
+	}
+
+	if err := os.MkdirAll(path, 0o755); err != nil {
+		return newLastCmdError(err)
+	}
+
+	if err := WriteIntelRdtTasks(path, pid); err != nil {
+		return newLastCmdError(err)
 	}

 	m.path = path
 	return nil
 }

-// Destroys the Intel RDT 'container_id' group
+// Destroys the Intel RDT container-specific 'container_id' group
 func (m *intelRdtManager) Destroy() error {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	if err := os.RemoveAll(m.GetPath()); err != nil {
-		return err
+	// Don't remove resctrl group if closid has been explicitly specified. The
+	// group is likely externally managed, i.e. by some other entity than us.
+	// There are probably other containers/tasks sharing the same group.
+	if m.config.IntelRdt == nil || m.config.IntelRdt.ClosID == "" {
+		m.mu.Lock()
+		defer m.mu.Unlock()
+		if err := os.RemoveAll(m.GetPath()); err != nil {
+			return err
+		}
+		m.path = ""
 	}
-	m.path = ""
 	return nil
 }

@@ -593,7 +578,7 @@ func (m *intelRdtManager) Destroy() error {
 // restore the object later
 func (m *intelRdtManager) GetPath() string {
 	if m.path == "" {
-		m.path, _ = GetIntelRdtPath(m.id)
+		m.path, _ = m.getIntelRdtPath()
 	}
 	return m.path
 }
@@ -607,9 +592,9 @@ func (m *intelRdtManager) GetStats() (*Stats, error) {

 	m.mu.Lock()
 	defer m.mu.Unlock()
-	stats := NewStats()
+	stats := newStats()

-	rootPath, err := getIntelRdtRoot()
+	rootPath, err := Root()
 	if err != nil {
 		return nil, err
 	}
@@ -620,7 +605,7 @@ func (m *intelRdtManager) GetStats() (*Stats, error) {
 	}
 	schemaRootStrings := strings.Split(tmpRootStrings, "\n")

-	// The L3 cache and memory bandwidth schemata in 'container_id' group
+	// The L3 cache and memory bandwidth schemata in container's clos group
 	containerPath := m.GetPath()
 	tmpStrings, err := getIntelRdtParamString(containerPath, "schemata")
 	if err != nil {
@@ -643,7 +628,7 @@ func (m *intelRdtManager) GetStats() (*Stats, error) {
 			}
 		}

-		// The L3 cache schema in 'container_id' group
+		// The L3 cache schema in container's clos group
 		for _, schema := range schemaStrings {
 			if strings.Contains(schema, "L3") {
 				stats.L3CacheSchema = strings.TrimSpace(schema)
@@ -666,7 +651,7 @@ func (m *intelRdtManager) GetStats() (*Stats, error) {
 			}
 		}

-		// The memory bandwidth schema in 'container_id' group
+		// The memory bandwidth schema in container's clos group
 		for _, schema := range schemaStrings {
 			if strings.Contains(schema, "MB") {
 				stats.MemBwSchema = strings.TrimSpace(schema)
@@ -736,24 +721,30 @@ func (m *intelRdtManager) Set(container *configs.Config) error {
 		l3CacheSchema := container.IntelRdt.L3CacheSchema
 		memBwSchema := container.IntelRdt.MemBwSchema

+		// TODO: verify that l3CacheSchema and/or memBwSchema match the
+		// existing schemata if ClosID has been specified. This is a more
+		// involved than reading the file and doing plain string comparison as
+		// the value written in does not necessarily match what gets read out
+		// (leading zeros, cache id ordering etc).
+
 		// Write a single joint schema string to schemata file
 		if l3CacheSchema != "" && memBwSchema != "" {
 			if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil {
-				return NewLastCmdError(err)
+				return err
 			}
 		}

 		// Write only L3 cache schema string to schemata file
 		if l3CacheSchema != "" && memBwSchema == "" {
 			if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
-				return NewLastCmdError(err)
+				return err
 			}
 		}

 		// Write only memory bandwidth schema string to schemata file
 		if l3CacheSchema == "" && memBwSchema != "" {
 			if err := writeFile(path, "schemata", memBwSchema); err != nil {
-				return NewLastCmdError(err)
+				return err
 			}
 		}
 	}
@@ -761,56 +752,10 @@ func (m *intelRdtManager) Set(container *configs.Config) error {
 	return nil
 }

-func (raw *intelRdtData) join(id string) (string, error) {
-	path := filepath.Join(raw.root, id)
-	if err := os.MkdirAll(path, 0o755); err != nil {
-		return "", NewLastCmdError(err)
-	}
-
-	if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
-		return "", NewLastCmdError(err)
-	}
-	return path, nil
-}
-
-type NotFoundError struct {
-	ResourceControl string
-}
-
-func (e *NotFoundError) Error() string {
-	return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl)
-}
-
-func NewNotFoundError(res string) error {
-	return &NotFoundError{
-		ResourceControl: res,
-	}
-}
-
-func IsNotFound(err error) bool {
-	if err == nil {
-		return false
-	}
-	_, ok := err.(*NotFoundError)
-	return ok
-}
-
-type LastCmdError struct {
-	LastCmdStatus string
-	Err           error
-}
-
-func (e *LastCmdError) Error() string {
-	return e.Err.Error() + ", last_cmd_status: " + e.LastCmdStatus
-}
-
-func NewLastCmdError(err error) error {
-	lastCmdStatus, err1 := getLastCmdStatus()
+func newLastCmdError(err error) error {
+	status, err1 := getLastCmdStatus()
 	if err1 == nil {
-		return &LastCmdError{
-			LastCmdStatus: lastCmdStatus,
-			Err:           err,
-		}
+		return fmt.Errorf("%w, last_cmd_status: %s", err, status)
 	}
 	return err
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package intelrdt

 // The flag to indicate if Intel RDT/MBM is enabled
--- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go
@@ -3,7 +3,6 @@ package intelrdt
 import (
 	"bufio"
 	"io"
-	"io/ioutil"
 	"os"
 	"path/filepath"

@@ -49,7 +48,7 @@ func parseMonFeatures(reader io.Reader) (monFeatures, error) {
 }

 func getMonitoringStats(containerPath string, stats *Stats) error {
-	numaFiles, err := ioutil.ReadDir(filepath.Join(containerPath, "mon_data"))
+	numaFiles, err := os.ReadDir(filepath.Join(containerPath, "mon_data"))
 	if err != nil {
 		return err
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package intelrdt

 type L3CacheInfo struct {
@@ -54,6 +52,6 @@ type Stats struct {
 	CMTStats *[]CMTNumaNodeStats `json:"cmt_stats,omitempty"`
 }

-func NewStats() *Stats {
+func newStats() *Stats {
 	return &Stats{}
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
@@ -1,13 +1,11 @@
-// +build linux
-
 package keys

 import (
+	"errors"
+	"fmt"
 	"strconv"
 	"strings"

-	"github.com/pkg/errors"
-
 	"golang.org/x/sys/unix"
 )

@@ -16,7 +14,7 @@ type KeySerial uint32
 func JoinSessionKeyring(name string) (KeySerial, error) {
 	sessKeyID, err := unix.KeyctlJoinSessionKeyring(name)
 	if err != nil {
-		return 0, errors.Wrap(err, "create session key")
+		return 0, fmt.Errorf("unable to create session key: %w", err)
 	}
 	return KeySerial(sessKeyID), nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go
@@ -3,37 +3,28 @@ package logs
 import (
 	"bufio"
 	"encoding/json"
-	"fmt"
 	"io"
-	"os"
-	"sync"

-	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )

-var (
-	configureMutex sync.Mutex
-	// loggingConfigured will be set once logging has been configured via invoking `ConfigureLogging`.
-	// Subsequent invocations of `ConfigureLogging` would be no-op
-	loggingConfigured = false
-)
-
-type Config struct {
-	LogLevel    logrus.Level
-	LogFormat   string
-	LogFilePath string
-	LogPipeFd   int
-	LogCaller   bool
-}
-
 func ForwardLogs(logPipe io.ReadCloser) chan error {
 	done := make(chan error, 1)
 	s := bufio.NewScanner(logPipe)

+	logger := logrus.StandardLogger()
+	if logger.ReportCaller {
+		// Need a copy of the standard logger, but with ReportCaller
+		// turned off, as the logs are merely forwarded and their
+		// true source is not this file/line/function.
+		logNoCaller := *logrus.StandardLogger()
+		logNoCaller.ReportCaller = false
+		logger = &logNoCaller
+	}
+
 	go func() {
 		for s.Scan() {
-			processEntry(s.Bytes())
+			processEntry(s.Bytes(), logger)
 		}
 		if err := logPipe.Close(); err != nil {
 			logrus.Errorf("error closing log source: %v", err)
@@ -47,60 +38,19 @@ func ForwardLogs(logPipe io.ReadCloser) chan error {
 	return done
 }

-func processEntry(text []byte) {
+func processEntry(text []byte, logger *logrus.Logger) {
 	if len(text) == 0 {
 		return
 	}

 	var jl struct {
-		Level string `json:"level"`
-		Msg   string `json:"msg"`
+		Level logrus.Level `json:"level"`
+		Msg   string       `json:"msg"`
 	}
 	if err := json.Unmarshal(text, &jl); err != nil {
 		logrus.Errorf("failed to decode %q to json: %v", text, err)
 		return
 	}

-	lvl, err := logrus.ParseLevel(jl.Level)
-	if err != nil {
-		logrus.Errorf("failed to parse log level %q: %v", jl.Level, err)
-		return
-	}
-	logrus.StandardLogger().Logf(lvl, jl.Msg)
-}
-
-func ConfigureLogging(config Config) error {
-	configureMutex.Lock()
-	defer configureMutex.Unlock()
-
-	if loggingConfigured {
-		return errors.New("logging has already been configured")
-	}
-
-	logrus.SetLevel(config.LogLevel)
-	logrus.SetReportCaller(config.LogCaller)
-
-	// XXX: while 0 is a valid fd (usually stdin), here we assume
-	// that we never deliberately set LogPipeFd to 0.
-	if config.LogPipeFd > 0 {
-		logrus.SetOutput(os.NewFile(uintptr(config.LogPipeFd), "logpipe"))
-	} else if config.LogFilePath != "" {
-		f, err := os.OpenFile(config.LogFilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0o644)
-		if err != nil {
-			return err
-		}
-		logrus.SetOutput(f)
-	}
-
-	switch config.LogFormat {
-	case "text":
-		// retain logrus's default.
-	case "json":
-		logrus.SetFormatter(new(logrus.JSONFormatter))
-	default:
-		return fmt.Errorf("unknown log-format %q", config.LogFormat)
-	}
-
-	loggingConfigured = true
-	return nil
+	logger.Log(jl.Level, jl.Msg)
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package libcontainer

 import (
@@ -23,6 +21,7 @@ const (
 	RootlessEUIDAttr uint16 = 27287
 	UidmapPathAttr   uint16 = 27288
 	GidmapPathAttr   uint16 = 27289
+	MountSourcesAttr uint16 = 27290
 )

 type Int32msg struct {
--- a/vendor/github.com/opencontainers/runc/libcontainer/mount_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/mount_linux.go
@@ -0,0 +1,83 @@
+package libcontainer
+
+import (
+	"strconv"
+
+	"golang.org/x/sys/unix"
+)
+
+// mountError holds an error from a failed mount or unmount operation.
+type mountError struct {
+	op     string
+	source string
+	target string
+	procfd string
+	flags  uintptr
+	data   string
+	err    error
+}
+
+// Error provides a string error representation.
+func (e *mountError) Error() string {
+	out := e.op + " "
+
+	if e.source != "" {
+		out += e.source + ":" + e.target
+	} else {
+		out += e.target
+	}
+	if e.procfd != "" {
+		out += " (via " + e.procfd + ")"
+	}
+
+	if e.flags != uintptr(0) {
+		out += ", flags: 0x" + strconv.FormatUint(uint64(e.flags), 16)
+	}
+	if e.data != "" {
+		out += ", data: " + e.data
+	}
+
+	out += ": " + e.err.Error()
+	return out
+}
+
+// Unwrap returns the underlying error.
+// This is a convention used by Go 1.13+ standard library.
+func (e *mountError) Unwrap() error {
+	return e.err
+}
+
+// mount is a simple unix.Mount wrapper. If procfd is not empty, it is used
+// instead of target (and the target is only used to add context to an error).
+func mount(source, target, procfd, fstype string, flags uintptr, data string) error {
+	dst := target
+	if procfd != "" {
+		dst = procfd
+	}
+	if err := unix.Mount(source, dst, fstype, flags, data); err != nil {
+		return &mountError{
+			op:     "mount",
+			source: source,
+			target: target,
+			procfd: procfd,
+			flags:  flags,
+			data:   data,
+			err:    err,
+		}
+	}
+	return nil
+}
+
+// unmount is a simple unix.Unmount wrapper.
+func unmount(target string, flags int) error {
+	err := unix.Unmount(target, flags)
+	if err != nil {
+		return &mountError{
+			op:     "unmount",
+			target: target,
+			flags:  uintptr(flags),
+			err:    err,
+		}
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go
@@ -1,11 +1,9 @@
-// +build linux
-
 package libcontainer

 import (
 	"bytes"
 	"fmt"
-	"io/ioutil"
+	"os"
 	"path/filepath"
 	"strconv"

@@ -75,7 +73,7 @@ func getNetworkInterfaceStats(interfaceName string) (*types.NetworkInterface, er

 // Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
 func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
-	data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
+	data, err := os.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
 	if err != nil {
 		return 0, err
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
@@ -1,11 +1,8 @@
-// +build linux
-
 package libcontainer

 import (
 	"errors"
 	"fmt"
-	"io/ioutil"
 	"os"
 	"path/filepath"

@@ -35,7 +32,7 @@ func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct

 	eventControlPath := filepath.Join(cgDir, "cgroup.event_control")
 	data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg)
-	if err := ioutil.WriteFile(eventControlPath, []byte(data), 0o700); err != nil {
+	if err := os.WriteFile(eventControlPath, []byte(data), 0o700); err != nil {
 		eventfd.Close()
 		evFile.Close()
 		return nil, err
--- a/vendor/github.com/opencontainers/runc/libcontainer/notify_v2_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/notify_v2_linux.go
@@ -1,13 +1,11 @@
-// +build linux
-
 package libcontainer

 import (
+	"fmt"
 	"path/filepath"
 	"unsafe"

 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
-	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/sys/unix"
 )
@@ -15,19 +13,19 @@ import (
 func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, error) {
 	fd, err := unix.InotifyInit()
 	if err != nil {
-		return nil, errors.Wrap(err, "unable to init inotify")
+		return nil, fmt.Errorf("unable to init inotify: %w", err)
 	}
 	// watching oom kill
 	evFd, err := unix.InotifyAddWatch(fd, filepath.Join(cgDir, evName), unix.IN_MODIFY)
 	if err != nil {
 		unix.Close(fd)
-		return nil, errors.Wrap(err, "unable to add inotify watch")
+		return nil, fmt.Errorf("unable to add inotify watch: %w", err)
 	}
 	// Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited
 	cgFd, err := unix.InotifyAddWatch(fd, filepath.Join(cgDir, cgEvName), unix.IN_MODIFY)
 	if err != nil {
 		unix.Close(fd)
-		return nil, errors.Wrap(err, "unable to add inotify watch")
+		return nil, fmt.Errorf("unable to add inotify watch: %w", err)
 	}
 	ch := make(chan struct{})
 	go func() {
@@ -53,7 +51,7 @@ func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, err
 			offset = 0
 			for offset <= uint32(n-unix.SizeofInotifyEvent) {
 				rawEvent := (*unix.InotifyEvent)(unsafe.Pointer(&buffer[offset]))
-				offset += unix.SizeofInotifyEvent + uint32(rawEvent.Len)
+				offset += unix.SizeofInotifyEvent + rawEvent.Len
 				if rawEvent.Mask&unix.IN_MODIFY != unix.IN_MODIFY {
 					continue
 				}
--- a/vendor/github.com/opencontainers/runc/libcontainer/process.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/process.go
@@ -1,7 +1,7 @@
 package libcontainer

 import (
-	"fmt"
+	"errors"
 	"io"
 	"math"
 	"os"
@@ -9,6 +9,8 @@ import (
 	"github.com/opencontainers/runc/libcontainer/configs"
 )

+var errInvalidProcess = errors.New("invalid process")
+
 type processOperations interface {
 	wait() (*os.ProcessState, error)
 	signal(sig os.Signal) error
@@ -78,13 +80,22 @@ type Process struct {
 	ops processOperations

 	LogLevel string
+
+	// SubCgroupPaths specifies sub-cgroups to run the process in.
+	// Map keys are controller names, map values are paths (relative to
+	// container's top-level cgroup).
+	//
+	// If empty, the default top-level container's cgroup is used.
+	//
+	// For cgroup v2, the only key allowed is "".
+	SubCgroupPaths map[string]string
 }

 // Wait waits for the process to exit.
 // Wait releases any resources associated with the Process
 func (p Process) Wait() (*os.ProcessState, error) {
 	if p.ops == nil {
-		return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
+		return nil, errInvalidProcess
 	}
 	return p.ops.wait()
 }
@@ -94,7 +105,7 @@ func (p Process) Pid() (int, error) {
 	// math.MinInt32 is returned here, because it's invalid value
 	// for the kill() system call.
 	if p.ops == nil {
-		return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
+		return math.MinInt32, errInvalidProcess
 	}
 	return p.ops.pid(), nil
 }
@@ -102,7 +113,7 @@ func (p Process) Pid() (int, error) {
 // Signal sends a signal to the Process.
 func (p Process) Signal(sig os.Signal) error {
 	if p.ops == nil {
-		return newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
+		return errInvalidProcess
 	}
 	return p.ops.signal(sig)
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package libcontainer

 import (
@@ -7,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"net"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -25,10 +24,6 @@ import (
 	"golang.org/x/sys/unix"
 )

-// Synchronisation value for cgroup namespace setup.
-// The same constant is defined in nsexec.c as "CREATECGROUPNS".
-const createCgroupns = 0x80
-
 type parentProcess interface {
 	// pid returns the pid for the running process.
 	pid() int
@@ -96,7 +91,7 @@ func (p *setnsProcess) start() (retErr error) {
 	p.messageSockPair.child.Close()
 	p.logFilePair.child.Close()
 	if err != nil {
-		return newSystemErrorWithCause(err, "starting setns process")
+		return fmt.Errorf("error starting setns process: %w", err)
 	}

 	waitInit := initWaiter(p.messageSockPair.parent)
@@ -104,7 +99,7 @@ func (p *setnsProcess) start() (retErr error) {
 		if retErr != nil {
 			if newOom, err := p.manager.OOMKillCount(); err == nil && newOom != oom {
 				// Someone in this cgroup was killed, this _might_ be us.
-				retErr = newSystemErrorWithCause(retErr, "possibly OOM-killed")
+				retErr = fmt.Errorf("%w (possibly OOM-killed)", retErr)
 			}
 			werr := <-waitInit
 			if werr != nil {
@@ -119,7 +114,7 @@ func (p *setnsProcess) start() (retErr error) {

 	if p.bootstrapData != nil {
 		if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
-			return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
+			return fmt.Errorf("error copying bootstrap data to pipe: %w", err)
 		}
 	}
 	err = <-waitInit
@@ -127,14 +122,14 @@ func (p *setnsProcess) start() (retErr error) {
 		return err
 	}
 	if err := p.execSetns(); err != nil {
-		return newSystemErrorWithCause(err, "executing setns process")
+		return fmt.Errorf("error executing setns process: %w", err)
 	}
-	if len(p.cgroupPaths) > 0 {
-		if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil && !p.rootlessCgroups {
-			// On cgroup v2 + nesting + domain controllers, EnterPid may fail with EBUSY.
+	for _, path := range p.cgroupPaths {
+		if err := cgroups.WriteCgroupProc(path, p.pid()); err != nil && !p.rootlessCgroups {
+			// On cgroup v2 + nesting + domain controllers, WriteCgroupProc may fail with EBUSY.
 			// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
 			// Try to join the cgroup of InitProcessPid.
-			if cgroups.IsCgroup2UnifiedMode() {
+			if cgroups.IsCgroup2UnifiedMode() && p.initProcessPid != 0 {
 				initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
 				initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
 				if initCgErr == nil {
@@ -148,7 +143,7 @@ func (p *setnsProcess) start() (retErr error) {
 				}
 			}
 			if err != nil {
-				return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
+				return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err)
 			}
 		}
 	}
@@ -157,17 +152,17 @@ func (p *setnsProcess) start() (retErr error) {
 		_, err := os.Stat(p.intelRdtPath)
 		if err == nil {
 			if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil {
-				return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid())
+				return fmt.Errorf("error adding pid %d to Intel RDT: %w", p.pid(), err)
 			}
 		}
 	}
 	// set rlimits, this has to be done here because we lose permissions
 	// to raise the limits once we enter a user-namespace
 	if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
-		return newSystemErrorWithCause(err, "setting rlimits for process")
+		return fmt.Errorf("error setting rlimits for process: %w", err)
 	}
 	if err := utils.WriteJSON(p.messageSockPair.parent, p.config); err != nil {
-		return newSystemErrorWithCause(err, "writing config to pipe")
+		return fmt.Errorf("error writing config to pipe: %w", err)
 	}

 	ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
@@ -178,13 +173,49 @@ func (p *setnsProcess) start() (retErr error) {
 		case procHooks:
 			// This shouldn't happen.
 			panic("unexpected procHooks in setns")
+		case procSeccomp:
+			if p.config.Config.Seccomp.ListenerPath == "" {
+				return errors.New("listenerPath is not set")
+			}
+
+			seccompFd, err := recvSeccompFd(uintptr(p.pid()), uintptr(sync.Fd))
+			if err != nil {
+				return err
+			}
+			defer unix.Close(seccompFd)
+
+			bundle, annotations := utils.Annotations(p.config.Config.Labels)
+			containerProcessState := &specs.ContainerProcessState{
+				Version:  specs.Version,
+				Fds:      []string{specs.SeccompFdName},
+				Pid:      p.cmd.Process.Pid,
+				Metadata: p.config.Config.Seccomp.ListenerMetadata,
+				State: specs.State{
+					Version:     specs.Version,
+					ID:          p.config.ContainerId,
+					Status:      specs.StateRunning,
+					Pid:         p.initProcessPid,
+					Bundle:      bundle,
+					Annotations: annotations,
+				},
+			}
+			if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath,
+				containerProcessState, seccompFd); err != nil {
+				return err
+			}
+
+			// Sync with child.
+			if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
+				return err
+			}
+			return nil
 		default:
-			return newSystemError(errors.New("invalid JSON payload from child"))
+			return errors.New("invalid JSON payload from child")
 		}
 	})

 	if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
-		return newSystemErrorWithCause(err, "calling shutdown on init pipe")
+		return &os.PathError{Op: "shutdown", Path: "(init pipe)", Err: err}
 	}
 	// Must be done after Shutdown so the child will exit and we can wait for it.
 	if ierr != nil {
@@ -202,16 +233,16 @@ func (p *setnsProcess) execSetns() error {
 	status, err := p.cmd.Process.Wait()
 	if err != nil {
 		_ = p.cmd.Wait()
-		return newSystemErrorWithCause(err, "waiting on setns process to finish")
+		return fmt.Errorf("error waiting on setns process to finish: %w", err)
 	}
 	if !status.Success() {
 		_ = p.cmd.Wait()
-		return newSystemError(&exec.ExitError{ProcessState: status})
+		return &exec.ExitError{ProcessState: status}
 	}
 	var pid *pid
 	if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil {
 		_ = p.cmd.Wait()
-		return newSystemErrorWithCause(err, "reading pid from init pipe")
+		return fmt.Errorf("error reading pid from init pipe: %w", err)
 	}

 	// Clean up the zombie parent process
@@ -335,7 +366,7 @@ func (p *initProcess) start() (retErr error) {
 	_ = p.logFilePair.child.Close()
 	if err != nil {
 		p.process.ops = nil
-		return newSystemErrorWithCause(err, "starting init process command")
+		return fmt.Errorf("unable to start init: %w", err)
 	}

 	waitInit := initWaiter(p.messageSockPair.parent)
@@ -355,9 +386,9 @@ func (p *initProcess) start() (retErr error) {
 				if logrus.GetLevel() >= logrus.DebugLevel {
 					// Only show the original error if debug is set,
 					// as it is not generally very useful.
-					retErr = newSystemErrorWithCause(retErr, oomError)
+					retErr = fmt.Errorf(oomError+": %w", retErr)
 				} else {
-					retErr = newSystemError(errors.New(oomError))
+					retErr = errors.New(oomError)
 				}
 			}

@@ -382,15 +413,15 @@ func (p *initProcess) start() (retErr error) {
 	// cgroup. We don't need to worry about not doing this and not being root
 	// because we'd be using the rootless cgroup manager in that case.
 	if err := p.manager.Apply(p.pid()); err != nil {
-		return newSystemErrorWithCause(err, "applying cgroup configuration for process")
+		return fmt.Errorf("unable to apply cgroup configuration: %w", err)
 	}
 	if p.intelRdtManager != nil {
 		if err := p.intelRdtManager.Apply(p.pid()); err != nil {
-			return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
+			return fmt.Errorf("unable to apply Intel RDT configuration: %w", err)
 		}
 	}
 	if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
-		return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
+		return fmt.Errorf("can't copy bootstrap data to pipe: %w", err)
 	}
 	err = <-waitInit
 	if err != nil {
@@ -399,7 +430,7 @@ func (p *initProcess) start() (retErr error) {

 	childPid, err := p.getChildPid()
 	if err != nil {
-		return newSystemErrorWithCause(err, "getting the final child's pid from pipe")
+		return fmt.Errorf("can't get final child's PID from pipe: %w", err)
 	}

 	// Save the standard descriptor names before the container process
@@ -407,30 +438,23 @@ func (p *initProcess) start() (retErr error) {
 	// we won't know at checkpoint time which file descriptor to look up.
 	fds, err := getPipeFds(childPid)
 	if err != nil {
-		return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", childPid)
+		return fmt.Errorf("error getting pipe fds for pid %d: %w", childPid, err)
 	}
 	p.setExternalDescriptors(fds)

-	// Now it's time to setup cgroup namesapce
-	if p.config.Config.Namespaces.Contains(configs.NEWCGROUP) && p.config.Config.Namespaces.PathOf(configs.NEWCGROUP) == "" {
-		if _, err := p.messageSockPair.parent.Write([]byte{createCgroupns}); err != nil {
-			return newSystemErrorWithCause(err, "sending synchronization value to init process")
-		}
-	}
-
 	// Wait for our first child to exit
 	if err := p.waitForChildExit(childPid); err != nil {
-		return newSystemErrorWithCause(err, "waiting for our first child to exit")
+		return fmt.Errorf("error waiting for our first child to exit: %w", err)
 	}

 	if err := p.createNetworkInterfaces(); err != nil {
-		return newSystemErrorWithCause(err, "creating network interfaces")
+		return fmt.Errorf("error creating network interfaces: %w", err)
 	}
 	if err := p.updateSpecState(); err != nil {
-		return newSystemErrorWithCause(err, "updating the spec state")
+		return fmt.Errorf("error updating spec state: %w", err)
 	}
 	if err := p.sendConfig(); err != nil {
-		return newSystemErrorWithCause(err, "sending config to init process")
+		return fmt.Errorf("error sending config to init process: %w", err)
 	}
 	var (
 		sentRun    bool
@@ -439,25 +463,60 @@ func (p *initProcess) start() (retErr error) {

 	ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
 		switch sync.Type {
+		case procSeccomp:
+			if p.config.Config.Seccomp.ListenerPath == "" {
+				return errors.New("listenerPath is not set")
+			}
+
+			seccompFd, err := recvSeccompFd(uintptr(childPid), uintptr(sync.Fd))
+			if err != nil {
+				return err
+			}
+			defer unix.Close(seccompFd)
+
+			s, err := p.container.currentOCIState()
+			if err != nil {
+				return err
+			}
+
+			// initProcessStartTime hasn't been set yet.
+			s.Pid = p.cmd.Process.Pid
+			s.Status = specs.StateCreating
+			containerProcessState := &specs.ContainerProcessState{
+				Version:  specs.Version,
+				Fds:      []string{specs.SeccompFdName},
+				Pid:      s.Pid,
+				Metadata: p.config.Config.Seccomp.ListenerMetadata,
+				State:    *s,
+			}
+			if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath,
+				containerProcessState, seccompFd); err != nil {
+				return err
+			}
+
+			// Sync with child.
+			if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
+				return err
+			}
 		case procReady:
 			// set rlimits, this has to be done here because we lose permissions
 			// to raise the limits once we enter a user-namespace
 			if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
-				return newSystemErrorWithCause(err, "setting rlimits for ready process")
+				return fmt.Errorf("error setting rlimits for ready process: %w", err)
 			}
 			// call prestart and CreateRuntime hooks
 			if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
 				// Setup cgroup before the hook, so that the prestart and CreateRuntime hook could apply cgroup permissions.
 				if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil {
-					return newSystemErrorWithCause(err, "setting cgroup config for ready process")
+					return fmt.Errorf("error setting cgroup config for ready process: %w", err)
 				}
 				if p.intelRdtManager != nil {
 					if err := p.intelRdtManager.Set(p.config.Config); err != nil {
-						return newSystemErrorWithCause(err, "setting Intel RDT config for ready process")
+						return fmt.Errorf("error setting Intel RDT config for ready process: %w", err)
 					}
 				}

-				if p.config.Config.Hooks != nil {
+				if len(p.config.Config.Hooks) != 0 {
 					s, err := p.container.currentOCIState()
 					if err != nil {
 						return err
@@ -493,26 +552,26 @@ func (p *initProcess) start() (retErr error) {
 			// procRun sync.
 			state, uerr := p.container.updateState(p)
 			if uerr != nil {
-				return newSystemErrorWithCause(err, "store init state")
+				return fmt.Errorf("unable to store init state: %w", err)
 			}
 			p.container.initProcessStartTime = state.InitProcessStartTime

 			// Sync with child.
 			if err := writeSync(p.messageSockPair.parent, procRun); err != nil {
-				return newSystemErrorWithCause(err, "writing syncT 'run'")
+				return err
 			}
 			sentRun = true
 		case procHooks:
 			// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
 			if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil {
-				return newSystemErrorWithCause(err, "setting cgroup config for procHooks process")
+				return fmt.Errorf("error setting cgroup config for procHooks process: %w", err)
 			}
 			if p.intelRdtManager != nil {
 				if err := p.intelRdtManager.Set(p.config.Config); err != nil {
-					return newSystemErrorWithCause(err, "setting Intel RDT config for procHooks process")
+					return fmt.Errorf("error setting Intel RDT config for procHooks process: %w", err)
 				}
 			}
-			if p.config.Config.Hooks != nil {
+			if len(p.config.Config.Hooks) != 0 {
 				s, err := p.container.currentOCIState()
 				if err != nil {
 					return err
@@ -531,24 +590,24 @@ func (p *initProcess) start() (retErr error) {
 			}
 			// Sync with child.
 			if err := writeSync(p.messageSockPair.parent, procResume); err != nil {
-				return newSystemErrorWithCause(err, "writing syncT 'resume'")
+				return err
 			}
 			sentResume = true
 		default:
-			return newSystemError(errors.New("invalid JSON payload from child"))
+			return errors.New("invalid JSON payload from child")
 		}

 		return nil
 	})

 	if !sentRun {
-		return newSystemErrorWithCause(ierr, "container init")
+		return fmt.Errorf("error during container init: %w", ierr)
 	}
 	if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
-		return newSystemError(errors.New("could not synchronise after executing prestart and CreateRuntime hooks with container process"))
+		return errors.New("could not synchronise after executing prestart and CreateRuntime hooks with container process")
 	}
 	if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
-		return newSystemErrorWithCause(err, "shutting down init pipe")
+		return &os.PathError{Op: "shutdown", Path: "(init pipe)", Err: err}
 	}

 	// Must be done after Shutdown so the child will exit and we can wait for it.
@@ -634,6 +693,46 @@ func (p *initProcess) forwardChildLogs() chan error {
 	return logs.ForwardLogs(p.logFilePair.parent)
 }

+func recvSeccompFd(childPid, childFd uintptr) (int, error) {
+	pidfd, _, errno := unix.Syscall(unix.SYS_PIDFD_OPEN, childPid, 0, 0)
+	if errno != 0 {
+		return -1, fmt.Errorf("performing SYS_PIDFD_OPEN syscall: %w", errno)
+	}
+	defer unix.Close(int(pidfd))
+
+	seccompFd, _, errno := unix.Syscall(unix.SYS_PIDFD_GETFD, pidfd, childFd, 0)
+	if errno != 0 {
+		return -1, fmt.Errorf("performing SYS_PIDFD_GETFD syscall: %w", errno)
+	}
+
+	return int(seccompFd), nil
+}
+
+func sendContainerProcessState(listenerPath string, state *specs.ContainerProcessState, fd int) error {
+	conn, err := net.Dial("unix", listenerPath)
+	if err != nil {
+		return fmt.Errorf("failed to connect with seccomp agent specified in the seccomp profile: %w", err)
+	}
+
+	socket, err := conn.(*net.UnixConn).File()
+	if err != nil {
+		return fmt.Errorf("cannot get seccomp socket: %w", err)
+	}
+	defer socket.Close()
+
+	b, err := json.Marshal(state)
+	if err != nil {
+		return fmt.Errorf("cannot marshall seccomp state: %w", err)
+	}
+
+	err = utils.SendFds(socket, b, fd)
+	if err != nil {
+		return fmt.Errorf("cannot send seccomp fd to %s: %w", listenerPath, err)
+	}
+
+	return nil
+}
+
 func getPipeFds(pid int) ([]string, error) {
 	fds := make([]string, 3)

@@ -694,7 +793,7 @@ func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
 	// change ownership of the pipes in case we are in a user namespace
 	for _, fd := range fds {
 		if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil {
-			return nil, err
+			return nil, &os.PathError{Op: "fchown", Path: "fd " + strconv.Itoa(int(fd)), Err: err}
 		}
 	}
 	return i, nil
@@ -719,7 +818,7 @@ func initWaiter(r io.Reader) chan error {
 				return
 			}
 		}
-		ch <- newSystemErrorWithCause(err, "waiting for init preliminary setup")
+		ch <- fmt.Errorf("waiting for init preliminary setup: %w", err)
 	}()

 	return ch
--- a/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
@@ -1,9 +1,7 @@
-// +build linux
-
 package libcontainer

 import (
-	"fmt"
+	"errors"
 	"os"
 	"os/exec"

@@ -31,7 +29,7 @@ type restoredProcess struct {
 }

 func (p *restoredProcess) start() error {
-	return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
+	return errors.New("restored process cannot be started")
 }

 func (p *restoredProcess) pid() int {
@@ -51,7 +49,8 @@ func (p *restoredProcess) wait() (*os.ProcessState, error) {
 	// maybe use --exec-cmd in criu
 	err := p.cmd.Wait()
 	if err != nil {
-		if _, ok := err.(*exec.ExitError); !ok {
+		var exitErr *exec.ExitError
+		if !errors.As(err, &exitErr) {
 			return nil, err
 		}
 	}
@@ -89,7 +88,7 @@ type nonChildProcess struct {
 }

 func (p *nonChildProcess) start() error {
-	return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
+	return errors.New("restored process cannot be started")
 }

 func (p *nonChildProcess) pid() int {
@@ -97,11 +96,11 @@ func (p *nonChildProcess) pid() int {
 }

 func (p *nonChildProcess) terminate() error {
-	return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
+	return errors.New("restored process cannot be terminated")
 }

 func (p *nonChildProcess) wait() (*os.ProcessState, error) {
-	return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
+	return nil, errors.New("restored process cannot be waited on")
 }

 func (p *nonChildProcess) startTime() (uint64, error) {
--- a/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
@@ -1,15 +1,14 @@
-// +build linux
-
 package libcontainer

 import (
+	"errors"
 	"fmt"
 	"io"
-	"io/ioutil"
 	"os"
 	"os/exec"
 	"path"
 	"path/filepath"
+	"strconv"
 	"strings"
 	"time"

@@ -36,6 +35,7 @@ type mountConfig struct {
 	cgroup2Path     string
 	rootlessCgroups bool
 	cgroupns        bool
+	fd              *int
 }

 // needsSetupDev returns true if /dev needs to be set up.
@@ -51,10 +51,14 @@ func needsSetupDev(config *configs.Config) bool {
 // prepareRootfs sets up the devices, mount points, and filesystems for use
 // inside a new mount namespace. It doesn't set anything as ro. You must call
 // finalizeRootfs after this function to finish setting up the rootfs.
-func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
+func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig, mountFds []int) (err error) {
 	config := iConfig.Config
 	if err := prepareRoot(config); err != nil {
-		return newSystemErrorWithCause(err, "preparing rootfs")
+		return fmt.Errorf("error preparing rootfs: %w", err)
+	}
+
+	if mountFds != nil && len(mountFds) != len(config.Mounts) {
+		return fmt.Errorf("malformed mountFds slice. Expected size: %v, got: %v. Slice: %v", len(config.Mounts), len(mountFds), mountFds)
 	}

 	mountConfig := &mountConfig{
@@ -65,32 +69,39 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
 		cgroupns:        config.Namespaces.Contains(configs.NEWCGROUP),
 	}
 	setupDev := needsSetupDev(config)
-	for _, m := range config.Mounts {
+	for i, m := range config.Mounts {
 		for _, precmd := range m.PremountCmds {
 			if err := mountCmd(precmd); err != nil {
-				return newSystemErrorWithCause(err, "running premount command")
+				return fmt.Errorf("error running premount command: %w", err)
 			}
 		}
+
+		// Just before the loop we checked that if not empty, len(mountFds) == len(config.Mounts).
+		// Therefore, we can access mountFds[i] without any concerns.
+		if mountFds != nil && mountFds[i] != -1 {
+			mountConfig.fd = &mountFds[i]
+		}
+
 		if err := mountToRootfs(m, mountConfig); err != nil {
-			return newSystemErrorWithCausef(err, "mounting %q to rootfs at %q", m.Source, m.Destination)
+			return fmt.Errorf("error mounting %q to rootfs at %q: %w", m.Source, m.Destination, err)
 		}

 		for _, postcmd := range m.PostmountCmds {
 			if err := mountCmd(postcmd); err != nil {
-				return newSystemErrorWithCause(err, "running postmount command")
+				return fmt.Errorf("error running postmount command: %w", err)
 			}
 		}
 	}

 	if setupDev {
 		if err := createDevices(config); err != nil {
-			return newSystemErrorWithCause(err, "creating device nodes")
+			return fmt.Errorf("error creating device nodes: %w", err)
 		}
 		if err := setupPtmx(config); err != nil {
-			return newSystemErrorWithCause(err, "setting up ptmx")
+			return fmt.Errorf("error setting up ptmx: %w", err)
 		}
 		if err := setupDevSymlinks(config.Rootfs); err != nil {
-			return newSystemErrorWithCause(err, "setting up /dev symlinks")
+			return fmt.Errorf("error setting up /dev symlinks: %w", err)
 		}
 	}

@@ -112,7 +123,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
 	// operation not being perfectly split).

 	if err := unix.Chdir(config.Rootfs); err != nil {
-		return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
+		return &os.PathError{Op: "chdir", Path: config.Rootfs, Err: err}
 	}

 	s := iConfig.SpecState
@@ -130,12 +141,12 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
 		err = chroot()
 	}
 	if err != nil {
-		return newSystemErrorWithCause(err, "jailing process inside rootfs")
+		return fmt.Errorf("error jailing process inside rootfs: %w", err)
 	}

 	if setupDev {
 		if err := reOpenDevNull(); err != nil {
-			return newSystemErrorWithCause(err, "reopening /dev/null inside container")
+			return fmt.Errorf("error reopening /dev/null inside container: %w", err)
 		}
 	}

@@ -161,7 +172,7 @@ func finalizeRootfs(config *configs.Config) (err error) {
 		}
 		if m.Device == "tmpfs" || utils.CleanPath(m.Destination) == "/dev" {
 			if err := remountReadonly(m); err != nil {
-				return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
+				return err
 			}
 		}
 	}
@@ -169,7 +180,7 @@ func finalizeRootfs(config *configs.Config) (err error) {
 	// set rootfs ( / ) as readonly
 	if config.Readonlyfs {
 		if err := setReadonly(); err != nil {
-			return newSystemErrorWithCause(err, "setting rootfs as readonly")
+			return fmt.Errorf("error setting rootfs as readonly: %w", err)
 		}
 	}

@@ -183,14 +194,14 @@ func finalizeRootfs(config *configs.Config) (err error) {

 // /tmp has to be mounted as private to allow MS_MOVE to work in all situations
 func prepareTmp(topTmpDir string) (string, error) {
-	tmpdir, err := ioutil.TempDir(topTmpDir, "runctop")
+	tmpdir, err := os.MkdirTemp(topTmpDir, "runctop")
 	if err != nil {
 		return "", err
 	}
-	if err := unix.Mount(tmpdir, tmpdir, "bind", unix.MS_BIND, ""); err != nil {
+	if err := mount(tmpdir, tmpdir, "", "bind", unix.MS_BIND, ""); err != nil {
 		return "", err
 	}
-	if err := unix.Mount("", tmpdir, "", uintptr(unix.MS_PRIVATE), ""); err != nil {
+	if err := mount("", tmpdir, "", "", uintptr(unix.MS_PRIVATE), ""); err != nil {
 		return "", err
 	}
 	return tmpdir, nil
@@ -206,13 +217,18 @@ func mountCmd(cmd configs.Command) error {
 	command.Env = cmd.Env
 	command.Dir = cmd.Dir
 	if out, err := command.CombinedOutput(); err != nil {
-		return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
+		return fmt.Errorf("%#v failed: %s: %w", cmd, string(out), err)
 	}
 	return nil
 }

-func prepareBindMount(m *configs.Mount, rootfs string) error {
-	stat, err := os.Stat(m.Source)
+func prepareBindMount(m *configs.Mount, rootfs string, mountFd *int) error {
+	source := m.Source
+	if mountFd != nil {
+		source = "/proc/self/fd/" + strconv.Itoa(*mountFd)
+	}
+
+	stat, err := os.Stat(source)
 	if err != nil {
 		// error out if the source of a bind mount does not exist as we will be
 		// unable to bind anything to it.
@@ -226,7 +242,7 @@ func prepareBindMount(m *configs.Mount, rootfs string) error {
 	if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
 		return err
 	}
-	if err := checkProcMount(rootfs, dest, m.Source); err != nil {
+	if err := checkProcMount(rootfs, dest, source); err != nil {
 		return err
 	}
 	if err := createIfNotExists(dest, stat.IsDir()); err != nil {
@@ -256,9 +272,11 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
 		Data:             "mode=755",
 		PropagationFlags: m.PropagationFlags,
 	}
+
 	if err := mountToRootfs(tmpfs, c); err != nil {
 		return err
 	}
+
 	for _, b := range binds {
 		if c.cgroupns {
 			subsystemPath := filepath.Join(c.root, b.Destination)
@@ -278,7 +296,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
 					data = cgroups.CgroupNamePrefix + data
 					source = "systemd"
 				}
-				return unix.Mount(source, procfd, "cgroup", uintptr(flags), data)
+				return mount(source, b.Destination, procfd, "cgroup", uintptr(flags), data)
 			}); err != nil {
 				return err
 			}
@@ -310,9 +328,9 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
 		return err
 	}
 	return utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
-		if err := unix.Mount(m.Source, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
+		if err := mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
 			// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
-			if err == unix.EPERM || err == unix.EBUSY {
+			if errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY) {
 				src := fs2.UnifiedMountpoint
 				if c.cgroupns && c.cgroup2Path != "" {
 					// Emulate cgroupns by bind-mounting
@@ -320,8 +338,8 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
 					// the whole /sys/fs/cgroup.
 					src = c.cgroup2Path
 				}
-				err = unix.Mount(src, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
-				if err == unix.ENOENT && c.rootlessCgroups {
+				err = mount(src, m.Destination, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
+				if c.rootlessCgroups && errors.Is(err, unix.ENOENT) {
 					err = nil
 				}
 			}
@@ -335,12 +353,12 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
 	// Set up a scratch dir for the tmpfs on the host.
 	tmpdir, err := prepareTmp("/tmp")
 	if err != nil {
-		return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
+		return fmt.Errorf("tmpcopyup: failed to setup tmpdir: %w", err)
 	}
 	defer cleanupTmp(tmpdir)
-	tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir")
+	tmpDir, err := os.MkdirTemp(tmpdir, "runctmpdir")
 	if err != nil {
-		return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
+		return fmt.Errorf("tmpcopyup: failed to create tmpdir: %w", err)
 	}
 	defer os.RemoveAll(tmpDir)

@@ -348,15 +366,15 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
 	// m.Destination since we are going to mount *on the host*.
 	oldDest := m.Destination
 	m.Destination = tmpDir
-	err = mountPropagate(m, "/", mountLabel)
+	err = mountPropagate(m, "/", mountLabel, nil)
 	m.Destination = oldDest
 	if err != nil {
 		return err
 	}
 	defer func() {
 		if Err != nil {
-			if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != nil {
-				logrus.Warnf("tmpcopyup: failed to unmount tmpdir on error: %v", err)
+			if err := unmount(tmpDir, unix.MNT_DETACH); err != nil {
+				logrus.Warnf("tmpcopyup: %v", err)
 			}
 		}
 	}()
@@ -369,8 +387,8 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
 			return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %w", m.Destination, procfd, tmpDir, err)
 		}
 		// Now move the mount into the container.
-		if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err != nil {
-			return fmt.Errorf("tmpcopyup: failed to move mount %s to %s (%s): %w", tmpDir, procfd, m.Destination, err)
+		if err := mount(tmpDir, m.Destination, procfd, "", unix.MS_MOVE, ""); err != nil {
+			return fmt.Errorf("tmpcopyup: failed to move mount: %w", err)
 		}
 		return nil
 	})
@@ -379,6 +397,7 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
 func mountToRootfs(m *configs.Mount, c *mountConfig) error {
 	rootfs := c.root
 	mountLabel := c.label
+	mountFd := c.fd
 	dest, err := securejoin.SecureJoin(rootfs, m.Destination)
 	if err != nil {
 		return err
@@ -402,12 +421,12 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
 			return err
 		}
 		// Selinux kernels do not support labeling of /proc or /sys
-		return mountPropagate(m, rootfs, "")
+		return mountPropagate(m, rootfs, "", nil)
 	case "mqueue":
 		if err := os.MkdirAll(dest, 0o755); err != nil {
 			return err
 		}
-		if err := mountPropagate(m, rootfs, ""); err != nil {
+		if err := mountPropagate(m, rootfs, "", nil); err != nil {
 			return err
 		}
 		return label.SetFileLabel(dest, mountLabel)
@@ -422,11 +441,13 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
 		if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
 			err = doTmpfsCopyUp(m, rootfs, mountLabel)
 		} else {
-			err = mountPropagate(m, rootfs, mountLabel)
+			err = mountPropagate(m, rootfs, mountLabel, nil)
 		}
+
 		if err != nil {
 			return err
 		}
+
 		if stat != nil {
 			if err = os.Chmod(dest, stat.Mode()); err != nil {
 				return err
@@ -434,17 +455,17 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
 		}
 		return nil
 	case "bind":
-		if err := prepareBindMount(m, rootfs); err != nil {
+		if err := prepareBindMount(m, rootfs, mountFd); err != nil {
 			return err
 		}
-		if err := mountPropagate(m, rootfs, mountLabel); err != nil {
+		if err := mountPropagate(m, rootfs, mountLabel, mountFd); err != nil {
 			return err
 		}
 		// bind mount won't change mount options, we need remount to make mount options effective.
 		// first check that we have non-default options required before attempting a remount
 		if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 {
 			// only remount if unique mount options are set
-			if err := remount(m, rootfs); err != nil {
+			if err := remount(m, rootfs, mountFd); err != nil {
 				return err
 			}
 		}
@@ -470,7 +491,10 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
 		if err := os.MkdirAll(dest, 0o755); err != nil {
 			return err
 		}
-		return mountPropagate(m, rootfs, mountLabel)
+		return mountPropagate(m, rootfs, mountLabel, mountFd)
+	}
+	if err := setRecAttr(m, rootfs); err != nil {
+		return err
 	}
 	return nil
 }
@@ -570,7 +594,7 @@ func checkProcMount(rootfs, dest, source string) error {
 func isProc(path string) (bool, error) {
 	var s unix.Statfs_t
 	if err := unix.Statfs(path, &s); err != nil {
-		return false, err
+		return false, &os.PathError{Op: "statfs", Path: path, Err: err}
 	}
 	return s.Type == unix.PROC_SUPER_MAGIC, nil
 }
@@ -593,7 +617,7 @@ func setupDevSymlinks(rootfs string) error {
 			dst = filepath.Join(rootfs, link[1])
 		)
 		if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
-			return fmt.Errorf("symlink %s %s %s", src, dst, err)
+			return err
 		}
 	}
 	return nil
@@ -607,20 +631,24 @@ func reOpenDevNull() error {
 	var stat, devNullStat unix.Stat_t
 	file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
 	if err != nil {
-		return fmt.Errorf("Failed to open /dev/null - %s", err)
+		return err
 	}
 	defer file.Close() //nolint: errcheck
 	if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil {
-		return err
+		return &os.PathError{Op: "fstat", Path: file.Name(), Err: err}
 	}
 	for fd := 0; fd < 3; fd++ {
 		if err := unix.Fstat(fd, &stat); err != nil {
-			return err
+			return &os.PathError{Op: "fstat", Path: "fd " + strconv.Itoa(fd), Err: err}
 		}
 		if stat.Rdev == devNullStat.Rdev {
 			// Close and re-open the fd.
 			if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil {
-				return err
+				return &os.PathError{
+					Op:   "dup3",
+					Path: "fd " + strconv.Itoa(int(file.Fd())),
+					Err:  err,
+				}
 			}
 		}
 	}
@@ -658,7 +686,7 @@ func bindMountDeviceNode(rootfs, dest string, node *devices.Device) error {
 		_ = f.Close()
 	}
 	return utils.WithProcfd(rootfs, dest, func(procfd string) error {
-		return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "")
+		return mount(node.Path, dest, procfd, "bind", unix.MS_BIND, "")
 	})
 }

@@ -679,9 +707,9 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
 		return bindMountDeviceNode(rootfs, dest, node)
 	}
 	if err := mknodDevice(dest, node); err != nil {
-		if os.IsExist(err) {
+		if errors.Is(err, os.ErrExist) {
 			return nil
-		} else if os.IsPermission(err) {
+		} else if errors.Is(err, os.ErrPermission) {
 			return bindMountDeviceNode(rootfs, dest, node)
 		}
 		return err
@@ -706,9 +734,9 @@ func mknodDevice(dest string, node *devices.Device) error {
 		return err
 	}
 	if err := unix.Mknod(dest, uint32(fileMode), int(dev)); err != nil {
-		return err
+		return &os.PathError{Op: "mknod", Path: dest, Err: err}
 	}
-	return unix.Chown(dest, int(node.Uid), int(node.Gid))
+	return os.Chown(dest, int(node.Uid), int(node.Gid))
 }

 // Get the parent mount point of directory passed in as argument. Also return
@@ -755,7 +783,7 @@ func rootfsParentMountPrivate(rootfs string) error {
 	// shared. Secondly when we bind mount rootfs it will propagate to
 	// parent namespace and we don't want that to happen.
 	if sharedMount {
-		return unix.Mount("", parentMount, "", unix.MS_PRIVATE, "")
+		return mount("", parentMount, "", "", unix.MS_PRIVATE, "")
 	}

 	return nil
@@ -766,7 +794,7 @@ func prepareRoot(config *configs.Config) error {
 	if config.RootPropagation != 0 {
 		flag = config.RootPropagation
 	}
-	if err := unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
+	if err := mount("", "/", "", "", uintptr(flag), ""); err != nil {
 		return err
 	}

@@ -777,13 +805,13 @@ func prepareRoot(config *configs.Config) error {
 		return err
 	}

-	return unix.Mount(config.Rootfs, config.Rootfs, "bind", unix.MS_BIND|unix.MS_REC, "")
+	return mount(config.Rootfs, config.Rootfs, "", "bind", unix.MS_BIND|unix.MS_REC, "")
 }

 func setReadonly() error {
 	flags := uintptr(unix.MS_BIND | unix.MS_REMOUNT | unix.MS_RDONLY)

-	err := unix.Mount("", "/", "", flags, "")
+	err := mount("", "/", "", "", flags, "")
 	if err == nil {
 		return nil
 	}
@@ -792,7 +820,7 @@ func setReadonly() error {
 		return &os.PathError{Op: "statfs", Path: "/", Err: err}
 	}
 	flags |= uintptr(s.Flags)
-	return unix.Mount("", "/", "", flags, "")
+	return mount("", "/", "", "", flags, "")
 }

 func setupPtmx(config *configs.Config) error {
@@ -801,7 +829,7 @@ func setupPtmx(config *configs.Config) error {
 		return err
 	}
 	if err := os.Symlink("pts/ptmx", ptmx); err != nil {
-		return fmt.Errorf("symlink dev ptmx %s", err)
+		return err
 	}
 	return nil
 }
@@ -817,23 +845,23 @@ func pivotRoot(rootfs string) error {

 	oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
 	if err != nil {
-		return err
+		return &os.PathError{Op: "open", Path: "/", Err: err}
 	}
 	defer unix.Close(oldroot) //nolint: errcheck

 	newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
 	if err != nil {
-		return err
+		return &os.PathError{Op: "open", Path: rootfs, Err: err}
 	}
 	defer unix.Close(newroot) //nolint: errcheck

 	// Change to the new root so that the pivot_root actually acts on it.
 	if err := unix.Fchdir(newroot); err != nil {
-		return err
+		return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(newroot), Err: err}
 	}

 	if err := unix.PivotRoot(".", "."); err != nil {
-		return fmt.Errorf("pivot_root %s", err)
+		return &os.PathError{Op: "pivot_root", Path: ".", Err: err}
 	}

 	// Currently our "." is oldroot (according to the current kernel code).
@@ -842,7 +870,7 @@ func pivotRoot(rootfs string) error {
 	// pivot_root(2).

 	if err := unix.Fchdir(oldroot); err != nil {
-		return err
+		return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(oldroot), Err: err}
 	}

 	// Make oldroot rslave to make sure our unmounts don't propagate to the
@@ -850,17 +878,17 @@ func pivotRoot(rootfs string) error {
 	// known to cause issues due to races where we still have a reference to a
 	// mount while a process in the host namespace are trying to operate on
 	// something they think has no mounts (devicemapper in particular).
-	if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
+	if err := mount("", ".", "", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
 		return err
 	}
-	// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
-	if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
+	// Perform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
+	if err := unmount(".", unix.MNT_DETACH); err != nil {
 		return err
 	}

 	// Switch back to our shiny new root.
 	if err := unix.Chdir("/"); err != nil {
-		return fmt.Errorf("chdir / %s", err)
+		return &os.PathError{Op: "chdir", Path: "/", Err: err}
 	}
 	return nil
 }
@@ -899,8 +927,8 @@ func msMoveRoot(rootfs string) error {
 	for _, info := range mountinfos {
 		p := info.Mountpoint
 		// Be sure umount events are not propagated to the host.
-		if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
-			if err == unix.ENOENT {
+		if err := mount("", p, "", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
+			if errors.Is(err, unix.ENOENT) {
 				// If the mountpoint doesn't exist that means that we've
 				// already blasted away some parent directory of the mountpoint
 				// and so we don't care about this error.
@@ -908,13 +936,13 @@ func msMoveRoot(rootfs string) error {
 			}
 			return err
 		}
-		if err := unix.Unmount(p, unix.MNT_DETACH); err != nil {
-			if err != unix.EINVAL && err != unix.EPERM {
+		if err := unmount(p, unix.MNT_DETACH); err != nil {
+			if !errors.Is(err, unix.EINVAL) && !errors.Is(err, unix.EPERM) {
 				return err
 			} else {
 				// If we have not privileges for umounting (e.g. rootless), then
 				// cover the path.
-				if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil {
+				if err := mount("tmpfs", p, "", "tmpfs", 0, ""); err != nil {
 					return err
 				}
 			}
@@ -922,7 +950,7 @@ func msMoveRoot(rootfs string) error {
 	}

 	// Move the rootfs on top of "/" in our mount namespace.
-	if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
+	if err := mount(rootfs, "/", "", "", unix.MS_MOVE, ""); err != nil {
 		return err
 	}
 	return chroot()
@@ -930,9 +958,12 @@ func msMoveRoot(rootfs string) error {

 func chroot() error {
 	if err := unix.Chroot("."); err != nil {
-		return err
+		return &os.PathError{Op: "chroot", Path: ".", Err: err}
 	}
-	return unix.Chdir("/")
+	if err := unix.Chdir("/"); err != nil {
+		return &os.PathError{Op: "chdir", Path: "/", Err: err}
+	}
+	return nil
 }

 // createIfNotExists creates a file or a directory only if it does not already exist.
@@ -957,11 +988,11 @@ func createIfNotExists(path string, isDir bool) error {

 // readonlyPath will make a path read only.
 func readonlyPath(path string) error {
-	if err := unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
-		if os.IsNotExist(err) {
+	if err := mount(path, path, "", "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
+		if errors.Is(err, os.ErrNotExist) {
 			return nil
 		}
-		return &os.PathError{Op: "bind-mount", Path: path, Err: err}
+		return err
 	}

 	var s unix.Statfs_t
@@ -970,8 +1001,8 @@ func readonlyPath(path string) error {
 	}
 	flags := uintptr(s.Flags) & (unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC)

-	if err := unix.Mount(path, path, "", flags|unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, ""); err != nil {
-		return &os.PathError{Op: "bind-mount-ro", Path: path, Err: err}
+	if err := mount(path, path, "", "", flags|unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, ""); err != nil {
+		return err
 	}

 	return nil
@@ -991,14 +1022,12 @@ func remountReadonly(m *configs.Mount) error {
 		// nosuid, etc.). So, let's use that case so that we can do
 		// this re-mount without failing in a userns.
 		flags |= unix.MS_REMOUNT | unix.MS_BIND | unix.MS_RDONLY
-		if err := unix.Mount("", dest, "", uintptr(flags), ""); err != nil {
-			switch err {
-			case unix.EBUSY:
+		if err := mount("", dest, "", "", uintptr(flags), ""); err != nil {
+			if errors.Is(err, unix.EBUSY) {
 				time.Sleep(100 * time.Millisecond)
 				continue
-			default:
-				return err
 			}
+			return err
 		}
 		return nil
 	}
@@ -1011,9 +1040,9 @@ func remountReadonly(m *configs.Mount) error {
 // For files, maskPath bind mounts /dev/null over the top of the specified path.
 // For directories, maskPath mounts read-only tmpfs over the top of the specified path.
 func maskPath(path string, mountLabel string) error {
-	if err := unix.Mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
-		if err == unix.ENOTDIR {
-			return unix.Mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel))
+	if err := mount("/dev/null", path, "", "", unix.MS_BIND, ""); err != nil && !errors.Is(err, os.ErrNotExist) {
+		if errors.Is(err, unix.ENOTDIR) {
+			return mount("tmpfs", path, "", "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel))
 		}
 		return err
 	}
@@ -1024,33 +1053,38 @@ func maskPath(path string, mountLabel string) error {
 // For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
 func writeSystemProperty(key, value string) error {
 	keyPath := strings.Replace(key, ".", "/", -1)
-	return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644)
+	return os.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644)
 }

-func remount(m *configs.Mount, rootfs string) error {
+func remount(m *configs.Mount, rootfs string, mountFd *int) error {
+	source := m.Source
+	if mountFd != nil {
+		source = "/proc/self/fd/" + strconv.Itoa(*mountFd)
+	}
+
 	return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
 		flags := uintptr(m.Flags | unix.MS_REMOUNT)
-		err := unix.Mount(m.Source, procfd, m.Device, flags, "")
+		err := mount(source, m.Destination, procfd, m.Device, flags, "")
 		if err == nil {
 			return nil
 		}
 		// Check if the source has ro flag...
 		var s unix.Statfs_t
-		if err := unix.Statfs(m.Source, &s); err != nil {
-			return &os.PathError{Op: "statfs", Path: m.Source, Err: err}
+		if err := unix.Statfs(source, &s); err != nil {
+			return &os.PathError{Op: "statfs", Path: source, Err: err}
 		}
 		if s.Flags&unix.MS_RDONLY != unix.MS_RDONLY {
 			return err
 		}
 		// ... and retry the mount with ro flag set.
 		flags |= unix.MS_RDONLY
-		return unix.Mount(m.Source, procfd, m.Device, flags, "")
+		return mount(source, m.Destination, procfd, m.Device, flags, "")
 	})
 }

 // Do the mount operation followed by additional mounts required to take care
 // of propagation flags. This will always be scoped inside the container rootfs.
-func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
+func mountPropagate(m *configs.Mount, rootfs string, mountLabel string, mountFd *int) error {
 	var (
 		data  = label.FormatMountLabel(m.Data, mountLabel)
 		flags = m.Flags
@@ -1067,17 +1101,22 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
 	// mutating underneath us, we verify that we are actually going to mount
 	// inside the container with WithProcfd() -- mounting through a procfd
 	// mounts on the target.
+	source := m.Source
+	if mountFd != nil {
+		source = "/proc/self/fd/" + strconv.Itoa(*mountFd)
+	}
+
 	if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
-		return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), data)
+		return mount(source, m.Destination, procfd, m.Device, uintptr(flags), data)
 	}); err != nil {
-		return fmt.Errorf("mount through procfd: %w", err)
+		return err
 	}
 	// We have to apply mount propagation flags in a separate WithProcfd() call
 	// because the previous call invalidates the passed procfd -- the mount
 	// target needs to be re-opened.
 	if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
 		for _, pflag := range m.PropagationFlags {
-			if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil {
+			if err := mount("", m.Destination, procfd, "", uintptr(pflag), ""); err != nil {
 				return err
 			}
 		}
@@ -1087,3 +1126,12 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
 	}
 	return nil
 }
+
+func setRecAttr(m *configs.Mount, rootfs string) error {
+	if m.RecAttr == nil {
+		return nil
+	}
+	return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
+		return unix.MountSetattr(-1, procfd, unix.AT_RECURSIVE, m.RecAttr)
+	})
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go
@@ -2,6 +2,7 @@ package seccomp

 import (
 	"fmt"
+	"sort"

 	"github.com/opencontainers/runc/libcontainer/configs"
 )
@@ -16,13 +17,36 @@ var operators = map[string]configs.Operator{
 	"SCMP_CMP_MASKED_EQ": configs.MaskEqualTo,
 }

+// KnownOperators returns the list of the known operations.
+// Used by `runc features`.
+func KnownOperators() []string {
+	var res []string
+	for k := range operators {
+		res = append(res, k)
+	}
+	sort.Strings(res)
+	return res
+}
+
 var actions = map[string]configs.Action{
-	"SCMP_ACT_KILL":  configs.Kill,
-	"SCMP_ACT_ERRNO": configs.Errno,
-	"SCMP_ACT_TRAP":  configs.Trap,
-	"SCMP_ACT_ALLOW": configs.Allow,
-	"SCMP_ACT_TRACE": configs.Trace,
-	"SCMP_ACT_LOG":   configs.Log,
+	"SCMP_ACT_KILL":   configs.Kill,
+	"SCMP_ACT_ERRNO":  configs.Errno,
+	"SCMP_ACT_TRAP":   configs.Trap,
+	"SCMP_ACT_ALLOW":  configs.Allow,
+	"SCMP_ACT_TRACE":  configs.Trace,
+	"SCMP_ACT_LOG":    configs.Log,
+	"SCMP_ACT_NOTIFY": configs.Notify,
+}
+
+// KnownActions returns the list of the known actions.
+// Used by `runc features`.
+func KnownActions() []string {
+	var res []string
+	for k := range actions {
+		res = append(res, k)
+	}
+	sort.Strings(res)
+	return res
 }

 var archs = map[string]string{
@@ -44,6 +68,17 @@ var archs = map[string]string{
 	"SCMP_ARCH_S390X":       "s390x",
 }

+// KnownArchs returns the list of the known archs.
+// Used by `runc features`.
+func KnownArchs() []string {
+	var res []string
+	for k := range archs {
+		res = append(res, k)
+	}
+	sort.Strings(res)
+	return res
+}
+
 // ConvertStringToOperator converts a string into a Seccomp comparison operator.
 // Comparison operators use the names they are assigned by Libseccomp's header.
 // Attempting to convert a string that is not a valid operator results in an
@@ -56,9 +91,7 @@ func ConvertStringToOperator(in string) (configs.Operator, error) {
 }

 // ConvertStringToAction converts a string into a Seccomp rule match action.
-// Actions use the names they are assigned in Libseccomp's header, though some
-// (notable, SCMP_ACT_TRACE) are not available in this implementation and will
-// return errors.
+// Actions use the names they are assigned in Libseccomp's header.
 // Attempting to convert a string that is not a valid action results in an
 // error.
 func ConvertStringToAction(in string) (configs.Action, error) {
--- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go
@@ -1,23 +1,25 @@
-// +build linux,cgo,seccomp
+//go:build cgo && seccomp
+// +build cgo,seccomp

 package patchbpf

 import (
 	"bytes"
 	"encoding/binary"
+	"errors"
+	"fmt"
 	"io"
 	"os"
 	"runtime"
 	"unsafe"

-	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/opencontainers/runc/libcontainer/utils"
-
-	"github.com/pkg/errors"
 	libseccomp "github.com/seccomp/libseccomp-golang"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/net/bpf"
 	"golang.org/x/sys/unix"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/utils"
 )

 // #cgo pkg-config: libseccomp
@@ -41,6 +43,11 @@ const uintptr_t C_SET_MODE_FILTER = SECCOMP_SET_MODE_FILTER;
 #endif
 const uintptr_t C_FILTER_FLAG_LOG = SECCOMP_FILTER_FLAG_LOG;

+#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
+#	define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
+#endif
+const uintptr_t C_FILTER_FLAG_NEW_LISTENER = SECCOMP_FILTER_FLAG_NEW_LISTENER;
+
 // We use the AUDIT_ARCH_* values because those are the ones used by the kernel
 // and SCMP_ARCH_* sometimes has fake values (such as SCMP_ARCH_X32). But we
 // use <seccomp.h> so we get libseccomp's fallback definitions of AUDIT_ARCH_*.
@@ -85,17 +92,16 @@ loop:
 		// seccomp_export_bpf outputs the program in *host* endian-ness.
 		var insn unix.SockFilter
 		if err := binary.Read(rdr, utils.NativeEndian, &insn); err != nil {
-			switch err {
-			case io.EOF:
+			if errors.Is(err, io.EOF) {
 				// Parsing complete.
 				break loop
-			case io.ErrUnexpectedEOF:
-				// Parsing stopped mid-instruction.
-				return nil, errors.Wrap(err, "program parsing halted mid-instruction")
-			default:
-				// All other errors.
-				return nil, errors.Wrap(err, "parsing instructions")
 			}
+			if errors.Is(err, io.ErrUnexpectedEOF) {
+				// Parsing stopped mid-instruction.
+				return nil, fmt.Errorf("program parsing halted mid-instruction: %w", err)
+			}
+			// All other errors.
+			return nil, fmt.Errorf("error parsing instructions: %w", err)
 		}
 		program = append(program, bpf.RawInstruction{
 			Op: insn.Code,
@@ -110,7 +116,7 @@ loop:
 func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error) {
 	rdr, wtr, err := os.Pipe()
 	if err != nil {
-		return nil, errors.Wrap(err, "creating scratch pipe")
+		return nil, fmt.Errorf("error creating scratch pipe: %w", err)
 	}
 	defer wtr.Close()
 	defer rdr.Close()
@@ -124,23 +130,23 @@ func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error)
 	}()

 	if err := filter.ExportBPF(wtr); err != nil {
-		return nil, errors.Wrap(err, "exporting BPF")
+		return nil, fmt.Errorf("error exporting BPF: %w", err)
 	}
 	// Close so that the reader actually gets EOF.
 	_ = wtr.Close()

 	if copyErr := <-errChan; copyErr != nil {
-		return nil, errors.Wrap(copyErr, "reading from ExportBPF pipe")
+		return nil, fmt.Errorf("error reading from ExportBPF pipe: %w", copyErr)
 	}

 	// Parse the instructions.
 	rawProgram, err := parseProgram(readerBuffer)
 	if err != nil {
-		return nil, errors.Wrap(err, "parsing generated BPF filter")
+		return nil, fmt.Errorf("parsing generated BPF filter: %w", err)
 	}
 	program, ok := bpf.Disassemble(rawProgram)
 	if !ok {
-		return nil, errors.Errorf("could not disassemble entire BPF filter")
+		return nil, errors.New("could not disassemble entire BPF filter")
 	}
 	return program, nil
 }
@@ -155,7 +161,7 @@ func archToNative(arch libseccomp.ScmpArch) (nativeArch, error) {
 		// Convert to actual native architecture.
 		arch, err := libseccomp.GetNativeArch()
 		if err != nil {
-			return invalidArch, errors.Wrap(err, "get native arch")
+			return invalidArch, fmt.Errorf("unable to get native arch: %w", err)
 		}
 		return archToNative(arch)
 	case libseccomp.ArchX86:
@@ -192,7 +198,7 @@ func archToNative(arch libseccomp.ScmpArch) (nativeArch, error) {
 	case libseccomp.ArchS390X:
 		return nativeArch(C.C_AUDIT_ARCH_S390X), nil
 	default:
-		return invalidArch, errors.Errorf("unknown architecture: %v", arch)
+		return invalidArch, fmt.Errorf("unknown architecture: %v", arch)
 	}
 }

@@ -209,7 +215,7 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
 	for _, ociArch := range config.Architectures {
 		arch, err := libseccomp.GetArchFromString(ociArch)
 		if err != nil {
-			return nil, errors.Wrap(err, "validating seccomp architecture")
+			return nil, fmt.Errorf("unable to validate seccomp architecture: %w", err)
 		}

 		// Map native architecture to a real architecture value to avoid
@@ -217,7 +223,7 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
 		if arch == libseccomp.ArchNative {
 			nativeArch, err := libseccomp.GetNativeArch()
 			if err != nil {
-				return nil, errors.Wrap(err, "get native arch")
+				return nil, fmt.Errorf("unable to get native architecture: %w", err)
 			}
 			arch = nativeArch
 		}
@@ -225,7 +231,7 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
 		// Figure out native architecture representation of the architecture.
 		nativeArch, err := archToNative(arch)
 		if err != nil {
-			return nil, errors.Wrapf(err, "cannot map architecture %v to AUDIT_ARCH_ constant", arch)
+			return nil, fmt.Errorf("cannot map architecture %v to AUDIT_ARCH_ constant: %w", arch, err)
 		}

 		if _, ok := lastSyscalls[nativeArch]; !ok {
@@ -370,7 +376,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
 						},
 					}, sectionTail...)
 				default:
-					return nil, errors.Errorf("unknown amd64 native architecture %#x", scmpArch)
+					return nil, fmt.Errorf("unknown amd64 native architecture %#x", scmpArch)
 				}
 			}

@@ -378,16 +384,16 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
 		case 2:
 			// x32 and x86_64 are a unique case, we can't handle any others.
 			if uint32(nativeArch) != uint32(C.C_AUDIT_ARCH_X86_64) {
-				return nil, errors.Errorf("unknown architecture overlap on native arch %#x", nativeArch)
+				return nil, fmt.Errorf("unknown architecture overlap on native arch %#x", nativeArch)
 			}

 			x32sysno, ok := maxSyscalls[libseccomp.ArchX32]
 			if !ok {
-				return nil, errors.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchX32, maxSyscalls)
+				return nil, fmt.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchX32, maxSyscalls)
 			}
 			x86sysno, ok := maxSyscalls[libseccomp.ArchAMD64]
 			if !ok {
-				return nil, errors.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchAMD64, maxSyscalls)
+				return nil, fmt.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchAMD64, maxSyscalls)
 			}

 			// The x32 ABI indicates that a syscall is being made by an x32
@@ -448,7 +454,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
 				}...)
 			}
 		default:
-			return nil, errors.Errorf("invalid number of architecture overlaps: %v", len(maxSyscalls))
+			return nil, fmt.Errorf("invalid number of architecture overlaps: %v", len(maxSyscalls))
 		}

 		// Prepend this section to the tail.
@@ -517,7 +523,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
 func assemble(program []bpf.Instruction) ([]unix.SockFilter, error) {
 	rawProgram, err := bpf.Assemble(program)
 	if err != nil {
-		return nil, errors.Wrap(err, "assembling program")
+		return nil, fmt.Errorf("error assembling program: %w", err)
 	}

 	// Convert to []unix.SockFilter for unix.SockFilter.
@@ -547,11 +553,11 @@ func generatePatch(config *configs.Seccomp) ([]bpf.Instruction, error) {

 	lastSyscalls, err := findLastSyscalls(config)
 	if err != nil {
-		return nil, errors.Wrap(err, "finding last syscalls for -ENOSYS stub")
+		return nil, fmt.Errorf("error finding last syscalls for -ENOSYS stub: %w", err)
 	}
 	stubProgram, err := generateEnosysStub(lastSyscalls)
 	if err != nil {
-		return nil, errors.Wrap(err, "generating -ENOSYS stub")
+		return nil, fmt.Errorf("error generating -ENOSYS stub: %w", err)
 	}
 	return stubProgram, nil
 }
@@ -559,12 +565,12 @@ func generatePatch(config *configs.Seccomp) ([]bpf.Instruction, error) {
 func enosysPatchFilter(config *configs.Seccomp, filter *libseccomp.ScmpFilter) ([]unix.SockFilter, error) {
 	program, err := disassembleFilter(filter)
 	if err != nil {
-		return nil, errors.Wrap(err, "disassembling original filter")
+		return nil, fmt.Errorf("error disassembling original filter: %w", err)
 	}

 	patch, err := generatePatch(config)
 	if err != nil {
-		return nil, errors.Wrap(err, "generating patch for filter")
+		return nil, fmt.Errorf("error generating patch for filter: %w", err)
 	}
 	fullProgram := append(patch, program...)

@@ -576,49 +582,61 @@ func enosysPatchFilter(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (

 	fprog, err := assemble(fullProgram)
 	if err != nil {
-		return nil, errors.Wrap(err, "assembling modified filter")
+		return nil, fmt.Errorf("error assembling modified filter: %w", err)
 	}
 	return fprog, nil
 }

-func filterFlags(filter *libseccomp.ScmpFilter) (flags uint, noNewPrivs bool, err error) {
+func filterFlags(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (flags uint, noNewPrivs bool, err error) {
 	// Ignore the error since pre-2.4 libseccomp is treated as API level 0.
-	apiLevel, _ := libseccomp.GetApi()
+	apiLevel, _ := libseccomp.GetAPI()

 	noNewPrivs, err = filter.GetNoNewPrivsBit()
 	if err != nil {
-		return 0, false, errors.Wrap(err, "fetch no_new_privs filter bit")
+		return 0, false, fmt.Errorf("unable to fetch no_new_privs filter bit: %w", err)
 	}

 	if apiLevel >= 3 {
 		if logBit, err := filter.GetLogBit(); err != nil {
-			return 0, false, errors.Wrap(err, "fetch SECCOMP_FILTER_FLAG_LOG bit")
+			return 0, false, fmt.Errorf("unable to fetch SECCOMP_FILTER_FLAG_LOG bit: %w", err)
 		} else if logBit {
 			flags |= uint(C.C_FILTER_FLAG_LOG)
 		}
 	}

 	// TODO: Support seccomp flags not yet added to libseccomp-golang...
+
+	for _, call := range config.Syscalls {
+		if call.Action == configs.Notify {
+			flags |= uint(C.C_FILTER_FLAG_NEW_LISTENER)
+			break
+		}
+	}
+
 	return
 }

-func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (err error) {
+func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (fd int, err error) {
 	fprog := unix.SockFprog{
 		Len:    uint16(len(filter)),
 		Filter: &filter[0],
 	}
+	fd = -1 // only return a valid fd when C_FILTER_FLAG_NEW_LISTENER is set
 	// If no seccomp flags were requested we can use the old-school prctl(2).
 	if flags == 0 {
 		err = unix.Prctl(unix.PR_SET_SECCOMP,
 			unix.SECCOMP_MODE_FILTER,
 			uintptr(unsafe.Pointer(&fprog)), 0, 0)
 	} else {
-		_, _, errno := unix.RawSyscall(unix.SYS_SECCOMP,
+		fdptr, _, errno := unix.RawSyscall(unix.SYS_SECCOMP,
 			uintptr(C.C_SET_MODE_FILTER),
 			uintptr(flags), uintptr(unsafe.Pointer(&fprog)))
 		if errno != 0 {
 			err = errno
 		}
+		if flags&uint(C.C_FILTER_FLAG_NEW_LISTENER) != 0 {
+			fd = int(fdptr)
+		}
 	}
 	runtime.KeepAlive(filter)
 	runtime.KeepAlive(fprog)
@@ -630,17 +648,17 @@ func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (err error) {
 // patches said filter to handle -ENOSYS in a much nicer manner than the
 // default libseccomp default action behaviour, and loads the patched filter
 // into the kernel for the current process.
-func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) error {
+func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (int, error) {
 	// Generate a patched filter.
 	fprog, err := enosysPatchFilter(config, filter)
 	if err != nil {
-		return errors.Wrap(err, "patching filter")
+		return -1, fmt.Errorf("error patching filter: %w", err)
 	}

 	// Get the set of libseccomp flags set.
-	seccompFlags, noNewPrivs, err := filterFlags(filter)
+	seccompFlags, noNewPrivs, err := filterFlags(config, filter)
 	if err != nil {
-		return errors.Wrap(err, "fetch seccomp filter flags")
+		return -1, fmt.Errorf("unable to fetch seccomp filter flags: %w", err)
 	}

 	// Set no_new_privs if it was requested, though in runc we handle
@@ -648,13 +666,15 @@ func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) error
 	if noNewPrivs {
 		logrus.Warnf("potentially misconfigured filter -- setting no_new_privs in seccomp path")
 		if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
-			return errors.Wrap(err, "enable no_new_privs bit")
+			return -1, fmt.Errorf("error enabling no_new_privs bit: %w", err)
 		}
 	}

 	// Finally, load the filter.
-	if err := sysSeccompSetFilter(seccompFlags, fprog); err != nil {
-		return errors.Wrap(err, "loading seccomp filter")
+	fd, err := sysSeccompSetFilter(seccompFlags, fprog)
+	if err != nil {
+		return -1, fmt.Errorf("error loading seccomp filter: %w", err)
 	}
-	return nil
+
+	return fd, nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_unsupported.go
@@ -1,3 +1,4 @@
+//go:build !linux || !cgo || !seccomp
 // +build !linux !cgo !seccomp

 package patchbpf
--- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
@@ -1,4 +1,5 @@
-// +build linux,cgo,seccomp
+//go:build cgo && seccomp
+// +build cgo,seccomp

 package seccomp

@@ -6,19 +7,16 @@ import (
 	"errors"
 	"fmt"

+	libseccomp "github.com/seccomp/libseccomp-golang"
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/seccomp/patchbpf"
-
-	libseccomp "github.com/seccomp/libseccomp-golang"
-	"golang.org/x/sys/unix"
 )

 var (
-	actAllow = libseccomp.ActAllow
-	actTrap  = libseccomp.ActTrap
-	actKill  = libseccomp.ActKill
 	actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM))
-	actLog   = libseccomp.ActLog
 	actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
 )

@@ -27,77 +25,118 @@ const (
 	syscallMaxArguments int = 6
 )

-// Filters given syscalls in a container, preventing them from being used
-// Started in the container init process, and carried over to all child processes
-// Setns calls, however, require a separate invocation, as they are not children
-// of the init until they join the namespace
-func InitSeccomp(config *configs.Seccomp) error {
+// InitSeccomp installs the seccomp filters to be used in the container as
+// specified in config.
+// Returns the seccomp file descriptor if any of the filters include a
+// SCMP_ACT_NOTIFY action, otherwise returns -1.
+func InitSeccomp(config *configs.Seccomp) (int, error) {
 	if config == nil {
-		return errors.New("cannot initialize Seccomp - nil config passed")
+		return -1, errors.New("cannot initialize Seccomp - nil config passed")
 	}

 	defaultAction, err := getAction(config.DefaultAction, config.DefaultErrnoRet)
 	if err != nil {
-		return errors.New("error initializing seccomp - invalid default action")
+		return -1, errors.New("error initializing seccomp - invalid default action")
+	}
+
+	// Ignore the error since pre-2.4 libseccomp is treated as API level 0.
+	apiLevel, _ := libseccomp.GetAPI()
+	for _, call := range config.Syscalls {
+		if call.Action == configs.Notify {
+			if apiLevel < 6 {
+				return -1, fmt.Errorf("seccomp notify unsupported: API level: got %d, want at least 6. Please try with libseccomp >= 2.5.0 and Linux >= 5.7", apiLevel)
+			}
+
+			// We can't allow the write syscall to notify to the seccomp agent.
+			// After InitSeccomp() is called, we need to syncParentSeccomp() to write the seccomp fd plain
+			// number, so the parent sends it to the seccomp agent. If we use SCMP_ACT_NOTIFY on write, we
+			// never can write the seccomp fd to the parent and therefore the seccomp agent never receives
+			// the seccomp fd and runc is hang during initialization.
+			//
+			// Note that read()/close(), that are also used in syncParentSeccomp(), _can_ use SCMP_ACT_NOTIFY.
+			// Because we write the seccomp fd on the pipe to the parent, the parent is able to proceed and
+			// send the seccomp fd to the agent (it is another process and not subject to the seccomp
+			// filter). We will be blocked on read()/close() inside syncParentSeccomp() but if the seccomp
+			// agent allows those syscalls to proceed, initialization works just fine and the agent can
+			// handle future read()/close() syscalls as it wanted.
+			if call.Name == "write" {
+				return -1, errors.New("SCMP_ACT_NOTIFY cannot be used for the write syscall")
+			}
+		}
+	}
+
+	// See comment on why write is not allowed. The same reason applies, as this can mean handling write too.
+	if defaultAction == libseccomp.ActNotify {
+		return -1, errors.New("SCMP_ACT_NOTIFY cannot be used as default action")
 	}

 	filter, err := libseccomp.NewFilter(defaultAction)
 	if err != nil {
-		return fmt.Errorf("error creating filter: %s", err)
+		return -1, fmt.Errorf("error creating filter: %w", err)
 	}

 	// Add extra architectures
 	for _, arch := range config.Architectures {
 		scmpArch, err := libseccomp.GetArchFromString(arch)
 		if err != nil {
-			return fmt.Errorf("error validating Seccomp architecture: %s", err)
+			return -1, fmt.Errorf("error validating Seccomp architecture: %w", err)
 		}
 		if err := filter.AddArch(scmpArch); err != nil {
-			return fmt.Errorf("error adding architecture to seccomp filter: %s", err)
+			return -1, fmt.Errorf("error adding architecture to seccomp filter: %w", err)
 		}
 	}

 	// Unset no new privs bit
 	if err := filter.SetNoNewPrivsBit(false); err != nil {
-		return fmt.Errorf("error setting no new privileges: %s", err)
+		return -1, fmt.Errorf("error setting no new privileges: %w", err)
 	}

 	// Add a rule for each syscall
 	for _, call := range config.Syscalls {
 		if call == nil {
-			return errors.New("encountered nil syscall while initializing Seccomp")
+			return -1, errors.New("encountered nil syscall while initializing Seccomp")
 		}
+
 		if err := matchCall(filter, call, defaultAction); err != nil {
-			return err
+			return -1, err
 		}
 	}
-	if err := patchbpf.PatchAndLoad(config, filter); err != nil {
-		return fmt.Errorf("error loading seccomp filter into kernel: %s", err)
+
+	seccompFd, err := patchbpf.PatchAndLoad(config, filter)
+	if err != nil {
+		return -1, fmt.Errorf("error loading seccomp filter into kernel: %w", err)
 	}
-	return nil
+
+	return seccompFd, nil
 }

 // Convert Libcontainer Action to Libseccomp ScmpAction
 func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) {
 	switch act {
 	case configs.Kill:
-		return actKill, nil
+		return libseccomp.ActKill, nil
 	case configs.Errno:
 		if errnoRet != nil {
 			return libseccomp.ActErrno.SetReturnCode(int16(*errnoRet)), nil
 		}
 		return actErrno, nil
 	case configs.Trap:
-		return actTrap, nil
+		return libseccomp.ActTrap, nil
 	case configs.Allow:
-		return actAllow, nil
+		return libseccomp.ActAllow, nil
 	case configs.Trace:
 		if errnoRet != nil {
 			return libseccomp.ActTrace.SetReturnCode(int16(*errnoRet)), nil
 		}
 		return actTrace, nil
 	case configs.Log:
-		return actLog, nil
+		return libseccomp.ActLog, nil
+	case configs.Notify:
+		return libseccomp.ActNotify, nil
+	case configs.KillThread:
+		return libseccomp.ActKillThread, nil
+	case configs.KillProcess:
+		return libseccomp.ActKillProcess, nil
 	default:
 		return libseccomp.ActInvalid, errors.New("invalid action, cannot use in rule")
 	}
@@ -162,17 +201,18 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libs
 		return nil
 	}

-	// If we can't resolve the syscall, assume it's not supported on this kernel
-	// Ignore it, don't error out
+	// If we can't resolve the syscall, assume it is not supported
+	// by this kernel. Warn about it, don't error out.
 	callNum, err := libseccomp.GetSyscallFromName(call.Name)
 	if err != nil {
+		logrus.Debugf("unknown seccomp syscall %q ignored", call.Name)
 		return nil
 	}

 	// Unconditional match - just add the rule
 	if len(call.Args) == 0 {
 		if err := filter.AddRule(callNum, callAct); err != nil {
-			return fmt.Errorf("error adding seccomp filter rule for syscall %s: %s", call.Name, err)
+			return fmt.Errorf("error adding seccomp filter rule for syscall %s: %w", call.Name, err)
 		}
 	} else {
 		// If two or more arguments have the same condition,
@@ -183,7 +223,7 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libs
 		for _, cond := range call.Args {
 			newCond, err := getCondition(cond)
 			if err != nil {
-				return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %s", call.Name, err)
+				return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %w", call.Name, err)
 			}

 			argCounts[cond.Index] += 1
@@ -206,14 +246,14 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libs
 				condArr := []libseccomp.ScmpCondition{cond}

 				if err := filter.AddRuleConditional(callNum, callAct, condArr); err != nil {
-					return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
+					return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err)
 				}
 			}
 		} else {
 			// No conditions share same argument
 			// Use new, proper behavior
 			if err := filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
-				return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
+				return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err)
 			}
 		}
 	}
@@ -225,3 +265,6 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libs
 func Version() (uint, uint, uint) {
 	return libseccomp.GetLibraryVersion()
 }
+
+// Enabled is true if seccomp support is compiled in.
+const Enabled = true
--- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
@@ -1,3 +1,4 @@
+//go:build !linux || !cgo || !seccomp
 // +build !linux !cgo !seccomp

 package seccomp
@@ -11,14 +12,17 @@ import (
 var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")

 // InitSeccomp does nothing because seccomp is not supported.
-func InitSeccomp(config *configs.Seccomp) error {
+func InitSeccomp(config *configs.Seccomp) (int, error) {
 	if config != nil {
-		return ErrSeccompNotEnabled
+		return -1, ErrSeccompNotEnabled
 	}
-	return nil
+	return -1, nil
 }

 // Version returns major, minor, and micro.
 func Version() (uint, uint, uint) {
 	return 0, 0, 0
 }
+
+// Enabled is true if seccomp support is compiled in.
+const Enabled = false
--- a/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
@@ -1,19 +1,19 @@
-// +build linux
-
 package libcontainer

 import (
+	"errors"
+	"fmt"
 	"os"
-	"runtime"
+	"strconv"
+
+	"github.com/opencontainers/selinux/go-selinux"
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"

 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runc/libcontainer/keys"
 	"github.com/opencontainers/runc/libcontainer/seccomp"
 	"github.com/opencontainers/runc/libcontainer/system"
-	"github.com/opencontainers/selinux/go-selinux"
-	"github.com/pkg/errors"
-	"github.com/sirupsen/logrus"
-	"golang.org/x/sys/unix"
 )

 // linuxSetnsInit performs the container's initialization for running a new process
@@ -30,9 +30,6 @@ func (l *linuxSetnsInit) getSessionRingName() string {
 }

 func (l *linuxSetnsInit) Init() error {
-	runtime.LockOSThread()
-	defer runtime.UnlockOSThread()
-
 	if !l.config.Config.NoNewKeyring {
 		if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil {
 			return err
@@ -44,8 +41,8 @@ func (l *linuxSetnsInit) Init() error {
 			// don't bail on ENOSYS.
 			//
 			// TODO(cyphar): And we should have logging here too.
-			if errors.Cause(err) != unix.ENOSYS {
-				return errors.Wrap(err, "join session keyring")
+			if !errors.Is(err, unix.ENOSYS) {
+				return fmt.Errorf("unable to join session keyring: %w", err)
 			}
 		}
 	}
@@ -70,7 +67,12 @@ func (l *linuxSetnsInit) Init() error {
 	// do this before dropping capabilities; otherwise do it as late as possible
 	// just before execve so as few syscalls take place after it as possible.
 	if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
-		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
+		seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
+		if err != nil {
+			return err
+		}
+
+		if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
 			return err
 		}
 	}
@@ -84,14 +86,19 @@ func (l *linuxSetnsInit) Init() error {
 	// place afterward (reducing the amount of syscalls that users need to
 	// enable in their seccomp profiles).
 	if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
-		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
-			return newSystemErrorWithCause(err, "init seccomp")
+		seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
+		if err != nil {
+			return fmt.Errorf("unable to init seccomp: %w", err)
+		}
+
+		if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
+			return err
 		}
 	}
 	logrus.Debugf("setns_init: about to exec")
 	// Close the log pipe fd so the parent's ForwardLogs can exit.
 	if err := unix.Close(l.logFd); err != nil {
-		return newSystemErrorWithCause(err, "closing log pipe fd")
+		return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err}
 	}

 	return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
--- a/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go
@@ -1,27 +0,0 @@
-package stacktrace
-
-import "runtime"
-
-// Capture captures a stacktrace for the current calling go program
-//
-// skip is the number of frames to skip
-func Capture(userSkip int) Stacktrace {
-	var (
-		skip   = userSkip + 1 // add one for our own function
-		frames []Frame
-		prevPc uintptr
-	)
-	for i := skip; ; i++ {
-		pc, file, line, ok := runtime.Caller(i)
-		// detect if caller is repeated to avoid loop, gccgo
-		// currently runs  into a loop without this check
-		if !ok || pc == prevPc {
-			break
-		}
-		frames = append(frames, NewFrame(pc, file, line))
-		prevPc = pc
-	}
-	return Stacktrace{
-		Frames: frames,
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go
@@ -1,38 +0,0 @@
-package stacktrace
-
-import (
-	"path/filepath"
-	"runtime"
-	"strings"
-)
-
-// NewFrame returns a new stack frame for the provided information
-func NewFrame(pc uintptr, file string, line int) Frame {
-	fn := runtime.FuncForPC(pc)
-	if fn == nil {
-		return Frame{}
-	}
-	pack, name := parseFunctionName(fn.Name())
-	return Frame{
-		Line:     line,
-		File:     filepath.Base(file),
-		Package:  pack,
-		Function: name,
-	}
-}
-
-func parseFunctionName(name string) (string, string) {
-	i := strings.LastIndex(name, ".")
-	if i == -1 {
-		return "", name
-	}
-	return name[:i], name[i+1:]
-}
-
-// Frame contains all the information for a stack frame within a go program
-type Frame struct {
-	File     string
-	Function string
-	Package  string
-	Line     int
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go
@@ -1,5 +0,0 @@
-package stacktrace
-
-type Stacktrace struct {
-	Frames []Frame
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
@@ -1,23 +1,22 @@
-// +build linux
-
 package libcontainer

 import (
+	"errors"
+	"fmt"
 	"os"
 	"os/exec"
-	"runtime"
 	"strconv"

+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/opencontainers/selinux/go-selinux"
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+
 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/keys"
 	"github.com/opencontainers/runc/libcontainer/seccomp"
 	"github.com/opencontainers/runc/libcontainer/system"
-	"github.com/opencontainers/runtime-spec/specs-go"
-	"github.com/opencontainers/selinux/go-selinux"
-	"github.com/pkg/errors"
-	"github.com/sirupsen/logrus"
-	"golang.org/x/sys/unix"
 )

 type linuxStandardInit struct {
@@ -26,6 +25,7 @@ type linuxStandardInit struct {
 	parentPid     int
 	fifoFd        int
 	logFd         int
+	mountFds      []int
 	config        *initConfig
 }

@@ -46,8 +46,6 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
 }

 func (l *linuxStandardInit) Init() error {
-	runtime.LockOSThread()
-	defer runtime.UnlockOSThread()
 	if !l.config.Config.NoNewKeyring {
 		if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil {
 			return err
@@ -65,15 +63,15 @@ func (l *linuxStandardInit) Init() error {
 			//
 			// TODO(cyphar): Log this so people know what's going on, once we
 			//               have proper logging in 'runc init'.
-			if errors.Cause(err) != unix.ENOSYS {
-				return errors.Wrap(err, "join session keyring")
+			if !errors.Is(err, unix.ENOSYS) {
+				return fmt.Errorf("unable to join session keyring: %w", err)
 			}
 		} else {
-			// Make session keyring searcheable. If we've gotten this far we
+			// Make session keyring searchable. If we've gotten this far we
 			// bail on any error -- we don't want to have a keyring with bad
 			// permissions.
 			if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
-				return errors.Wrap(err, "mod keyring permissions")
+				return fmt.Errorf("unable to mod keyring permissions: %w", err)
 			}
 		}
 	}
@@ -87,9 +85,23 @@ func (l *linuxStandardInit) Init() error {

 	// initialises the labeling system
 	selinux.GetEnabled()
-	if err := prepareRootfs(l.pipe, l.config); err != nil {
+
+	// We don't need the mountFds after prepareRootfs() nor if it fails.
+	err := prepareRootfs(l.pipe, l.config, l.mountFds)
+	for _, m := range l.mountFds {
+		if m == -1 {
+			continue
+		}
+
+		if err := unix.Close(m); err != nil {
+			return fmt.Errorf("Unable to close mountFds fds: %w", err)
+		}
+	}
+
+	if err != nil {
 		return err
 	}
+
 	// Set up the console. This has to be done *before* we finalize the rootfs,
 	// but *after* we've given the user the chance to set up all of the mounts
 	// they wanted.
@@ -98,7 +110,7 @@ func (l *linuxStandardInit) Init() error {
 			return err
 		}
 		if err := system.Setctty(); err != nil {
-			return errors.Wrap(err, "setctty")
+			return &os.SyscallError{Syscall: "ioctl(setctty)", Err: err}
 		}
 	}

@@ -111,52 +123,57 @@ func (l *linuxStandardInit) Init() error {

 	if hostname := l.config.Config.Hostname; hostname != "" {
 		if err := unix.Sethostname([]byte(hostname)); err != nil {
-			return errors.Wrap(err, "sethostname")
+			return &os.SyscallError{Syscall: "sethostname", Err: err}
 		}
 	}
 	if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
-		return errors.Wrap(err, "apply apparmor profile")
+		return fmt.Errorf("unable to apply apparmor profile: %w", err)
 	}

 	for key, value := range l.config.Config.Sysctl {
 		if err := writeSystemProperty(key, value); err != nil {
-			return errors.Wrapf(err, "write sysctl key %s", key)
+			return err
 		}
 	}
 	for _, path := range l.config.Config.ReadonlyPaths {
 		if err := readonlyPath(path); err != nil {
-			return errors.Wrapf(err, "readonly path %s", path)
+			return fmt.Errorf("can't make %q read-only: %w", path, err)
 		}
 	}
 	for _, path := range l.config.Config.MaskPaths {
 		if err := maskPath(path, l.config.Config.MountLabel); err != nil {
-			return errors.Wrapf(err, "mask path %s", path)
+			return fmt.Errorf("can't mask path %s: %w", path, err)
 		}
 	}
 	pdeath, err := system.GetParentDeathSignal()
 	if err != nil {
-		return errors.Wrap(err, "get pdeath signal")
+		return fmt.Errorf("can't get pdeath signal: %w", err)
 	}
 	if l.config.NoNewPrivileges {
 		if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
-			return errors.Wrap(err, "set nonewprivileges")
+			return &os.SyscallError{Syscall: "prctl(SET_NO_NEW_PRIVS)", Err: err}
 		}
 	}
 	// Tell our parent that we're ready to Execv. This must be done before the
 	// Seccomp rules have been applied, because we need to be able to read and
 	// write to a socket.
 	if err := syncParentReady(l.pipe); err != nil {
-		return errors.Wrap(err, "sync ready")
+		return fmt.Errorf("sync ready: %w", err)
 	}
 	if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
-		return errors.Wrap(err, "set process label")
+		return fmt.Errorf("can't set process label: %w", err)
 	}
 	defer selinux.SetExecLabel("") //nolint: errcheck
 	// Without NoNewPrivileges seccomp is a privileged operation, so we need to
 	// do this before dropping capabilities; otherwise do it as late as possible
 	// just before execve so as few syscalls take place after it as possible.
 	if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
-		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
+		seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
+		if err != nil {
+			return err
+		}
+
+		if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
 			return err
 		}
 	}
@@ -166,7 +183,7 @@ func (l *linuxStandardInit) Init() error {
 	// finalizeNamespace can change user/group which clears the parent death
 	// signal, so we restore it here.
 	if err := pdeath.Restore(); err != nil {
-		return errors.Wrap(err, "restore pdeath signal")
+		return fmt.Errorf("can't restore pdeath signal: %w", err)
 	}
 	// Compare the parent from the initial start of the init process and make
 	// sure that it did not change.  if the parent changes that means it died
@@ -181,26 +198,43 @@ func (l *linuxStandardInit) Init() error {
 	if err != nil {
 		return err
 	}
+	// Set seccomp as close to execve as possible, so as few syscalls take
+	// place afterward (reducing the amount of syscalls that users need to
+	// enable in their seccomp profiles). However, this needs to be done
+	// before closing the pipe since we need it to pass the seccompFd to
+	// the parent.
+	if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
+		seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
+		if err != nil {
+			return fmt.Errorf("unable to init seccomp: %w", err)
+		}
+
+		if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
+			return err
+		}
+	}
 	// Close the pipe to signal that we have completed our init.
 	logrus.Debugf("init: closing the pipe to signal completion")
 	_ = l.pipe.Close()

 	// Close the log pipe fd so the parent's ForwardLogs can exit.
 	if err := unix.Close(l.logFd); err != nil {
-		return newSystemErrorWithCause(err, "closing log pipe fd")
+		return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err}
 	}

 	// Wait for the FIFO to be opened on the other side before exec-ing the
 	// user process. We open it through /proc/self/fd/$fd, because the fd that
 	// was given to us was an O_PATH fd to the fifo itself. Linux allows us to
 	// re-open an O_PATH fd through /proc.
-	fd, err := unix.Open("/proc/self/fd/"+strconv.Itoa(l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
+	fifoPath := "/proc/self/fd/" + strconv.Itoa(l.fifoFd)
+	fd, err := unix.Open(fifoPath, unix.O_WRONLY|unix.O_CLOEXEC, 0)
 	if err != nil {
-		return newSystemErrorWithCause(err, "open exec fifo")
+		return &os.PathError{Op: "open exec fifo", Path: fifoPath, Err: err}
 	}
 	if _, err := unix.Write(fd, []byte("0")); err != nil {
-		return newSystemErrorWithCause(err, "write 0 exec fifo")
+		return &os.PathError{Op: "write exec fifo", Path: fifoPath, Err: err}
 	}
+
 	// Close the O_PATH fifofd fd before exec because the kernel resets
 	// dumpable in the wrong order. This has been fixed in newer kernels, but
 	// we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels.
@@ -208,14 +242,6 @@ func (l *linuxStandardInit) Init() error {
 	// since been resolved.
 	// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
 	_ = unix.Close(l.fifoFd)
-	// Set seccomp as close to execve as possible, so as few syscalls take
-	// place afterward (reducing the amount of syscalls that users need to
-	// enable in their seccomp profiles).
-	if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
-		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
-			return newSystemErrorWithCause(err, "init seccomp")
-		}
-	}

 	s := l.config.SpecState
 	s.Pid = unix.Getpid()
@@ -224,8 +250,5 @@ func (l *linuxStandardInit) Init() error {
 		return err
 	}

-	if err := system.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
-		return newSystemErrorWithCause(err, "exec user process")
-	}
-	return nil
+	return system.Exec(name, l.config.Args[0:], os.Environ())
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
@@ -1,5 +1,3 @@
-// +build linux
-
 package libcontainer

 import (
@@ -117,7 +115,7 @@ func (r *runningState) transition(s containerState) error {
 	switch s.(type) {
 	case *stoppedState:
 		if r.c.runType() == Running {
-			return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
+			return ErrRunning
 		}
 		r.c.state = s
 		return nil
@@ -132,7 +130,7 @@ func (r *runningState) transition(s containerState) error {

 func (r *runningState) destroy() error {
 	if r.c.runType() == Running {
-		return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
+		return ErrRunning
 	}
 	return destroy(r.c)
 }
@@ -190,7 +188,7 @@ func (p *pausedState) destroy() error {
 		}
 		return destroy(p.c)
 	}
-	return newGenericError(fmt.Errorf("container is paused"), ContainerPaused)
+	return ErrPaused
 }

 // restoredState is the same as the running state but also has associated checkpoint
--- a/Show More
+++ b/Show More