Bump runc to d5b4a3e

This fixes a race condition in runc/systemd at container creation time opencontainers/runc#1683 Signed-off-by: vikaschoudhary16 <vichoudh@redhat.com>
2018-01-10 19:20:21 -05:00
parent 81192eafd5
commit 4711bccd05
101 changed files with 3694 additions and 1709 deletions
--- a/vendor/github.com/opencontainers/runc/libcontainer/BUILD
+++ b/vendor/github.com/opencontainers/runc/libcontainer/BUILD
@@ -3,7 +3,6 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
 go_library(
    name = "go_default_library",
    srcs = [
-        "console.go",
        "container.go",
        "error.go",
        "factory.go",
@@ -12,13 +11,8 @@ go_library(
        "stats.go",
        "sync.go",
    ] + select({
-        "@io_bazel_rules_go//go/platform:freebsd": [
-            "console_freebsd.go",
-            "stats_freebsd.go",
-        ],
        "@io_bazel_rules_go//go/platform:linux": [
            "capabilities_linux.go",
-            "compat_1.5_linux.go",
            "console_linux.go",
            "container_linux.go",
            "criu_opts_linux.go",
@@ -30,23 +24,11 @@ go_library(
            "process_linux.go",
            "restored_process.go",
            "rootfs_linux.go",
-            "setgroups_linux.go",
            "setns_init_linux.go",
            "standard_init_linux.go",
            "state_linux.go",
            "stats_linux.go",
        ],
-        "@io_bazel_rules_go//go/platform:solaris": [
-            "console_solaris.go",
-            "container_solaris.go",
-            "stats_solaris.go",
-        ],
-        "@io_bazel_rules_go//go/platform:windows": [
-            "console_windows.go",
-            "container_windows.go",
-            "criu_opts_windows.go",
-            "stats_windows.go",
-        ],
        "//conditions:default": [],
    }),
    importpath = "github.com/opencontainers/runc/libcontainer",
@@ -57,18 +39,19 @@ go_library(
        "//vendor/github.com/opencontainers/runc/libcontainer/utils:go_default_library",
    ] + select({
        "@io_bazel_rules_go//go/platform:linux": [
-            "//vendor/github.com/docker/docker/pkg/mount:go_default_library",
-            "//vendor/github.com/docker/docker/pkg/symlink:go_default_library",
+            "//vendor/github.com/containerd/console:go_default_library",
+            "//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
            "//vendor/github.com/golang/protobuf/proto:go_default_library",
            "//vendor/github.com/mrunalp/fileutils:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/apparmor:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
-            "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/configs/validate:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/criurpc:go_default_library",
+            "//vendor/github.com/opencontainers/runc/libcontainer/intelrdt:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/keys:go_default_library",
+            "//vendor/github.com/opencontainers/runc/libcontainer/mount:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/seccomp:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
@@ -98,7 +81,9 @@ filegroup(
        "//vendor/github.com/opencontainers/runc/libcontainer/cgroups:all-srcs",
        "//vendor/github.com/opencontainers/runc/libcontainer/configs:all-srcs",
        "//vendor/github.com/opencontainers/runc/libcontainer/criurpc:all-srcs",
+        "//vendor/github.com/opencontainers/runc/libcontainer/intelrdt:all-srcs",
        "//vendor/github.com/opencontainers/runc/libcontainer/keys:all-srcs",
+        "//vendor/github.com/opencontainers/runc/libcontainer/mount:all-srcs",
        "//vendor/github.com/opencontainers/runc/libcontainer/seccomp:all-srcs",
        "//vendor/github.com/opencontainers/runc/libcontainer/stacktrace:all-srcs",
        "//vendor/github.com/opencontainers/runc/libcontainer/system:all-srcs",
--- a/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md
+++ b/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md
@@ -154,6 +154,90 @@ that no processes or threads escape the cgroups.  This sync is
 done via a pipe ( specified in the runtime section below ) that the container's
 init process will block waiting for the parent to finish setup.

+### IntelRdt
+
+Intel platforms with new Xeon CPU support Intel Resource Director Technology
+(RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which
+currently supports L3 cache resource allocation.
+
+This feature provides a way for the software to restrict cache allocation to a
+defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
+The different subsets are identified by class of service (CLOS) and each CLOS
+has a capacity bitmask (CBM).
+
+It can be used to handle L3 cache resource allocation for containers if
+hardware and kernel support Intel RDT/CAT.
+
+In Linux 4.10 kernel or newer, the interface is defined and exposed via
+"resource control" filesystem, which is a "cgroup-like" interface.
+
+Comparing with cgroups, it has similar process management lifecycle and
+interfaces in a container. But unlike cgroups' hierarchy, it has single level
+filesystem layout.
+
+Intel RDT "resource control" filesystem hierarchy:
+```
+mount -t resctrl resctrl /sys/fs/resctrl
+tree /sys/fs/resctrl
+/sys/fs/resctrl/
+|-- info
+|   |-- L3
+|       |-- cbm_mask
+|       |-- min_cbm_bits
+|       |-- num_closids
+|-- cpus
+|-- schemata
+|-- tasks
+|-- <container_id>
+    |-- cpus
+    |-- schemata
+    |-- tasks
+
+```
+
+For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
+resource constraints.
+
+The file `tasks` has a list of tasks that belongs to this group (e.g.,
+<container_id>" group). Tasks can be added to a group by writing the task ID
+to the "tasks" file  (which will automatically remove them from the previous
+group to which they belonged). New tasks created by fork(2) and clone(2) are
+added to the same group as their parent. If a pid is not in any sub group, it
+is in root group.
+
+The file `schemata` has allocation masks/values for L3 cache on each socket,
+which contains L3 cache id and capacity bitmask (CBM).
+```
+	Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
+```
+For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
+Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
+
+The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
+be set is less than the max bit. The max bits in the CBM is varied among
+supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
+layout, the CBM in a group should be a subset of the CBM in root. Kernel will
+check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
+of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
+values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
+
+For more information about Intel RDT/CAT kernel interface:  
+https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
+
+An example for runc:
+```
+Consider a two-socket machine with two L3 caches where the default CBM is
+0xfffff and the max CBM length is 20 bits. With this configuration, tasks
+inside the container only have access to the "upper" 80% of L3 cache id 0 and
+the "lower" 50% L3 cache id 1:
+
+"linux": {
+	"intelRdt": {
+		"l3CacheSchema": "L3:0=ffff0;1=3ff"
+	}
+}
+```
+
 ### Security 

 The standard set of Linux capabilities that are set in a container
@@ -306,7 +390,7 @@ a container.
 | Exec           | Execute a new process inside of the container  ( requires setns )  |
 | Set            | Setup configs of the container after it's created                  |

-### Execute a new process inside of a running container.
+### Execute a new process inside of a running container

 User can execute a new process inside of a running container. Any binaries to be
 executed must be accessible within the container's rootfs.
--- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/BUILD
+++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/BUILD
@@ -38,7 +38,6 @@ go_library(
        ],
        "//conditions:default": [],
    }),
-    cgo = True,
    importpath = "github.com/opencontainers/runc/libcontainer/apparmor",
    visibility = ["//visibility:public"],
 )
--- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go
@@ -2,15 +2,10 @@

 package apparmor

-// #cgo LDFLAGS: -lapparmor
-// #include <sys/apparmor.h>
-// #include <stdlib.h>
-import "C"
 import (
 	"fmt"
 	"io/ioutil"
 	"os"
-	"unsafe"
 )

 // IsEnabled returns true if apparmor is enabled for the host.
@@ -24,16 +19,36 @@ func IsEnabled() bool {
 	return false
 }

+func setprocattr(attr, value string) error {
+	// Under AppArmor you can only change your own attr, so use /proc/self/
+	// instead of /proc/<tid>/ like libapparmor does
+	path := fmt.Sprintf("/proc/self/attr/%s", attr)
+
+	f, err := os.OpenFile(path, os.O_WRONLY, 0)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	_, err = fmt.Fprintf(f, "%s", value)
+	return err
+}
+
+// changeOnExec reimplements aa_change_onexec from libapparmor in Go
+func changeOnExec(name string) error {
+	value := "exec " + name
+	if err := setprocattr("exec", value); err != nil {
+		return fmt.Errorf("apparmor failed to apply profile: %s", err)
+	}
+	return nil
+}
+
 // ApplyProfile will apply the profile with the specified name to the process after
 // the next exec.
 func ApplyProfile(name string) error {
 	if name == "" {
 		return nil
 	}
-	cName := C.CString(name)
-	defer C.free(unsafe.Pointer(cName))
-	if _, err := C.aa_change_onexec(cName); err != nil {
-		return fmt.Errorf("apparmor failed to apply profile: %s", err)
-	}
-	return nil
+
+	return changeOnExec(name)
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/BUILD
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/BUILD
@@ -63,7 +63,6 @@ filegroup(
    srcs = [
        ":package-srcs",
        "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:all-srcs",
-        "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless:all-srcs",
        "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:all-srcs",
    ],
    tags = ["automanaged"],
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
@@ -145,8 +145,17 @@ func (m *Manager) Apply(pid int) (err error) {
 		m.Paths[sys.Name()] = p

 		if err := sys.Apply(d); err != nil {
+			if os.IsPermission(err) && m.Cgroups.Path == "" {
+				// If we didn't set a cgroup path, then let's defer the error here
+				// until we know whether we have set limits or not.
+				// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
+				// it will have the same limits as its parent.
+				delete(m.Paths, sys.Name())
+				continue
+			}
 			return err
 		}
+
 	}
 	return nil
 }
@@ -198,6 +207,10 @@ func (m *Manager) Set(container *configs.Config) error {
 	for _, sys := range subsystems {
 		path := paths[sys.Name()]
 		if err := sys.Set(path, container.Cgroups); err != nil {
+			if path == "" {
+				// cgroup never applied
+				return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
+			}
 			return err
 		}
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
@@ -29,11 +29,15 @@ func (s *FreezerGroup) Apply(d *cgroupData) error {
 func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
 	switch cgroup.Resources.Freezer {
 	case configs.Frozen, configs.Thawed:
-		if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
-			return err
-		}
-
 		for {
+			// In case this loop does not exit because it doesn't get the expected
+			// state, let's write again this state, hoping it's going to be properly
+			// set this time. Otherwise, this loop could run infinitely, waiting for
+			// a state change that would never happen.
+			if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
+				return err
+			}
+
 			state, err := readFile(path, "freezer.state")
 			if err != nil {
 				return err
@@ -41,6 +45,7 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
 			if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
 				break
 			}
+
 			time.Sleep(1 * time.Millisecond)
 		}
 	case configs.Undefined:
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go
@@ -1,128 +0,0 @@
-// +build linux
-
-package rootless
-
-import (
-	"fmt"
-
-	"github.com/opencontainers/runc/libcontainer/cgroups"
-	"github.com/opencontainers/runc/libcontainer/cgroups/fs"
-	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/opencontainers/runc/libcontainer/configs/validate"
-)
-
-// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
-//       needlessly. We should probably export this list.
-
-var subsystems = []subsystem{
-	&fs.CpusetGroup{},
-	&fs.DevicesGroup{},
-	&fs.MemoryGroup{},
-	&fs.CpuGroup{},
-	&fs.CpuacctGroup{},
-	&fs.PidsGroup{},
-	&fs.BlkioGroup{},
-	&fs.HugetlbGroup{},
-	&fs.NetClsGroup{},
-	&fs.NetPrioGroup{},
-	&fs.PerfEventGroup{},
-	&fs.FreezerGroup{},
-	&fs.NameGroup{GroupName: "name=systemd"},
-}
-
-type subsystem interface {
-	// Name returns the name of the subsystem.
-	Name() string
-
-	// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
-	GetStats(path string, stats *cgroups.Stats) error
-}
-
-// The noop cgroup manager is used for rootless containers, because we currently
-// cannot manage cgroups if we are in a rootless setup. This manager is chosen
-// by factory if we are in rootless mode. We error out if any cgroup options are
-// set in the config -- this may change in the future with upcoming kernel features
-// like the cgroup namespace.
-
-type Manager struct {
-	Cgroups *configs.Cgroup
-	Paths   map[string]string
-}
-
-func (m *Manager) Apply(pid int) error {
-	// If there are no cgroup settings, there's nothing to do.
-	if m.Cgroups == nil {
-		return nil
-	}
-
-	// We can't set paths.
-	// TODO(cyphar): Implement the case where the runner of a rootless container
-	//               owns their own cgroup, which would allow us to set up a
-	//               cgroup for each path.
-	if m.Cgroups.Paths != nil {
-		return fmt.Errorf("cannot change cgroup path in rootless container")
-	}
-
-	// We load the paths into the manager.
-	paths := make(map[string]string)
-	for _, sys := range subsystems {
-		name := sys.Name()
-
-		path, err := cgroups.GetOwnCgroupPath(name)
-		if err != nil {
-			// Ignore paths we couldn't resolve.
-			continue
-		}
-
-		paths[name] = path
-	}
-
-	m.Paths = paths
-	return nil
-}
-
-func (m *Manager) GetPaths() map[string]string {
-	return m.Paths
-}
-
-func (m *Manager) Set(container *configs.Config) error {
-	// We have to re-do the validation here, since someone might decide to
-	// update a rootless container.
-	return validate.New().Validate(container)
-}
-
-func (m *Manager) GetPids() ([]int, error) {
-	dir, err := cgroups.GetOwnCgroupPath("devices")
-	if err != nil {
-		return nil, err
-	}
-	return cgroups.GetPids(dir)
-}
-
-func (m *Manager) GetAllPids() ([]int, error) {
-	dir, err := cgroups.GetOwnCgroupPath("devices")
-	if err != nil {
-		return nil, err
-	}
-	return cgroups.GetAllPids(dir)
-}
-
-func (m *Manager) GetStats() (*cgroups.Stats, error) {
-	// TODO(cyphar): We can make this work if we figure out a way to allow usage
-	//               of cgroups with a rootless container. While this doesn't
-	//               actually require write access to a cgroup directory, the
-	//               statistics are not useful if they can be affected by
-	//               non-container processes.
-	return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
-}
-
-func (m *Manager) Freeze(state configs.FreezerState) error {
-	// TODO(cyphar): We can make this work if we figure out a way to allow usage
-	//               of cgroups with a rootless container.
-	return fmt.Errorf("cannot use freezer cgroup in rootless container")
-}
-
-func (m *Manager) Destroy() error {
-	// We don't have to do anything here because we didn't do any setup.
-	return nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go
@@ -1,4 +1,4 @@
-// +build !linux
+// +build !linux static_build

 package systemd

@@ -43,7 +43,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
 }

 func (m *Manager) Set(container *configs.Config) error {
-	return nil, fmt.Errorf("Systemd not supported")
+	return fmt.Errorf("Systemd not supported")
 }

 func (m *Manager) Freeze(state configs.FreezerState) error {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
@@ -1,4 +1,4 @@
-// +build linux
+// +build linux,!static_build

 package systemd

@@ -271,6 +271,13 @@ func (m *Manager) Apply(pid int) error {
 	// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
 	if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
 		cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
+		// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
+		// (integer percentage of CPU) internally.  This means that if a fractional percent of
+		// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
+		// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
+		if cpuQuotaPerSecUSec%10000 != 0 {
+			cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
+		}
 		properties = append(properties,
 			newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
 	}
@@ -288,10 +295,13 @@ func (m *Manager) Apply(pid int) error {
 		}
 	}

-	if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil && !isUnitExists(err) {
+	statusChan := make(chan string)
+	if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err != nil && !isUnitExists(err) {
 		return err
 	}

+	<-statusChan
+
 	if err := joinCgroups(c, pid); err != nil {
 		return err
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go
@@ -1,10 +0,0 @@
-// +build linux,!go1.5
-
-package libcontainer
-
-import "syscall"
-
-// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building
-// with earlier versions
-func enableSetgroups(sys *syscall.SysProcAttr) {
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/BUILD
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/BUILD
@@ -7,28 +7,25 @@ go_library(
        "config.go",
        "device.go",
        "hugepage_limit.go",
+        "intelrdt.go",
        "interface_priority_map.go",
        "mount.go",
        "namespaces.go",
        "network.go",
    ] + select({
        "@io_bazel_rules_go//go/platform:android": [
-            "cgroup_unsupported.go",
            "namespaces_syscall_unsupported.go",
            "namespaces_unsupported.go",
        ],
        "@io_bazel_rules_go//go/platform:darwin": [
-            "cgroup_unsupported.go",
            "namespaces_syscall_unsupported.go",
            "namespaces_unsupported.go",
        ],
        "@io_bazel_rules_go//go/platform:dragonfly": [
-            "cgroup_unsupported.go",
            "namespaces_syscall_unsupported.go",
            "namespaces_unsupported.go",
        ],
        "@io_bazel_rules_go//go/platform:freebsd": [
-            "device_defaults.go",
            "namespaces_syscall_unsupported.go",
            "namespaces_unsupported.go",
        ],
@@ -40,27 +37,22 @@ go_library(
            "namespaces_syscall.go",
        ],
        "@io_bazel_rules_go//go/platform:nacl": [
-            "cgroup_unsupported.go",
            "namespaces_syscall_unsupported.go",
            "namespaces_unsupported.go",
        ],
        "@io_bazel_rules_go//go/platform:netbsd": [
-            "cgroup_unsupported.go",
            "namespaces_syscall_unsupported.go",
            "namespaces_unsupported.go",
        ],
        "@io_bazel_rules_go//go/platform:openbsd": [
-            "cgroup_unsupported.go",
            "namespaces_syscall_unsupported.go",
            "namespaces_unsupported.go",
        ],
        "@io_bazel_rules_go//go/platform:plan9": [
-            "cgroup_unsupported.go",
            "namespaces_syscall_unsupported.go",
            "namespaces_unsupported.go",
        ],
        "@io_bazel_rules_go//go/platform:solaris": [
-            "cgroup_unsupported.go",
            "namespaces_syscall_unsupported.go",
            "namespaces_unsupported.go",
        ],
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
@@ -1,6 +0,0 @@
-// +build !windows,!linux,!freebsd
-
-package configs
-
-type Cgroup struct {
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
@@ -187,6 +187,10 @@ type Config struct {

 	// Rootless specifies whether the container is a rootless container.
 	Rootless bool `json:"rootless"`
+
+	// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
+	// to limit the resources (e.g., L3 cache) the container has available
+	IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
 }

 type Hooks struct {
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go
@@ -1,4 +1,4 @@
-// +build linux freebsd
+// +build linux

 package configs

--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
@@ -0,0 +1,7 @@
+package configs
+
+type IntelRdt struct {
+	// The schema for L3 cache id and capacity bitmask (CBM)
+	// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
+	L3CacheSchema string `json:"l3_cache_schema,omitempty"`
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/BUILD
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/BUILD
@@ -10,6 +10,7 @@ go_library(
    visibility = ["//visibility:public"],
    deps = [
        "//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
+        "//vendor/github.com/opencontainers/runc/libcontainer/intelrdt:go_default_library",
        "//vendor/github.com/opencontainers/selinux/go-selinux:go_default_library",
    ],
 )
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
@@ -21,13 +21,6 @@ func (v *ConfigValidator) rootless(config *configs.Config) error {
 	if err := rootlessMount(config); err != nil {
 		return err
 	}
-	// Currently, cgroups cannot effectively be used in rootless containers.
-	// The new cgroup namespace doesn't really help us either because it doesn't
-	// have nice interactions with the user namespace (we're working with upstream
-	// to fix this).
-	if err := rootlessCgroup(config); err != nil {
-		return err
-	}

 	// XXX: We currently can't verify the user config at all, because
 	//      configs.Config doesn't store the user-related configs. So this
@@ -36,37 +29,27 @@ func (v *ConfigValidator) rootless(config *configs.Config) error {
 	return nil
 }

-func rootlessMappings(config *configs.Config) error {
-	rootuid, err := config.HostRootUID()
-	if err != nil {
-		return fmt.Errorf("failed to get root uid from uidMappings: %v", err)
+func hasIDMapping(id int, mappings []configs.IDMap) bool {
+	for _, m := range mappings {
+		if id >= m.ContainerID && id < m.ContainerID+m.Size {
+			return true
+		}
 	}
+	return false
+}
+
+func rootlessMappings(config *configs.Config) error {
 	if euid := geteuid(); euid != 0 {
 		if !config.Namespaces.Contains(configs.NEWUSER) {
 			return fmt.Errorf("rootless containers require user namespaces")
 		}
-		if rootuid != euid {
-			return fmt.Errorf("rootless containers cannot map container root to a different host user")
-		}
 	}

-	rootgid, err := config.HostRootGID()
-	if err != nil {
-		return fmt.Errorf("failed to get root gid from gidMappings: %v", err)
+	if len(config.UidMappings) == 0 {
+		return fmt.Errorf("rootless containers requires at least one UID mapping")
 	}
-
-	// Similar to the above test, we need to make sure that we aren't trying to
-	// map to a group ID that we don't have the right to be.
-	if rootgid != getegid() {
-		return fmt.Errorf("rootless containers cannot map container root to a different host group")
-	}
-
-	// We can only map one user and group inside a container (our own).
-	if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 {
-		return fmt.Errorf("rootless containers cannot map more than one user")
-	}
-	if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 {
-		return fmt.Errorf("rootless containers cannot map more than one group")
+	if len(config.GidMappings) == 0 {
+		return fmt.Errorf("rootless containers requires at least one UID mapping")
 	}

 	return nil
@@ -104,11 +87,28 @@ func rootlessMount(config *configs.Config) error {
 		// Check that the options list doesn't contain any uid= or gid= entries
 		// that don't resolve to root.
 		for _, opt := range strings.Split(mount.Data, ",") {
-			if strings.HasPrefix(opt, "uid=") && opt != "uid=0" {
-				return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0")
+			if strings.HasPrefix(opt, "uid=") {
+				var uid int
+				n, err := fmt.Sscanf(opt, "uid=%d", &uid)
+				if n != 1 || err != nil {
+					// Ignore unknown mount options.
+					continue
+				}
+				if !hasIDMapping(uid, config.UidMappings) {
+					return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers")
+				}
 			}
-			if strings.HasPrefix(opt, "gid=") && opt != "gid=0" {
-				return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0")
+
+			if strings.HasPrefix(opt, "gid=") {
+				var gid int
+				n, err := fmt.Sscanf(opt, "gid=%d", &gid)
+				if n != 1 || err != nil {
+					// Ignore unknown mount options.
+					continue
+				}
+				if !hasIDMapping(gid, config.GidMappings) {
+					return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers")
+				}
 			}
 		}
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
@@ -7,6 +7,7 @@ import (
 	"strings"

 	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/intelrdt"
 	selinux "github.com/opencontainers/selinux/go-selinux"
 )

@@ -40,6 +41,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
 	if err := v.sysctl(config); err != nil {
 		return err
 	}
+	if err := v.intelrdt(config); err != nil {
+		return err
+	}
 	if config.Rootless {
 		if err := v.rootless(config); err != nil {
 			return err
@@ -153,6 +157,19 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
 	return nil
 }

+func (v *ConfigValidator) intelrdt(config *configs.Config) error {
+	if config.IntelRdt != nil {
+		if !intelrdt.IsEnabled() {
+			return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
+		}
+		if config.IntelRdt.L3CacheSchema == "" {
+			return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
+		}
+	}
+
+	return nil
+}
+
 func isSymbolicLink(path string) (bool, error) {
 	fi, err := os.Lstat(path)
 	if err != nil {
--- a/vendor/github.com/opencontainers/runc/libcontainer/console.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/console.go
@@ -1,17 +0,0 @@
-package libcontainer
-
-import (
-	"io"
-	"os"
-)
-
-// Console represents a pseudo TTY.
-type Console interface {
-	io.ReadWriteCloser
-
-	// Path returns the filesystem path to the slave side of the pty.
-	Path() string
-
-	// Fd returns the fd for the master of the pty.
-	File() *os.File
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/console_freebsd.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/console_freebsd.go
@@ -1,13 +0,0 @@
-// +build freebsd
-
-package libcontainer
-
-import (
-	"errors"
-)
-
-// newConsole returns an initialized console that can be used within a container by copying bytes
-// from the master side to the slave that is attached as the tty for the container's init process.
-func newConsole() (Console, error) {
-	return nil, errors.New("libcontainer console is not supported on FreeBSD")
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
@@ -1,71 +1,14 @@
 package libcontainer

 import (
-	"fmt"
 	"os"
-	"unsafe"

 	"golang.org/x/sys/unix"
 )

-func ConsoleFromFile(f *os.File) Console {
-	return &linuxConsole{
-		master: f,
-	}
-}
-
-// newConsole returns an initialized console that can be used within a container by copying bytes
-// from the master side to the slave that is attached as the tty for the container's init process.
-func newConsole() (Console, error) {
-	master, err := os.OpenFile("/dev/ptmx", unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0)
-	if err != nil {
-		return nil, err
-	}
-	console, err := ptsname(master)
-	if err != nil {
-		return nil, err
-	}
-	if err := unlockpt(master); err != nil {
-		return nil, err
-	}
-	return &linuxConsole{
-		slavePath: console,
-		master:    master,
-	}, nil
-}
-
-// linuxConsole is a linux pseudo TTY for use within a container.
-type linuxConsole struct {
-	master    *os.File
-	slavePath string
-}
-
-func (c *linuxConsole) File() *os.File {
-	return c.master
-}
-
-func (c *linuxConsole) Path() string {
-	return c.slavePath
-}
-
-func (c *linuxConsole) Read(b []byte) (int, error) {
-	return c.master.Read(b)
-}
-
-func (c *linuxConsole) Write(b []byte) (int, error) {
-	return c.master.Write(b)
-}
-
-func (c *linuxConsole) Close() error {
-	if m := c.master; m != nil {
-		return m.Close()
-	}
-	return nil
-}
-
 // mount initializes the console inside the rootfs mounting with the specified mount label
 // and applying the correct ownership of the console.
-func (c *linuxConsole) mount() error {
+func mountConsole(slavePath string) error {
 	oldMask := unix.Umask(0000)
 	defer unix.Umask(oldMask)
 	f, err := os.Create("/dev/console")
@@ -75,17 +18,20 @@ func (c *linuxConsole) mount() error {
 	if f != nil {
 		f.Close()
 	}
-	return unix.Mount(c.slavePath, "/dev/console", "bind", unix.MS_BIND, "")
+	return unix.Mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "")
 }

 // dupStdio opens the slavePath for the console and dups the fds to the current
 // processes stdio, fd 0,1,2.
-func (c *linuxConsole) dupStdio() error {
-	slave, err := c.open(unix.O_RDWR)
+func dupStdio(slavePath string) error {
+	fd, err := unix.Open(slavePath, unix.O_RDWR, 0)
 	if err != nil {
-		return err
+		return &os.PathError{
+			Op:   "open",
+			Path: slavePath,
+			Err:  err,
+		}
 	}
-	fd := int(slave.Fd())
 	for _, i := range []int{0, 1, 2} {
 		if err := unix.Dup3(fd, i, 0); err != nil {
 			return err
@@ -93,60 +39,3 @@ func (c *linuxConsole) dupStdio() error {
 	}
 	return nil
 }
-
-// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
-func (c *linuxConsole) open(flag int) (*os.File, error) {
-	r, e := unix.Open(c.slavePath, flag, 0)
-	if e != nil {
-		return nil, &os.PathError{
-			Op:   "open",
-			Path: c.slavePath,
-			Err:  e,
-		}
-	}
-	return os.NewFile(uintptr(r), c.slavePath), nil
-}
-
-func ioctl(fd uintptr, flag, data uintptr) error {
-	if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 {
-		return err
-	}
-	return nil
-}
-
-// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
-// unlockpt should be called before opening the slave side of a pty.
-func unlockpt(f *os.File) error {
-	var u int32
-	return ioctl(f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
-}
-
-// ptsname retrieves the name of the first available pts for the given master.
-func ptsname(f *os.File) (string, error) {
-	n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN)
-	if err != nil {
-		return "", err
-	}
-	return fmt.Sprintf("/dev/pts/%d", n), nil
-}
-
-// SaneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair
-// created by us acts normally. In particular, a not-very-well-known default of
-// Linux unix98 ptys is that they have +onlcr by default. While this isn't a
-// problem for terminal emulators, because we relay data from the terminal we
-// also relay that funky line discipline.
-func SaneTerminal(terminal *os.File) error {
-	termios, err := unix.IoctlGetTermios(int(terminal.Fd()), unix.TCGETS)
-	if err != nil {
-		return fmt.Errorf("ioctl(tty, tcgets): %s", err.Error())
-	}
-
-	// Set -onlcr so we don't have to deal with \r.
-	termios.Oflag &^= unix.ONLCR
-
-	if err := unix.IoctlSetTermios(int(terminal.Fd()), unix.TCSETS, termios); err != nil {
-		return fmt.Errorf("ioctl(tty, tcsets): %s", err.Error())
-	}
-
-	return nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/console_solaris.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/console_solaris.go
@@ -1,11 +0,0 @@
-package libcontainer
-
-import (
-	"errors"
-)
-
-// newConsole returns an initialized console that can be used within a container by copying bytes
-// from the master side to the slave that is attached as the tty for the container's init process.
-func newConsole() (Console, error) {
-	return nil, errors.New("libcontainer console is not supported on Solaris")
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/console_windows.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/console_windows.go
@@ -1,30 +0,0 @@
-package libcontainer
-
-// newConsole returns an initialized console that can be used within a container
-func newConsole() (Console, error) {
-	return &windowsConsole{}, nil
-}
-
-// windowsConsole is a Windows pseudo TTY for use within a container.
-type windowsConsole struct {
-}
-
-func (c *windowsConsole) Fd() uintptr {
-	return 0
-}
-
-func (c *windowsConsole) Path() string {
-	return ""
-}
-
-func (c *windowsConsole) Read(b []byte) (int, error) {
-	return 0, nil
-}
-
-func (c *windowsConsole) Write(b []byte) (int, error) {
-	return 0, nil
-}
-
-func (c *windowsConsole) Close() error {
-	return nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
@@ -21,6 +21,7 @@ import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/criurpc"
+	"github.com/opencontainers/runc/libcontainer/intelrdt"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/utils"

@@ -38,10 +39,14 @@ type linuxContainer struct {
 	root                 string
 	config               *configs.Config
 	cgroupManager        cgroups.Manager
+	intelRdtManager      intelrdt.Manager
+	initPath             string
 	initArgs             []string
 	initProcess          parentProcess
 	initProcessStartTime uint64
 	criuPath             string
+	newuidmapPath        string
+	newgidmapPath        string
 	m                    sync.Mutex
 	criuVersion          int
 	state                containerState
@@ -67,6 +72,9 @@ type State struct {

 	// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
 	ExternalDescriptors []string `json:"external_descriptors,omitempty"`
+
+	// Intel RDT "resource control" filesystem path
+	IntelRdtPath string `json:"intel_rdt_path"`
 }

 // Container is a libcontainer container object.
@@ -163,6 +171,11 @@ func (c *linuxContainer) Stats() (*Stats, error) {
 	if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
 		return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
 	}
+	if c.intelRdtManager != nil {
+		if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil {
+			return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
+		}
+	}
 	for _, iface := range c.config.Networks {
 		switch iface.Type {
 		case "veth":
@@ -193,6 +206,15 @@ func (c *linuxContainer) Set(config configs.Config) error {
 		}
 		return err
 	}
+	if c.intelRdtManager != nil {
+		if err := c.intelRdtManager.Set(&config); err != nil {
+			// Set configs back
+			if err2 := c.intelRdtManager.Set(c.config); err2 != nil {
+				logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
+			}
+			return err
+		}
+	}
 	// After config setting succeed, update config and states
 	c.config = &config
 	_, err = c.updateState(nil)
@@ -268,7 +290,7 @@ func (c *linuxContainer) start(process *Process, isInit bool) error {
 	}
 	if err := parent.start(); err != nil {
 		// terminate the process to ensure that it properly is reaped.
-		if err := parent.terminate(); err != nil {
+		if err := ignoreTerminateErrors(parent.terminate()); err != nil {
 			logrus.Warn(err)
 		}
 		return newSystemErrorWithCause(err, "starting container process")
@@ -294,7 +316,7 @@ func (c *linuxContainer) start(process *Process, isInit bool) error {
 			}
 			for i, hook := range c.config.Hooks.Poststart {
 				if err := hook.Run(s); err != nil {
-					if err := parent.terminate(); err != nil {
+					if err := ignoreTerminateErrors(parent.terminate()); err != nil {
 						logrus.Warn(err)
 					}
 					return newSystemErrorWithCausef(err, "running poststart hook %d", i)
@@ -392,7 +414,8 @@ func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProces
 }

 func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
-	cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...)
+	cmd := exec.Command(c.initPath, c.initArgs[1:]...)
+	cmd.Args[0] = c.initArgs[0]
 	cmd.Stdin = p.Stdin
 	cmd.Stdout = p.Stdout
 	cmd.Stderr = p.Stderr
@@ -434,15 +457,16 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
 		return nil, err
 	}
 	return &initProcess{
-		cmd:           cmd,
-		childPipe:     childPipe,
-		parentPipe:    parentPipe,
-		manager:       c.cgroupManager,
-		config:        c.newInitConfig(p),
-		container:     c,
-		process:       p,
-		bootstrapData: data,
-		sharePidns:    sharePidns,
+		cmd:             cmd,
+		childPipe:       childPipe,
+		parentPipe:      parentPipe,
+		manager:         c.cgroupManager,
+		intelRdtManager: c.intelRdtManager,
+		config:          c.newInitConfig(p),
+		container:       c,
+		process:         p,
+		bootstrapData:   data,
+		sharePidns:      sharePidns,
 	}, nil
 }

@@ -461,6 +485,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
 	return &setnsProcess{
 		cmd:           cmd,
 		cgroupPaths:   c.cgroupManager.GetPaths(),
+		intelRdtPath:  state.IntelRdtPath,
 		childPipe:     childPipe,
 		parentPipe:    parentPipe,
 		config:        c.newInitConfig(p),
@@ -499,6 +524,8 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
 		cfg.Rlimits = process.Rlimits
 	}
 	cfg.CreateConsole = process.ConsoleSocket != nil
+	cfg.ConsoleWidth = process.ConsoleWidth
+	cfg.ConsoleHeight = process.ConsoleHeight
 	return cfg
 }

@@ -600,9 +627,24 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
 	logrus.Debugf("Feature check says: %s", criuFeatures)
 	missingFeatures := false

-	if *criuFeat.MemTrack && !*criuFeatures.MemTrack {
-		missingFeatures = true
-		logrus.Debugf("CRIU does not support MemTrack")
+	// The outer if checks if the fields actually exist
+	if (criuFeat.MemTrack != nil) &&
+		(criuFeatures.MemTrack != nil) {
+		// The inner if checks if they are set to true
+		if *criuFeat.MemTrack && !*criuFeatures.MemTrack {
+			missingFeatures = true
+			logrus.Debugf("CRIU does not support MemTrack")
+		}
+	}
+
+	// This needs to be repeated for every new feature check.
+	// Is there a way to put this in a function. Reflection?
+	if (criuFeat.LazyPages != nil) &&
+		(criuFeatures.LazyPages != nil) {
+		if *criuFeat.LazyPages && !*criuFeatures.LazyPages {
+			missingFeatures = true
+			logrus.Debugf("CRIU does not support LazyPages")
+		}
 	}

 	if missingFeatures {
@@ -632,9 +674,9 @@ func parseCriuVersion(path string) (int, error) {
 			return 0, fmt.Errorf("Unable to parse the CRIU version: %s", path)
 		}

-		n, err := fmt.Sscanf(string(version), "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2
+		n, err := fmt.Sscanf(version, "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2
 		if err != nil {
-			n, err = fmt.Sscanf(string(version), "GitID: v%d.%d", &x, &y) // 1.6
+			n, err = fmt.Sscanf(version, "GitID: v%d.%d", &x, &y) // 1.6
 			y++
 		} else {
 			z++
@@ -758,6 +800,25 @@ func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error {
 		}
 		req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
 	}
+	return nil
+}
+
+func waitForCriuLazyServer(r *os.File, status string) error {
+
+	data := make([]byte, 1)
+	_, err := r.Read(data)
+	if err != nil {
+		return err
+	}
+	fd, err := os.OpenFile(status, os.O_TRUNC|os.O_WRONLY, os.ModeAppend)
+	if err != nil {
+		return err
+	}
+	_, err = fd.Write(data)
+	if err != nil {
+		return err
+	}
+	fd.Close()

 	return nil
 }
@@ -825,6 +886,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
 		EmptyNs:         proto.Uint32(criuOpts.EmptyNs),
 		OrphanPtsMaster: proto.Bool(true),
 		AutoDedup:       proto.Bool(criuOpts.AutoDedup),
+		LazyPages:       proto.Bool(criuOpts.LazyPages),
 	}

 	fcg := c.cgroupManager.GetPaths()["freezer"]
@@ -875,6 +937,24 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
 		Opts: &rpcOpts,
 	}

+	if criuOpts.LazyPages {
+		// lazy migration requested; check if criu supports it
+		feat := criurpc.CriuFeatures{
+			LazyPages: proto.Bool(true),
+		}
+
+		if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil {
+			return err
+		}
+
+		statusRead, statusWrite, err := os.Pipe()
+		if err != nil {
+			return err
+		}
+		rpcOpts.StatusFd = proto.Int32(int32(statusWrite.Fd()))
+		go waitForCriuLazyServer(statusRead, criuOpts.StatusFd)
+	}
+
 	//no need to dump these information in pre-dump
 	if !criuOpts.PreDump {
 		for _, m := range c.config.Mounts {
@@ -1027,6 +1107,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 			EmptyNs:         proto.Uint32(criuOpts.EmptyNs),
 			OrphanPtsMaster: proto.Bool(true),
 			AutoDedup:       proto.Bool(criuOpts.AutoDedup),
+			LazyPages:       proto.Bool(criuOpts.LazyPages),
 		},
 	}

@@ -1404,7 +1485,7 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
 		defer master.Close()

 		// While we can access console.master, using the API is a good idea.
-		if err := utils.SendFd(process.ConsoleSocket, master); err != nil {
+		if err := utils.SendFd(process.ConsoleSocket, master.Name(), master.Fd()); err != nil {
 			return err
 		}
 	}
@@ -1519,6 +1600,10 @@ func (c *linuxContainer) currentState() (*State, error) {
 		startTime, _ = c.initProcess.startTime()
 		externalDescriptors = c.initProcess.externalDescriptors()
 	}
+	intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
+	if err != nil {
+		intelRdtPath = ""
+	}
 	state := &State{
 		BaseState: BaseState{
 			ID:                   c.ID(),
@@ -1529,6 +1614,7 @@ func (c *linuxContainer) currentState() (*State, error) {
 		},
 		Rootless:            c.config.Rootless,
 		CgroupPaths:         c.cgroupManager.GetPaths(),
+		IntelRdtPath:        intelRdtPath,
 		NamespacePaths:      make(map[configs.NamespaceType]string),
 		ExternalDescriptors: externalDescriptors,
 	}
@@ -1627,6 +1713,12 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
 	if !joinExistingUser {
 		// write uid mappings
 		if len(c.config.UidMappings) > 0 {
+			if c.config.Rootless && c.newuidmapPath != "" {
+				r.AddData(&Bytemsg{
+					Type:  UidmapPathAttr,
+					Value: []byte(c.newuidmapPath),
+				})
+			}
 			b, err := encodeIDMapping(c.config.UidMappings)
 			if err != nil {
 				return nil, err
@@ -1647,6 +1739,12 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
 				Type:  GidmapAttr,
 				Value: b,
 			})
+			if c.config.Rootless && c.newgidmapPath != "" {
+				r.AddData(&Bytemsg{
+					Type:  GidmapPathAttr,
+					Value: []byte(c.newgidmapPath),
+				})
+			}
 			// The following only applies if we are root.
 			if !c.config.Rootless {
 				// check if we have CAP_SETGID to setgroup properly
@@ -1678,3 +1776,18 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na

 	return bytes.NewReader(r.Serialize()), nil
 }
+
+// ignoreTerminateErrors returns nil if the given err matches an error known
+// to indicate that the terminate occurred successfully or err was nil, otherwise
+// err is returned unaltered.
+func ignoreTerminateErrors(err error) error {
+	if err == nil {
+		return nil
+	}
+	s := err.Error()
+	switch {
+	case strings.Contains(s, "process already finished"), strings.Contains(s, "Wait was already called"):
+		return nil
+	}
+	return err
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/container_solaris.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/container_solaris.go
@@ -1,20 +0,0 @@
-package libcontainer
-
-// State represents a running container's state
-type State struct {
-	BaseState
-
-	// Platform specific fields below here
-}
-
-// A libcontainer container object.
-//
-// Each container is thread-safe within the same process. Since a container can
-// be destroyed by a separate process, any function may return that the container
-// was not found.
-type Container interface {
-	BaseContainer
-
-	// Methods below here are platform specific
-
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/container_windows.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/container_windows.go
@@ -1,20 +0,0 @@
-package libcontainer
-
-// State represents a running container's state
-type State struct {
-	BaseState
-
-	// Platform specific fields below here
-}
-
-// A libcontainer container object.
-//
-// Each container is thread-safe within the same process. Since a container can
-// be destroyed by a separate process, any function may return that the container
-// was not found.
-type Container interface {
-	BaseContainer
-
-	// Methods below here are platform specific
-
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
@@ -23,7 +23,7 @@ type VethPairName struct {
 type CriuOpts struct {
 	ImagesDirectory         string             // directory for storing image files
 	WorkDirectory           string             // directory to cd and write logs/pidfiles/stats to
-	ParentImage             string             // direcotry for storing parent image files in pre-dump and dump
+	ParentImage             string             // directory for storing parent image files in pre-dump and dump
 	LeaveRunning            bool               // leave container in running state after checkpoint
 	TcpEstablished          bool               // checkpoint/restore established TCP connections
 	ExternalUnixConnections bool               // allow external unix connections
@@ -35,4 +35,6 @@ type CriuOpts struct {
 	ManageCgroupsMode       cgMode             // dump or restore cgroup mode
 	EmptyNs                 uint32             // don't c/r properties for namespace from this mask
 	AutoDedup               bool               // auto deduplication for incremental dumps
+	LazyPages               bool               // restore memory pages lazily using userfaultfd
+	StatusFd                string             // fd for feedback when lazy server is ready
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_windows.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_windows.go
@@ -1,6 +0,0 @@
-package libcontainer
-
-// TODO Windows: This can ultimately be entirely factored out as criu is
-// a Unix concept not relevant on Windows.
-type CriuOpts struct {
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
@@ -11,13 +11,13 @@ import (
 	"runtime/debug"
 	"strconv"

-	"github.com/docker/docker/pkg/mount"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fs"
-	"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
 	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/configs/validate"
+	"github.com/opencontainers/runc/libcontainer/intelrdt"
+	"github.com/opencontainers/runc/libcontainer/mount"
 	"github.com/opencontainers/runc/libcontainer/utils"

 	"golang.org/x/sys/unix"
@@ -72,15 +72,15 @@ func Cgroupfs(l *LinuxFactory) error {
 	return nil
 }

-// RootlessCgroups is an options func to configure a LinuxFactory to
-// return containers that use the "rootless" cgroup manager, which will
-// fail to do any operations not possible to do with an unprivileged user.
-// It should only be used in conjunction with rootless containers.
-func RootlessCgroups(l *LinuxFactory) error {
-	l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
-		return &rootless.Manager{
-			Cgroups: config,
-			Paths:   paths,
+// IntelRdtfs is an options func to configure a LinuxFactory to return
+// containers that use the Intel RDT "resource control" filesystem to
+// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
+func IntelRdtFs(l *LinuxFactory) error {
+	l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
+		return &intelrdt.IntelRdtManager{
+			Config: config,
+			Id:     id,
+			Path:   path,
 		}
 	}
 	return nil
@@ -119,12 +119,16 @@ func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
 	}
 	l := &LinuxFactory{
 		Root:      root,
-		InitArgs:  []string{"/proc/self/exe", "init"},
+		InitPath:  "/proc/self/exe",
+		InitArgs:  []string{os.Args[0], "init"},
 		Validator: validate.New(),
 		CriuPath:  "criu",
 	}
 	Cgroupfs(l)
 	for _, opt := range options {
+		if opt == nil {
+			continue
+		}
 		if err := opt(l); err != nil {
 			return nil, err
 		}
@@ -137,6 +141,10 @@ type LinuxFactory struct {
 	// Root directory for the factory to store state.
 	Root string

+	// InitPath is the path for calling the init responsibilities for spawning
+	// a container.
+	InitPath string
+
 	// InitArgs are arguments for calling the init responsibilities for spawning
 	// a container.
 	InitArgs []string
@@ -145,11 +153,19 @@ type LinuxFactory struct {
 	// containers.
 	CriuPath string

+	// New{u,g}uidmapPath is the path to the binaries used for mapping with
+	// rootless containers.
+	NewuidmapPath string
+	NewgidmapPath string
+
 	// Validator provides validation to container configurations.
 	Validator validate.Validator

 	// NewCgroupsManager returns an initialized cgroups manager for a single container.
 	NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
+
+	// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
+	NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
 }

 func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
@@ -174,17 +190,20 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
 	if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
 		return nil, newGenericError(err, SystemError)
 	}
-	if config.Rootless {
-		RootlessCgroups(l)
-	}
 	c := &linuxContainer{
 		id:            id,
 		root:          containerRoot,
 		config:        config,
+		initPath:      l.InitPath,
 		initArgs:      l.InitArgs,
 		criuPath:      l.CriuPath,
+		newuidmapPath: l.NewuidmapPath,
+		newgidmapPath: l.NewgidmapPath,
 		cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
 	}
+	if intelrdt.IsEnabled() {
+		c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
+	}
 	c.state = &stoppedState{c: c}
 	return c, nil
 }
@@ -203,17 +222,16 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
 		processStartTime: state.InitProcessStartTime,
 		fds:              state.ExternalDescriptors,
 	}
-	// We have to use the RootlessManager.
-	if state.Rootless {
-		RootlessCgroups(l)
-	}
 	c := &linuxContainer{
 		initProcess:          r,
 		initProcessStartTime: state.InitProcessStartTime,
 		id:                   id,
 		config:               &state.Config,
+		initPath:             l.InitPath,
 		initArgs:             l.InitArgs,
 		criuPath:             l.CriuPath,
+		newuidmapPath:        l.NewuidmapPath,
+		newgidmapPath:        l.NewgidmapPath,
 		cgroupManager:        l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
 		root:                 containerRoot,
 		created:              state.Created,
@@ -222,6 +240,9 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
 	if err := c.refreshState(); err != nil {
 		return nil, err
 	}
+	if intelrdt.IsEnabled() {
+		c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
+	}
 	return c, nil
 }

@@ -323,3 +344,21 @@ func (l *LinuxFactory) validateID(id string) error {

 	return nil
 }
+
+// NewuidmapPath returns an option func to configure a LinuxFactory with the
+// provided ..
+func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error {
+	return func(l *LinuxFactory) error {
+		l.NewuidmapPath = newuidmapPath
+		return nil
+	}
+}
+
+// NewgidmapPath returns an option func to configure a LinuxFactory with the
+// provided ..
+func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
+	return func(l *LinuxFactory) error {
+		l.NewgidmapPath = newgidmapPath
+		return nil
+	}
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
@@ -12,15 +12,16 @@ import (
 	"syscall" // only for Errno
 	"unsafe"

+	"golang.org/x/sys/unix"
+
+	"github.com/containerd/console"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/user"
 	"github.com/opencontainers/runc/libcontainer/utils"
-
 	"github.com/sirupsen/logrus"
 	"github.com/vishvananda/netlink"
-	"golang.org/x/sys/unix"
 )

 type initType string
@@ -61,6 +62,8 @@ type initConfig struct {
 	ContainerId      string                `json:"containerid"`
 	Rlimits          []configs.Rlimit      `json:"rlimits"`
 	CreateConsole    bool                  `json:"create_console"`
+	ConsoleWidth     uint16                `json:"console_width"`
+	ConsoleHeight    uint16                `json:"console_height"`
 	Rootless         bool                  `json:"rootless"`
 }

@@ -170,29 +173,38 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error {
 	// however, that setupUser (specifically fixStdioPermissions) *will* change
 	// the UID owner of the console to be the user the process will run as (so
 	// they can actually control their console).
-	console, err := newConsole()
+
+	pty, slavePath, err := console.NewPty()
 	if err != nil {
 		return err
 	}
-	// After we return from here, we don't need the console anymore.
-	defer console.Close()

-	linuxConsole, ok := console.(*linuxConsole)
-	if !ok {
-		return fmt.Errorf("failed to cast console to *linuxConsole")
+	if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 {
+		err = pty.Resize(console.WinSize{
+			Height: config.ConsoleHeight,
+			Width:  config.ConsoleWidth,
+		})
+
+		if err != nil {
+			return err
+		}
 	}
+
+	// After we return from here, we don't need the console anymore.
+	defer pty.Close()
+
 	// Mount the console inside our rootfs.
 	if mount {
-		if err := linuxConsole.mount(); err != nil {
+		if err := mountConsole(slavePath); err != nil {
 			return err
 		}
 	}
 	// While we can access console.master, using the API is a good idea.
-	if err := utils.SendFd(socket, linuxConsole.File()); err != nil {
+	if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil {
 		return err
 	}
 	// Now, dup over all the things.
-	return linuxConsole.dupStdio()
+	return dupStdio(slavePath)
 }

 // syncParentReady sends to the given pipe a JSON payload which indicates that
@@ -261,25 +273,27 @@ func setupUser(config *initConfig) error {
 		}
 	}

+	// Rather than just erroring out later in setuid(2) and setgid(2), check
+	// that the user is mapped here.
+	if _, err := config.Config.HostUID(execUser.Uid); err != nil {
+		return fmt.Errorf("cannot set uid to unmapped user in user namespace")
+	}
+	if _, err := config.Config.HostGID(execUser.Gid); err != nil {
+		return fmt.Errorf("cannot set gid to unmapped user in user namespace")
+	}
+
 	if config.Rootless {
-		if execUser.Uid != 0 {
-			return fmt.Errorf("cannot run as a non-root user in a rootless container")
-		}
-
-		if execUser.Gid != 0 {
-			return fmt.Errorf("cannot run as a non-root group in a rootless container")
-		}
-
-		// We cannot set any additional groups in a rootless container and thus we
-		// bail if the user asked us to do so. TODO: We currently can't do this
-		// earlier, but if libcontainer.Process.User was typesafe this might work.
+		// We cannot set any additional groups in a rootless container and thus
+		// we bail if the user asked us to do so. TODO: We currently can't do
+		// this check earlier, but if libcontainer.Process.User was typesafe
+		// this might work.
 		if len(addGroups) > 0 {
 			return fmt.Errorf("cannot set any additional groups in a rootless container")
 		}
 	}

-	// before we change to the container's user make sure that the processes STDIO
-	// is correctly owned by the user that we are switching to.
+	// Before we change to the container's user make sure that the processes
+	// STDIO is correctly owned by the user that we are switching to.
 	if err := fixStdioPermissions(config, execUser); err != nil {
 		return err
 	}
@@ -298,7 +312,6 @@ func setupUser(config *initConfig) error {
 	if err := system.Setgid(execUser.Gid); err != nil {
 		return err
 	}
-
 	if err := system.Setuid(execUser.Uid); err != nil {
 		return err
 	}
@@ -335,14 +348,6 @@ func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
 			continue
 		}

-		// Skip chown if s.Gid is actually an unmapped gid in the host. While
-		// this is a bit dodgy if it just so happens that the console _is_
-		// owned by overflow_gid, there's no way for us to disambiguate this as
-		// a userspace program.
-		if _, err := config.Config.HostGID(int(s.Gid)); err != nil {
-			continue
-		}
-
 		// We only change the uid owner (as it is possible for the mount to
 		// prefer a different gid, and there's no reason for us to change it).
 		// The reason why we don't just leave the default uid=X mount setup is
@@ -350,6 +355,15 @@ func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
 		// this code, you couldn't effectively run as a non-root user inside a
 		// container and also have a console set up.
 		if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
+			// If we've hit an EINVAL then s.Gid isn't mapped in the user
+			// namespace. If we've hit an EPERM then the inode's current owner
+			// is not mapped in our user namespace (in particular,
+			// privileged_wrt_inode_uidgid() has failed). In either case, we
+			// are in a configuration where it's better for us to just not
+			// touch the stdio rather than bail at this point.
+			if err == unix.EINVAL || err == unix.EPERM {
+				continue
+			}
 			return err
 		}
 	}
@@ -480,6 +494,16 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
 		logrus.Warn(err)
 	}

+	subreaper, err := system.GetSubreaper()
+	if err != nil {
+		// The error here means that PR_GET_CHILD_SUBREAPER is not
+		// supported because this code might run on a kernel older
+		// than 3.4. We don't want to throw an error in that case,
+		// and we simplify things, considering there is no subreaper
+		// set.
+		subreaper = 0
+	}
+
 	for _, p := range procs {
 		if s != unix.SIGKILL {
 			if ok, err := isWaitable(p.Pid); err != nil {
@@ -493,9 +517,16 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
 			}
 		}

-		if _, err := p.Wait(); err != nil {
-			if !isNoChildren(err) {
-				logrus.Warn("wait: ", err)
+		// In case a subreaper has been setup, this code must not
+		// wait for the process. Otherwise, we cannot be sure the
+		// current process will be reaped by the subreaper, while
+		// the subreaper might be waiting for this process in order
+		// to retrieve its exit code.
+		if subreaper == 0 {
+			if _, err := p.Wait(); err != nil {
+				if !isNoChildren(err) {
+					logrus.Warn("wait: ", err)
+				}
 			}
 		}
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/BUILD
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/BUILD
@@ -4,18 +4,16 @@ go_library(
    name = "go_default_library",
    srcs = select({
        "@io_bazel_rules_go//go/platform:linux": [
-            "rootless.go",
+            "intelrdt.go",
+            "stats.go",
        ],
        "//conditions:default": [],
    }),
-    importpath = "github.com/opencontainers/runc/libcontainer/cgroups/rootless",
+    importpath = "github.com/opencontainers/runc/libcontainer/intelrdt",
    visibility = ["//visibility:public"],
    deps = select({
        "@io_bazel_rules_go//go/platform:linux": [
-            "//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
-            "//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
            "//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
-            "//vendor/github.com/opencontainers/runc/libcontainer/configs/validate:go_default_library",
        ],
        "//conditions:default": [],
    }),
--- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
@@ -0,0 +1,553 @@
+// +build linux
+
+package intelrdt
+
+import (
+	"bufio"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+/*
+ * About Intel RDT/CAT feature:
+ * Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
+ * Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
+ * Cache is the only resource that is supported in RDT.
+ *
+ * This feature provides a way for the software to restrict cache allocation to a
+ * defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
+ * The different subsets are identified by class of service (CLOS) and each CLOS
+ * has a capacity bitmask (CBM).
+ *
+ * For more information about Intel RDT/CAT can be found in the section 17.17
+ * of Intel Software Developer Manual.
+ *
+ * About Intel RDT/CAT kernel interface:
+ * In Linux 4.10 kernel or newer, the interface is defined and exposed via
+ * "resource control" filesystem, which is a "cgroup-like" interface.
+ *
+ * Comparing with cgroups, it has similar process management lifecycle and
+ * interfaces in a container. But unlike cgroups' hierarchy, it has single level
+ * filesystem layout.
+ *
+ * Intel RDT "resource control" filesystem hierarchy:
+ * mount -t resctrl resctrl /sys/fs/resctrl
+ * tree /sys/fs/resctrl
+ * /sys/fs/resctrl/
+ * |-- info
+ * |   |-- L3
+ * |       |-- cbm_mask
+ * |       |-- min_cbm_bits
+ * |       |-- num_closids
+ * |-- cpus
+ * |-- schemata
+ * |-- tasks
+ * |-- <container_id>
+ *     |-- cpus
+ *     |-- schemata
+ *     |-- tasks
+ *
+ * For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
+ * resource constraints.
+ *
+ *  The file `tasks` has a list of tasks that belongs to this group (e.g.,
+ * <container_id>" group). Tasks can be added to a group by writing the task ID
+ * to the "tasks" file  (which will automatically remove them from the previous
+ * group to which they belonged). New tasks created by fork(2) and clone(2) are
+ * added to the same group as their parent. If a pid is not in any sub group, it is
+ * in root group.
+ *
+ * The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
+ * which contains L3 cache id and capacity bitmask (CBM).
+ * 	Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
+ * For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
+ * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
+ *
+ * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
+ * be set is less than the max bit. The max bits in the CBM is varied among
+ * supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
+ * layout, the CBM in a group should be a subset of the CBM in root. Kernel will
+ * check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
+ * of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
+ * values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
+ *
+ * For more information about Intel RDT/CAT kernel interface:
+ * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
+ *
+ * An example for runc:
+ * Consider a two-socket machine with two L3 caches where the default CBM is
+ * 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
+ * inside the container only have access to the "upper" 80% of L3 cache id 0 and
+ * the "lower" 50% L3 cache id 1:
+ *
+ * "linux": {
+ * 	"intelRdt": {
+ * 		"l3CacheSchema": "L3:0=ffff0;1=3ff"
+ * 	}
+ * }
+ */
+
+type Manager interface {
+	// Applies Intel RDT configuration to the process with the specified pid
+	Apply(pid int) error
+
+	// Returns statistics for Intel RDT
+	GetStats() (*Stats, error)
+
+	// Destroys the Intel RDT 'container_id' group
+	Destroy() error
+
+	// Returns Intel RDT path to save in a state file and to be able to
+	// restore the object later
+	GetPath() string
+
+	// Set Intel RDT "resource control" filesystem as configured.
+	Set(container *configs.Config) error
+}
+
+// This implements interface Manager
+type IntelRdtManager struct {
+	mu     sync.Mutex
+	Config *configs.Config
+	Id     string
+	Path   string
+}
+
+const (
+	IntelRdtTasks = "tasks"
+)
+
+var (
+	// The absolute root path of the Intel RDT "resource control" filesystem
+	intelRdtRoot     string
+	intelRdtRootLock sync.Mutex
+
+	// The flag to indicate if Intel RDT is supported
+	isEnabled bool
+)
+
+type intelRdtData struct {
+	root   string
+	config *configs.Config
+	pid    int
+}
+
+// Check if Intel RDT is enabled in init()
+func init() {
+	// 1. Check if hardware and kernel support Intel RDT/CAT feature
+	// "cat_l3" flag is set if supported
+	isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
+	if !isFlagSet || err != nil {
+		isEnabled = false
+		return
+	}
+
+	// 2. Check if Intel RDT "resource control" filesystem is mounted
+	// The user guarantees to mount the filesystem
+	isEnabled = isIntelRdtMounted()
+}
+
+// Return the mount point path of Intel RDT "resource control" filesysem
+func findIntelRdtMountpointDir() (string, error) {
+	f, err := os.Open("/proc/self/mountinfo")
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	s := bufio.NewScanner(f)
+	for s.Scan() {
+		text := s.Text()
+		fields := strings.Split(text, " ")
+		// Safe as mountinfo encodes mountpoints with spaces as \040.
+		index := strings.Index(text, " - ")
+		postSeparatorFields := strings.Fields(text[index+3:])
+		numPostFields := len(postSeparatorFields)
+
+		// This is an error as we can't detect if the mount is for "Intel RDT"
+		if numPostFields == 0 {
+			return "", fmt.Errorf("Found no fields post '-' in %q", text)
+		}
+
+		if postSeparatorFields[0] == "resctrl" {
+			// Check that the mount is properly formated.
+			if numPostFields < 3 {
+				return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
+			}
+
+			return fields[4], nil
+		}
+	}
+	if err := s.Err(); err != nil {
+		return "", err
+	}
+
+	return "", NewNotFoundError("Intel RDT")
+}
+
+// Gets the root path of Intel RDT "resource control" filesystem
+func getIntelRdtRoot() (string, error) {
+	intelRdtRootLock.Lock()
+	defer intelRdtRootLock.Unlock()
+
+	if intelRdtRoot != "" {
+		return intelRdtRoot, nil
+	}
+
+	root, err := findIntelRdtMountpointDir()
+	if err != nil {
+		return "", err
+	}
+
+	if _, err := os.Stat(root); err != nil {
+		return "", err
+	}
+
+	intelRdtRoot = root
+	return intelRdtRoot, nil
+}
+
+func isIntelRdtMounted() bool {
+	_, err := getIntelRdtRoot()
+	if err != nil {
+		return false
+	}
+
+	return true
+}
+
+func parseCpuInfoFile(path string) (bool, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return false, err
+	}
+	defer f.Close()
+
+	s := bufio.NewScanner(f)
+	for s.Scan() {
+		if err := s.Err(); err != nil {
+			return false, err
+		}
+
+		text := s.Text()
+		flags := strings.Split(text, " ")
+
+		// "cat_l3" flag is set if Intel RDT/CAT is supported
+		for _, flag := range flags {
+			if flag == "cat_l3" {
+				return true, nil
+			}
+		}
+	}
+	return false, nil
+}
+
+func parseUint(s string, base, bitSize int) (uint64, error) {
+	value, err := strconv.ParseUint(s, base, bitSize)
+	if err != nil {
+		intValue, intErr := strconv.ParseInt(s, base, bitSize)
+		// 1. Handle negative values greater than MinInt64 (and)
+		// 2. Handle negative values lesser than MinInt64
+		if intErr == nil && intValue < 0 {
+			return 0, nil
+		} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
+			return 0, nil
+		}
+
+		return value, err
+	}
+
+	return value, nil
+}
+
+// Gets a single uint64 value from the specified file.
+func getIntelRdtParamUint(path, file string) (uint64, error) {
+	fileName := filepath.Join(path, file)
+	contents, err := ioutil.ReadFile(fileName)
+	if err != nil {
+		return 0, err
+	}
+
+	res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
+	if err != nil {
+		return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
+	}
+	return res, nil
+}
+
+// Gets a string value from the specified file
+func getIntelRdtParamString(path, file string) (string, error) {
+	contents, err := ioutil.ReadFile(filepath.Join(path, file))
+	if err != nil {
+		return "", err
+	}
+
+	return strings.TrimSpace(string(contents)), nil
+}
+
+func readTasksFile(dir string) ([]int, error) {
+	f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	var (
+		s   = bufio.NewScanner(f)
+		out = []int{}
+	)
+
+	for s.Scan() {
+		if t := s.Text(); t != "" {
+			pid, err := strconv.Atoi(t)
+			if err != nil {
+				return nil, err
+			}
+			out = append(out, pid)
+		}
+	}
+	return out, nil
+}
+
+func writeFile(dir, file, data string) error {
+	if dir == "" {
+		return fmt.Errorf("no such directory for %s", file)
+	}
+	if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil {
+		return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
+	}
+	return nil
+}
+
+func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) {
+	rootPath, err := getIntelRdtRoot()
+	if err != nil {
+		return nil, err
+	}
+	return &intelRdtData{
+		root:   rootPath,
+		config: c,
+		pid:    pid,
+	}, nil
+}
+
+// Get the read-only L3 cache information
+func getL3CacheInfo() (*L3CacheInfo, error) {
+	l3CacheInfo := &L3CacheInfo{}
+
+	rootPath, err := getIntelRdtRoot()
+	if err != nil {
+		return l3CacheInfo, err
+	}
+
+	path := filepath.Join(rootPath, "info", "L3")
+	cbmMask, err := getIntelRdtParamString(path, "cbm_mask")
+	if err != nil {
+		return l3CacheInfo, err
+	}
+	minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits")
+	if err != nil {
+		return l3CacheInfo, err
+	}
+	numClosids, err := getIntelRdtParamUint(path, "num_closids")
+	if err != nil {
+		return l3CacheInfo, err
+	}
+
+	l3CacheInfo.CbmMask = cbmMask
+	l3CacheInfo.MinCbmBits = minCbmBits
+	l3CacheInfo.NumClosids = numClosids
+
+	return l3CacheInfo, nil
+}
+
+// WriteIntelRdtTasks writes the specified pid into the "tasks" file
+func WriteIntelRdtTasks(dir string, pid int) error {
+	if dir == "" {
+		return fmt.Errorf("no such directory for %s", IntelRdtTasks)
+	}
+
+	// Dont attach any pid if -1 is specified as a pid
+	if pid != -1 {
+		if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil {
+			return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
+		}
+	}
+	return nil
+}
+
+// Check if Intel RDT is enabled
+func IsEnabled() bool {
+	return isEnabled
+}
+
+// Get the 'container_id' path in Intel RDT "resource control" filesystem
+func GetIntelRdtPath(id string) (string, error) {
+	rootPath, err := getIntelRdtRoot()
+	if err != nil {
+		return "", err
+	}
+
+	path := filepath.Join(rootPath, id)
+	return path, nil
+}
+
+// Applies Intel RDT configuration to the process with the specified pid
+func (m *IntelRdtManager) Apply(pid int) (err error) {
+	// If intelRdt is not specified in config, we do nothing
+	if m.Config.IntelRdt == nil {
+		return nil
+	}
+	d, err := getIntelRdtData(m.Config, pid)
+	if err != nil && !IsNotFound(err) {
+		return err
+	}
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	path, err := d.join(m.Id)
+	if err != nil {
+		return err
+	}
+
+	m.Path = path
+	return nil
+}
+
+// Destroys the Intel RDT 'container_id' group
+func (m *IntelRdtManager) Destroy() error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	if err := os.RemoveAll(m.Path); err != nil {
+		return err
+	}
+	m.Path = ""
+	return nil
+}
+
+// Returns Intel RDT path to save in a state file and to be able to
+// restore the object later
+func (m *IntelRdtManager) GetPath() string {
+	if m.Path == "" {
+		m.Path, _ = GetIntelRdtPath(m.Id)
+	}
+	return m.Path
+}
+
+// Returns statistics for Intel RDT
+func (m *IntelRdtManager) GetStats() (*Stats, error) {
+	// If intelRdt is not specified in config
+	if m.Config.IntelRdt == nil {
+		return nil, nil
+	}
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	stats := NewStats()
+
+	// The read-only L3 cache information
+	l3CacheInfo, err := getL3CacheInfo()
+	if err != nil {
+		return nil, err
+	}
+	stats.L3CacheInfo = l3CacheInfo
+
+	// The read-only L3 cache schema in root
+	rootPath, err := getIntelRdtRoot()
+	if err != nil {
+		return nil, err
+	}
+	tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
+	if err != nil {
+		return nil, err
+	}
+	// L3 cache schema is in the first line
+	schemaRootStrings := strings.Split(tmpRootStrings, "\n")
+	stats.L3CacheSchemaRoot = schemaRootStrings[0]
+
+	// The L3 cache schema in 'container_id' group
+	tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
+	if err != nil {
+		return nil, err
+	}
+	// L3 cache schema is in the first line
+	schemaStrings := strings.Split(tmpStrings, "\n")
+	stats.L3CacheSchema = schemaStrings[0]
+
+	return stats, nil
+}
+
+// Set Intel RDT "resource control" filesystem as configured.
+func (m *IntelRdtManager) Set(container *configs.Config) error {
+	path := m.GetPath()
+
+	// About L3 cache schema file:
+	// The schema has allocation masks/values for L3 cache on each socket,
+	// which contains L3 cache id and capacity bitmask (CBM).
+	//     Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
+	// For example, on a two-socket machine, L3's schema line could be:
+	//     L3:0=ff;1=c0
+	// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
+	//
+	// About L3 cache CBM validity:
+	// The valid L3 cache CBM is a *contiguous bits set* and number of
+	// bits that can be set is less than the max bit. The max bits in the
+	// CBM is varied among supported Intel Xeon platforms. In Intel RDT
+	// "resource control" filesystem layout, the CBM in a group should
+	// be a subset of the CBM in root. Kernel will check if it is valid
+	// when writing.
+	// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
+	// which mapping to entire L3 cache capacity. Some valid CBM values
+	// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
+	if container.IntelRdt != nil {
+		l3CacheSchema := container.IntelRdt.L3CacheSchema
+		if l3CacheSchema != "" {
+			if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+func (raw *intelRdtData) join(id string) (string, error) {
+	path := filepath.Join(raw.root, id)
+	if err := os.MkdirAll(path, 0755); err != nil {
+		return "", err
+	}
+
+	if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
+		return "", err
+	}
+	return path, nil
+}
+
+type NotFoundError struct {
+	ResourceControl string
+}
+
+func (e *NotFoundError) Error() string {
+	return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl)
+}
+
+func NewNotFoundError(res string) error {
+	return &NotFoundError{
+		ResourceControl: res,
+	}
+}
+
+func IsNotFound(err error) bool {
+	if err == nil {
+		return false
+	}
+	_, ok := err.(*NotFoundError)
+	return ok
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
@@ -0,0 +1,24 @@
+// +build linux
+
+package intelrdt
+
+type L3CacheInfo struct {
+	CbmMask    string `json:"cbm_mask,omitempty"`
+	MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
+	NumClosids uint64 `json:"num_closids,omitempty"`
+}
+
+type Stats struct {
+	// The read-only L3 cache information
+	L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
+
+	// The read-only L3 cache schema in root
+	L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
+
+	// The L3 cache schema in 'container_id' group
+	L3CacheSchema string `json:"l3_cache_schema,omitempty"`
+}
+
+func NewStats() *Stats {
+	return &Stats{}
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
@@ -29,7 +29,7 @@ func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
 		return err
 	}

-	res := strings.Split(string(dest), ";")
+	res := strings.Split(dest, ";")
 	if len(res) < 5 {
 		return fmt.Errorf("Destination buffer for key description is too small")
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
@@ -18,6 +18,8 @@ const (
 	SetgroupAttr    uint16 = 27285
 	OomScoreAdjAttr uint16 = 27286
 	RootlessAttr    uint16 = 27287
+	UidmapPathAttr  uint16 = 27288
+	GidmapPathAttr  uint16 = 27289
 )

 type Int32msg struct {
--- a/vendor/github.com/opencontainers/runc/libcontainer/mount/BUILD
+++ b/vendor/github.com/opencontainers/runc/libcontainer/mount/BUILD
@@ -0,0 +1,30 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "mount.go",
+        "mountinfo.go",
+    ] + select({
+        "@io_bazel_rules_go//go/platform:linux": [
+            "mount_linux.go",
+        ],
+        "//conditions:default": [],
+    }),
+    importpath = "github.com/opencontainers/runc/libcontainer/mount",
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "package-srcs",
+    srcs = glob(["**"]),
+    tags = ["automanaged"],
+    visibility = ["//visibility:private"],
+)
+
+filegroup(
+    name = "all-srcs",
+    srcs = [":package-srcs"],
+    tags = ["automanaged"],
+    visibility = ["//visibility:public"],
+)
--- a/vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go
@@ -0,0 +1,23 @@
+package mount
+
+// GetMounts retrieves a list of mounts for the current running process.
+func GetMounts() ([]*Info, error) {
+	return parseMountTable()
+}
+
+// Mounted looks at /proc/self/mountinfo to determine of the specified
+// mountpoint has been mounted
+func Mounted(mountpoint string) (bool, error) {
+	entries, err := parseMountTable()
+	if err != nil {
+		return false, err
+	}
+
+	// Search the table for the mountpoint
+	for _, e := range entries {
+		if e.Mountpoint == mountpoint {
+			return true, nil
+		}
+	}
+	return false, nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go
@@ -0,0 +1,82 @@
+// +build linux
+
+package mount
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+)
+
+const (
+	/* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+	   (1)(2)(3)   (4)   (5)      (6)      (7)   (8) (9)   (10)         (11)
+
+	   (1) mount ID:  unique identifier of the mount (may be reused after umount)
+	   (2) parent ID:  ID of parent (or of self for the top of the mount tree)
+	   (3) major:minor:  value of st_dev for files on filesystem
+	   (4) root:  root of the mount within the filesystem
+	   (5) mount point:  mount point relative to the process's root
+	   (6) mount options:  per mount options
+	   (7) optional fields:  zero or more fields of the form "tag[:value]"
+	   (8) separator:  marks the end of the optional fields
+	   (9) filesystem type:  name of filesystem of the form "type[.subtype]"
+	   (10) mount source:  filesystem specific information or "none"
+	   (11) super options:  per super block options*/
+	mountinfoFormat = "%d %d %d:%d %s %s %s %s"
+)
+
+// Parse /proc/self/mountinfo because comparing Dev and ino does not work from
+// bind mounts
+func parseMountTable() ([]*Info, error) {
+	f, err := os.Open("/proc/self/mountinfo")
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	return parseInfoFile(f)
+}
+
+func parseInfoFile(r io.Reader) ([]*Info, error) {
+	var (
+		s   = bufio.NewScanner(r)
+		out = []*Info{}
+	)
+
+	for s.Scan() {
+		if err := s.Err(); err != nil {
+			return nil, err
+		}
+
+		var (
+			p              = &Info{}
+			text           = s.Text()
+			optionalFields string
+		)
+
+		if _, err := fmt.Sscanf(text, mountinfoFormat,
+			&p.ID, &p.Parent, &p.Major, &p.Minor,
+			&p.Root, &p.Mountpoint, &p.Opts, &optionalFields); err != nil {
+			return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err)
+		}
+		// Safe as mountinfo encodes mountpoints with spaces as \040.
+		index := strings.Index(text, " - ")
+		postSeparatorFields := strings.Fields(text[index+3:])
+		if len(postSeparatorFields) < 3 {
+			return nil, fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
+		}
+
+		if optionalFields != "-" {
+			p.Optional = optionalFields
+		}
+
+		p.Fstype = postSeparatorFields[0]
+		p.Source = postSeparatorFields[1]
+		p.VfsOpts = strings.Join(postSeparatorFields[2:], " ")
+		out = append(out, p)
+	}
+	return out, nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go
@@ -0,0 +1,40 @@
+package mount
+
+// Info reveals information about a particular mounted filesystem. This
+// struct is populated from the content in the /proc/<pid>/mountinfo file.
+type Info struct {
+	// ID is a unique identifier of the mount (may be reused after umount).
+	ID int
+
+	// Parent indicates the ID of the mount parent (or of self for the top of the
+	// mount tree).
+	Parent int
+
+	// Major indicates one half of the device ID which identifies the device class.
+	Major int
+
+	// Minor indicates one half of the device ID which identifies a specific
+	// instance of device.
+	Minor int
+
+	// Root of the mount within the filesystem.
+	Root string
+
+	// Mountpoint indicates the mount point relative to the process's root.
+	Mountpoint string
+
+	// Opts represents mount-specific options.
+	Opts string
+
+	// Optional represents optional fields.
+	Optional string
+
+	// Fstype indicates the type of filesystem, such as EXT3.
+	Fstype string
+
+	// Source indicates filesystem specific information or "none".
+	Source string
+
+	// VfsOpts represents per super block options.
+	VfsOpts string
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/process.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/process.go
@@ -47,6 +47,10 @@ type Process struct {
 	// ExtraFiles specifies additional open files to be inherited by the container
 	ExtraFiles []*os.File

+	// Initial sizings for the console
+	ConsoleWidth  uint16
+	ConsoleHeight uint16
+
 	// Capabilities specify the capabilities to keep when executing the process inside the container
 	// All capabilities not specified will be dropped from the processes capability mask
 	Capabilities *configs.Capabilities
--- a/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
@@ -15,6 +15,7 @@ import (

 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/intelrdt"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/utils"

@@ -49,6 +50,7 @@ type setnsProcess struct {
 	parentPipe    *os.File
 	childPipe     *os.File
 	cgroupPaths   map[string]string
+	intelRdtPath  string
 	config        *initConfig
 	fds           []string
 	process       *Process
@@ -83,12 +85,20 @@ func (p *setnsProcess) start() (err error) {
 	if err = p.execSetns(); err != nil {
 		return newSystemErrorWithCause(err, "executing setns process")
 	}
-	// We can't join cgroups if we're in a rootless container.
-	if !p.config.Rootless && len(p.cgroupPaths) > 0 {
+	if len(p.cgroupPaths) > 0 {
 		if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
 			return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
 		}
 	}
+	if p.intelRdtPath != "" {
+		// if Intel RDT "resource control" filesystem path exists
+		_, err := os.Stat(p.intelRdtPath)
+		if err == nil {
+			if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil {
+				return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid())
+			}
+		}
+	}
 	// set rlimits, this has to be done here because we lose permissions
 	// to raise the limits once we enter a user-namespace
 	if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
@@ -193,16 +203,17 @@ func (p *setnsProcess) setExternalDescriptors(newFds []string) {
 }

 type initProcess struct {
-	cmd           *exec.Cmd
-	parentPipe    *os.File
-	childPipe     *os.File
-	config        *initConfig
-	manager       cgroups.Manager
-	container     *linuxContainer
-	fds           []string
-	process       *Process
-	bootstrapData io.Reader
-	sharePidns    bool
+	cmd             *exec.Cmd
+	parentPipe      *os.File
+	childPipe       *os.File
+	config          *initConfig
+	manager         cgroups.Manager
+	intelRdtManager intelrdt.Manager
+	container       *linuxContainer
+	fds             []string
+	process         *Process
+	bootstrapData   io.Reader
+	sharePidns      bool
 }

 func (p *initProcess) pid() int {
@@ -261,12 +272,35 @@ func (p *initProcess) start() error {
 		p.process.ops = nil
 		return newSystemErrorWithCause(err, "starting init process command")
 	}
+	// Do this before syncing with child so that no children can escape the
+	// cgroup. We don't need to worry about not doing this and not being root
+	// because we'd be using the rootless cgroup manager in that case.
+	if err := p.manager.Apply(p.pid()); err != nil {
+		return newSystemErrorWithCause(err, "applying cgroup configuration for process")
+	}
+	if p.intelRdtManager != nil {
+		if err := p.intelRdtManager.Apply(p.pid()); err != nil {
+			return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
+		}
+	}
+	defer func() {
+		if err != nil {
+			// TODO: should not be the responsibility to call here
+			p.manager.Destroy()
+			if p.intelRdtManager != nil {
+				p.intelRdtManager.Destroy()
+			}
+		}
+	}()
+
 	if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
 		return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
 	}
+
 	if err := p.execSetns(); err != nil {
 		return newSystemErrorWithCause(err, "running exec setns process for init")
 	}
+
 	// Save the standard descriptor names before the container process
 	// can potentially move them (e.g., via dup2()).  If we don't do this now,
 	// we won't know at checkpoint time which file descriptor to look up.
@@ -275,18 +309,6 @@ func (p *initProcess) start() error {
 		return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
 	}
 	p.setExternalDescriptors(fds)
-	// Do this before syncing with child so that no children can escape the
-	// cgroup. We don't need to worry about not doing this and not being root
-	// because we'd be using the rootless cgroup manager in that case.
-	if err := p.manager.Apply(p.pid()); err != nil {
-		return newSystemErrorWithCause(err, "applying cgroup configuration for process")
-	}
-	defer func() {
-		if err != nil {
-			// TODO: should not be the responsibility to call here
-			p.manager.Destroy()
-		}
-	}()
 	if err := p.createNetworkInterfaces(); err != nil {
 		return newSystemErrorWithCause(err, "creating network interfaces")
 	}
@@ -312,6 +334,11 @@ func (p *initProcess) start() error {
 				if err := p.manager.Set(p.config.Config); err != nil {
 					return newSystemErrorWithCause(err, "setting cgroup config for ready process")
 				}
+				if p.intelRdtManager != nil {
+					if err := p.intelRdtManager.Set(p.config.Config); err != nil {
+						return newSystemErrorWithCause(err, "setting Intel RDT config for ready process")
+					}
+				}

 				if p.config.Config.Hooks != nil {
 					s := configs.HookState{
@@ -337,6 +364,11 @@ func (p *initProcess) start() error {
 			if err := p.manager.Set(p.config.Config); err != nil {
 				return newSystemErrorWithCause(err, "setting cgroup config for procHooks process")
 			}
+			if p.intelRdtManager != nil {
+				if err := p.intelRdtManager.Set(p.config.Config); err != nil {
+					return newSystemErrorWithCause(err, "setting Intel RDT config for procHooks process")
+				}
+			}
 			if p.config.Config.Hooks != nil {
 				s := configs.HookState{
 					Version: p.container.config.Version,
--- a/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
@@ -13,11 +13,11 @@ import (
 	"strings"
 	"time"

-	"github.com/docker/docker/pkg/mount"
-	"github.com/docker/docker/pkg/symlink"
+	"github.com/cyphar/filepath-securejoin"
 	"github.com/mrunalp/fileutils"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/mount"
 	"github.com/opencontainers/runc/libcontainer/system"
 	libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
 	"github.com/opencontainers/selinux/go-selinux/label"
@@ -40,7 +40,8 @@ func needsSetupDev(config *configs.Config) bool {
 // prepareRootfs sets up the devices, mount points, and filesystems for use
 // inside a new mount namespace. It doesn't set anything as ro. You must call
 // finalizeRootfs after this function to finish setting up the rootfs.
-func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) {
+func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
+	config := iConfig.Config
 	if err := prepareRoot(config); err != nil {
 		return newSystemErrorWithCause(err, "preparing rootfs")
 	}
@@ -80,6 +81,7 @@ func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) {
 	// The hooks are run after the mounts are setup, but before we switch to the new
 	// root, so that the old root is still available in the hooks for any mount
 	// manipulations.
+	// Note that iConfig.Cwd is not guaranteed to exist here.
 	if err := syncParentHooks(pipe); err != nil {
 		return err
 	}
@@ -111,6 +113,14 @@ func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) {
 		}
 	}

+	if cwd := iConfig.Cwd; cwd != "" {
+		// Note that spec.Process.Cwd can contain unclean value like  "../../../../foo/bar...".
+		// However, we are safe to call MkDirAll directly because we are in the jail here.
+		if err := os.MkdirAll(cwd, 0755); err != nil {
+			return err
+		}
+	}
+
 	return nil
 }

@@ -230,7 +240,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 		// any previous mounts can invalidate the next mount's destination.
 		// this can happen when a user specifies mounts within other mounts to cause breakouts or other
 		// evil stuff to try to escape the container's rootfs.
-		if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
+		if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
 			return err
 		}
 		if err := checkMountDestination(rootfs, dest); err != nil {
@@ -318,7 +328,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 		// this can happen when a user specifies mounts within other mounts to cause breakouts or other
 		// evil stuff to try to escape the container's rootfs.
 		var err error
-		if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
+		if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
 			return err
 		}
 		if err := checkMountDestination(rootfs, dest); err != nil {
@@ -668,9 +678,12 @@ func pivotRoot(rootfs string) error {
 		return err
 	}

-	// Make oldroot rprivate to make sure our unmounts don't propagate to the
-	// host (and thus bork the machine).
-	if err := unix.Mount("", ".", "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil {
+	// Make oldroot rslave to make sure our unmounts don't propagate to the
+	// host (and thus bork the machine). We don't use rprivate because this is
+	// known to cause issues due to races where we still have a reference to a
+	// mount while a process in the host namespace are trying to operate on
+	// something they think has no mounts (devicemapper in particular).
+	if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
 		return err
 	}
 	// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
@@ -733,7 +746,14 @@ func remountReadonly(m *configs.Mount) error {
 		flags = m.Flags
 	)
 	for i := 0; i < 5; i++ {
-		if err := unix.Mount("", dest, "", uintptr(flags|unix.MS_REMOUNT|unix.MS_RDONLY), ""); err != nil {
+		// There is a special case in the kernel for
+		// MS_REMOUNT | MS_BIND, which allows us to change only the
+		// flags even as an unprivileged user (i.e. user namespace)
+		// assuming we don't drop any security related flags (nodev,
+		// nosuid, etc.). So, let's use that case so that we can do
+		// this re-mount without failing in a userns.
+		flags |= unix.MS_REMOUNT | unix.MS_BIND | unix.MS_RDONLY
+		if err := unix.Mount("", dest, "", uintptr(flags), ""); err != nil {
 			switch err {
 			case unix.EBUSY:
 				time.Sleep(100 * time.Millisecond)
--- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
@@ -22,6 +22,11 @@ var (
 	actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
 )

+const (
+	// Linux system calls can have at most 6 arguments
+	syscallMaxArguments int = 6
+)
+
 // Filters given syscalls in a container, preventing them from being used
 // Started in the container init process, and carried over to all child processes
 // Setns calls, however, require a separate invocation, as they are not children
@@ -45,11 +50,11 @@ func InitSeccomp(config *configs.Seccomp) error {
 	for _, arch := range config.Architectures {
 		scmpArch, err := libseccomp.GetArchFromString(arch)
 		if err != nil {
-			return err
+			return fmt.Errorf("error validating Seccomp architecture: %s", err)
 		}

 		if err := filter.AddArch(scmpArch); err != nil {
-			return err
+			return fmt.Errorf("error adding architecture to seccomp filter: %s", err)
 		}
 	}

@@ -170,29 +175,55 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
 	// Convert the call's action to the libseccomp equivalent
 	callAct, err := getAction(call.Action)
 	if err != nil {
-		return err
+		return fmt.Errorf("action in seccomp profile is invalid: %s", err)
 	}

 	// Unconditional match - just add the rule
 	if len(call.Args) == 0 {
 		if err = filter.AddRule(callNum, callAct); err != nil {
-			return err
+			return fmt.Errorf("error adding seccomp filter rule for syscall %s: %s", call.Name, err)
 		}
 	} else {
-		// Conditional match - convert the per-arg rules into library format
+		// If two or more arguments have the same condition,
+		// Revert to old behavior, adding each condition as a separate rule
+		argCounts := make([]uint, syscallMaxArguments)
 		conditions := []libseccomp.ScmpCondition{}

 		for _, cond := range call.Args {
 			newCond, err := getCondition(cond)
 			if err != nil {
-				return err
+				return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %s", call.Name, err)
 			}

+			argCounts[cond.Index] += 1
+
 			conditions = append(conditions, newCond)
 		}

-		if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
-			return err
+		hasMultipleArgs := false
+		for _, count := range argCounts {
+			if count > 1 {
+				hasMultipleArgs = true
+				break
+			}
+		}
+
+		if hasMultipleArgs {
+			// Revert to old behavior
+			// Add each condition attached to a separate rule
+			for _, cond := range conditions {
+				condArr := []libseccomp.ScmpCondition{cond}
+
+				if err = filter.AddRuleConditional(callNum, callAct, condArr); err != nil {
+					return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
+				}
+			}
+		} else {
+			// No conditions share same argument
+			// Use new, proper behavior
+			if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
+				return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
+			}
 		}
 	}

--- a/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go
@@ -1,11 +0,0 @@
-// +build linux,go1.5
-
-package libcontainer
-
-import "syscall"
-
-// Set the GidMappingsEnableSetgroups member to true, so the process's
-// setgroups proc entry wont be set to 'deny' if GidMappings are set
-func enableSetgroups(sys *syscall.SysProcAttr) {
-	sys.GidMappingsEnableSetgroups = true
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
@@ -47,7 +47,10 @@ func (l *linuxSetnsInit) Init() error {
 			return err
 		}
 	}
-	if l.config.Config.Seccomp != nil {
+	// Without NoNewPrivileges seccomp is a privileged operation, so we need to
+	// do this before dropping capabilities; otherwise do it as late as possible
+	// just before execve so as few syscalls take place after it as possible.
+	if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
 		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
 			return err
 		}
@@ -61,5 +64,13 @@ func (l *linuxSetnsInit) Init() error {
 	if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
 		return err
 	}
+	// Set seccomp as close to execve as possible, so as few syscalls take
+	// place afterward (reducing the amount of syscalls that users need to
+	// enable in their seccomp profiles).
+	if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
+		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
+			return newSystemErrorWithCause(err, "init seccomp")
+		}
+	}
 	return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
@@ -30,15 +30,15 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
 	var newperms uint32

 	if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
-		// with user ns we need 'other' search permissions
+		// With user ns we need 'other' search permissions.
 		newperms = 0x8
 	} else {
-		// without user ns we need 'UID' search permissions
+		// Without user ns we need 'UID' search permissions.
 		newperms = 0x80000
 	}

-	// create a unique per session container name that we can
-	// join in setns; however, other containers can also join it
+	// Create a unique per session container name that we can join in setns;
+	// However, other containers can also join it.
 	return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
 }

@@ -46,12 +46,12 @@ func (l *linuxStandardInit) Init() error {
 	if !l.config.Config.NoNewKeyring {
 		ringname, keepperms, newperms := l.getSessionRingParams()

-		// do not inherit the parent's session keyring
+		// Do not inherit the parent's session keyring.
 		sessKeyId, err := keys.JoinSessionKeyring(ringname)
 		if err != nil {
 			return err
 		}
-		// make session keyring searcheable
+		// Make session keyring searcheable.
 		if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
 			return err
 		}
@@ -68,7 +68,7 @@ func (l *linuxStandardInit) Init() error {

 	// prepareRootfs() can be executed only for a new mount namespace.
 	if l.config.Config.Namespaces.Contains(configs.NEWNS) {
-		if err := prepareRootfs(l.pipe, l.config.Config); err != nil {
+		if err := prepareRootfs(l.pipe, l.config); err != nil {
 			return err
 		}
 	}
@@ -150,19 +150,20 @@ func (l *linuxStandardInit) Init() error {
 	if err := pdeath.Restore(); err != nil {
 		return err
 	}
-	// compare the parent from the initial start of the init process and make sure that it did not change.
-	// if the parent changes that means it died and we were reparented to something else so we should
-	// just kill ourself and not cause problems for someone else.
+	// Compare the parent from the initial start of the init process and make
+	// sure that it did not change.  if the parent changes that means it died
+	// and we were reparented to something else so we should just kill ourself
+	// and not cause problems for someone else.
 	if unix.Getppid() != l.parentPid {
 		return unix.Kill(unix.Getpid(), unix.SIGKILL)
 	}
-	// check for the arg before waiting to make sure it exists and it is returned
-	// as a create time error.
+	// Check for the arg before waiting to make sure it exists and it is
+	// returned as a create time error.
 	name, err := exec.LookPath(l.config.Args[0])
 	if err != nil {
 		return err
 	}
-	// close the pipe to signal that we have completed our init.
+	// Close the pipe to signal that we have completed our init.
 	l.pipe.Close()
 	// Wait for the FIFO to be opened on the other side before exec-ing the
 	// user process. We open it through /proc/self/fd/$fd, because the fd that
@@ -170,19 +171,26 @@ func (l *linuxStandardInit) Init() error {
 	// re-open an O_PATH fd through /proc.
 	fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
 	if err != nil {
-		return newSystemErrorWithCause(err, "openat exec fifo")
+		return newSystemErrorWithCause(err, "open exec fifo")
 	}
 	if _, err := unix.Write(fd, []byte("0")); err != nil {
 		return newSystemErrorWithCause(err, "write 0 exec fifo")
 	}
+	// Close the O_PATH fifofd fd before exec because the kernel resets
+	// dumpable in the wrong order. This has been fixed in newer kernels, but
+	// we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels.
+	// N.B. the core issue itself (passing dirfds to the host filesystem) has
+	// since been resolved.
+	// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
+	unix.Close(l.fifoFd)
+	// Set seccomp as close to execve as possible, so as few syscalls take
+	// place afterward (reducing the amount of syscalls that users need to
+	// enable in their seccomp profiles).
 	if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
 		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
 			return newSystemErrorWithCause(err, "init seccomp")
 		}
 	}
-	// close the statedir fd before exec because the kernel resets dumpable in the wrong order
-	// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
-	unix.Close(l.fifoFd)
 	if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
 		return newSystemErrorWithCause(err, "exec user process")
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
@@ -45,6 +45,11 @@ func destroy(c *linuxContainer) error {
 		}
 	}
 	err := c.cgroupManager.Destroy()
+	if c.intelRdtManager != nil {
+		if ierr := c.intelRdtManager.Destroy(); err == nil {
+			err = ierr
+		}
+	}
 	if rerr := os.RemoveAll(c.root); err == nil {
 		err = rerr
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/stats_freebsd.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stats_freebsd.go
@@ -1,5 +0,0 @@
-package libcontainer
-
-type Stats struct {
-	Interfaces []*NetworkInterface
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go
@@ -1,8 +1,10 @@
 package libcontainer

 import "github.com/opencontainers/runc/libcontainer/cgroups"
+import "github.com/opencontainers/runc/libcontainer/intelrdt"

 type Stats struct {
-	Interfaces  []*NetworkInterface
-	CgroupStats *cgroups.Stats
+	Interfaces    []*NetworkInterface
+	CgroupStats   *cgroups.Stats
+	IntelRdtStats *intelrdt.Stats
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/stats_solaris.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stats_solaris.go
@@ -1,7 +0,0 @@
-package libcontainer
-
-// Solaris - TODO
-
-type Stats struct {
-	Interfaces []*NetworkInterface
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/stats_windows.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stats_windows.go
@@ -1,5 +0,0 @@
-package libcontainer
-
-type Stats struct {
-	Interfaces []*NetworkInterface
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/BUILD
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/BUILD
@@ -18,7 +18,6 @@ go_library(
            "unsupported.go",
        ],
        "@io_bazel_rules_go//go/platform:freebsd": [
-            "sysconfig.go",
            "sysconfig_notcgo.go",
            "unsupported.go",
        ],
@@ -55,17 +54,29 @@ go_library(
        "//conditions:default": [],
    }) + select({
        "@io_bazel_rules_go//go/platform:linux_386": [
-            "syscall_linux_386.go",
+            "syscall_linux_32.go",
        ],
        "@io_bazel_rules_go//go/platform:linux_amd64": [
            "syscall_linux_64.go",
        ],
        "@io_bazel_rules_go//go/platform:linux_arm": [
-            "syscall_linux_arm.go",
+            "syscall_linux_32.go",
        ],
        "@io_bazel_rules_go//go/platform:linux_arm64": [
            "syscall_linux_64.go",
        ],
+        "@io_bazel_rules_go//go/platform:linux_mips": [
+            "syscall_linux_64.go",
+        ],
+        "@io_bazel_rules_go//go/platform:linux_mips64": [
+            "syscall_linux_64.go",
+        ],
+        "@io_bazel_rules_go//go/platform:linux_mips64le": [
+            "syscall_linux_64.go",
+        ],
+        "@io_bazel_rules_go//go/platform:linux_mipsle": [
+            "syscall_linux_64.go",
+        ],
        "@io_bazel_rules_go//go/platform:linux_ppc64": [
            "syscall_linux_64.go",
        ],
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
@@ -134,3 +134,14 @@ func RunningInUserNS() bool {
 func SetSubreaper(i int) error {
 	return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
 }
+
+// GetSubreaper returns the subreaper setting for the calling process
+func GetSubreaper() (int, error) {
+	var i uintptr
+
+	if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
+		return -1, err
+	}
+
+	return int(i), nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go
@@ -1,4 +1,5 @@
-// +build linux,arm
+// +build linux
+// +build 386 arm

 package system

--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go
@@ -1,25 +0,0 @@
-// +build linux,386
-
-package system
-
-import (
-	"golang.org/x/sys/unix"
-)
-
-// Setuid sets the uid of the calling thread to the specified uid.
-func Setuid(uid int) (err error) {
-	_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
-	if e1 != 0 {
-		err = e1
-	}
-	return
-}
-
-// Setgid sets the gid of the calling thread to the specified gid.
-func Setgid(gid int) (err error) {
-	_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
-	if e1 != 0 {
-		err = e1
-	}
-	return
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go
@@ -1,4 +1,5 @@
-// +build linux,arm64 linux,amd64 linux,ppc linux,ppc64 linux,ppc64le linux,s390x
+// +build linux
+// +build arm64 amd64 mips mipsle mips64 mips64le ppc ppc64 ppc64le s390x

 package system

--- a/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go
@@ -1,4 +1,4 @@
-// +build cgo,linux cgo,freebsd
+// +build cgo,linux

 package system

--- a/vendor/github.com/opencontainers/runc/libcontainer/user/BUILD
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/BUILD
@@ -6,9 +6,6 @@ go_library(
        "lookup.go",
        "user.go",
    ] + select({
-        "@io_bazel_rules_go//go/platform:android": [
-            "lookup_unsupported.go",
-        ],
        "@io_bazel_rules_go//go/platform:darwin": [
            "lookup_unix.go",
        ],
@@ -21,24 +18,15 @@ go_library(
        "@io_bazel_rules_go//go/platform:linux": [
            "lookup_unix.go",
        ],
-        "@io_bazel_rules_go//go/platform:nacl": [
-            "lookup_unsupported.go",
-        ],
        "@io_bazel_rules_go//go/platform:netbsd": [
            "lookup_unix.go",
        ],
        "@io_bazel_rules_go//go/platform:openbsd": [
            "lookup_unix.go",
        ],
-        "@io_bazel_rules_go//go/platform:plan9": [
-            "lookup_unsupported.go",
-        ],
        "@io_bazel_rules_go//go/platform:solaris": [
            "lookup_unix.go",
        ],
-        "@io_bazel_rules_go//go/platform:windows": [
-            "lookup_unsupported.go",
-        ],
        "//conditions:default": [],
    }),
    importpath = "github.com/opencontainers/runc/libcontainer/user",
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unsupported.go
@@ -1,38 +0,0 @@
-// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris
-
-package user
-
-import (
-	"io"
-	"syscall"
-)
-
-func GetPasswdPath() (string, error) {
-	return "", ErrUnsupported
-}
-
-func GetPasswd() (io.ReadCloser, error) {
-	return nil, ErrUnsupported
-}
-
-func GetGroupPath() (string, error) {
-	return "", ErrUnsupported
-}
-
-func GetGroup() (io.ReadCloser, error) {
-	return nil, ErrUnsupported
-}
-
-// CurrentUser looks up the current user by their user id in /etc/passwd. If the
-// user cannot be found (or there is no /etc/passwd file on the filesystem),
-// then CurrentUser returns an error.
-func CurrentUser() (User, error) {
-	return LookupUid(syscall.Getuid())
-}
-
-// CurrentGroup looks up the current user's group by their primary group id's
-// entry in /etc/passwd. If the group cannot be found (or there is no
-// /etc/group file on the filesystem), then CurrentGroup returns an error.
-func CurrentGroup() (Group, error) {
-	return LookupGid(syscall.Getgid())
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
@@ -84,12 +84,10 @@ func RecvFd(socket *os.File) (*os.File, error) {
 // addition, the file.Name() of the given file will also be sent as
 // non-auxiliary data in the same payload (allowing to send contextual
 // information for a file descriptor).
-func SendFd(socket, file *os.File) error {
-	name := []byte(file.Name())
+func SendFd(socket *os.File, name string, fd uintptr) error {
 	if len(name) >= MaxNameLen {
-		return fmt.Errorf("sendfd: filename too long: %s", file.Name())
+		return fmt.Errorf("sendfd: filename too long: %s", name)
 	}
-	oob := unix.UnixRights(int(file.Fd()))
-
-	return unix.Sendmsg(int(socket.Fd()), name, oob, nil, 0)
+	oob := unix.UnixRights(int(fd))
+	return unix.Sendmsg(int(socket.Fd()), []byte(name), oob, nil, 0)
 }