oci.WithPrivileged: set the current caps, not the known caps

This change is needed for running the latest containerd inside Docker
that is not aware of the recently added caps (BPF, PERFMON, CHECKPOINT_RESTORE).

Without this change, containerd inside Docker fails to run containers with
"apply caps: operation not permitted" error.

See kubernetes-sigs/kind 2058

NOTE: The caller process of this function is now assumed to be as
privileged as possible.

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda
2021-02-06 20:43:01 +09:00
parent ddcc431c11
commit a2d1a8a865
12 changed files with 424 additions and 35 deletions

View File

@@ -354,7 +354,7 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
}
// WithCapabilities sets the provided capabilities from the security context
func WithCapabilities(sc *runtime.LinuxContainerSecurityContext) oci.SpecOpts {
func WithCapabilities(sc *runtime.LinuxContainerSecurityContext, allCaps []string) oci.SpecOpts {
capabilities := sc.GetCapabilities()
if capabilities == nil {
return nullOpt
@@ -366,7 +366,7 @@ func WithCapabilities(sc *runtime.LinuxContainerSecurityContext) oci.SpecOpts {
// AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}
// will be all capabilities without `CAP_CHOWN`.
if util.InStringSlice(capabilities.GetAddCapabilities(), "ALL") {
opts = append(opts, oci.WithAllCapabilities)
opts = append(opts, oci.WithCapabilities(allCaps))
}
if util.InStringSlice(capabilities.GetDropCapabilities(), "ALL") {
opts = append(opts, oci.WithCapabilities(nil))

View File

@@ -217,10 +217,12 @@ func (c *criService) containerSpec(
specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed)
} else {
// add requested devices by the config as host devices are not automatically added
specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext))
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
customopts.WithCapabilities(securityContext, c.allCaps))
}
} else { // not privileged
specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext))
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
customopts.WithCapabilities(securityContext, c.allCaps))
}
// Clear all ambient capabilities. The implication of non-root + caps

View File

@@ -39,6 +39,7 @@ import (
"github.com/stretchr/testify/require"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"github.com/containerd/containerd/pkg/cap"
"github.com/containerd/containerd/pkg/cri/annotations"
"github.com/containerd/containerd/pkg/cri/config"
"github.com/containerd/containerd/pkg/cri/opts"
@@ -191,6 +192,7 @@ func TestContainerCapabilities(t *testing.T) {
testSandboxID := "sandbox-id"
testContainerName := "container-name"
testPid := uint32(1234)
allCaps := cap.Known()
for desc, test := range map[string]struct {
capability *runtime.Capability
includes []string
@@ -208,20 +210,20 @@ func TestContainerCapabilities(t *testing.T) {
capability: &runtime.Capability{
AddCapabilities: []string{"ALL"},
},
includes: oci.GetAllCapabilities(),
includes: allCaps,
},
"should be able to drop all capabilities": {
capability: &runtime.Capability{
DropCapabilities: []string{"ALL"},
},
excludes: oci.GetAllCapabilities(),
excludes: allCaps,
},
"should be able to drop capabilities with add all": {
capability: &runtime.Capability{
AddCapabilities: []string{"ALL"},
DropCapabilities: []string{"CHOWN"},
},
includes: util.SubtractStringSlice(oci.GetAllCapabilities(), "CAP_CHOWN"),
includes: util.SubtractStringSlice(allCaps, "CAP_CHOWN"),
excludes: []string{"CAP_CHOWN"},
},
"should be able to add capabilities with drop all": {
@@ -230,13 +232,14 @@ func TestContainerCapabilities(t *testing.T) {
DropCapabilities: []string{"ALL"},
},
includes: []string{"CAP_SYS_ADMIN"},
excludes: util.SubtractStringSlice(oci.GetAllCapabilities(), "CAP_SYS_ADMIN"),
excludes: util.SubtractStringSlice(allCaps, "CAP_SYS_ADMIN"),
},
} {
t.Logf("TestCase %q", desc)
containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
ociRuntime := config.Runtime{}
c := newTestCRIService()
c.allCaps = allCaps
containerConfig.Linux.SecurityContext.Capabilities = test.capability
spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)

View File

@@ -101,6 +101,9 @@ type criService struct {
cniNetConfMonitor *cniNetConfSyncer
// baseOCISpecs contains cached OCI specs loaded via `Runtime.BaseRuntimeSpec`
baseOCISpecs map[string]*oci.Spec
// allCaps is the list of the capabilities.
// When nil, parsed from CapEff of /proc/self/status.
allCaps []string // nolint
}
// NewCRIService returns a new instance of CRIService

View File

@@ -17,6 +17,7 @@
package server
import (
"github.com/containerd/containerd/pkg/cap"
"github.com/containerd/containerd/sys"
cni "github.com/containerd/go-cni"
"github.com/opencontainers/selinux/go-selinux"
@@ -61,6 +62,13 @@ func (c *criService) initPlatform() error {
return errors.Wrap(err, "failed to initialize cni")
}
if c.allCaps == nil {
c.allCaps, err = cap.Current()
if err != nil {
return errors.Wrap(err, "failed to get caps")
}
}
return nil
}