diff --git a/oci/spec_opts.go b/oci/spec_opts.go index ba5ec76a5..bf95bc6f0 100644 --- a/oci/spec_opts.go +++ b/oci/spec_opts.go @@ -38,7 +38,6 @@ import ( "github.com/opencontainers/runc/libcontainer/user" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" - "github.com/syndtr/gocapability/capability" ) // SpecOpts sets spec specific information to a newly generated OCI spec @@ -776,29 +775,6 @@ func WithCapabilities(caps []string) SpecOpts { } } -// WithAllCapabilities sets all linux capabilities for the process -var WithAllCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error { - return WithCapabilities(GetAllCapabilities())(ctx, client, c, s) -} - -// GetAllCapabilities returns all caps up to CAP_LAST_CAP -// or CAP_BLOCK_SUSPEND on RHEL6 -func GetAllCapabilities() []string { - last := capability.CAP_LAST_CAP - // hack for RHEL6 which has no /proc/sys/kernel/cap_last_cap - if last == capability.Cap(63) { - last = capability.CAP_BLOCK_SUSPEND - } - var caps []string - for _, cap := range capability.List() { - if cap > last { - continue - } - caps = append(caps, "CAP_"+strings.ToUpper(cap.String())) - } - return caps -} - func capsContain(caps []string, s string) bool { for _, c := range caps { if c == s { @@ -1132,7 +1108,7 @@ func WithDefaultUnixDevices(_ context.Context, _ Client, _ *containers.Container // WithPrivileged sets up options for a privileged container var WithPrivileged = Compose( - WithAllCapabilities, + WithAllCurrentCapabilities, WithMaskedPaths(nil), WithReadonlyPaths(nil), WithWriteableSysfs, diff --git a/oci/spec_opts_linux.go b/oci/spec_opts_linux.go index 82219169a..db883af72 100644 --- a/oci/spec_opts_linux.go +++ b/oci/spec_opts_linux.go @@ -25,6 +25,7 @@ import ( "path/filepath" "github.com/containerd/containerd/containers" + "github.com/containerd/containerd/pkg/cap" specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" ) @@ -180,3 +181,19 @@ func WithCPUCFS(quota int64, period uint64) SpecOpts { return nil } } + +// WithAllCurrentCapabilities propagates the effective capabilities of the caller process to the container process. +// The capability set may differ from WithAllKnownCapabilities when running in a container. +var WithAllCurrentCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error { + caps, err := cap.Current() + if err != nil { + return err + } + return WithCapabilities(caps)(ctx, client, c, s) +} + +// WithAllKnownCapabilities sets all the the known linux capabilities for the container process +var WithAllKnownCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error { + caps := cap.Known() + return WithCapabilities(caps)(ctx, client, c, s) +} diff --git a/oci/spec_opts_linux_test.go b/oci/spec_opts_linux_test.go new file mode 100644 index 000000000..c2080d472 --- /dev/null +++ b/oci/spec_opts_linux_test.go @@ -0,0 +1,108 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package oci + +import ( + "context" + "testing" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func TestAddCaps(t *testing.T) { + t.Parallel() + + var s specs.Spec + + if err := WithAddedCapabilities([]string{"CAP_CHOWN"})(context.Background(), nil, nil, &s); err != nil { + t.Fatal(err) + } + for i, cl := range [][]string{ + s.Process.Capabilities.Bounding, + s.Process.Capabilities.Effective, + s.Process.Capabilities.Permitted, + s.Process.Capabilities.Inheritable, + } { + if !capsContain(cl, "CAP_CHOWN") { + t.Errorf("cap list %d does not contain added cap", i) + } + } +} + +func TestDropCaps(t *testing.T) { + t.Parallel() + + var s specs.Spec + + if err := WithAllKnownCapabilities(context.Background(), nil, nil, &s); err != nil { + t.Fatal(err) + } + if err := WithDroppedCapabilities([]string{"CAP_CHOWN"})(context.Background(), nil, nil, &s); err != nil { + t.Fatal(err) + } + + for i, cl := range [][]string{ + s.Process.Capabilities.Bounding, + s.Process.Capabilities.Effective, + s.Process.Capabilities.Permitted, + s.Process.Capabilities.Inheritable, + } { + if capsContain(cl, "CAP_CHOWN") { + t.Errorf("cap list %d contains dropped cap", i) + } + } + + // Add all capabilities back and drop a different cap. + if err := WithAllKnownCapabilities(context.Background(), nil, nil, &s); err != nil { + t.Fatal(err) + } + if err := WithDroppedCapabilities([]string{"CAP_FOWNER"})(context.Background(), nil, nil, &s); err != nil { + t.Fatal(err) + } + + for i, cl := range [][]string{ + s.Process.Capabilities.Bounding, + s.Process.Capabilities.Effective, + s.Process.Capabilities.Permitted, + s.Process.Capabilities.Inheritable, + } { + if capsContain(cl, "CAP_FOWNER") { + t.Errorf("cap list %d contains dropped cap", i) + } + if !capsContain(cl, "CAP_CHOWN") { + t.Errorf("cap list %d doesn't contain non-dropped cap", i) + } + } + + // Drop all duplicated caps. + if err := WithCapabilities([]string{"CAP_CHOWN", "CAP_CHOWN"})(context.Background(), nil, nil, &s); err != nil { + t.Fatal(err) + } + if err := WithDroppedCapabilities([]string{"CAP_CHOWN"})(context.Background(), nil, nil, &s); err != nil { + t.Fatal(err) + } + for i, cl := range [][]string{ + s.Process.Capabilities.Bounding, + s.Process.Capabilities.Effective, + s.Process.Capabilities.Permitted, + s.Process.Capabilities.Inheritable, + } { + if len(cl) != 0 { + t.Errorf("cap list %d is not empty", i) + } + } +} diff --git a/oci/spec_opts_nonlinux.go b/oci/spec_opts_nonlinux.go new file mode 100644 index 000000000..77a163638 --- /dev/null +++ b/oci/spec_opts_nonlinux.go @@ -0,0 +1,38 @@ +// +build !linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package oci + +import ( + "context" + + "github.com/containerd/containerd/containers" +) + +// WithAllCurrentCapabilities propagates the effective capabilities of the caller process to the container process. +// The capability set may differ from WithAllKnownCapabilities when running in a container. +//nolint: deadcode, unused +var WithAllCurrentCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error { + return WithCapabilities(nil)(ctx, client, c, s) +} + +// WithAllKnownCapabilities sets all the the known linux capabilities for the container process +//nolint: deadcode, unused +var WithAllKnownCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error { + return WithCapabilities(nil)(ctx, client, c, s) +} diff --git a/oci/spec_opts_test.go b/oci/spec_opts_test.go index 12726fed4..d0d585a38 100644 --- a/oci/spec_opts_test.go +++ b/oci/spec_opts_test.go @@ -549,90 +549,6 @@ func TestWithImageConfigArgs(t *testing.T) { } } -func TestAddCaps(t *testing.T) { - t.Parallel() - - var s specs.Spec - - if err := WithAddedCapabilities([]string{"CAP_CHOWN"})(context.Background(), nil, nil, &s); err != nil { - t.Fatal(err) - } - for i, cl := range [][]string{ - s.Process.Capabilities.Bounding, - s.Process.Capabilities.Effective, - s.Process.Capabilities.Permitted, - s.Process.Capabilities.Inheritable, - } { - if !capsContain(cl, "CAP_CHOWN") { - t.Errorf("cap list %d does not contain added cap", i) - } - } -} - -func TestDropCaps(t *testing.T) { - t.Parallel() - - var s specs.Spec - - if err := WithAllCapabilities(context.Background(), nil, nil, &s); err != nil { - t.Fatal(err) - } - if err := WithDroppedCapabilities([]string{"CAP_CHOWN"})(context.Background(), nil, nil, &s); err != nil { - t.Fatal(err) - } - - for i, cl := range [][]string{ - s.Process.Capabilities.Bounding, - s.Process.Capabilities.Effective, - s.Process.Capabilities.Permitted, - s.Process.Capabilities.Inheritable, - } { - if capsContain(cl, "CAP_CHOWN") { - t.Errorf("cap list %d contains dropped cap", i) - } - } - - // Add all capabilities back and drop a different cap. - if err := WithAllCapabilities(context.Background(), nil, nil, &s); err != nil { - t.Fatal(err) - } - if err := WithDroppedCapabilities([]string{"CAP_FOWNER"})(context.Background(), nil, nil, &s); err != nil { - t.Fatal(err) - } - - for i, cl := range [][]string{ - s.Process.Capabilities.Bounding, - s.Process.Capabilities.Effective, - s.Process.Capabilities.Permitted, - s.Process.Capabilities.Inheritable, - } { - if capsContain(cl, "CAP_FOWNER") { - t.Errorf("cap list %d contains dropped cap", i) - } - if !capsContain(cl, "CAP_CHOWN") { - t.Errorf("cap list %d doesn't contain non-dropped cap", i) - } - } - - // Drop all duplicated caps. - if err := WithCapabilities([]string{"CAP_CHOWN", "CAP_CHOWN"})(context.Background(), nil, nil, &s); err != nil { - t.Fatal(err) - } - if err := WithDroppedCapabilities([]string{"CAP_CHOWN"})(context.Background(), nil, nil, &s); err != nil { - t.Fatal(err) - } - for i, cl := range [][]string{ - s.Process.Capabilities.Bounding, - s.Process.Capabilities.Effective, - s.Process.Capabilities.Permitted, - s.Process.Capabilities.Inheritable, - } { - if len(cl) != 0 { - t.Errorf("cap list %d is not empty", i) - } - } -} - func TestDevShmSize(t *testing.T) { t.Parallel() var ( diff --git a/oci/spec_test.go b/oci/spec_test.go index e36eac72c..9e9f98134 100644 --- a/oci/spec_test.go +++ b/oci/spec_test.go @@ -23,6 +23,7 @@ import ( "github.com/containerd/containerd/containers" "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/pkg/testutil" specs "github.com/opencontainers/runtime-spec/specs-go" ) @@ -251,6 +252,10 @@ func TestPopulateDefaultUnixSpec(t *testing.T) { func TestWithPrivileged(t *testing.T) { t.Parallel() + if runtime.GOOS == "linux" { + // because WithPrivileged depends on CapEff in /proc/self/status + testutil.RequiresRoot(t) + } ctx := namespaces.WithNamespace(context.Background(), "testing") @@ -272,6 +277,10 @@ func TestWithPrivileged(t *testing.T) { t.Fatal(err) } + if runtime.GOOS != "linux" { + return + } + if len(s.Process.Capabilities.Bounding) == 0 { t.Error("Expected capabilities to be set with privileged") } diff --git a/pkg/cap/cap_linux.go b/pkg/cap/cap_linux.go new file mode 100644 index 000000000..e14acad76 --- /dev/null +++ b/pkg/cap/cap_linux.go @@ -0,0 +1,172 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Package cap provides Linux capability utility +package cap + +import ( + "bufio" + "io" + "os" + "strconv" + "strings" + + "github.com/pkg/errors" + "github.com/syndtr/gocapability/capability" +) + +// FromUint64 parses an integer into string slice like +// []{"CAP_SYS_ADMIN", ...}. +// +// Unknown cap numbers are returned as []int. +func FromUint64(v uint64) ([]string, []int) { + var ( + res []string + unknown []int + ) + knownList := capability.List() + known := make(map[string]struct{}, len(knownList)) + for _, f := range knownList { + known[f.String()] = struct{}{} + } + for i := 0; i <= 63; i++ { + if b := (v >> i) & 0x1; b == 0x1 { + c := capability.Cap(i) + sRaw := c.String() + if _, ok := known[sRaw]; ok { + s := "CAP_" + strings.ToUpper(sRaw) + res = append(res, s) + } else { + unknown = append(unknown, i) + } + } + } + return res, unknown +} + +// ParseProcPIDStatus returns uint64 value from /proc//status file +func ParseProcPIDStatus(r io.Reader) (map[capability.CapType]uint64, error) { + res := make(map[capability.CapType]uint64) + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := scanner.Text() + pair := strings.SplitN(line, ":", 2) + if len(pair) != 2 { + continue + } + k := strings.TrimSpace(pair[0]) + v := strings.TrimSpace(pair[1]) + switch k { + case "CapInh", "CapPrm", "CapEff", "CapBnd", "CapAmb": + ui64, err := strconv.ParseUint(v, 16, 64) + if err != nil { + return nil, errors.Errorf("failed to parse line %q", line) + } + switch k { + case "CapInh": + res[capability.INHERITABLE] = ui64 + case "CapPrm": + res[capability.PERMITTED] = ui64 + case "CapEff": + res[capability.EFFECTIVE] = ui64 + case "CapBnd": + res[capability.BOUNDING] = ui64 + case "CapAmb": + res[capability.AMBIENT] = ui64 + } + } + } + if err := scanner.Err(); err != nil { + return nil, err + } + return res, nil +} + +// Current returns the list of the effective and the known caps of +// the current process. +// +// The result is like []string{"CAP_SYS_ADMIN", ...}. +// +// The result does not contain caps that are not recognized by +// the "github.com/syndtr/gocapability" library. +func Current() ([]string, error) { + f, err := os.Open("/proc/self/status") + if err != nil { + return nil, err + } + defer f.Close() + caps, err := ParseProcPIDStatus(f) + if err != nil { + return nil, err + } + capEff := caps[capability.EFFECTIVE] + names, _ := FromUint64(capEff) + return names, nil +} + +var ( + // caps35 is the caps of kernel 3.5 (37 entries) + caps35 = []string{ + "CAP_CHOWN", // 2.2 + "CAP_DAC_OVERRIDE", // 2.2 + "CAP_DAC_READ_SEARCH", // 2.2 + "CAP_FOWNER", // 2.2 + "CAP_FSETID", // 2.2 + "CAP_KILL", // 2.2 + "CAP_SETGID", // 2.2 + "CAP_SETUID", // 2.2 + "CAP_SETPCAP", // 2.2 + "CAP_LINUX_IMMUTABLE", // 2.2 + "CAP_NET_BIND_SERVICE", // 2.2 + "CAP_NET_BROADCAST", // 2.2 + "CAP_NET_ADMIN", // 2.2 + "CAP_NET_RAW", // 2.2 + "CAP_IPC_LOCK", // 2.2 + "CAP_IPC_OWNER", // 2.2 + "CAP_SYS_MODULE", // 2.2 + "CAP_SYS_RAWIO", // 2.2 + "CAP_SYS_CHROOT", // 2.2 + "CAP_SYS_PTRACE", // 2.2 + "CAP_SYS_PACCT", // 2.2 + "CAP_SYS_ADMIN", // 2.2 + "CAP_SYS_BOOT", // 2.2 + "CAP_SYS_NICE", // 2.2 + "CAP_SYS_RESOURCE", // 2.2 + "CAP_SYS_TIME", // 2.2 + "CAP_SYS_TTY_CONFIG", // 2.2 + "CAP_MKNOD", // 2.4 + "CAP_LEASE", // 2.4 + "CAP_AUDIT_WRITE", // 2.6.11 + "CAP_AUDIT_CONTROL", // 2.6.11 + "CAP_SETFCAP", // 2.6.24 + "CAP_MAC_OVERRIDE", // 2.6.25 + "CAP_MAC_ADMIN", // 2.6.25 + "CAP_SYSLOG", // 2.6.37 + "CAP_WAKE_ALARM", // 3.0 + "CAP_BLOCK_SUSPEND", // 3.5 + } + // caps316 is the caps of kernel 3.16 (38 entries) + caps316 = append(caps35, "CAP_AUDIT_READ") + // caps58 is the caps of kernel 5.8 (40 entries) + caps58 = append(caps316, []string{"CAP_PERFMON", "CAP_BPF"}...) + // caps59 is the caps of kernel 5.9 (41 entries) + caps59 = append(caps58, "CAP_CHECKPOINT_RESTORE") +) + +// Known returns the known cap strings as of kernel 5.9 +func Known() []string { + return caps59 +} diff --git a/pkg/cap/cap_linux_test.go b/pkg/cap/cap_linux_test.go new file mode 100644 index 000000000..2c474339b --- /dev/null +++ b/pkg/cap/cap_linux_test.go @@ -0,0 +1,161 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cap + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/syndtr/gocapability/capability" +) + +func TestCapsList(t *testing.T) { + assert.Len(t, caps316, 38) + assert.Len(t, caps58, 40) + assert.Len(t, caps59, 41) +} + +func TestFromUint64(t *testing.T) { + type testCase struct { + comment string + v uint64 + knownNames []string + unknown []int + } + testCases := []testCase{ + { + comment: "No cap", + v: 0x0000000000000000, + }, + { + // 3.10 (same caps as 3.5) is the oldest kernel version we want to support + comment: "All caps on kernel 3.5 (last = CAP_BLOCK_SUSPEND)", + v: 0x0000001fffffffff, + knownNames: caps35, + }, + { + comment: "All caps on kernel 3.16 (last = CAP_AUDIT_READ)", + v: 0x0000003fffffffff, + knownNames: caps316, + }, + { + comment: "All caps on kernel 5.8 (last = CAP_BPF)", + v: 0x000000ffffffffff, + knownNames: caps58, + }, + { + comment: "All caps on kernel 5.9 (last = CAP_CHECKPOINT_RESTORE)", + v: 0x000001ffffffffff, + knownNames: caps59, + }, + { + comment: "Unknown caps", + v: 0xf00001ffffffffff, + knownNames: caps59, + unknown: []int{60, 61, 62, 63}, + }, + } + + for _, tc := range testCases { + knownNames, unknown := FromUint64(tc.v) + t.Logf("[%s] v=0x%x, got=%+v (%d entries), unknown=%v", + tc.comment, tc.v, knownNames, len(knownNames), unknown) + assert.Equal(t, tc.knownNames, knownNames) + assert.Equal(t, tc.unknown, unknown) + } +} + +func TestParseProcPIDStatus(t *testing.T) { + procPIDStatus := `Name: cat +Umask: 0022 +State: R (running) +Tgid: 170065 +Ngid: 0 +Pid: 170065 +PPid: 170064 +TracerPid: 0 +Uid: 0 0 0 0 +Gid: 0 0 0 0 +FDSize: 64 +Groups: 0 +NStgid: 170065 +NSpid: 170065 +NSpgid: 170064 +NSsid: 3784 +VmPeak: 8216 kB +VmSize: 8216 kB +VmLck: 0 kB +VmPin: 0 kB +VmHWM: 676 kB +VmRSS: 676 kB +RssAnon: 72 kB +RssFile: 604 kB +RssShmem: 0 kB +VmData: 324 kB +VmStk: 132 kB +VmExe: 20 kB +VmLib: 1612 kB +VmPTE: 56 kB +VmSwap: 0 kB +HugetlbPages: 0 kB +CoreDumping: 0 +THP_enabled: 1 +Threads: 1 +SigQ: 0/63692 +SigPnd: 0000000000000000 +ShdPnd: 0000000000000000 +SigBlk: 0000000000000000 +SigIgn: 0000000000000000 +SigCgt: 0000000000000000 +CapInh: 0000000000000000 +CapPrm: 000000ffffffffff +CapEff: 000000ffffffffff +CapBnd: 000000ffffffffff +CapAmb: 0000000000000000 +NoNewPrivs: 0 +Seccomp: 0 +Speculation_Store_Bypass: thread vulnerable +Cpus_allowed: 00000000,00000000,00000000,0000000f +Cpus_allowed_list: 0-3 +Mems_allowed: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 +Mems_allowed_list: 0 +voluntary_ctxt_switches: 0 +nonvoluntary_ctxt_switches: 0 +` + res, err := ParseProcPIDStatus(strings.NewReader(procPIDStatus)) + assert.NoError(t, err) + expected := map[capability.CapType]uint64{ + capability.INHERITABLE: 0, + capability.PERMITTED: 0xffffffffff, + capability.EFFECTIVE: 0xffffffffff, + capability.BOUNDING: 0xffffffffff, + capability.AMBIENT: 0, + } + assert.EqualValues(t, expected, res) +} + +func TestCurrent(t *testing.T) { + caps, err := Current() + assert.NoError(t, err) + t.Logf("verify the result manually: %+v", caps) +} + +func TestKnown(t *testing.T) { + caps := Known() + assert.EqualValues(t, caps59, caps) +} diff --git a/pkg/cri/opts/spec_linux.go b/pkg/cri/opts/spec_linux.go index 8730d8482..4b4e3ec76 100644 --- a/pkg/cri/opts/spec_linux.go +++ b/pkg/cri/opts/spec_linux.go @@ -354,7 +354,7 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp } // WithCapabilities sets the provided capabilities from the security context -func WithCapabilities(sc *runtime.LinuxContainerSecurityContext) oci.SpecOpts { +func WithCapabilities(sc *runtime.LinuxContainerSecurityContext, allCaps []string) oci.SpecOpts { capabilities := sc.GetCapabilities() if capabilities == nil { return nullOpt @@ -366,7 +366,7 @@ func WithCapabilities(sc *runtime.LinuxContainerSecurityContext) oci.SpecOpts { // AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"} // will be all capabilities without `CAP_CHOWN`. if util.InStringSlice(capabilities.GetAddCapabilities(), "ALL") { - opts = append(opts, oci.WithAllCapabilities) + opts = append(opts, oci.WithCapabilities(allCaps)) } if util.InStringSlice(capabilities.GetDropCapabilities(), "ALL") { opts = append(opts, oci.WithCapabilities(nil)) diff --git a/pkg/cri/server/container_create_linux.go b/pkg/cri/server/container_create_linux.go index 992da802c..8c77774ff 100644 --- a/pkg/cri/server/container_create_linux.go +++ b/pkg/cri/server/container_create_linux.go @@ -217,10 +217,12 @@ func (c *criService) containerSpec( specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed) } else { // add requested devices by the config as host devices are not automatically added - specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext)) + specOpts = append(specOpts, customopts.WithDevices(c.os, config), + customopts.WithCapabilities(securityContext, c.allCaps)) } } else { // not privileged - specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext)) + specOpts = append(specOpts, customopts.WithDevices(c.os, config), + customopts.WithCapabilities(securityContext, c.allCaps)) } // Clear all ambient capabilities. The implication of non-root + caps diff --git a/pkg/cri/server/container_create_linux_test.go b/pkg/cri/server/container_create_linux_test.go index b6f07a598..927c54673 100644 --- a/pkg/cri/server/container_create_linux_test.go +++ b/pkg/cri/server/container_create_linux_test.go @@ -39,6 +39,7 @@ import ( "github.com/stretchr/testify/require" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + "github.com/containerd/containerd/pkg/cap" "github.com/containerd/containerd/pkg/cri/annotations" "github.com/containerd/containerd/pkg/cri/config" "github.com/containerd/containerd/pkg/cri/opts" @@ -191,6 +192,7 @@ func TestContainerCapabilities(t *testing.T) { testSandboxID := "sandbox-id" testContainerName := "container-name" testPid := uint32(1234) + allCaps := cap.Known() for desc, test := range map[string]struct { capability *runtime.Capability includes []string @@ -208,20 +210,20 @@ func TestContainerCapabilities(t *testing.T) { capability: &runtime.Capability{ AddCapabilities: []string{"ALL"}, }, - includes: oci.GetAllCapabilities(), + includes: allCaps, }, "should be able to drop all capabilities": { capability: &runtime.Capability{ DropCapabilities: []string{"ALL"}, }, - excludes: oci.GetAllCapabilities(), + excludes: allCaps, }, "should be able to drop capabilities with add all": { capability: &runtime.Capability{ AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}, }, - includes: util.SubtractStringSlice(oci.GetAllCapabilities(), "CAP_CHOWN"), + includes: util.SubtractStringSlice(allCaps, "CAP_CHOWN"), excludes: []string{"CAP_CHOWN"}, }, "should be able to add capabilities with drop all": { @@ -230,13 +232,14 @@ func TestContainerCapabilities(t *testing.T) { DropCapabilities: []string{"ALL"}, }, includes: []string{"CAP_SYS_ADMIN"}, - excludes: util.SubtractStringSlice(oci.GetAllCapabilities(), "CAP_SYS_ADMIN"), + excludes: util.SubtractStringSlice(allCaps, "CAP_SYS_ADMIN"), }, } { t.Logf("TestCase %q", desc) containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData() ociRuntime := config.Runtime{} c := newTestCRIService() + c.allCaps = allCaps containerConfig.Linux.SecurityContext.Capabilities = test.capability spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime) diff --git a/pkg/cri/server/service.go b/pkg/cri/server/service.go index a8c30972d..cec15ffd0 100644 --- a/pkg/cri/server/service.go +++ b/pkg/cri/server/service.go @@ -101,6 +101,9 @@ type criService struct { cniNetConfMonitor *cniNetConfSyncer // baseOCISpecs contains cached OCI specs loaded via `Runtime.BaseRuntimeSpec` baseOCISpecs map[string]*oci.Spec + // allCaps is the list of the capabilities. + // When nil, parsed from CapEff of /proc/self/status. + allCaps []string // nolint } // NewCRIService returns a new instance of CRIService diff --git a/pkg/cri/server/service_linux.go b/pkg/cri/server/service_linux.go index 03b28f0ae..93fa8b92a 100644 --- a/pkg/cri/server/service_linux.go +++ b/pkg/cri/server/service_linux.go @@ -17,6 +17,7 @@ package server import ( + "github.com/containerd/containerd/pkg/cap" "github.com/containerd/containerd/sys" cni "github.com/containerd/go-cni" "github.com/opencontainers/selinux/go-selinux" @@ -61,6 +62,13 @@ func (c *criService) initPlatform() error { return errors.Wrap(err, "failed to initialize cni") } + if c.allCaps == nil { + c.allCaps, err = cap.Current() + if err != nil { + return errors.Wrap(err, "failed to get caps") + } + } + return nil }