oci.WithPrivileged: set the current caps, not the known caps
This change is needed for running the latest containerd inside Docker that is not aware of the recently added caps (BPF, PERFMON, CHECKPOINT_RESTORE). Without this change, containerd inside Docker fails to run containers with "apply caps: operation not permitted" error. See kubernetes-sigs/kind 2058 NOTE: The caller process of this function is now assumed to be as privileged as possible. Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
172
pkg/cap/cap_linux.go
Normal file
172
pkg/cap/cap_linux.go
Normal file
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package cap provides Linux capability utility
|
||||
package cap
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
// FromUint64 parses an integer into string slice like
|
||||
// []{"CAP_SYS_ADMIN", ...}.
|
||||
//
|
||||
// Unknown cap numbers are returned as []int.
|
||||
func FromUint64(v uint64) ([]string, []int) {
|
||||
var (
|
||||
res []string
|
||||
unknown []int
|
||||
)
|
||||
knownList := capability.List()
|
||||
known := make(map[string]struct{}, len(knownList))
|
||||
for _, f := range knownList {
|
||||
known[f.String()] = struct{}{}
|
||||
}
|
||||
for i := 0; i <= 63; i++ {
|
||||
if b := (v >> i) & 0x1; b == 0x1 {
|
||||
c := capability.Cap(i)
|
||||
sRaw := c.String()
|
||||
if _, ok := known[sRaw]; ok {
|
||||
s := "CAP_" + strings.ToUpper(sRaw)
|
||||
res = append(res, s)
|
||||
} else {
|
||||
unknown = append(unknown, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
return res, unknown
|
||||
}
|
||||
|
||||
// ParseProcPIDStatus returns uint64 value from /proc/<PID>/status file
|
||||
func ParseProcPIDStatus(r io.Reader) (map[capability.CapType]uint64, error) {
|
||||
res := make(map[capability.CapType]uint64)
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
pair := strings.SplitN(line, ":", 2)
|
||||
if len(pair) != 2 {
|
||||
continue
|
||||
}
|
||||
k := strings.TrimSpace(pair[0])
|
||||
v := strings.TrimSpace(pair[1])
|
||||
switch k {
|
||||
case "CapInh", "CapPrm", "CapEff", "CapBnd", "CapAmb":
|
||||
ui64, err := strconv.ParseUint(v, 16, 64)
|
||||
if err != nil {
|
||||
return nil, errors.Errorf("failed to parse line %q", line)
|
||||
}
|
||||
switch k {
|
||||
case "CapInh":
|
||||
res[capability.INHERITABLE] = ui64
|
||||
case "CapPrm":
|
||||
res[capability.PERMITTED] = ui64
|
||||
case "CapEff":
|
||||
res[capability.EFFECTIVE] = ui64
|
||||
case "CapBnd":
|
||||
res[capability.BOUNDING] = ui64
|
||||
case "CapAmb":
|
||||
res[capability.AMBIENT] = ui64
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Current returns the list of the effective and the known caps of
|
||||
// the current process.
|
||||
//
|
||||
// The result is like []string{"CAP_SYS_ADMIN", ...}.
|
||||
//
|
||||
// The result does not contain caps that are not recognized by
|
||||
// the "github.com/syndtr/gocapability" library.
|
||||
func Current() ([]string, error) {
|
||||
f, err := os.Open("/proc/self/status")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
caps, err := ParseProcPIDStatus(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
capEff := caps[capability.EFFECTIVE]
|
||||
names, _ := FromUint64(capEff)
|
||||
return names, nil
|
||||
}
|
||||
|
||||
var (
|
||||
// caps35 is the caps of kernel 3.5 (37 entries)
|
||||
caps35 = []string{
|
||||
"CAP_CHOWN", // 2.2
|
||||
"CAP_DAC_OVERRIDE", // 2.2
|
||||
"CAP_DAC_READ_SEARCH", // 2.2
|
||||
"CAP_FOWNER", // 2.2
|
||||
"CAP_FSETID", // 2.2
|
||||
"CAP_KILL", // 2.2
|
||||
"CAP_SETGID", // 2.2
|
||||
"CAP_SETUID", // 2.2
|
||||
"CAP_SETPCAP", // 2.2
|
||||
"CAP_LINUX_IMMUTABLE", // 2.2
|
||||
"CAP_NET_BIND_SERVICE", // 2.2
|
||||
"CAP_NET_BROADCAST", // 2.2
|
||||
"CAP_NET_ADMIN", // 2.2
|
||||
"CAP_NET_RAW", // 2.2
|
||||
"CAP_IPC_LOCK", // 2.2
|
||||
"CAP_IPC_OWNER", // 2.2
|
||||
"CAP_SYS_MODULE", // 2.2
|
||||
"CAP_SYS_RAWIO", // 2.2
|
||||
"CAP_SYS_CHROOT", // 2.2
|
||||
"CAP_SYS_PTRACE", // 2.2
|
||||
"CAP_SYS_PACCT", // 2.2
|
||||
"CAP_SYS_ADMIN", // 2.2
|
||||
"CAP_SYS_BOOT", // 2.2
|
||||
"CAP_SYS_NICE", // 2.2
|
||||
"CAP_SYS_RESOURCE", // 2.2
|
||||
"CAP_SYS_TIME", // 2.2
|
||||
"CAP_SYS_TTY_CONFIG", // 2.2
|
||||
"CAP_MKNOD", // 2.4
|
||||
"CAP_LEASE", // 2.4
|
||||
"CAP_AUDIT_WRITE", // 2.6.11
|
||||
"CAP_AUDIT_CONTROL", // 2.6.11
|
||||
"CAP_SETFCAP", // 2.6.24
|
||||
"CAP_MAC_OVERRIDE", // 2.6.25
|
||||
"CAP_MAC_ADMIN", // 2.6.25
|
||||
"CAP_SYSLOG", // 2.6.37
|
||||
"CAP_WAKE_ALARM", // 3.0
|
||||
"CAP_BLOCK_SUSPEND", // 3.5
|
||||
}
|
||||
// caps316 is the caps of kernel 3.16 (38 entries)
|
||||
caps316 = append(caps35, "CAP_AUDIT_READ")
|
||||
// caps58 is the caps of kernel 5.8 (40 entries)
|
||||
caps58 = append(caps316, []string{"CAP_PERFMON", "CAP_BPF"}...)
|
||||
// caps59 is the caps of kernel 5.9 (41 entries)
|
||||
caps59 = append(caps58, "CAP_CHECKPOINT_RESTORE")
|
||||
)
|
||||
|
||||
// Known returns the known cap strings as of kernel 5.9
|
||||
func Known() []string {
|
||||
return caps59
|
||||
}
|
||||
161
pkg/cap/cap_linux_test.go
Normal file
161
pkg/cap/cap_linux_test.go
Normal file
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cap
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
func TestCapsList(t *testing.T) {
|
||||
assert.Len(t, caps316, 38)
|
||||
assert.Len(t, caps58, 40)
|
||||
assert.Len(t, caps59, 41)
|
||||
}
|
||||
|
||||
func TestFromUint64(t *testing.T) {
|
||||
type testCase struct {
|
||||
comment string
|
||||
v uint64
|
||||
knownNames []string
|
||||
unknown []int
|
||||
}
|
||||
testCases := []testCase{
|
||||
{
|
||||
comment: "No cap",
|
||||
v: 0x0000000000000000,
|
||||
},
|
||||
{
|
||||
// 3.10 (same caps as 3.5) is the oldest kernel version we want to support
|
||||
comment: "All caps on kernel 3.5 (last = CAP_BLOCK_SUSPEND)",
|
||||
v: 0x0000001fffffffff,
|
||||
knownNames: caps35,
|
||||
},
|
||||
{
|
||||
comment: "All caps on kernel 3.16 (last = CAP_AUDIT_READ)",
|
||||
v: 0x0000003fffffffff,
|
||||
knownNames: caps316,
|
||||
},
|
||||
{
|
||||
comment: "All caps on kernel 5.8 (last = CAP_BPF)",
|
||||
v: 0x000000ffffffffff,
|
||||
knownNames: caps58,
|
||||
},
|
||||
{
|
||||
comment: "All caps on kernel 5.9 (last = CAP_CHECKPOINT_RESTORE)",
|
||||
v: 0x000001ffffffffff,
|
||||
knownNames: caps59,
|
||||
},
|
||||
{
|
||||
comment: "Unknown caps",
|
||||
v: 0xf00001ffffffffff,
|
||||
knownNames: caps59,
|
||||
unknown: []int{60, 61, 62, 63},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
knownNames, unknown := FromUint64(tc.v)
|
||||
t.Logf("[%s] v=0x%x, got=%+v (%d entries), unknown=%v",
|
||||
tc.comment, tc.v, knownNames, len(knownNames), unknown)
|
||||
assert.Equal(t, tc.knownNames, knownNames)
|
||||
assert.Equal(t, tc.unknown, unknown)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseProcPIDStatus(t *testing.T) {
|
||||
procPIDStatus := `Name: cat
|
||||
Umask: 0022
|
||||
State: R (running)
|
||||
Tgid: 170065
|
||||
Ngid: 0
|
||||
Pid: 170065
|
||||
PPid: 170064
|
||||
TracerPid: 0
|
||||
Uid: 0 0 0 0
|
||||
Gid: 0 0 0 0
|
||||
FDSize: 64
|
||||
Groups: 0
|
||||
NStgid: 170065
|
||||
NSpid: 170065
|
||||
NSpgid: 170064
|
||||
NSsid: 3784
|
||||
VmPeak: 8216 kB
|
||||
VmSize: 8216 kB
|
||||
VmLck: 0 kB
|
||||
VmPin: 0 kB
|
||||
VmHWM: 676 kB
|
||||
VmRSS: 676 kB
|
||||
RssAnon: 72 kB
|
||||
RssFile: 604 kB
|
||||
RssShmem: 0 kB
|
||||
VmData: 324 kB
|
||||
VmStk: 132 kB
|
||||
VmExe: 20 kB
|
||||
VmLib: 1612 kB
|
||||
VmPTE: 56 kB
|
||||
VmSwap: 0 kB
|
||||
HugetlbPages: 0 kB
|
||||
CoreDumping: 0
|
||||
THP_enabled: 1
|
||||
Threads: 1
|
||||
SigQ: 0/63692
|
||||
SigPnd: 0000000000000000
|
||||
ShdPnd: 0000000000000000
|
||||
SigBlk: 0000000000000000
|
||||
SigIgn: 0000000000000000
|
||||
SigCgt: 0000000000000000
|
||||
CapInh: 0000000000000000
|
||||
CapPrm: 000000ffffffffff
|
||||
CapEff: 000000ffffffffff
|
||||
CapBnd: 000000ffffffffff
|
||||
CapAmb: 0000000000000000
|
||||
NoNewPrivs: 0
|
||||
Seccomp: 0
|
||||
Speculation_Store_Bypass: thread vulnerable
|
||||
Cpus_allowed: 00000000,00000000,00000000,0000000f
|
||||
Cpus_allowed_list: 0-3
|
||||
Mems_allowed: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001
|
||||
Mems_allowed_list: 0
|
||||
voluntary_ctxt_switches: 0
|
||||
nonvoluntary_ctxt_switches: 0
|
||||
`
|
||||
res, err := ParseProcPIDStatus(strings.NewReader(procPIDStatus))
|
||||
assert.NoError(t, err)
|
||||
expected := map[capability.CapType]uint64{
|
||||
capability.INHERITABLE: 0,
|
||||
capability.PERMITTED: 0xffffffffff,
|
||||
capability.EFFECTIVE: 0xffffffffff,
|
||||
capability.BOUNDING: 0xffffffffff,
|
||||
capability.AMBIENT: 0,
|
||||
}
|
||||
assert.EqualValues(t, expected, res)
|
||||
}
|
||||
|
||||
func TestCurrent(t *testing.T) {
|
||||
caps, err := Current()
|
||||
assert.NoError(t, err)
|
||||
t.Logf("verify the result manually: %+v", caps)
|
||||
}
|
||||
|
||||
func TestKnown(t *testing.T) {
|
||||
caps := Known()
|
||||
assert.EqualValues(t, caps59, caps)
|
||||
}
|
||||
Reference in New Issue
Block a user