From 00b5c99b1a501881571c37f735348a63186eb575 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 23 Mar 2021 17:57:22 -0700 Subject: [PATCH] pkg/seccomp: simplify IsEnabled, update doc Current implementation of seccomp.IsEnabled (rooted in runc) is not too good. First, it parses the whole /proc/self/status, adding each key: value pair into the map (lots of allocations and future work for garbage collector), when using a single key from that map. Second, the presence of "Seccomp" key in /proc/self/status merely means that kernel option CONFIG_SECCOMP is set, but there is a need to _also_ check for CONFIG_SECCOMP_FILTER (the code for which exists but never executed in case /proc/self/status has Seccomp key). Replace all this with a single call to prctl; see the long comment in the code for details. While at it, improve the IsEnabled documentation. NOTE historically, parsing /proc/self/status was added after a concern was raised in https://github.com/opencontainers/runc/pull/471 that prctl(PR_GET_SECCOMP, ...) can result in the calling process being killed with SIGKILL. This is a valid concern, so the new code here does not use PR_GET_SECCOMP at all. Signed-off-by: Kir Kolyshkin --- pkg/seccomp/fixtures/proc_self_status | 47 ---------------- pkg/seccomp/seccomp.go | 8 +-- pkg/seccomp/seccomp_linux.go | 77 ++++++++++----------------- pkg/seccomp/seccomp_linux_test.go | 48 ----------------- 4 files changed, 33 insertions(+), 147 deletions(-) delete mode 100644 pkg/seccomp/fixtures/proc_self_status delete mode 100644 pkg/seccomp/seccomp_linux_test.go diff --git a/pkg/seccomp/fixtures/proc_self_status b/pkg/seccomp/fixtures/proc_self_status deleted file mode 100644 index 0e0084f6c..000000000 --- a/pkg/seccomp/fixtures/proc_self_status +++ /dev/null @@ -1,47 +0,0 @@ -Name: cat -State: R (running) -Tgid: 19383 -Ngid: 0 -Pid: 19383 -PPid: 19275 -TracerPid: 0 -Uid: 1000 1000 1000 1000 -Gid: 1000 1000 1000 1000 -FDSize: 256 -Groups: 24 25 27 29 30 44 46 102 104 108 111 1000 1001 -NStgid: 19383 -NSpid: 19383 -NSpgid: 19383 -NSsid: 19275 -VmPeak: 5944 kB -VmSize: 5944 kB -VmLck: 0 kB -VmPin: 0 kB -VmHWM: 744 kB -VmRSS: 744 kB -VmData: 324 kB -VmStk: 136 kB -VmExe: 48 kB -VmLib: 1776 kB -VmPTE: 32 kB -VmPMD: 12 kB -VmSwap: 0 kB -Threads: 1 -SigQ: 0/30067 -SigPnd: 0000000000000000 -ShdPnd: 0000000000000000 -SigBlk: 0000000000000000 -SigIgn: 0000000000000080 -SigCgt: 0000000000000000 -CapInh: 0000000000000000 -CapPrm: 0000000000000000 -CapEff: 0000000000000000 -CapBnd: 0000003fffffffff -CapAmb: 0000000000000000 -Seccomp: 0 -Cpus_allowed: f -Cpus_allowed_list: 0-3 -Mems_allowed: 00000000,00000001 -Mems_allowed_list: 0 -voluntary_ctxt_switches: 0 -nonvoluntary_ctxt_switches: 1 diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index a0ad7d0e8..74982358a 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -16,10 +16,10 @@ package seccomp -// IsEnabled returns whether seccomp support is enabled -// On Linux returns if the kernel has been configured to support seccomp. -// From https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L86-L102 -// On non-Linux returns false +// IsEnabled checks whether seccomp support is enabled. On Linux, it returns +// true if the kernel has been configured to support seccomp (kernel options +// CONFIG_SECCOMP and CONFIG_SECCOMP_FILTER are set). On non-Linux, it always +// returns false. func IsEnabled() bool { return isEnabled() } diff --git a/pkg/seccomp/seccomp_linux.go b/pkg/seccomp/seccomp_linux.go index 9bda3b2d7..f324bde72 100644 --- a/pkg/seccomp/seccomp_linux.go +++ b/pkg/seccomp/seccomp_linux.go @@ -33,56 +33,37 @@ package seccomp import ( - "bufio" - "os" - "strings" - "golang.org/x/sys/unix" ) -// isEnabled returns if the kernel has been configured to support seccomp. -// From https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L86-L102 +// isEnabled returns whether the kernel has been configured to support seccomp +// (including the check for CONFIG_SECCOMP_FILTER kernel option). func isEnabled() bool { - // Try to read from /proc/self/status for kernels > 3.8 - s, err := parseStatusFile("/proc/self/status") - if err != nil { - // Check if Seccomp is supported, via CONFIG_SECCOMP. - if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL { - // Make sure the kernel has CONFIG_SECCOMP_FILTER. - if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL { - return true - } - } - return false - } - _, ok := s["Seccomp"] - return ok -} - -// parseStatusFile is from https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L243-L268 -func parseStatusFile(path string) (map[string]string, error) { - f, err := os.Open(path) - if err != nil { - return nil, err - } - defer f.Close() - - s := bufio.NewScanner(f) - status := make(map[string]string) - - for s.Scan() { - text := s.Text() - parts := strings.Split(text, ":") - - if len(parts) <= 1 { - continue - } - - status[parts[0]] = parts[1] - } - if err := s.Err(); err != nil { - return nil, err - } - - return status, nil + // Excerpts from prctl(2), section ERRORS: + // + // EACCES + // option is PR_SET_SECCOMP and arg2 is SECCOMP_MODE_FILTER, but + // the process does not have the CAP_SYS_ADMIN capability or has + // not set the no_new_privs attribute <...>. + // <...> + // EFAULT + // option is PR_SET_SECCOMP, arg2 is SECCOMP_MODE_FILTER, the + // system was built with CONFIG_SECCOMP_FILTER, and arg3 is an + // invalid address. + // <...> + // EINVAL + // option is PR_SET_SECCOMP or PR_GET_SECCOMP, and the kernel + // was not configured with CONFIG_SECCOMP. + // + // EINVAL + // option is PR_SET_SECCOMP, arg2 is SECCOMP_MODE_FILTER, + // and the kernel was not configured with CONFIG_SECCOMP_FILTER. + // + // + // Meaning, in case these kernel options are set (this is what we check + // for here), we will get some other error (most probably EACCES or + // EFAULT). IOW, EINVAL means "seccomp not supported", any other error + // means it is supported. + + return unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0) != unix.EINVAL } diff --git a/pkg/seccomp/seccomp_linux_test.go b/pkg/seccomp/seccomp_linux_test.go deleted file mode 100644 index 850ab97e1..000000000 --- a/pkg/seccomp/seccomp_linux_test.go +++ /dev/null @@ -1,48 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -/* - Copyright The runc Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package seccomp - -import "testing" - -// TestParseStatusFile is from https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux_test.go -func TestParseStatusFile(t *testing.T) { - s, err := parseStatusFile("fixtures/proc_self_status") - if err != nil { - t.Fatal(err) - } - - if _, ok := s["Seccomp"]; !ok { - - t.Fatal("expected to find 'Seccomp' in the map but did not.") - } -}