From 00b5c99b1a501881571c37f735348a63186eb575 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 23 Mar 2021 17:57:22 -0700 Subject: [PATCH 1/2] pkg/seccomp: simplify IsEnabled, update doc Current implementation of seccomp.IsEnabled (rooted in runc) is not too good. First, it parses the whole /proc/self/status, adding each key: value pair into the map (lots of allocations and future work for garbage collector), when using a single key from that map. Second, the presence of "Seccomp" key in /proc/self/status merely means that kernel option CONFIG_SECCOMP is set, but there is a need to _also_ check for CONFIG_SECCOMP_FILTER (the code for which exists but never executed in case /proc/self/status has Seccomp key). Replace all this with a single call to prctl; see the long comment in the code for details. While at it, improve the IsEnabled documentation. NOTE historically, parsing /proc/self/status was added after a concern was raised in https://github.com/opencontainers/runc/pull/471 that prctl(PR_GET_SECCOMP, ...) can result in the calling process being killed with SIGKILL. This is a valid concern, so the new code here does not use PR_GET_SECCOMP at all. Signed-off-by: Kir Kolyshkin --- pkg/seccomp/fixtures/proc_self_status | 47 ---------------- pkg/seccomp/seccomp.go | 8 +-- pkg/seccomp/seccomp_linux.go | 77 ++++++++++----------------- pkg/seccomp/seccomp_linux_test.go | 48 ----------------- 4 files changed, 33 insertions(+), 147 deletions(-) delete mode 100644 pkg/seccomp/fixtures/proc_self_status delete mode 100644 pkg/seccomp/seccomp_linux_test.go diff --git a/pkg/seccomp/fixtures/proc_self_status b/pkg/seccomp/fixtures/proc_self_status deleted file mode 100644 index 0e0084f6c..000000000 --- a/pkg/seccomp/fixtures/proc_self_status +++ /dev/null @@ -1,47 +0,0 @@ -Name: cat -State: R (running) -Tgid: 19383 -Ngid: 0 -Pid: 19383 -PPid: 19275 -TracerPid: 0 -Uid: 1000 1000 1000 1000 -Gid: 1000 1000 1000 1000 -FDSize: 256 -Groups: 24 25 27 29 30 44 46 102 104 108 111 1000 1001 -NStgid: 19383 -NSpid: 19383 -NSpgid: 19383 -NSsid: 19275 -VmPeak: 5944 kB -VmSize: 5944 kB -VmLck: 0 kB -VmPin: 0 kB -VmHWM: 744 kB -VmRSS: 744 kB -VmData: 324 kB -VmStk: 136 kB -VmExe: 48 kB -VmLib: 1776 kB -VmPTE: 32 kB -VmPMD: 12 kB -VmSwap: 0 kB -Threads: 1 -SigQ: 0/30067 -SigPnd: 0000000000000000 -ShdPnd: 0000000000000000 -SigBlk: 0000000000000000 -SigIgn: 0000000000000080 -SigCgt: 0000000000000000 -CapInh: 0000000000000000 -CapPrm: 0000000000000000 -CapEff: 0000000000000000 -CapBnd: 0000003fffffffff -CapAmb: 0000000000000000 -Seccomp: 0 -Cpus_allowed: f -Cpus_allowed_list: 0-3 -Mems_allowed: 00000000,00000001 -Mems_allowed_list: 0 -voluntary_ctxt_switches: 0 -nonvoluntary_ctxt_switches: 1 diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index a0ad7d0e8..74982358a 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -16,10 +16,10 @@ package seccomp -// IsEnabled returns whether seccomp support is enabled -// On Linux returns if the kernel has been configured to support seccomp. -// From https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L86-L102 -// On non-Linux returns false +// IsEnabled checks whether seccomp support is enabled. On Linux, it returns +// true if the kernel has been configured to support seccomp (kernel options +// CONFIG_SECCOMP and CONFIG_SECCOMP_FILTER are set). On non-Linux, it always +// returns false. func IsEnabled() bool { return isEnabled() } diff --git a/pkg/seccomp/seccomp_linux.go b/pkg/seccomp/seccomp_linux.go index 9bda3b2d7..f324bde72 100644 --- a/pkg/seccomp/seccomp_linux.go +++ b/pkg/seccomp/seccomp_linux.go @@ -33,56 +33,37 @@ package seccomp import ( - "bufio" - "os" - "strings" - "golang.org/x/sys/unix" ) -// isEnabled returns if the kernel has been configured to support seccomp. -// From https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L86-L102 +// isEnabled returns whether the kernel has been configured to support seccomp +// (including the check for CONFIG_SECCOMP_FILTER kernel option). func isEnabled() bool { - // Try to read from /proc/self/status for kernels > 3.8 - s, err := parseStatusFile("/proc/self/status") - if err != nil { - // Check if Seccomp is supported, via CONFIG_SECCOMP. - if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL { - // Make sure the kernel has CONFIG_SECCOMP_FILTER. - if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL { - return true - } - } - return false - } - _, ok := s["Seccomp"] - return ok -} - -// parseStatusFile is from https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L243-L268 -func parseStatusFile(path string) (map[string]string, error) { - f, err := os.Open(path) - if err != nil { - return nil, err - } - defer f.Close() - - s := bufio.NewScanner(f) - status := make(map[string]string) - - for s.Scan() { - text := s.Text() - parts := strings.Split(text, ":") - - if len(parts) <= 1 { - continue - } - - status[parts[0]] = parts[1] - } - if err := s.Err(); err != nil { - return nil, err - } - - return status, nil + // Excerpts from prctl(2), section ERRORS: + // + // EACCES + // option is PR_SET_SECCOMP and arg2 is SECCOMP_MODE_FILTER, but + // the process does not have the CAP_SYS_ADMIN capability or has + // not set the no_new_privs attribute <...>. + // <...> + // EFAULT + // option is PR_SET_SECCOMP, arg2 is SECCOMP_MODE_FILTER, the + // system was built with CONFIG_SECCOMP_FILTER, and arg3 is an + // invalid address. + // <...> + // EINVAL + // option is PR_SET_SECCOMP or PR_GET_SECCOMP, and the kernel + // was not configured with CONFIG_SECCOMP. + // + // EINVAL + // option is PR_SET_SECCOMP, arg2 is SECCOMP_MODE_FILTER, + // and the kernel was not configured with CONFIG_SECCOMP_FILTER. + // + // + // Meaning, in case these kernel options are set (this is what we check + // for here), we will get some other error (most probably EACCES or + // EFAULT). IOW, EINVAL means "seccomp not supported", any other error + // means it is supported. + + return unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0) != unix.EINVAL } diff --git a/pkg/seccomp/seccomp_linux_test.go b/pkg/seccomp/seccomp_linux_test.go deleted file mode 100644 index 850ab97e1..000000000 --- a/pkg/seccomp/seccomp_linux_test.go +++ /dev/null @@ -1,48 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -/* - Copyright The runc Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package seccomp - -import "testing" - -// TestParseStatusFile is from https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux_test.go -func TestParseStatusFile(t *testing.T) { - s, err := parseStatusFile("fixtures/proc_self_status") - if err != nil { - t.Fatal(err) - } - - if _, ok := s["Seccomp"]; !ok { - - t.Fatal("expected to find 'Seccomp' in the map but did not.") - } -} From 3292ea586276c08e80e2aa8b940f3e8cedd40f5a Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 24 Mar 2021 13:56:06 -0700 Subject: [PATCH 2/2] pkg/seccomp: use sync.Once to speed up IsEnabled It does not make sense to check if seccomp is supported by the kernel more than once per runtime, so let's use sync.Once to speed it up. A quick benchmark (old implementation, before this commit, after): BenchmarkIsEnabledOld-4 37183 27971 ns/op BenchmarkIsEnabled-4 1252161 947 ns/op BenchmarkIsEnabledOnce-4 666274008 2.14 ns/op Signed-off-by: Kir Kolyshkin --- pkg/seccomp/seccomp_linux.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pkg/seccomp/seccomp_linux.go b/pkg/seccomp/seccomp_linux.go index f324bde72..a23b492c6 100644 --- a/pkg/seccomp/seccomp_linux.go +++ b/pkg/seccomp/seccomp_linux.go @@ -33,9 +33,16 @@ package seccomp import ( + "sync" + "golang.org/x/sys/unix" ) +var ( + enabled bool + enabledOnce sync.Once +) + // isEnabled returns whether the kernel has been configured to support seccomp // (including the check for CONFIG_SECCOMP_FILTER kernel option). func isEnabled() bool { @@ -65,5 +72,9 @@ func isEnabled() bool { // EFAULT). IOW, EINVAL means "seccomp not supported", any other error // means it is supported. - return unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0) != unix.EINVAL + enabledOnce.Do(func() { + enabled = unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0) != unix.EINVAL + }) + + return enabled }