pkg/seccomp: simplify IsEnabled, update doc
Current implementation of seccomp.IsEnabled (rooted in runc) is not too good. First, it parses the whole /proc/self/status, adding each key: value pair into the map (lots of allocations and future work for garbage collector), when using a single key from that map. Second, the presence of "Seccomp" key in /proc/self/status merely means that kernel option CONFIG_SECCOMP is set, but there is a need to _also_ check for CONFIG_SECCOMP_FILTER (the code for which exists but never executed in case /proc/self/status has Seccomp key). Replace all this with a single call to prctl; see the long comment in the code for details. While at it, improve the IsEnabled documentation. NOTE historically, parsing /proc/self/status was added after a concern was raised in https://github.com/opencontainers/runc/pull/471 that prctl(PR_GET_SECCOMP, ...) can result in the calling process being killed with SIGKILL. This is a valid concern, so the new code here does not use PR_GET_SECCOMP at all. Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
		| @@ -1,47 +0,0 @@ | ||||
| Name:   cat | ||||
| State:  R (running) | ||||
| Tgid:   19383 | ||||
| Ngid:   0 | ||||
| Pid:    19383 | ||||
| PPid:   19275 | ||||
| TracerPid:  0 | ||||
| Uid:    1000    1000    1000    1000 | ||||
| Gid:    1000    1000    1000    1000 | ||||
| FDSize: 256 | ||||
| Groups: 24 25 27 29 30 44 46 102 104 108 111 1000 1001 | ||||
| NStgid: 19383 | ||||
| NSpid:  19383 | ||||
| NSpgid: 19383 | ||||
| NSsid:  19275 | ||||
| VmPeak:     5944 kB | ||||
| VmSize:     5944 kB | ||||
| VmLck:         0 kB | ||||
| VmPin:         0 kB | ||||
| VmHWM:       744 kB | ||||
| VmRSS:       744 kB | ||||
| VmData:      324 kB | ||||
| VmStk:       136 kB | ||||
| VmExe:        48 kB | ||||
| VmLib:      1776 kB | ||||
| VmPTE:        32 kB | ||||
| VmPMD:        12 kB | ||||
| VmSwap:        0 kB | ||||
| Threads:    1 | ||||
| SigQ:   0/30067 | ||||
| SigPnd: 0000000000000000 | ||||
| ShdPnd: 0000000000000000 | ||||
| SigBlk: 0000000000000000 | ||||
| SigIgn: 0000000000000080 | ||||
| SigCgt: 0000000000000000 | ||||
| CapInh: 0000000000000000 | ||||
| CapPrm: 0000000000000000 | ||||
| CapEff: 0000000000000000 | ||||
| CapBnd: 0000003fffffffff | ||||
| CapAmb: 0000000000000000 | ||||
| Seccomp:    0 | ||||
| Cpus_allowed:   f | ||||
| Cpus_allowed_list:  0-3 | ||||
| Mems_allowed:   00000000,00000001 | ||||
| Mems_allowed_list:  0 | ||||
| voluntary_ctxt_switches:    0 | ||||
| nonvoluntary_ctxt_switches: 1 | ||||
| @@ -16,10 +16,10 @@ | ||||
|  | ||||
| package seccomp | ||||
|  | ||||
| // IsEnabled returns whether seccomp support is enabled | ||||
| // On Linux returns if the kernel has been configured to support seccomp. | ||||
| //  From https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L86-L102 | ||||
| // On non-Linux returns false | ||||
| // IsEnabled checks whether seccomp support is enabled. On Linux, it returns | ||||
| // true if the kernel has been configured to support seccomp (kernel options | ||||
| // CONFIG_SECCOMP and CONFIG_SECCOMP_FILTER are set). On non-Linux, it always | ||||
| // returns false. | ||||
| func IsEnabled() bool { | ||||
| 	return isEnabled() | ||||
| } | ||||
|   | ||||
| @@ -33,56 +33,37 @@ | ||||
| package seccomp | ||||
|  | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"os" | ||||
| 	"strings" | ||||
|  | ||||
| 	"golang.org/x/sys/unix" | ||||
| ) | ||||
|  | ||||
| // isEnabled returns if the kernel has been configured to support seccomp. | ||||
| // From https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L86-L102 | ||||
| // isEnabled returns whether the kernel has been configured to support seccomp | ||||
| // (including the check for CONFIG_SECCOMP_FILTER kernel option). | ||||
| func isEnabled() bool { | ||||
| 	// Try to read from /proc/self/status for kernels > 3.8 | ||||
| 	s, err := parseStatusFile("/proc/self/status") | ||||
| 	if err != nil { | ||||
| 		// Check if Seccomp is supported, via CONFIG_SECCOMP. | ||||
| 		if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL { | ||||
| 			// Make sure the kernel has CONFIG_SECCOMP_FILTER. | ||||
| 			if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL { | ||||
| 				return true | ||||
| 			} | ||||
| 		} | ||||
| 		return false | ||||
| 	} | ||||
| 	_, ok := s["Seccomp"] | ||||
| 	return ok | ||||
| } | ||||
|  | ||||
| // parseStatusFile is from https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L243-L268 | ||||
| func parseStatusFile(path string) (map[string]string, error) { | ||||
| 	f, err := os.Open(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer f.Close() | ||||
|  | ||||
| 	s := bufio.NewScanner(f) | ||||
| 	status := make(map[string]string) | ||||
|  | ||||
| 	for s.Scan() { | ||||
| 		text := s.Text() | ||||
| 		parts := strings.Split(text, ":") | ||||
|  | ||||
| 		if len(parts) <= 1 { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		status[parts[0]] = parts[1] | ||||
| 	} | ||||
| 	if err := s.Err(); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	return status, nil | ||||
| 	// Excerpts from prctl(2), section ERRORS: | ||||
| 	// | ||||
| 	// EACCES | ||||
| 	//      option is PR_SET_SECCOMP and arg2 is SECCOMP_MODE_FILTER, but | ||||
| 	//      the process does not have the CAP_SYS_ADMIN capability or has | ||||
| 	//      not set the no_new_privs attribute <...>. | ||||
| 	// <...> | ||||
| 	// EFAULT | ||||
| 	//      option is PR_SET_SECCOMP, arg2 is SECCOMP_MODE_FILTER, the | ||||
| 	//      system was built with CONFIG_SECCOMP_FILTER, and arg3 is an | ||||
| 	//      invalid address. | ||||
| 	// <...> | ||||
| 	// EINVAL | ||||
| 	//      option is PR_SET_SECCOMP or PR_GET_SECCOMP, and the kernel | ||||
| 	//      was not configured with CONFIG_SECCOMP. | ||||
| 	// | ||||
| 	// EINVAL | ||||
| 	//      option is PR_SET_SECCOMP, arg2 is SECCOMP_MODE_FILTER, | ||||
| 	//      and the kernel was not configured with CONFIG_SECCOMP_FILTER. | ||||
| 	// <end of quote> | ||||
| 	// | ||||
| 	// Meaning, in case these kernel options are set (this is what we check | ||||
| 	// for here), we will get some other error (most probably EACCES or | ||||
| 	// EFAULT). IOW, EINVAL means "seccomp not supported", any other error | ||||
| 	// means it is supported. | ||||
|  | ||||
| 	return unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0) != unix.EINVAL | ||||
| } | ||||
|   | ||||
| @@ -1,48 +0,0 @@ | ||||
| /* | ||||
|    Copyright The containerd Authors. | ||||
|  | ||||
|    Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|    you may not use this file except in compliance with the License. | ||||
|    You may obtain a copy of the License at | ||||
|  | ||||
|        http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
|    Unless required by applicable law or agreed to in writing, software | ||||
|    distributed under the License is distributed on an "AS IS" BASIS, | ||||
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|    See the License for the specific language governing permissions and | ||||
|    limitations under the License. | ||||
| */ | ||||
|  | ||||
| /* | ||||
|    Copyright The runc Authors. | ||||
|  | ||||
|    Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|    you may not use this file except in compliance with the License. | ||||
|    You may obtain a copy of the License at | ||||
|  | ||||
|        http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
|    Unless required by applicable law or agreed to in writing, software | ||||
|    distributed under the License is distributed on an "AS IS" BASIS, | ||||
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|    See the License for the specific language governing permissions and | ||||
|    limitations under the License. | ||||
| */ | ||||
|  | ||||
| package seccomp | ||||
|  | ||||
| import "testing" | ||||
|  | ||||
| // TestParseStatusFile is from https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux_test.go | ||||
| func TestParseStatusFile(t *testing.T) { | ||||
| 	s, err := parseStatusFile("fixtures/proc_self_status") | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
|  | ||||
| 	if _, ok := s["Seccomp"]; !ok { | ||||
|  | ||||
| 		t.Fatal("expected to find 'Seccomp' in the map but did not.") | ||||
| 	} | ||||
| } | ||||
		Reference in New Issue
	
	Block a user
	 Kir Kolyshkin
					Kir Kolyshkin