pkg/seccomp: simplify IsEnabled, update doc

Current implementation of seccomp.IsEnabled (rooted in runc) is not
too good.

First, it parses the whole /proc/self/status, adding each key: value
pair into the map (lots of allocations and future work for garbage
collector), when using a single key from that map.

Second, the presence of "Seccomp" key in /proc/self/status merely means
that kernel option CONFIG_SECCOMP is set, but there is a need to _also_
check for CONFIG_SECCOMP_FILTER (the code for which exists but never
executed in case /proc/self/status has Seccomp key).

Replace all this with a single call to prctl; see the long comment in
the code for details.

While at it, improve the IsEnabled documentation.

NOTE historically, parsing /proc/self/status was added after a concern
was raised in https://github.com/opencontainers/runc/pull/471 that
prctl(PR_GET_SECCOMP, ...) can result in the calling process being
killed with SIGKILL. This is a valid concern, so the new code here
does not use PR_GET_SECCOMP at all.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
Kir Kolyshkin
2021-03-23 17:57:22 -07:00
parent 9efd3e2384
commit 00b5c99b1a
4 changed files with 33 additions and 147 deletions

View File

@@ -33,56 +33,37 @@
package seccomp
import (
"bufio"
"os"
"strings"
"golang.org/x/sys/unix"
)
// isEnabled returns if the kernel has been configured to support seccomp.
// From https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L86-L102
// isEnabled returns whether the kernel has been configured to support seccomp
// (including the check for CONFIG_SECCOMP_FILTER kernel option).
func isEnabled() bool {
// Try to read from /proc/self/status for kernels > 3.8
s, err := parseStatusFile("/proc/self/status")
if err != nil {
// Check if Seccomp is supported, via CONFIG_SECCOMP.
if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL {
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL {
return true
}
}
return false
}
_, ok := s["Seccomp"]
return ok
}
// parseStatusFile is from https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L243-L268
func parseStatusFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
s := bufio.NewScanner(f)
status := make(map[string]string)
for s.Scan() {
text := s.Text()
parts := strings.Split(text, ":")
if len(parts) <= 1 {
continue
}
status[parts[0]] = parts[1]
}
if err := s.Err(); err != nil {
return nil, err
}
return status, nil
// Excerpts from prctl(2), section ERRORS:
//
// EACCES
// option is PR_SET_SECCOMP and arg2 is SECCOMP_MODE_FILTER, but
// the process does not have the CAP_SYS_ADMIN capability or has
// not set the no_new_privs attribute <...>.
// <...>
// EFAULT
// option is PR_SET_SECCOMP, arg2 is SECCOMP_MODE_FILTER, the
// system was built with CONFIG_SECCOMP_FILTER, and arg3 is an
// invalid address.
// <...>
// EINVAL
// option is PR_SET_SECCOMP or PR_GET_SECCOMP, and the kernel
// was not configured with CONFIG_SECCOMP.
//
// EINVAL
// option is PR_SET_SECCOMP, arg2 is SECCOMP_MODE_FILTER,
// and the kernel was not configured with CONFIG_SECCOMP_FILTER.
// <end of quote>
//
// Meaning, in case these kernel options are set (this is what we check
// for here), we will get some other error (most probably EACCES or
// EFAULT). IOW, EINVAL means "seccomp not supported", any other error
// means it is supported.
return unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0) != unix.EINVAL
}