 3eda46af12
			
		
	
	3eda46af12
	
	
	
		
			
			Test suite:
```yaml
---
apiVersion: v1
kind: Pod
metadata:
  name: test-no-option
  annotations:
    description: "Equivalent of `docker run` (no option)"
spec:
  restartPolicy: Never
  containers:
    - name: main
      image: ghcr.io/containerd/busybox:1.28
      args: ['sh', '-euxc',
             '[ "$(id)" = "uid=0(root) gid=0(root) groups=0(root),10(wheel)" ]']
---
apiVersion: v1
kind: Pod
metadata:
  name: test-group-add-1-group-add-1234
  annotations:
    description: "Equivalent of `docker run --group-add 1 --group-add 1234`"
spec:
  restartPolicy: Never
  containers:
    - name: main
      image: ghcr.io/containerd/busybox:1.28
      args: ['sh', '-euxc',
             '[ "$(id)" = "uid=0(root) gid=0(root) groups=0(root),1(daemon),10(wheel),1234" ]']
  securityContext:
    supplementalGroups: [1, 1234]
---
apiVersion: v1
kind: Pod
metadata:
  name: test-user-1234
  annotations:
    description: "Equivalent of `docker run --user 1234`"
spec:
  restartPolicy: Never
  containers:
    - name: main
      image: ghcr.io/containerd/busybox:1.28
      args: ['sh', '-euxc',
             '[ "$(id)" = "uid=1234 gid=0(root) groups=0(root)" ]']
  securityContext:
    runAsUser: 1234
---
apiVersion: v1
kind: Pod
metadata:
  name: test-user-1234-1234
  annotations:
    description: "Equivalent of `docker run --user 1234:1234`"
spec:
  restartPolicy: Never
  containers:
    - name: main
      image: ghcr.io/containerd/busybox:1.28
      args: ['sh', '-euxc',
             '[ "$(id)" = "uid=1234 gid=1234 groups=1234" ]']
  securityContext:
    runAsUser: 1234
    runAsGroup: 1234
---
apiVersion: v1
kind: Pod
metadata:
  name: test-user-1234-group-add-1234
  annotations:
    description: "Equivalent of `docker run --user 1234 --group-add 1234`"
spec:
  restartPolicy: Never
  containers:
    - name: main
      image: ghcr.io/containerd/busybox:1.28
      args: ['sh', '-euxc',
             '[ "$(id)" = "uid=1234 gid=0(root) groups=0(root),1234" ]']
  securityContext:
    runAsUser: 1234
    supplementalGroups: [1234]
```
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
		
	
		
			
				
	
	
		
			380 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			380 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
|    Copyright The containerd Authors.
 | |
| 
 | |
|    Licensed under the Apache License, Version 2.0 (the "License");
 | |
|    you may not use this file except in compliance with the License.
 | |
|    You may obtain a copy of the License at
 | |
| 
 | |
|        http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
|    Unless required by applicable law or agreed to in writing, software
 | |
|    distributed under the License is distributed on an "AS IS" BASIS,
 | |
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|    See the License for the specific language governing permissions and
 | |
|    limitations under the License.
 | |
| */
 | |
| 
 | |
| package sbserver
 | |
| 
 | |
| import (
 | |
| 	"bufio"
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"os"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 
 | |
| 	imagespec "github.com/opencontainers/image-spec/specs-go/v1"
 | |
| 	runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
 | |
| 
 | |
| 	"github.com/containerd/containerd/contrib/apparmor"
 | |
| 	"github.com/containerd/containerd/contrib/seccomp"
 | |
| 	"github.com/containerd/containerd/oci"
 | |
| 	"github.com/containerd/containerd/snapshots"
 | |
| 
 | |
| 	customopts "github.com/containerd/containerd/pkg/cri/opts"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	// profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName.
 | |
| 	profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747
 | |
| 	// runtimeDefault indicates that we should use or create a runtime default profile.
 | |
| 	runtimeDefault = "runtime/default"
 | |
| 	// dockerDefault indicates that we should use or create a docker default profile.
 | |
| 	dockerDefault = "docker/default"
 | |
| 	// appArmorDefaultProfileName is name to use when creating a default apparmor profile.
 | |
| 	appArmorDefaultProfileName = "cri-containerd.apparmor.d"
 | |
| 	// unconfinedProfile is a string indicating one should run a pod/containerd without a security profile
 | |
| 	unconfinedProfile = "unconfined"
 | |
| 	// seccompDefaultProfile is the default seccomp profile.
 | |
| 	seccompDefaultProfile = dockerDefault
 | |
| )
 | |
| 
 | |
| // containerMounts sets up necessary container system file mounts
 | |
| // including /dev/shm, /etc/hosts and /etc/resolv.conf.
 | |
| func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount {
 | |
| 	var mounts []*runtime.Mount
 | |
| 	securityContext := config.GetLinux().GetSecurityContext()
 | |
| 	if !isInCRIMounts(etcHostname, config.GetMounts()) {
 | |
| 		// /etc/hostname is added since 1.1.6, 1.2.4 and 1.3.
 | |
| 		// For in-place upgrade, the old sandbox doesn't have the hostname file,
 | |
| 		// do not mount this in that case.
 | |
| 		// TODO(random-liu): Remove the check and always mount this when
 | |
| 		// containerd 1.1 and 1.2 are deprecated.
 | |
| 		hostpath := c.getSandboxHostname(sandboxID)
 | |
| 		if _, err := c.os.Stat(hostpath); err == nil {
 | |
| 			mounts = append(mounts, &runtime.Mount{
 | |
| 				ContainerPath:  etcHostname,
 | |
| 				HostPath:       hostpath,
 | |
| 				Readonly:       securityContext.GetReadonlyRootfs(),
 | |
| 				SelinuxRelabel: true,
 | |
| 			})
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if !isInCRIMounts(etcHosts, config.GetMounts()) {
 | |
| 		mounts = append(mounts, &runtime.Mount{
 | |
| 			ContainerPath:  etcHosts,
 | |
| 			HostPath:       c.getSandboxHosts(sandboxID),
 | |
| 			Readonly:       securityContext.GetReadonlyRootfs(),
 | |
| 			SelinuxRelabel: true,
 | |
| 		})
 | |
| 	}
 | |
| 
 | |
| 	// Mount sandbox resolv.config.
 | |
| 	// TODO: Need to figure out whether we should always mount it as read-only
 | |
| 	if !isInCRIMounts(resolvConfPath, config.GetMounts()) {
 | |
| 		mounts = append(mounts, &runtime.Mount{
 | |
| 			ContainerPath:  resolvConfPath,
 | |
| 			HostPath:       c.getResolvPath(sandboxID),
 | |
| 			Readonly:       securityContext.GetReadonlyRootfs(),
 | |
| 			SelinuxRelabel: true,
 | |
| 		})
 | |
| 	}
 | |
| 
 | |
| 	if !isInCRIMounts(devShm, config.GetMounts()) {
 | |
| 		sandboxDevShm := c.getSandboxDevShm(sandboxID)
 | |
| 		if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE {
 | |
| 			sandboxDevShm = devShm
 | |
| 		}
 | |
| 		mounts = append(mounts, &runtime.Mount{
 | |
| 			ContainerPath:  devShm,
 | |
| 			HostPath:       sandboxDevShm,
 | |
| 			Readonly:       false,
 | |
| 			SelinuxRelabel: sandboxDevShm != devShm,
 | |
| 		})
 | |
| 	}
 | |
| 	return mounts
 | |
| }
 | |
| 
 | |
| func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
 | |
| 	var specOpts []oci.SpecOpts
 | |
| 	securityContext := config.GetLinux().GetSecurityContext()
 | |
| 	// Set container username. This could only be done by containerd, because it needs
 | |
| 	// access to the container rootfs. Pass user name to containerd, and let it overwrite
 | |
| 	// the spec for us.
 | |
| 	userstr, err := generateUserString(
 | |
| 		securityContext.GetRunAsUsername(),
 | |
| 		securityContext.GetRunAsUser(),
 | |
| 		securityContext.GetRunAsGroup())
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("failed to generate user string: %w", err)
 | |
| 	}
 | |
| 	if userstr == "" {
 | |
| 		// Lastly, since no user override was passed via CRI try to set via OCI
 | |
| 		// Image
 | |
| 		userstr = imageConfig.User
 | |
| 	}
 | |
| 	if userstr != "" {
 | |
| 		specOpts = append(specOpts, oci.WithUser(userstr))
 | |
| 	}
 | |
| 
 | |
| 	if securityContext.GetRunAsUsername() != "" {
 | |
| 		userstr = securityContext.GetRunAsUsername()
 | |
| 	} else {
 | |
| 		// Even if RunAsUser is not set, we still call `GetValue` to get uid 0.
 | |
| 		// Because it is still useful to get additional gids for uid 0.
 | |
| 		userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10)
 | |
| 	}
 | |
| 	specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr),
 | |
| 		customopts.WithSupplementalGroups(securityContext.GetSupplementalGroups()))
 | |
| 
 | |
| 	asp := securityContext.GetApparmor()
 | |
| 	if asp == nil {
 | |
| 		asp, err = generateApparmorSecurityProfile(securityContext.GetApparmorProfile()) //nolint:staticcheck // Deprecated but we don't want to remove yet
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err)
 | |
| 		}
 | |
| 	}
 | |
| 	apparmorSpecOpts, err := generateApparmorSpecOpts(
 | |
| 		asp,
 | |
| 		securityContext.GetPrivileged(),
 | |
| 		c.apparmorEnabled())
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err)
 | |
| 	}
 | |
| 	if apparmorSpecOpts != nil {
 | |
| 		specOpts = append(specOpts, apparmorSpecOpts)
 | |
| 	}
 | |
| 
 | |
| 	ssp := securityContext.GetSeccomp()
 | |
| 	if ssp == nil {
 | |
| 		ssp, err = generateSeccompSecurityProfile(
 | |
| 			securityContext.GetSeccompProfilePath(), //nolint:staticcheck // Deprecated but we don't want to remove yet
 | |
| 			c.config.UnsetSeccompProfile)
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err)
 | |
| 		}
 | |
| 	}
 | |
| 	seccompSpecOpts, err := c.generateSeccompSpecOpts(
 | |
| 		ssp,
 | |
| 		securityContext.GetPrivileged(),
 | |
| 		c.seccompEnabled())
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("failed to generate seccomp spec opts: %w", err)
 | |
| 	}
 | |
| 	if seccompSpecOpts != nil {
 | |
| 		specOpts = append(specOpts, seccompSpecOpts)
 | |
| 	}
 | |
| 	if c.config.EnableCDI {
 | |
| 		specOpts = append(specOpts, customopts.WithCDI(config.Annotations))
 | |
| 	}
 | |
| 	return specOpts, nil
 | |
| }
 | |
| 
 | |
| func generateSeccompSecurityProfile(profilePath string, unsetProfilePath string) (*runtime.SecurityProfile, error) {
 | |
| 	if profilePath != "" {
 | |
| 		return generateSecurityProfile(profilePath)
 | |
| 	}
 | |
| 	if unsetProfilePath != "" {
 | |
| 		return generateSecurityProfile(unsetProfilePath)
 | |
| 	}
 | |
| 	return nil, nil
 | |
| }
 | |
| func generateApparmorSecurityProfile(profilePath string) (*runtime.SecurityProfile, error) {
 | |
| 	if profilePath != "" {
 | |
| 		return generateSecurityProfile(profilePath)
 | |
| 	}
 | |
| 	return nil, nil
 | |
| }
 | |
| 
 | |
| func generateSecurityProfile(profilePath string) (*runtime.SecurityProfile, error) {
 | |
| 	switch profilePath {
 | |
| 	case runtimeDefault, dockerDefault, "":
 | |
| 		return &runtime.SecurityProfile{
 | |
| 			ProfileType: runtime.SecurityProfile_RuntimeDefault,
 | |
| 		}, nil
 | |
| 	case unconfinedProfile:
 | |
| 		return &runtime.SecurityProfile{
 | |
| 			ProfileType: runtime.SecurityProfile_Unconfined,
 | |
| 		}, nil
 | |
| 	default:
 | |
| 		// Require and Trim default profile name prefix
 | |
| 		if !strings.HasPrefix(profilePath, profileNamePrefix) {
 | |
| 			return nil, fmt.Errorf("invalid profile %q", profilePath)
 | |
| 		}
 | |
| 		return &runtime.SecurityProfile{
 | |
| 			ProfileType:  runtime.SecurityProfile_Localhost,
 | |
| 			LocalhostRef: strings.TrimPrefix(profilePath, profileNamePrefix),
 | |
| 		}, nil
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // generateSeccompSpecOpts generates containerd SpecOpts for seccomp.
 | |
| func (c *criService) generateSeccompSpecOpts(sp *runtime.SecurityProfile, privileged, seccompEnabled bool) (oci.SpecOpts, error) {
 | |
| 	if privileged {
 | |
| 		// Do not set seccomp profile when container is privileged
 | |
| 		return nil, nil
 | |
| 	}
 | |
| 	if !seccompEnabled {
 | |
| 		if sp != nil {
 | |
| 			if sp.ProfileType != runtime.SecurityProfile_Unconfined {
 | |
| 				return nil, errors.New("seccomp is not supported")
 | |
| 			}
 | |
| 		}
 | |
| 		return nil, nil
 | |
| 	}
 | |
| 
 | |
| 	if sp == nil {
 | |
| 		return nil, nil
 | |
| 	}
 | |
| 
 | |
| 	if sp.ProfileType != runtime.SecurityProfile_Localhost && sp.LocalhostRef != "" {
 | |
| 		return nil, errors.New("seccomp config invalid LocalhostRef must only be set if ProfileType is Localhost")
 | |
| 	}
 | |
| 	switch sp.ProfileType {
 | |
| 	case runtime.SecurityProfile_Unconfined:
 | |
| 		// Do not set seccomp profile.
 | |
| 		return nil, nil
 | |
| 	case runtime.SecurityProfile_RuntimeDefault:
 | |
| 		return seccomp.WithDefaultProfile(), nil
 | |
| 	case runtime.SecurityProfile_Localhost:
 | |
| 		// trimming the localhost/ prefix just in case even though it should not
 | |
| 		// be necessary with the new SecurityProfile struct
 | |
| 		return seccomp.WithProfile(strings.TrimPrefix(sp.LocalhostRef, profileNamePrefix)), nil
 | |
| 	default:
 | |
| 		return nil, errors.New("seccomp unknown ProfileType")
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // generateApparmorSpecOpts generates containerd SpecOpts for apparmor.
 | |
| func generateApparmorSpecOpts(sp *runtime.SecurityProfile, privileged, apparmorEnabled bool) (oci.SpecOpts, error) {
 | |
| 	if !apparmorEnabled {
 | |
| 		// Should fail loudly if user try to specify apparmor profile
 | |
| 		// but we don't support it.
 | |
| 		if sp != nil {
 | |
| 			if sp.ProfileType != runtime.SecurityProfile_Unconfined {
 | |
| 				return nil, errors.New("apparmor is not supported")
 | |
| 			}
 | |
| 		}
 | |
| 		return nil, nil
 | |
| 	}
 | |
| 
 | |
| 	if sp == nil {
 | |
| 		// Based on kubernetes#51746, default apparmor profile should be applied
 | |
| 		// for when apparmor is not specified.
 | |
| 		sp, _ = generateSecurityProfile("")
 | |
| 	}
 | |
| 
 | |
| 	if sp.ProfileType != runtime.SecurityProfile_Localhost && sp.LocalhostRef != "" {
 | |
| 		return nil, errors.New("apparmor config invalid LocalhostRef must only be set if ProfileType is Localhost")
 | |
| 	}
 | |
| 
 | |
| 	switch sp.ProfileType {
 | |
| 	case runtime.SecurityProfile_Unconfined:
 | |
| 		// Do not set apparmor profile.
 | |
| 		return nil, nil
 | |
| 	case runtime.SecurityProfile_RuntimeDefault:
 | |
| 		if privileged {
 | |
| 			// Do not set apparmor profile when container is privileged
 | |
| 			return nil, nil
 | |
| 		}
 | |
| 		// TODO (mikebrow): delete created apparmor default profile
 | |
| 		return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil
 | |
| 	case runtime.SecurityProfile_Localhost:
 | |
| 		// trimming the localhost/ prefix just in case even through it should not
 | |
| 		// be necessary with the new SecurityProfile struct
 | |
| 		appArmorProfile := strings.TrimPrefix(sp.LocalhostRef, profileNamePrefix)
 | |
| 		if profileExists, err := appArmorProfileExists(appArmorProfile); !profileExists {
 | |
| 			if err != nil {
 | |
| 				return nil, fmt.Errorf("failed to generate apparmor spec opts: %w", err)
 | |
| 			}
 | |
| 			return nil, fmt.Errorf("apparmor profile not found %s", appArmorProfile)
 | |
| 		}
 | |
| 		return apparmor.WithProfile(appArmorProfile), nil
 | |
| 	default:
 | |
| 		return nil, errors.New("apparmor unknown ProfileType")
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // appArmorProfileExists scans apparmor/profiles for the requested profile
 | |
| func appArmorProfileExists(profile string) (bool, error) {
 | |
| 	if profile == "" {
 | |
| 		return false, errors.New("nil apparmor profile is not supported")
 | |
| 	}
 | |
| 	profiles, err := os.Open("/sys/kernel/security/apparmor/profiles")
 | |
| 	if err != nil {
 | |
| 		return false, err
 | |
| 	}
 | |
| 	defer profiles.Close()
 | |
| 
 | |
| 	rbuff := bufio.NewReader(profiles)
 | |
| 	for {
 | |
| 		line, err := rbuff.ReadString('\n')
 | |
| 		switch err {
 | |
| 		case nil:
 | |
| 			if strings.HasPrefix(line, profile+" (") {
 | |
| 				return true, nil
 | |
| 			}
 | |
| 		case io.EOF:
 | |
| 			return false, nil
 | |
| 		default:
 | |
| 			return false, err
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // generateUserString generates valid user string based on OCI Image Spec
 | |
| // v1.0.0.
 | |
| //
 | |
| // CRI defines that the following combinations are valid:
 | |
| //
 | |
| // (none) -> ""
 | |
| // username -> username
 | |
| // username, uid -> username
 | |
| // username, uid, gid -> username:gid
 | |
| // username, gid -> username:gid
 | |
| // uid -> uid
 | |
| // uid, gid -> uid:gid
 | |
| // gid -> error
 | |
| //
 | |
| // TODO(random-liu): Add group name support in CRI.
 | |
| func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) {
 | |
| 	var userstr, groupstr string
 | |
| 	if uid != nil {
 | |
| 		userstr = strconv.FormatInt(uid.GetValue(), 10)
 | |
| 	}
 | |
| 	if username != "" {
 | |
| 		userstr = username
 | |
| 	}
 | |
| 	if gid != nil {
 | |
| 		groupstr = strconv.FormatInt(gid.GetValue(), 10)
 | |
| 	}
 | |
| 	if userstr == "" {
 | |
| 		if groupstr != "" {
 | |
| 			return "", fmt.Errorf("user group %q is specified without user", groupstr)
 | |
| 		}
 | |
| 		return "", nil
 | |
| 	}
 | |
| 	if groupstr != "" {
 | |
| 		userstr = userstr + ":" + groupstr
 | |
| 	}
 | |
| 	return userstr, nil
 | |
| }
 | |
| 
 | |
| // snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot
 | |
| func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt {
 | |
| 	return []snapshots.Opt{}
 | |
| }
 |