Move unix specific logic into _unix.go

Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
Lantao Liu
2019-09-01 00:58:06 -07:00
parent 7b606375ae
commit 50c73e6dc5
55 changed files with 4265 additions and 3442 deletions

View File

@@ -18,23 +18,12 @@ package server
import (
"path/filepath"
"strconv"
"strings"
"time"
"github.com/containerd/containerd"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/contrib/apparmor"
"github.com/containerd/containerd/contrib/seccomp"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/oci"
"github.com/containerd/cri/pkg/annotations"
"github.com/containerd/cri/pkg/config"
customopts "github.com/containerd/cri/pkg/containerd/opts"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
cio "github.com/containerd/cri/pkg/server/io"
containerstore "github.com/containerd/cri/pkg/store/container"
"github.com/containerd/cri/pkg/util"
"github.com/containerd/typeurl"
"github.com/davecgh/go-spew/spew"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
@@ -42,21 +31,12 @@ import (
"github.com/pkg/errors"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)
const (
// profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName.
profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747
// runtimeDefault indicates that we should use or create a runtime default profile.
runtimeDefault = "runtime/default"
// dockerDefault indicates that we should use or create a docker default profile.
dockerDefault = "docker/default"
// appArmorDefaultProfileName is name to use when creating a default apparmor profile.
appArmorDefaultProfileName = "cri-containerd.apparmor.d"
// unconfinedProfile is a string indicating one should run a pod/containerd without a security profile
unconfinedProfile = "unconfined"
// seccompDefaultProfile is the default seccomp profile.
seccompDefaultProfile = dockerDefault
customopts "github.com/containerd/cri/pkg/containerd/opts"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
cio "github.com/containerd/cri/pkg/server/io"
containerstore "github.com/containerd/cri/pkg/store/container"
"github.com/containerd/cri/pkg/util"
)
func init() {
@@ -156,10 +136,10 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
}()
// Create container volumes mounts.
volumeMounts := c.generateVolumeMounts(containerRootDir, config.GetMounts(), &image.ImageSpec.Config)
volumeMounts := c.volumeMounts(containerRootDir, config.GetMounts(), &image.ImageSpec.Config)
// Generate container runtime spec.
mounts := c.generateContainerMounts(sandboxID, config)
// Generate container mounts.
mounts := c.containerMounts(sandboxID, config)
ociRuntime, err := c.getSandboxRuntime(sandboxConfig, sandbox.Metadata.RuntimeHandler)
if err != nil {
@@ -167,7 +147,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
}
log.G(ctx).Debugf("Use OCI runtime %+v for sandbox %q and container %q", ociRuntime, sandboxID, id)
spec, err := c.generateContainerSpec(id, sandboxID, sandboxPid, config, sandboxConfig,
spec, err := c.containerSpec(id, sandboxID, sandboxPid, sandbox.NetNSPath, config, sandboxConfig,
&image.ImageSpec.Config, append(mounts, volumeMounts...), ociRuntime)
if err != nil {
return nil, errors.Wrapf(err, "failed to generate container %q spec", id)
@@ -185,7 +165,6 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
// rootfs readonly (requested by spec.Root.Readonly).
customopts.WithNewSnapshot(id, containerdImage),
}
if len(volumeMounts) > 0 {
mountMap := make(map[string]string)
for _, v := range volumeMounts {
@@ -219,58 +198,11 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
}
}()
var specOpts []oci.SpecOpts
securityContext := config.GetLinux().GetSecurityContext()
// Set container username. This could only be done by containerd, because it needs
// access to the container rootfs. Pass user name to containerd, and let it overwrite
// the spec for us.
userstr, err := generateUserString(
securityContext.GetRunAsUsername(),
securityContext.GetRunAsUser(),
securityContext.GetRunAsGroup())
specOpts, err := c.containerSpecOpts(config, &image.ImageSpec.Config)
if err != nil {
return nil, errors.Wrap(err, "failed to generate user string")
}
if userstr == "" {
// Lastly, since no user override was passed via CRI try to set via OCI
// Image
userstr = image.ImageSpec.Config.User
}
if userstr != "" {
specOpts = append(specOpts, oci.WithUser(userstr))
return nil, errors.Wrap(err, "")
}
if securityContext.GetRunAsUsername() != "" {
userstr = securityContext.GetRunAsUsername()
} else {
// Even if RunAsUser is not set, we still call `GetValue` to get uid 0.
// Because it is still useful to get additional gids for uid 0.
userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10)
}
specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr))
apparmorSpecOpts, err := generateApparmorSpecOpts(
securityContext.GetApparmorProfile(),
securityContext.GetPrivileged(),
c.apparmorEnabled)
if err != nil {
return nil, errors.Wrap(err, "failed to generate apparmor spec opts")
}
if apparmorSpecOpts != nil {
specOpts = append(specOpts, apparmorSpecOpts)
}
seccompSpecOpts, err := generateSeccompSpecOpts(
securityContext.GetSeccompProfilePath(),
securityContext.GetPrivileged(),
c.seccompEnabled)
if err != nil {
return nil, errors.Wrap(err, "failed to generate seccomp spec opts")
}
if seccompSpecOpts != nil {
specOpts = append(specOpts, seccompSpecOpts)
}
containerLabels := buildLabels(config.Labels, containerKindContainer)
runtimeOptions, err := getRuntimeOptions(sandboxInfo)
@@ -322,128 +254,10 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
return &runtime.CreateContainerResponse{ContainerId: id}, nil
}
func (c *criService) generateContainerSpec(id string, sandboxID string, sandboxPid uint32, config *runtime.ContainerConfig,
sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig, extraMounts []*runtime.Mount,
ociRuntime config.Runtime) (*runtimespec.Spec, error) {
specOpts := []oci.SpecOpts{
customopts.WithoutRunMount,
customopts.WithoutDefaultSecuritySettings,
customopts.WithRelativeRoot(relativeRootfsPath),
customopts.WithProcessArgs(config, imageConfig),
// this will be set based on the security context below
oci.WithNewPrivileges,
}
if config.GetWorkingDir() != "" {
specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir()))
} else if imageConfig.WorkingDir != "" {
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
}
if config.GetTty() {
specOpts = append(specOpts, oci.WithTTY)
}
// Add HOSTNAME env.
var (
err error
hostname = sandboxConfig.GetHostname()
)
if hostname == "" {
if hostname, err = c.os.Hostname(); err != nil {
return nil, err
}
}
specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname}))
// Apply envs from image config first, so that envs from container config
// can override them.
env := imageConfig.Env
for _, e := range config.GetEnvs() {
env = append(env, e.GetKey()+"="+e.GetValue())
}
specOpts = append(specOpts, oci.WithEnv(env))
securityContext := config.GetLinux().GetSecurityContext()
selinuxOpt := securityContext.GetSelinuxOptions()
processLabel, mountLabel, err := initSelinuxOpts(selinuxOpt)
if err != nil {
return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions())
}
specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel))
if !c.config.DisableProcMount {
// Apply masked paths if specified.
// If the container is privileged, this will be cleared later on.
specOpts = append(specOpts, oci.WithMaskedPaths(securityContext.GetMaskedPaths()))
// Apply readonly paths if specified.
// If the container is privileged, this will be cleared later on.
specOpts = append(specOpts, oci.WithReadonlyPaths(securityContext.GetReadonlyPaths()))
}
if securityContext.GetPrivileged() {
if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() {
return nil, errors.New("no privileged container allowed in sandbox")
}
specOpts = append(specOpts, oci.WithPrivileged)
if !ociRuntime.PrivilegedWithoutHostDevices {
specOpts = append(specOpts, customopts.WithPrivilegedDevices)
}
} else { // not privileged
specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext))
}
// Clear all ambient capabilities. The implication of non-root + caps
// is not clearly defined in Kubernetes.
// See https://github.com/kubernetes/kubernetes/issues/56374
// Keep docker's behavior for now.
specOpts = append(specOpts,
customopts.WithoutAmbientCaps,
customopts.WithSelinuxLabels(processLabel, mountLabel),
)
// TODO: Figure out whether we should set no new privilege for sandbox container by default
if securityContext.GetNoNewPrivs() {
specOpts = append(specOpts, oci.WithNoNewPrivileges)
}
// TODO(random-liu): [P1] Set selinux options (privileged or not).
if securityContext.GetReadonlyRootfs() {
specOpts = append(specOpts, oci.WithRootFSReadonly())
}
if c.config.DisableCgroup {
specOpts = append(specOpts, customopts.WithDisabledCgroups)
} else {
specOpts = append(specOpts, customopts.WithResources(config.GetLinux().GetResources()))
if sandboxConfig.GetLinux().GetCgroupParent() != "" {
cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id)
specOpts = append(specOpts, oci.WithCgroup(cgroupsPath))
}
}
supplementalGroups := securityContext.GetSupplementalGroups()
for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations,
ociRuntime.PodAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
specOpts = append(specOpts,
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
customopts.WithPodNamespaces(securityContext, sandboxPid),
customopts.WithSupplementalGroups(supplementalGroups),
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
)
return runtimeSpec(id, specOpts...)
}
// generateVolumeMounts sets up image volumes for container. Rely on the removal of container
// volumeMounts sets up image volumes for container. Rely on the removal of container
// root directory to do cleanup. Note that image volume will be skipped, if there is criMounts
// specified with the same destination.
func (c *criService) generateVolumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount {
func (c *criService) volumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount {
if len(config.Volumes) == 0 {
return nil
}
@@ -469,61 +283,9 @@ func (c *criService) generateVolumeMounts(containerRootDir string, criMounts []*
return mounts
}
// generateContainerMounts sets up necessary container mounts including /dev/shm, /etc/hosts
// and /etc/resolv.conf.
func (c *criService) generateContainerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount {
var mounts []*runtime.Mount
securityContext := config.GetLinux().GetSecurityContext()
if !isInCRIMounts(etcHostname, config.GetMounts()) {
// /etc/hostname is added since 1.1.6, 1.2.4 and 1.3.
// For in-place upgrade, the old sandbox doesn't have the hostname file,
// do not mount this in that case.
// TODO(random-liu): Remove the check and always mount this when
// containerd 1.1 and 1.2 are deprecated.
hostpath := c.getSandboxHostname(sandboxID)
if _, err := c.os.Stat(hostpath); err == nil {
mounts = append(mounts, &runtime.Mount{
ContainerPath: etcHostname,
HostPath: hostpath,
Readonly: securityContext.GetReadonlyRootfs(),
})
}
}
if !isInCRIMounts(etcHosts, config.GetMounts()) {
mounts = append(mounts, &runtime.Mount{
ContainerPath: etcHosts,
HostPath: c.getSandboxHosts(sandboxID),
Readonly: securityContext.GetReadonlyRootfs(),
})
}
// Mount sandbox resolv.config.
// TODO: Need to figure out whether we should always mount it as read-only
if !isInCRIMounts(resolvConfPath, config.GetMounts()) {
mounts = append(mounts, &runtime.Mount{
ContainerPath: resolvConfPath,
HostPath: c.getResolvPath(sandboxID),
Readonly: securityContext.GetReadonlyRootfs(),
})
}
if !isInCRIMounts(devShm, config.GetMounts()) {
sandboxDevShm := c.getSandboxDevShm(sandboxID)
if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE {
sandboxDevShm = devShm
}
mounts = append(mounts, &runtime.Mount{
ContainerPath: devShm,
HostPath: sandboxDevShm,
Readonly: false,
})
}
return mounts
}
// runtimeSpec returns a default runtime spec used in cri-containerd.
func runtimeSpec(id string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) {
// TODO(windows): Remove nolint after windows starts using this helper.
func runtimeSpec(id string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) { // nolint: deadcode, unused
// GenerateSpec needs namespace.
ctx := ctrdutil.NamespacedContext()
spec, err := oci.GenerateSpec(ctx, nil, &containers.Container{ID: id}, opts...)
@@ -532,105 +294,3 @@ func runtimeSpec(id string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) {
}
return spec, nil
}
// generateSeccompSpecOpts generates containerd SpecOpts for seccomp.
func generateSeccompSpecOpts(seccompProf string, privileged, seccompEnabled bool) (oci.SpecOpts, error) {
if privileged {
// Do not set seccomp profile when container is privileged
return nil, nil
}
// Set seccomp profile
if seccompProf == runtimeDefault || seccompProf == dockerDefault {
// use correct default profile (Eg. if not configured otherwise, the default is docker/default)
seccompProf = seccompDefaultProfile
}
if !seccompEnabled {
if seccompProf != "" && seccompProf != unconfinedProfile {
return nil, errors.New("seccomp is not supported")
}
return nil, nil
}
switch seccompProf {
case "", unconfinedProfile:
// Do not set seccomp profile.
return nil, nil
case dockerDefault:
// Note: WithDefaultProfile specOpts must be added after capabilities
return seccomp.WithDefaultProfile(), nil
default:
// Require and Trim default profile name prefix
if !strings.HasPrefix(seccompProf, profileNamePrefix) {
return nil, errors.Errorf("invalid seccomp profile %q", seccompProf)
}
return seccomp.WithProfile(strings.TrimPrefix(seccompProf, profileNamePrefix)), nil
}
}
// generateApparmorSpecOpts generates containerd SpecOpts for apparmor.
func generateApparmorSpecOpts(apparmorProf string, privileged, apparmorEnabled bool) (oci.SpecOpts, error) {
if !apparmorEnabled {
// Should fail loudly if user try to specify apparmor profile
// but we don't support it.
if apparmorProf != "" && apparmorProf != unconfinedProfile {
return nil, errors.New("apparmor is not supported")
}
return nil, nil
}
switch apparmorProf {
// Based on kubernetes#51746, default apparmor profile should be applied
// for when apparmor is not specified.
case runtimeDefault, "":
if privileged {
// Do not set apparmor profile when container is privileged
return nil, nil
}
// TODO (mikebrow): delete created apparmor default profile
return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil
case unconfinedProfile:
return nil, nil
default:
// Require and Trim default profile name prefix
if !strings.HasPrefix(apparmorProf, profileNamePrefix) {
return nil, errors.Errorf("invalid apparmor profile %q", apparmorProf)
}
return apparmor.WithProfile(strings.TrimPrefix(apparmorProf, profileNamePrefix)), nil
}
}
// generateUserString generates valid user string based on OCI Image Spec
// v1.0.0.
//
// CRI defines that the following combinations are valid:
//
// (none) -> ""
// username -> username
// username, uid -> username
// username, uid, gid -> username:gid
// username, gid -> username:gid
// uid -> uid
// uid, gid -> uid:gid
// gid -> error
//
// TODO(random-liu): Add group name support in CRI.
func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) {
var userstr, groupstr string
if uid != nil {
userstr = strconv.FormatInt(uid.GetValue(), 10)
}
if username != "" {
userstr = username
}
if gid != nil {
groupstr = strconv.FormatInt(gid.GetValue(), 10)
}
if userstr == "" {
if groupstr != "" {
return "", errors.Errorf("user group %q is specified without user", groupstr)
}
return "", nil
}
if groupstr != "" {
userstr = userstr + ":" + groupstr
}
return userstr, nil
}