containerd/pkg/server/container_start.go
Crazykev 9bf7ffd51a generate and maintain resolv.conf for sandbox
Signed-off-by: Crazykev <crazykev@zju.edu.cn>
2017-06-09 19:36:30 +08:00

409 lines
14 KiB
Go

/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"
"github.com/containerd/containerd/api/services/execution"
rootfsapi "github.com/containerd/containerd/api/services/rootfs"
"github.com/containerd/containerd/api/types/container"
prototypes "github.com/gogo/protobuf/types"
"github.com/golang/glog"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
"golang.org/x/net/context"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1"
"github.com/kubernetes-incubator/cri-containerd/pkg/metadata"
"github.com/kubernetes-incubator/cri-containerd/pkg/server/agents"
)
// StartContainer starts the container.
func (c *criContainerdService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
glog.V(2).Infof("StartContainer for %q", r.GetContainerId())
defer func() {
if retErr == nil {
glog.V(2).Infof("StartContainer %q returns successfully", r.GetContainerId())
}
}()
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %v", r.GetContainerId(), err)
}
id := container.ID
var startErr error
// start container in one transaction to avoid race with event monitor.
if err := c.containerStore.Update(id, func(meta metadata.ContainerMetadata) (metadata.ContainerMetadata, error) {
// Always apply metadata change no matter startContainer fails or not. Because startContainer
// may change container state no matter it fails or succeeds.
startErr = c.startContainer(ctx, id, &meta)
return meta, nil
}); startErr != nil {
return nil, startErr
} else if err != nil {
return nil, fmt.Errorf("failed to update container %q metadata: %v", id, err)
}
return &runtime.StartContainerResponse{}, nil
}
// startContainer actually starts the container. The function needs to be run in one transaction. Any updates
// to the metadata passed in will be applied to container store no matter the function returns error or not.
func (c *criContainerdService) startContainer(ctx context.Context, id string, meta *metadata.ContainerMetadata) (retErr error) {
config := meta.Config
// Return error if container is not in created state.
if meta.State() != runtime.ContainerState_CONTAINER_CREATED {
return fmt.Errorf("container %q is in %s state", id, criContainerStateToString(meta.State()))
}
// Do not start the container when there is a removal in progress.
if meta.Removing {
return fmt.Errorf("container %q is in removing state", id)
}
defer func() {
if retErr != nil {
// Set container to exited if fail to start.
meta.Pid = 0
meta.FinishedAt = time.Now().UnixNano()
meta.ExitCode = errorStartExitCode
meta.Reason = errorStartReason
meta.Message = retErr.Error()
}
}()
// Get sandbox config from sandbox store.
sandboxMeta, err := c.getSandbox(meta.SandboxID)
if err != nil {
return fmt.Errorf("sandbox %q not found: %v", meta.SandboxID, err)
}
sandboxConfig := sandboxMeta.Config
sandboxID := meta.SandboxID
// Make sure sandbox is running.
sandboxInfo, err := c.containerService.Info(ctx, &execution.InfoRequest{ID: sandboxID})
if err != nil {
return fmt.Errorf("failed to get sandbox container %q info: %v", sandboxID, err)
}
// This is only a best effort check, sandbox may still exit after this. If sandbox fails
// before starting the container, the start will fail.
if sandboxInfo.Status != container.Status_RUNNING {
return fmt.Errorf("sandbox container %q is not running", sandboxID)
}
sandboxPid := sandboxInfo.Pid
glog.V(2).Infof("Sandbox container %q is running with pid %d", sandboxID, sandboxPid)
// Generate containerd container create options.
imageMeta, err := c.imageMetadataStore.Get(meta.ImageRef)
if err != nil {
return fmt.Errorf("failed to get container image %q: %v", meta.ImageRef, err)
}
mounts := c.generateContainerMounts(getSandboxRootDir(c.rootDir, sandboxID), config)
spec, err := c.generateContainerSpec(id, sandboxPid, config, sandboxConfig, imageMeta.Config, mounts)
if err != nil {
return fmt.Errorf("failed to generate container %q spec: %v", id, err)
}
rawSpec, err := json.Marshal(spec)
if err != nil {
return fmt.Errorf("failed to marshal oci spec %+v: %v", spec, err)
}
glog.V(4).Infof("Container spec: %+v", spec)
containerRootDir := getContainerRootDir(c.rootDir, id)
stdin, stdout, stderr := getStreamingPipes(containerRootDir)
// Set stdin to empty if Stdin == false.
if !config.GetStdin() {
stdin = ""
}
stdinPipe, stdoutPipe, stderrPipe, err := c.prepareStreamingPipes(ctx, stdin, stdout, stderr)
if err != nil {
return fmt.Errorf("failed to prepare streaming pipes: %v", err)
}
defer func() {
if retErr != nil {
if stdinPipe != nil {
stdinPipe.Close()
}
stdoutPipe.Close()
stderrPipe.Close()
}
}()
// Redirect the stream to std for now.
// TODO(random-liu): [P1] Support container logging.
// TODO(random-liu): [P1] Support StdinOnce after container logging is added.
if stdinPipe != nil {
go func(w io.WriteCloser) {
io.Copy(w, os.Stdin) // nolint: errcheck
w.Close()
}(stdinPipe)
}
if config.GetLogPath() != "" {
// Only generate container log when log path is specified.
logPath := filepath.Join(sandboxConfig.GetLogDirectory(), config.GetLogPath())
if err := c.agentFactory.NewContainerLogger(logPath, agents.Stdout, stdoutPipe).Start(); err != nil {
return fmt.Errorf("failed to start container stdout logger: %v", err)
}
// Only redirect stderr when there is no tty.
if !config.GetTty() {
if err := c.agentFactory.NewContainerLogger(logPath, agents.Stderr, stderrPipe).Start(); err != nil {
return fmt.Errorf("failed to start container stderr logger: %v", err)
}
}
}
// Get rootfs mounts.
mountsResp, err := c.rootfsService.Mounts(ctx, &rootfsapi.MountsRequest{Name: id})
if err != nil {
return fmt.Errorf("failed to get rootfs mounts %q: %v", id, err)
}
// Create containerd container.
createOpts := &execution.CreateRequest{
ID: id,
Spec: &prototypes.Any{
TypeUrl: runtimespec.Version,
Value: rawSpec,
},
Rootfs: mountsResp.Mounts,
Runtime: defaultRuntime,
Stdin: stdin,
Stdout: stdout,
Stderr: stderr,
Terminal: config.GetTty(),
}
glog.V(5).Infof("Create containerd container (id=%q, name=%q) with options %+v.",
id, meta.Name, createOpts)
createResp, err := c.containerService.Create(ctx, createOpts)
if err != nil {
return fmt.Errorf("failed to create containerd container: %v", err)
}
defer func() {
if retErr != nil {
// Cleanup the containerd container if an error is returned.
if _, err := c.containerService.Delete(ctx, &execution.DeleteRequest{ID: id}); err != nil {
glog.Errorf("Failed to delete containerd container %q: %v", id, err)
}
}
}()
// Start containerd container.
if _, err := c.containerService.Start(ctx, &execution.StartRequest{ID: id}); err != nil {
return fmt.Errorf("failed to start containerd container %q: %v", id, err)
}
// Update container start timestamp.
meta.Pid = createResp.Pid
meta.StartedAt = time.Now().UnixNano()
return nil
}
func (c *criContainerdService) generateContainerSpec(id string, sandboxPid uint32, config *runtime.ContainerConfig,
sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig, extraMounts []*runtime.Mount) (*runtimespec.Spec, error) {
// Creates a spec Generator with the default spec.
// TODO(random-liu): [P2] Move container runtime spec generation into a helper function.
g := generate.New()
// Set the relative path to the rootfs of the container from containerd's
// pre-defined directory.
g.SetRootPath(relativeRootfsPath)
if err := setOCIProcessArgs(&g, config, imageConfig); err != nil {
return nil, err
}
if config.GetWorkingDir() != "" {
g.SetProcessCwd(config.GetWorkingDir())
} else if imageConfig.WorkingDir != "" {
g.SetProcessCwd(imageConfig.WorkingDir)
}
// Apply envs from image config first, so that envs from container config
// can override them.
if err := addImageEnvs(&g, imageConfig.Env); err != nil {
return nil, err
}
for _, e := range config.GetEnvs() {
g.AddProcessEnv(e.GetKey(), e.GetValue())
}
// Add extra mounts first so that CRI specified mounts can override.
addOCIBindMounts(&g, append(extraMounts, config.GetMounts()...))
// TODO(random-liu): [P1] Set device mapping.
// Ref https://github.com/moby/moby/blob/master/oci/devices_linux.go.
// TODO(random-liu): [P1] Handle container logging, decorate and redirect to file.
setOCILinuxResource(&g, config.GetLinux().GetResources())
if sandboxConfig.GetLinux().GetCgroupParent() != "" {
cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id)
g.SetLinuxCgroupsPath(cgroupsPath)
}
g.SetProcessTerminal(config.GetTty())
securityContext := config.GetLinux().GetSecurityContext()
if err := setOCICapabilities(&g, securityContext.GetCapabilities()); err != nil {
return nil, fmt.Errorf("failed to set capabilities %+v: %v",
securityContext.GetCapabilities(), err)
}
// TODO(random-liu): [P0] Handle privileged.
// Set namespaces, share namespace with sandbox container.
setOCINamespaces(&g, securityContext.GetNamespaceOptions(), sandboxPid)
// TODO(random-liu): [P1] Set selinux options.
// TODO(random-liu): [P1] Set user/username.
supplementalGroups := securityContext.GetSupplementalGroups()
for _, group := range supplementalGroups {
g.AddProcessAdditionalGid(uint32(group))
}
g.SetRootReadonly(securityContext.GetReadonlyRootfs())
// TODO(random-liu): [P2] Add apparmor and seccomp.
return g.Spec(), nil
}
// generateContainerMounts sets up necessary container mounts including /dev/shm, /etc/hosts
// and /etc/resolv.conf.
func (c *criContainerdService) generateContainerMounts(sandboxRootDir string, config *runtime.ContainerConfig) []*runtime.Mount {
var mounts []*runtime.Mount
securityContext := config.GetLinux().GetSecurityContext()
mounts = append(mounts, &runtime.Mount{
ContainerPath: etcHosts,
HostPath: getSandboxHosts(sandboxRootDir),
Readonly: securityContext.GetReadonlyRootfs(),
})
// Mount sandbox resolv.config.
// TODO: Need to figure out whether we should always mount it as read-only
mounts = append(mounts, &runtime.Mount{
ContainerPath: resolvConfPath,
HostPath: getResolvPath(sandboxRootDir),
Readonly: securityContext.GetReadonlyRootfs(),
})
// TODO(random-liu): [P0] Mount sandbox /dev/shm.
return mounts
}
// setOCIProcessArgs sets process args. It returns error if the final arg list
// is empty.
func setOCIProcessArgs(g *generate.Generator, config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) error {
command, args := config.GetCommand(), config.GetArgs()
// The following logic is migrated from https://github.com/moby/moby/blob/master/daemon/commit.go
// TODO(random-liu): Clearly define the commands overwrite behavior.
if len(command) == 0 {
if len(args) == 0 {
args = imageConfig.Cmd
}
if command == nil {
command = imageConfig.Entrypoint
}
}
if len(command) == 0 && len(args) == 0 {
return fmt.Errorf("no command specified")
}
g.SetProcessArgs(append(command, args...))
return nil
}
// addImageEnvs adds environment variables from image config. It returns error if
// an invalid environment variable is encountered.
func addImageEnvs(g *generate.Generator, imageEnvs []string) error {
for _, e := range imageEnvs {
kv := strings.Split(e, "=")
if len(kv) != 2 {
return fmt.Errorf("invalid environment variable %q", e)
}
g.AddProcessEnv(kv[0], kv[1])
}
return nil
}
// addOCIBindMounts adds bind mounts.
func addOCIBindMounts(g *generate.Generator, mounts []*runtime.Mount) {
for _, mount := range mounts {
dst := mount.GetContainerPath()
src := mount.GetHostPath()
options := []string{"rw"}
if mount.GetReadonly() {
options = []string{"ro"}
}
// TODO(random-liu): [P1] Apply selinux label
g.AddBindMount(src, dst, options)
}
}
// setOCILinuxResource set container resource limit.
func setOCILinuxResource(g *generate.Generator, resources *runtime.LinuxContainerResources) {
if resources == nil {
return
}
g.SetLinuxResourcesCPUPeriod(uint64(resources.GetCpuPeriod()))
g.SetLinuxResourcesCPUQuota(resources.GetCpuQuota())
g.SetLinuxResourcesCPUShares(uint64(resources.GetCpuShares()))
g.SetLinuxResourcesMemoryLimit(uint64(resources.GetMemoryLimitInBytes()))
g.SetLinuxResourcesOOMScoreAdj(int(resources.GetOomScoreAdj()))
}
// setOCICapabilities adds/drops process capabilities.
func setOCICapabilities(g *generate.Generator, capabilities *runtime.Capability) error {
if capabilities == nil {
return nil
}
for _, c := range capabilities.GetAddCapabilities() {
if err := g.AddProcessCapability(c); err != nil {
return err
}
}
for _, c := range capabilities.GetDropCapabilities() {
if err := g.DropProcessCapability(c); err != nil {
return err
}
}
return nil
}
// setOCINamespaces sets namespaces.
func setOCINamespaces(g *generate.Generator, namespaces *runtime.NamespaceOption, sandboxPid uint32) {
g.AddOrReplaceLinuxNamespace(string(runtimespec.NetworkNamespace), getNetworkNamespace(sandboxPid)) // nolint: errcheck
g.AddOrReplaceLinuxNamespace(string(runtimespec.IPCNamespace), getIPCNamespace(sandboxPid)) // nolint: errcheck
g.AddOrReplaceLinuxNamespace(string(runtimespec.UTSNamespace), getUTSNamespace(sandboxPid)) // nolint: errcheck
g.AddOrReplaceLinuxNamespace(string(runtimespec.PIDNamespace), getPIDNamespace(sandboxPid)) // nolint: errcheck
}