Create/delete containerd containerd

Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
Lantao Liu
2017-06-15 23:13:48 +00:00
parent 6ca9c65578
commit cb9e104cf1
15 changed files with 1049 additions and 829 deletions

View File

@@ -17,10 +17,18 @@ limitations under the License.
package server
import (
"encoding/json"
"fmt"
"strings"
"time"
"github.com/containerd/containerd/api/services/containers"
prototypes "github.com/gogo/protobuf/types"
"github.com/golang/glog"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/opencontainers/runc/libcontainer/devices"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
"golang.org/x/net/context"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1"
@@ -43,6 +51,7 @@ func (c *criContainerdService) CreateContainer(ctx context.Context, r *runtime.C
if err != nil {
return nil, fmt.Errorf("failed to find sandbox id %q: %v", r.GetPodSandboxId(), err)
}
sandboxID := sandbox.ID
// Generate unique id and name for the container and reserve the name.
// Reserve the container name to avoid concurrent `CreateContainer` request creating
@@ -63,7 +72,7 @@ func (c *criContainerdService) CreateContainer(ctx context.Context, r *runtime.C
meta := metadata.ContainerMetadata{
ID: id,
Name: name,
SandboxID: sandbox.ID,
SandboxID: sandboxID,
Config: config,
}
@@ -77,6 +86,20 @@ func (c *criContainerdService) CreateContainer(ctx context.Context, r *runtime.C
if imageMeta == nil {
return nil, fmt.Errorf("image %q not found", image)
}
// Generate container runtime spec.
mounts := c.generateContainerMounts(getSandboxRootDir(c.rootDir, sandboxID), config)
spec, err := c.generateContainerSpec(id, sandbox.Pid, config, sandboxConfig, imageMeta.Config, mounts)
if err != nil {
return nil, fmt.Errorf("failed to generate container %q spec: %v", id, err)
}
rawSpec, err := json.Marshal(spec)
if err != nil {
return nil, fmt.Errorf("failed to marshal oci spec %+v: %v", spec, err)
}
glog.V(4).Infof("Container spec: %+v", spec)
// Prepare container rootfs.
if config.GetLinux().GetSecurityContext().GetReadonlyRootfs() {
if _, err := c.snapshotService.View(ctx, id, imageMeta.ChainID); err != nil {
return nil, fmt.Errorf("failed to view container rootfs %q: %v", imageMeta.ChainID, err)
@@ -111,6 +134,30 @@ func (c *criContainerdService) CreateContainer(ctx context.Context, r *runtime.C
}
}()
// Create containerd container.
if _, err = c.containerService.Create(ctx, &containers.CreateContainerRequest{
Container: containers.Container{
ID: id,
// TODO(random-liu): Checkpoint metadata into container labels.
Image: imageMeta.ID,
Runtime: defaultRuntime,
Spec: &prototypes.Any{
TypeUrl: runtimespec.Version,
Value: rawSpec,
},
RootFS: id,
},
}); err != nil {
return nil, fmt.Errorf("failed to create containerd container: %v", err)
}
defer func() {
if retErr != nil {
if _, err := c.containerService.Delete(ctx, &containers.DeleteContainerRequest{ID: id}); err != nil {
glog.Errorf("Failed to delete containerd container %q: %v", id, err)
}
}
}()
// Update container CreatedAt.
meta.CreatedAt = time.Now().UnixNano()
// Add container into container store.
@@ -121,3 +168,283 @@ func (c *criContainerdService) CreateContainer(ctx context.Context, r *runtime.C
return &runtime.CreateContainerResponse{ContainerId: id}, nil
}
func (c *criContainerdService) generateContainerSpec(id string, sandboxPid uint32, config *runtime.ContainerConfig,
sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig, extraMounts []*runtime.Mount) (*runtimespec.Spec, error) {
// Creates a spec Generator with the default spec.
// TODO(random-liu): [P2] Move container runtime spec generation into a helper function.
g := generate.New()
// Set the relative path to the rootfs of the container from containerd's
// pre-defined directory.
g.SetRootPath(relativeRootfsPath)
if err := setOCIProcessArgs(&g, config, imageConfig); err != nil {
return nil, err
}
if config.GetWorkingDir() != "" {
g.SetProcessCwd(config.GetWorkingDir())
} else if imageConfig.WorkingDir != "" {
g.SetProcessCwd(imageConfig.WorkingDir)
}
// Apply envs from image config first, so that envs from container config
// can override them.
if err := addImageEnvs(&g, imageConfig.Env); err != nil {
return nil, err
}
for _, e := range config.GetEnvs() {
g.AddProcessEnv(e.GetKey(), e.GetValue())
}
// TODO: add setOCIPrivileged group all privileged logic together
securityContext := config.GetLinux().GetSecurityContext()
// Add extra mounts first so that CRI specified mounts can override.
addOCIBindMounts(&g, append(extraMounts, config.GetMounts()...), securityContext.GetPrivileged())
g.SetRootReadonly(securityContext.GetReadonlyRootfs())
if err := addOCIDevices(&g, config.GetDevices(), securityContext.GetPrivileged()); err != nil {
return nil, fmt.Errorf("failed to set devices mapping %+v: %v", config.GetDevices(), err)
}
// TODO(random-liu): [P1] Handle container logging, decorate and redirect to file.
setOCILinuxResource(&g, config.GetLinux().GetResources())
if sandboxConfig.GetLinux().GetCgroupParent() != "" {
cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id)
g.SetLinuxCgroupsPath(cgroupsPath)
}
g.SetProcessTerminal(config.GetTty())
if err := setOCICapabilities(&g, securityContext.GetCapabilities(), securityContext.GetPrivileged()); err != nil {
return nil, fmt.Errorf("failed to set capabilities %+v: %v",
securityContext.GetCapabilities(), err)
}
// Set namespaces, share namespace with sandbox container.
setOCINamespaces(&g, securityContext.GetNamespaceOptions(), sandboxPid)
// TODO(random-liu): [P1] Set selinux options.
// TODO(random-liu): [P1] Set user/username.
supplementalGroups := securityContext.GetSupplementalGroups()
for _, group := range supplementalGroups {
g.AddProcessAdditionalGid(uint32(group))
}
// TODO(random-liu): [P2] Add apparmor and seccomp.
return g.Spec(), nil
}
// generateContainerMounts sets up necessary container mounts including /dev/shm, /etc/hosts
// and /etc/resolv.conf.
func (c *criContainerdService) generateContainerMounts(sandboxRootDir string, config *runtime.ContainerConfig) []*runtime.Mount {
var mounts []*runtime.Mount
securityContext := config.GetLinux().GetSecurityContext()
mounts = append(mounts, &runtime.Mount{
ContainerPath: etcHosts,
HostPath: getSandboxHosts(sandboxRootDir),
Readonly: securityContext.GetReadonlyRootfs(),
})
// Mount sandbox resolv.config.
// TODO: Need to figure out whether we should always mount it as read-only
mounts = append(mounts, &runtime.Mount{
ContainerPath: resolvConfPath,
HostPath: getResolvPath(sandboxRootDir),
Readonly: securityContext.GetReadonlyRootfs(),
})
sandboxDevShm := getSandboxDevShm(sandboxRootDir)
if securityContext.GetNamespaceOptions().GetHostIpc() {
sandboxDevShm = devShm
}
mounts = append(mounts, &runtime.Mount{
ContainerPath: devShm,
HostPath: sandboxDevShm,
Readonly: false,
})
return mounts
}
// setOCIProcessArgs sets process args. It returns error if the final arg list
// is empty.
func setOCIProcessArgs(g *generate.Generator, config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) error {
command, args := config.GetCommand(), config.GetArgs()
// The following logic is migrated from https://github.com/moby/moby/blob/master/daemon/commit.go
// TODO(random-liu): Clearly define the commands overwrite behavior.
if len(command) == 0 {
if len(args) == 0 {
args = imageConfig.Cmd
}
if command == nil {
command = imageConfig.Entrypoint
}
}
if len(command) == 0 && len(args) == 0 {
return fmt.Errorf("no command specified")
}
g.SetProcessArgs(append(command, args...))
return nil
}
// addImageEnvs adds environment variables from image config. It returns error if
// an invalid environment variable is encountered.
func addImageEnvs(g *generate.Generator, imageEnvs []string) error {
for _, e := range imageEnvs {
kv := strings.Split(e, "=")
if len(kv) != 2 {
return fmt.Errorf("invalid environment variable %q", e)
}
g.AddProcessEnv(kv[0], kv[1])
}
return nil
}
func clearReadOnly(m *runtimespec.Mount) {
var opt []string
for _, o := range m.Options {
if o != "ro" {
opt = append(opt, o)
}
}
m.Options = opt
}
// addDevices set device mapping.
func addOCIDevices(g *generate.Generator, devs []*runtime.Device, privileged bool) error {
spec := g.Spec()
if privileged {
hostDevices, err := devices.HostDevices()
if err != nil {
return err
}
for _, hostDevice := range hostDevices {
rd := runtimespec.LinuxDevice{
Path: hostDevice.Path,
Type: string(hostDevice.Type),
Major: hostDevice.Major,
Minor: hostDevice.Minor,
UID: &hostDevice.Uid,
GID: &hostDevice.Gid,
}
g.AddDevice(rd)
}
spec.Linux.Resources.Devices = []runtimespec.LinuxDeviceCgroup{
{
Allow: true,
Access: "rwm",
},
}
return nil
}
for _, device := range devs {
dev, err := devices.DeviceFromPath(device.HostPath, device.Permissions)
if err != nil {
return err
}
rd := runtimespec.LinuxDevice{
Path: device.ContainerPath,
Type: string(dev.Type),
Major: dev.Major,
Minor: dev.Minor,
UID: &dev.Uid,
GID: &dev.Gid,
}
g.AddDevice(rd)
spec.Linux.Resources.Devices = append(spec.Linux.Resources.Devices, runtimespec.LinuxDeviceCgroup{
Allow: true,
Type: string(dev.Type),
Major: &dev.Major,
Minor: &dev.Minor,
Access: dev.Permissions,
})
}
return nil
}
// addOCIBindMounts adds bind mounts.
// TODO(random-liu): Figure out whether we need to change all CRI mounts to readonly when
// rootfs is readonly. (https://github.com/moby/moby/blob/master/daemon/oci_linux.go)
func addOCIBindMounts(g *generate.Generator, mounts []*runtime.Mount, privileged bool) {
// Mount cgroup into the container as readonly, which inherits docker's behavior.
g.AddCgroupsMount("ro") // nolint: errcheck
for _, mount := range mounts {
dst := mount.GetContainerPath()
src := mount.GetHostPath()
options := []string{"rw"}
if mount.GetReadonly() {
options = []string{"ro"}
}
// TODO(random-liu): [P1] Apply selinux label
g.AddBindMount(src, dst, options)
}
if !privileged {
return
}
spec := g.Spec()
// clear readonly for /sys and cgroup
for i, m := range spec.Mounts {
if spec.Mounts[i].Destination == "/sys" && !spec.Root.Readonly {
clearReadOnly(&spec.Mounts[i])
}
if m.Type == "cgroup" {
clearReadOnly(&spec.Mounts[i])
}
}
spec.Linux.ReadonlyPaths = nil
spec.Linux.MaskedPaths = nil
}
// setOCILinuxResource set container resource limit.
func setOCILinuxResource(g *generate.Generator, resources *runtime.LinuxContainerResources) {
if resources == nil {
return
}
g.SetLinuxResourcesCPUPeriod(uint64(resources.GetCpuPeriod()))
g.SetLinuxResourcesCPUQuota(resources.GetCpuQuota())
g.SetLinuxResourcesCPUShares(uint64(resources.GetCpuShares()))
g.SetLinuxResourcesMemoryLimit(uint64(resources.GetMemoryLimitInBytes()))
g.SetLinuxResourcesOOMScoreAdj(int(resources.GetOomScoreAdj()))
}
// setOCICapabilities adds/drops process capabilities.
func setOCICapabilities(g *generate.Generator, capabilities *runtime.Capability, privileged bool) error {
if privileged {
// Add all capabilities in privileged mode.
g.SetupPrivileged(true)
return nil
}
if capabilities == nil {
return nil
}
// Capabilities in CRI doesn't have `CAP_` prefix, so add it.
for _, c := range capabilities.GetAddCapabilities() {
if err := g.AddProcessCapability("CAP_" + c); err != nil {
return err
}
}
for _, c := range capabilities.GetDropCapabilities() {
if err := g.DropProcessCapability("CAP_" + c); err != nil {
return err
}
}
return nil
}
// setOCINamespaces sets namespaces.
func setOCINamespaces(g *generate.Generator, namespaces *runtime.NamespaceOption, sandboxPid uint32) {
g.AddOrReplaceLinuxNamespace(string(runtimespec.NetworkNamespace), getNetworkNamespace(sandboxPid)) // nolint: errcheck
g.AddOrReplaceLinuxNamespace(string(runtimespec.IPCNamespace), getIPCNamespace(sandboxPid)) // nolint: errcheck
g.AddOrReplaceLinuxNamespace(string(runtimespec.UTSNamespace), getUTSNamespace(sandboxPid)) // nolint: errcheck
g.AddOrReplaceLinuxNamespace(string(runtimespec.PIDNamespace), getPIDNamespace(sandboxPid)) // nolint: errcheck
}