diff --git a/pkg/opts/container.go b/pkg/opts/container.go new file mode 100644 index 000000000..7a158fd7e --- /dev/null +++ b/pkg/opts/container.go @@ -0,0 +1,95 @@ +/* +Copyright 2017 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package opts + +import ( + "context" + "io/ioutil" + "os" + "path/filepath" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/containers" + "github.com/docker/docker/pkg/system" + "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +// WithVolumes copies ownership of volume in rootfs to its corresponding host path. +// It doesn't update runtime spec. +// The passed in map is a host path to container path map for all volumes. +// TODO(random-liu): Figure out whether we need to copy volume content. +func WithVolumes(volumeMounts map[string]string) containerd.NewContainerOpts { + return func(ctx context.Context, client *containerd.Client, c *containers.Container) error { + if c.Snapshotter == "" { + return errors.Errorf("no snapshotter set for container") + } + if c.SnapshotKey == "" { + return errors.Errorf("rootfs not created for container") + } + snapshotter := client.SnapshotService(c.Snapshotter) + mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey) + if err != nil { + return err + } + root, err := ioutil.TempDir("", "ctd-volume") + if err != nil { + return err + } + defer os.RemoveAll(root) // nolint: errcheck + for _, m := range mounts { + if err := m.Mount(root); err != nil { + return err + } + } + defer unix.Unmount(root, 0) // nolint: errcheck + + for host, volume := range volumeMounts { + if err := copyOwnership(filepath.Join(root, volume), host); err != nil { + return err + } + } + return nil + } +} + +// copyOwnership copies the permissions and uid:gid of the src file +// to the dst file +func copyOwnership(src, dst string) error { + stat, err := system.Stat(src) + if err != nil { + return err + } + + dstStat, err := system.Stat(dst) + if err != nil { + return err + } + + // In some cases, even though UID/GID match and it would effectively be a no-op, + // this can return a permission denied error... for example if this is an NFS + // mount. + // Since it's not really an error that we can't chown to the same UID/GID, don't + // even bother trying in such cases. + if stat.UID() != dstStat.UID() || stat.GID() != dstStat.GID() { + if err := os.Chown(dst, int(stat.UID()), int(stat.GID())); err != nil { + return err + } + } + + if stat.Mode() != dstStat.Mode() { + return os.Chmod(dst, os.FileMode(stat.Mode())) + } + return nil +} diff --git a/pkg/server/container_create.go b/pkg/server/container_create.go index 315a1a08b..427501339 100644 --- a/pkg/server/container_create.go +++ b/pkg/server/container_create.go @@ -19,6 +19,7 @@ package server import ( "fmt" "os" + "path/filepath" "strings" "time" @@ -36,6 +37,7 @@ import ( "golang.org/x/sys/unix" "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" + customopts "github.com/kubernetes-incubator/cri-containerd/pkg/opts" cio "github.com/kubernetes-incubator/cri-containerd/pkg/server/io" containerstore "github.com/kubernetes-incubator/cri-containerd/pkg/store/container" "github.com/kubernetes-incubator/cri-containerd/pkg/util" @@ -101,26 +103,6 @@ func (c *criContainerdService) CreateContainer(ctx context.Context, r *runtime.C return nil, fmt.Errorf("image %q not found", imageRef) } - // Generate container runtime spec. - mounts := c.generateContainerMounts(getSandboxRootDir(c.rootDir, sandboxID), config) - spec, err := c.generateContainerSpec(id, sandboxPid, config, sandboxConfig, image.Config, mounts) - if err != nil { - return nil, fmt.Errorf("failed to generate container %q spec: %v", id, err) - } - glog.V(4).Infof("Container spec: %+v", spec) - - // Set snapshotter before any other options. - opts := []containerd.NewContainerOpts{ - containerd.WithSnapshotter(c.snapshotter), - } - // Prepare container rootfs. This is always writeable even if - // the container wants a readonly rootfs since we want to give - // the runtime (runc) a chance to modify (e.g. to create mount - // points corresponding to spec.Mounts) before making the - // rootfs readonly (requested by spec.Root.Readonly). - opts = append(opts, containerd.WithNewSnapshot(id, image.Image)) - meta.ImageRef = image.ID - // Create container root directory. containerRootDir := getContainerRootDir(c.rootDir, id) if err = c.os.MkdirAll(containerRootDir, 0755); err != nil { @@ -137,6 +119,39 @@ func (c *criContainerdService) CreateContainer(ctx context.Context, r *runtime.C } }() + // Create container volumes mounts. + // TODO(random-liu): Add cri-containerd integration test for image volume. + volumeMounts := c.generateVolumeMounts(containerRootDir, config.GetMounts(), image.Config) + + // Generate container runtime spec. + mounts := c.generateContainerMounts(getSandboxRootDir(c.rootDir, sandboxID), config) + + spec, err := c.generateContainerSpec(id, sandboxPid, config, sandboxConfig, image.Config, append(mounts, volumeMounts...)) + if err != nil { + return nil, fmt.Errorf("failed to generate container %q spec: %v", id, err) + } + glog.V(4).Infof("Container spec: %+v", spec) + + // Set snapshotter before any other options. + opts := []containerd.NewContainerOpts{ + containerd.WithSnapshotter(c.snapshotter), + // Prepare container rootfs. This is always writeable even if + // the container wants a readonly rootfs since we want to give + // the runtime (runc) a chance to modify (e.g. to create mount + // points corresponding to spec.Mounts) before making the + // rootfs readonly (requested by spec.Root.Readonly). + containerd.WithNewSnapshot(id, image.Image), + } + + if len(volumeMounts) > 0 { + mountMap := make(map[string]string) + for _, v := range volumeMounts { + mountMap[v.HostPath] = v.ContainerPath + } + opts = append(opts, customopts.WithVolumes(mountMap)) + } + meta.ImageRef = image.ID + containerIO, err := cio.NewContainerIO(id, cio.WithStdin(config.GetStdin()), cio.WithTerminal(config.GetTty()), @@ -277,7 +292,7 @@ func (c *criContainerdService) generateContainerSpec(id string, sandboxPid uint3 // Add extra mounts first so that CRI specified mounts can override. mounts := append(extraMounts, config.GetMounts()...) - if err := addOCIBindMounts(&g, mounts, mountLabel); err != nil { + if err := c.addOCIBindMounts(&g, mounts, mountLabel); err != nil { return nil, fmt.Errorf("failed to set OCI bind mounts %+v: %v", mounts, err) } @@ -328,34 +343,69 @@ func (c *criContainerdService) generateContainerSpec(id string, sandboxPid uint3 return g.Spec(), nil } +// generateVolumeMounts sets up image volumes for container. Rely on the removal of container +// root directory to do cleanup. Note that image volume will be skipped, if there is criMounts +// specified with the same destination. +func (c *criContainerdService) generateVolumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount { + if len(config.Volumes) == 0 { + return nil + } + var mounts []*runtime.Mount + for dst := range config.Volumes { + if isInCRIMounts(dst, criMounts) { + // Skip the image volume, if there is CRI defined volume mapping. + // TODO(random-liu): This should be handled by Kubelet in the future. + // Kubelet should decide what to use for image volume, and also de-duplicate + // the image volume and user mounts. + continue + } + volumeID := util.GenerateID() + src := filepath.Join(containerRootDir, "volumes", volumeID) + // addOCIBindMounts will create these volumes. + mounts = append(mounts, &runtime.Mount{ + ContainerPath: dst, + HostPath: src, + // Use default mount propagation. + // TODO(random-liu): What about selinux relabel? + }) + } + return mounts +} + // generateContainerMounts sets up necessary container mounts including /dev/shm, /etc/hosts // and /etc/resolv.conf. func (c *criContainerdService) generateContainerMounts(sandboxRootDir string, config *runtime.ContainerConfig) []*runtime.Mount { var mounts []*runtime.Mount securityContext := config.GetLinux().GetSecurityContext() - mounts = append(mounts, &runtime.Mount{ - ContainerPath: etcHosts, - HostPath: getSandboxHosts(sandboxRootDir), - Readonly: securityContext.GetReadonlyRootfs(), - }) + if !isInCRIMounts(etcHosts, config.GetMounts()) { + mounts = append(mounts, &runtime.Mount{ + ContainerPath: etcHosts, + HostPath: getSandboxHosts(sandboxRootDir), + Readonly: securityContext.GetReadonlyRootfs(), + }) + } // Mount sandbox resolv.config. // TODO: Need to figure out whether we should always mount it as read-only - mounts = append(mounts, &runtime.Mount{ - ContainerPath: resolvConfPath, - HostPath: getResolvPath(sandboxRootDir), - Readonly: securityContext.GetReadonlyRootfs(), - }) - - sandboxDevShm := getSandboxDevShm(sandboxRootDir) - if securityContext.GetNamespaceOptions().GetHostIpc() { - sandboxDevShm = devShm + if !isInCRIMounts(resolvConfPath, config.GetMounts()) { + mounts = append(mounts, &runtime.Mount{ + ContainerPath: resolvConfPath, + HostPath: getResolvPath(sandboxRootDir), + Readonly: securityContext.GetReadonlyRootfs(), + }) + } + + if !isInCRIMounts(devShm, config.GetMounts()) { + sandboxDevShm := getSandboxDevShm(sandboxRootDir) + if securityContext.GetNamespaceOptions().GetHostIpc() { + sandboxDevShm = devShm + } + mounts = append(mounts, &runtime.Mount{ + ContainerPath: devShm, + HostPath: sandboxDevShm, + Readonly: false, + }) } - mounts = append(mounts, &runtime.Mount{ - ContainerPath: devShm, - HostPath: sandboxDevShm, - Readonly: false, - }) return mounts } @@ -479,7 +529,7 @@ func setOCIDevicesPrivileged(g *generate.Generator) error { // addOCIBindMounts adds bind mounts. // TODO(random-liu): Figure out whether we need to change all CRI mounts to readonly when // rootfs is readonly. (https://github.com/moby/moby/blob/master/daemon/oci_linux.go) -func addOCIBindMounts(g *generate.Generator, mounts []*runtime.Mount, mountLabel string) error { +func (c *criContainerdService) addOCIBindMounts(g *generate.Generator, mounts []*runtime.Mount, mountLabel string) error { // Mount cgroup into the container as readonly, which inherits docker's behavior. g.AddCgroupsMount("ro") // nolint: errcheck for _, mount := range mounts { @@ -487,11 +537,11 @@ func addOCIBindMounts(g *generate.Generator, mounts []*runtime.Mount, mountLabel src := mount.GetHostPath() // Create the host path if it doesn't exist. // TODO(random-liu): Add CRI validation test for this case. - if _, err := os.Stat(src); err != nil { + if _, err := c.os.Stat(src); err != nil { if !os.IsNotExist(err) { return fmt.Errorf("failed to stat %q: %v", src, err) } - if err := os.MkdirAll(src, 0755); err != nil { + if err := c.os.MkdirAll(src, 0755); err != nil { return fmt.Errorf("failed to mkdir %q: %v", src, err) } } diff --git a/pkg/server/container_create_test.go b/pkg/server/container_create_test.go index bb0fbd0bb..39b9aade0 100644 --- a/pkg/server/container_create_test.go +++ b/pkg/server/container_create_test.go @@ -17,6 +17,7 @@ limitations under the License. package server import ( + "path/filepath" "testing" imagespec "github.com/opencontainers/image-spec/specs-go/v1" @@ -402,9 +403,66 @@ func TestContainerSpecCommand(t *testing.T) { } } +func TestGenerateVolumeMounts(t *testing.T) { + testContainerRootDir := "test-container-root" + for desc, test := range map[string]struct { + criMounts []*runtime.Mount + imageVolumes map[string]struct{} + expectedMountDest []string + }{ + "should setup rw mount for image volumes": { + imageVolumes: map[string]struct{}{ + "/test-volume-1": {}, + "/test-volume-2": {}, + }, + expectedMountDest: []string{ + "/test-volume-1", + "/test-volume-2", + }, + }, + "should skip image volumes if already mounted by CRI": { + criMounts: []*runtime.Mount{ + { + ContainerPath: "/test-volume-1", + HostPath: "/test-hostpath-1", + }, + }, + imageVolumes: map[string]struct{}{ + "/test-volume-1": {}, + "/test-volume-2": {}, + }, + expectedMountDest: []string{ + "/test-volume-2", + }, + }, + } { + t.Logf("TestCase %q", desc) + config := &imagespec.ImageConfig{ + Volumes: test.imageVolumes, + } + c := newTestCRIContainerdService() + got := c.generateVolumeMounts(testContainerRootDir, test.criMounts, config) + assert.Len(t, got, len(test.expectedMountDest)) + for _, dest := range test.expectedMountDest { + found := false + for _, m := range got { + if m.ContainerPath == dest { + found = true + assert.Equal(t, + filepath.Dir(m.HostPath), + filepath.Join(testContainerRootDir, "volumes")) + break + } + } + assert.True(t, found) + } + } +} + func TestGenerateContainerMounts(t *testing.T) { testSandboxRootDir := "test-sandbox-root" for desc, test := range map[string]struct { + criMounts []*runtime.Mount securityContext *runtime.LinuxContainerSecurityContext expectedMounts []*runtime.Mount }{ @@ -472,12 +530,31 @@ func TestGenerateContainerMounts(t *testing.T) { }, }, }, + "should skip contaner mounts if already mounted by CRI": { + criMounts: []*runtime.Mount{ + { + ContainerPath: "/etc/hosts", + HostPath: "/test-etc-host", + }, + { + ContainerPath: resolvConfPath, + HostPath: "test-resolv-conf", + }, + { + ContainerPath: "/dev/shm", + HostPath: "test-dev-shm", + }, + }, + securityContext: &runtime.LinuxContainerSecurityContext{}, + expectedMounts: nil, + }, } { config := &runtime.ContainerConfig{ Metadata: &runtime.ContainerMetadata{ Name: "test-name", Attempt: 1, }, + Mounts: test.criMounts, Linux: &runtime.LinuxContainerConfig{ SecurityContext: test.securityContext, }, @@ -514,7 +591,8 @@ func TestPrivilegedBindMount(t *testing.T) { t.Logf("TestCase %q", desc) g := generate.New() g.SetRootReadonly(test.readonlyRootFS) - addOCIBindMounts(&g, nil, "") + c := newTestCRIContainerdService() + c.addOCIBindMounts(&g, nil, "") if test.privileged { setOCIBindMountsPrivileged(&g) } diff --git a/pkg/server/helpers.go b/pkg/server/helpers.go index d4a66c16b..30482d702 100644 --- a/pkg/server/helpers.go +++ b/pkg/server/helpers.go @@ -374,3 +374,13 @@ func initSelinuxOpts(selinuxOpt *runtime.SELinuxOption) (string, string, error) selinuxOpt.GetType()) return label.InitLabels(selinux.DupSecOpt(labelOpts)) } + +// isInCRIMounts checks whether a destination is in CRI mount list. +func isInCRIMounts(dst string, mounts []*runtime.Mount) bool { + for _, m := range mounts { + if m.ContainerPath == dst { + return true + } + } + return false +}