Merge pull request #247 from ijc/volumes-support

Add image volume support.
This commit is contained in:
Lantao Liu 2017-09-15 14:33:27 -07:00 committed by GitHub
commit da31647ef8
6 changed files with 326 additions and 78 deletions

95
pkg/opts/container.go Normal file
View File

@ -0,0 +1,95 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"io/ioutil"
"os"
"path/filepath"
"github.com/containerd/containerd"
"github.com/containerd/containerd/containers"
"github.com/docker/docker/pkg/system"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
// WithVolumes copies ownership of volume in rootfs to its corresponding host path.
// It doesn't update runtime spec.
// The passed in map is a host path to container path map for all volumes.
// TODO(random-liu): Figure out whether we need to copy volume content.
func WithVolumes(volumeMounts map[string]string) containerd.NewContainerOpts {
return func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
if c.Snapshotter == "" {
return errors.Errorf("no snapshotter set for container")
}
if c.SnapshotKey == "" {
return errors.Errorf("rootfs not created for container")
}
snapshotter := client.SnapshotService(c.Snapshotter)
mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
if err != nil {
return err
}
root, err := ioutil.TempDir("", "ctd-volume")
if err != nil {
return err
}
defer os.RemoveAll(root) // nolint: errcheck
for _, m := range mounts {
if err := m.Mount(root); err != nil {
return err
}
}
defer unix.Unmount(root, 0) // nolint: errcheck
for host, volume := range volumeMounts {
if err := copyOwnership(filepath.Join(root, volume), host); err != nil {
return err
}
}
return nil
}
}
// copyOwnership copies the permissions and uid:gid of the src file
// to the dst file
func copyOwnership(src, dst string) error {
stat, err := system.Stat(src)
if err != nil {
return err
}
dstStat, err := system.Stat(dst)
if err != nil {
return err
}
// In some cases, even though UID/GID match and it would effectively be a no-op,
// this can return a permission denied error... for example if this is an NFS
// mount.
// Since it's not really an error that we can't chown to the same UID/GID, don't
// even bother trying in such cases.
if stat.UID() != dstStat.UID() || stat.GID() != dstStat.GID() {
if err := os.Chown(dst, int(stat.UID()), int(stat.GID())); err != nil {
return err
}
}
if stat.Mode() != dstStat.Mode() {
return os.Chmod(dst, os.FileMode(stat.Mode()))
}
return nil
}

View File

@ -20,6 +20,7 @@ import (
"io"
"io/ioutil"
"os"
"path/filepath"
"github.com/containerd/fifo"
"github.com/docker/docker/pkg/mount"
@ -34,6 +35,7 @@ type OS interface {
RemoveAll(path string) error
OpenFifo(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error)
Stat(name string) (os.FileInfo, error)
ResolveSymbolicLink(name string) (string, error)
CopyFile(src, dest string, perm os.FileMode) error
WriteFile(filename string, data []byte, perm os.FileMode) error
Mount(source string, target string, fstype string, flags uintptr, data string) error
@ -63,6 +65,18 @@ func (RealOS) Stat(name string) (os.FileInfo, error) {
return os.Stat(name)
}
// ResolveSymbolicLink will follow any symbolic links
func (RealOS) ResolveSymbolicLink(path string) (string, error) {
info, err := os.Lstat(path)
if err != nil {
return "", err
}
if info.Mode()&os.ModeSymlink != os.ModeSymlink {
return path, nil
}
return filepath.EvalSymlinks(path)
}
// CopyFile copys src file to dest file
func (RealOS) CopyFile(src, dest string, perm os.FileMode) error {
in, err := os.Open(src)

View File

@ -39,16 +39,17 @@ type CalledDetail struct {
// of the real call.
type FakeOS struct {
sync.Mutex
MkdirAllFn func(string, os.FileMode) error
RemoveAllFn func(string) error
OpenFifoFn func(context.Context, string, int, os.FileMode) (io.ReadWriteCloser, error)
StatFn func(string) (os.FileInfo, error)
CopyFileFn func(string, string, os.FileMode) error
WriteFileFn func(string, []byte, os.FileMode) error
MountFn func(source string, target string, fstype string, flags uintptr, data string) error
UnmountFn func(target string, flags int) error
calls []CalledDetail
errors map[string]error
MkdirAllFn func(string, os.FileMode) error
RemoveAllFn func(string) error
OpenFifoFn func(context.Context, string, int, os.FileMode) (io.ReadWriteCloser, error)
StatFn func(string) (os.FileInfo, error)
ResolveSymbolicLinkFn func(string) (string, error)
CopyFileFn func(string, string, os.FileMode) error
WriteFileFn func(string, []byte, os.FileMode) error
MountFn func(source string, target string, fstype string, flags uintptr, data string) error
UnmountFn func(target string, flags int) error
calls []CalledDetail
errors map[string]error
}
var _ osInterface.OS = &FakeOS{}
@ -160,6 +161,19 @@ func (f *FakeOS) Stat(name string) (os.FileInfo, error) {
return nil, nil
}
// ResolveSymbolicLink is a fake call that invokes ResolveSymbolicLinkFn or returns its input
func (f *FakeOS) ResolveSymbolicLink(path string) (string, error) {
f.appendCalls("ResolveSymbolicLink", path)
if err := f.getError("ResolveSymbolicLink"); err != nil {
return "", err
}
if f.ResolveSymbolicLinkFn != nil {
return f.ResolveSymbolicLinkFn(path)
}
return path, nil
}
// CopyFile is a fake call that invokes CopyFileFn or just return nil.
func (f *FakeOS) CopyFile(src, dest string, perm os.FileMode) error {
f.appendCalls("CopyFile", src, dest, perm)

View File

@ -19,6 +19,7 @@ package server
import (
"fmt"
"os"
"path/filepath"
"strings"
"time"
@ -36,6 +37,7 @@ import (
"golang.org/x/sys/unix"
"k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
customopts "github.com/kubernetes-incubator/cri-containerd/pkg/opts"
cio "github.com/kubernetes-incubator/cri-containerd/pkg/server/io"
containerstore "github.com/kubernetes-incubator/cri-containerd/pkg/store/container"
"github.com/kubernetes-incubator/cri-containerd/pkg/util"
@ -101,26 +103,6 @@ func (c *criContainerdService) CreateContainer(ctx context.Context, r *runtime.C
return nil, fmt.Errorf("image %q not found", imageRef)
}
// Generate container runtime spec.
mounts := c.generateContainerMounts(getSandboxRootDir(c.rootDir, sandboxID), config)
spec, err := c.generateContainerSpec(id, sandboxPid, config, sandboxConfig, image.Config, mounts)
if err != nil {
return nil, fmt.Errorf("failed to generate container %q spec: %v", id, err)
}
glog.V(4).Infof("Container spec: %+v", spec)
// Set snapshotter before any other options.
opts := []containerd.NewContainerOpts{
containerd.WithSnapshotter(c.snapshotter),
}
// Prepare container rootfs. This is always writeable even if
// the container wants a readonly rootfs since we want to give
// the runtime (runc) a chance to modify (e.g. to create mount
// points corresponding to spec.Mounts) before making the
// rootfs readonly (requested by spec.Root.Readonly).
opts = append(opts, containerd.WithNewSnapshot(id, image.Image))
meta.ImageRef = image.ID
// Create container root directory.
containerRootDir := getContainerRootDir(c.rootDir, id)
if err = c.os.MkdirAll(containerRootDir, 0755); err != nil {
@ -137,6 +119,39 @@ func (c *criContainerdService) CreateContainer(ctx context.Context, r *runtime.C
}
}()
// Create container volumes mounts.
// TODO(random-liu): Add cri-containerd integration test for image volume.
volumeMounts := c.generateVolumeMounts(containerRootDir, config.GetMounts(), image.Config)
// Generate container runtime spec.
mounts := c.generateContainerMounts(getSandboxRootDir(c.rootDir, sandboxID), config)
spec, err := c.generateContainerSpec(id, sandboxPid, config, sandboxConfig, image.Config, append(mounts, volumeMounts...))
if err != nil {
return nil, fmt.Errorf("failed to generate container %q spec: %v", id, err)
}
glog.V(4).Infof("Container spec: %+v", spec)
// Set snapshotter before any other options.
opts := []containerd.NewContainerOpts{
containerd.WithSnapshotter(c.snapshotter),
// Prepare container rootfs. This is always writeable even if
// the container wants a readonly rootfs since we want to give
// the runtime (runc) a chance to modify (e.g. to create mount
// points corresponding to spec.Mounts) before making the
// rootfs readonly (requested by spec.Root.Readonly).
containerd.WithNewSnapshot(id, image.Image),
}
if len(volumeMounts) > 0 {
mountMap := make(map[string]string)
for _, v := range volumeMounts {
mountMap[v.HostPath] = v.ContainerPath
}
opts = append(opts, customopts.WithVolumes(mountMap))
}
meta.ImageRef = image.ID
containerIO, err := cio.NewContainerIO(id,
cio.WithStdin(config.GetStdin()),
cio.WithTerminal(config.GetTty()),
@ -277,7 +292,7 @@ func (c *criContainerdService) generateContainerSpec(id string, sandboxPid uint3
// Add extra mounts first so that CRI specified mounts can override.
mounts := append(extraMounts, config.GetMounts()...)
if err := addOCIBindMounts(&g, mounts, mountLabel); err != nil {
if err := c.addOCIBindMounts(&g, mounts, mountLabel); err != nil {
return nil, fmt.Errorf("failed to set OCI bind mounts %+v: %v", mounts, err)
}
@ -289,7 +304,7 @@ func (c *criContainerdService) generateContainerSpec(id string, sandboxPid uint3
return nil, err
}
} else {
if err := addOCIDevices(&g, config.GetDevices()); err != nil {
if err := c.addOCIDevices(&g, config.GetDevices()); err != nil {
return nil, fmt.Errorf("failed to set devices mapping %+v: %v", config.GetDevices(), err)
}
@ -328,34 +343,69 @@ func (c *criContainerdService) generateContainerSpec(id string, sandboxPid uint3
return g.Spec(), nil
}
// generateVolumeMounts sets up image volumes for container. Rely on the removal of container
// root directory to do cleanup. Note that image volume will be skipped, if there is criMounts
// specified with the same destination.
func (c *criContainerdService) generateVolumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount {
if len(config.Volumes) == 0 {
return nil
}
var mounts []*runtime.Mount
for dst := range config.Volumes {
if isInCRIMounts(dst, criMounts) {
// Skip the image volume, if there is CRI defined volume mapping.
// TODO(random-liu): This should be handled by Kubelet in the future.
// Kubelet should decide what to use for image volume, and also de-duplicate
// the image volume and user mounts.
continue
}
volumeID := util.GenerateID()
src := filepath.Join(containerRootDir, "volumes", volumeID)
// addOCIBindMounts will create these volumes.
mounts = append(mounts, &runtime.Mount{
ContainerPath: dst,
HostPath: src,
// Use default mount propagation.
// TODO(random-liu): What about selinux relabel?
})
}
return mounts
}
// generateContainerMounts sets up necessary container mounts including /dev/shm, /etc/hosts
// and /etc/resolv.conf.
func (c *criContainerdService) generateContainerMounts(sandboxRootDir string, config *runtime.ContainerConfig) []*runtime.Mount {
var mounts []*runtime.Mount
securityContext := config.GetLinux().GetSecurityContext()
mounts = append(mounts, &runtime.Mount{
ContainerPath: etcHosts,
HostPath: getSandboxHosts(sandboxRootDir),
Readonly: securityContext.GetReadonlyRootfs(),
})
if !isInCRIMounts(etcHosts, config.GetMounts()) {
mounts = append(mounts, &runtime.Mount{
ContainerPath: etcHosts,
HostPath: getSandboxHosts(sandboxRootDir),
Readonly: securityContext.GetReadonlyRootfs(),
})
}
// Mount sandbox resolv.config.
// TODO: Need to figure out whether we should always mount it as read-only
mounts = append(mounts, &runtime.Mount{
ContainerPath: resolvConfPath,
HostPath: getResolvPath(sandboxRootDir),
Readonly: securityContext.GetReadonlyRootfs(),
})
sandboxDevShm := getSandboxDevShm(sandboxRootDir)
if securityContext.GetNamespaceOptions().GetHostIpc() {
sandboxDevShm = devShm
if !isInCRIMounts(resolvConfPath, config.GetMounts()) {
mounts = append(mounts, &runtime.Mount{
ContainerPath: resolvConfPath,
HostPath: getResolvPath(sandboxRootDir),
Readonly: securityContext.GetReadonlyRootfs(),
})
}
if !isInCRIMounts(devShm, config.GetMounts()) {
sandboxDevShm := getSandboxDevShm(sandboxRootDir)
if securityContext.GetNamespaceOptions().GetHostIpc() {
sandboxDevShm = devShm
}
mounts = append(mounts, &runtime.Mount{
ContainerPath: devShm,
HostPath: sandboxDevShm,
Readonly: false,
})
}
mounts = append(mounts, &runtime.Mount{
ContainerPath: devShm,
HostPath: sandboxDevShm,
Readonly: false,
})
return mounts
}
@ -414,10 +464,10 @@ func clearReadOnly(m *runtimespec.Mount) {
}
// addDevices set device mapping without privilege.
func addOCIDevices(g *generate.Generator, devs []*runtime.Device) error {
func (c *criContainerdService) addOCIDevices(g *generate.Generator, devs []*runtime.Device) error {
spec := g.Spec()
for _, device := range devs {
path, err := resolveSymbolicLink(device.HostPath)
path, err := c.os.ResolveSymbolicLink(device.HostPath)
if err != nil {
return err
}
@ -479,7 +529,7 @@ func setOCIDevicesPrivileged(g *generate.Generator) error {
// addOCIBindMounts adds bind mounts.
// TODO(random-liu): Figure out whether we need to change all CRI mounts to readonly when
// rootfs is readonly. (https://github.com/moby/moby/blob/master/daemon/oci_linux.go)
func addOCIBindMounts(g *generate.Generator, mounts []*runtime.Mount, mountLabel string) error {
func (c *criContainerdService) addOCIBindMounts(g *generate.Generator, mounts []*runtime.Mount, mountLabel string) error {
// Mount cgroup into the container as readonly, which inherits docker's behavior.
g.AddCgroupsMount("ro") // nolint: errcheck
for _, mount := range mounts {
@ -487,17 +537,17 @@ func addOCIBindMounts(g *generate.Generator, mounts []*runtime.Mount, mountLabel
src := mount.GetHostPath()
// Create the host path if it doesn't exist.
// TODO(random-liu): Add CRI validation test for this case.
if _, err := os.Stat(src); err != nil {
if _, err := c.os.Stat(src); err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("failed to stat %q: %v", src, err)
}
if err := os.MkdirAll(src, 0755); err != nil {
if err := c.os.MkdirAll(src, 0755); err != nil {
return fmt.Errorf("failed to mkdir %q: %v", src, err)
}
}
// TODO(random-liu): Add cri-containerd integration test or cri validation test
// for this.
src, err := resolveSymbolicLink(src)
src, err := c.os.ResolveSymbolicLink(src)
if err != nil {
return fmt.Errorf("failed to resolve symlink %q: %v", src, err)
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package server
import (
"path/filepath"
"testing"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
@ -203,7 +204,7 @@ func TestGeneralContainerSpec(t *testing.T) {
config, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
c := newTestCRIContainerdService()
spec, err := c.generateContainerSpec(testID, testPid, config, sandboxConfig, imageConfig, nil)
assert.NoError(t, err)
require.NoError(t, err)
specCheck(t, testID, testPid, spec)
}
@ -257,7 +258,7 @@ func TestContainerCapabilities(t *testing.T) {
t.Logf("TestCase %q", desc)
config.Linux.SecurityContext.Capabilities = test.capability
spec, err := c.generateContainerSpec(testID, testPid, config, sandboxConfig, imageConfig, nil)
assert.NoError(t, err)
require.NoError(t, err)
specCheck(t, testID, testPid, spec)
t.Log(spec.Process.Capabilities.Bounding)
for _, include := range test.includes {
@ -283,7 +284,7 @@ func TestContainerSpecTty(t *testing.T) {
for _, tty := range []bool{true, false} {
config.Tty = tty
spec, err := c.generateContainerSpec(testID, testPid, config, sandboxConfig, imageConfig, nil)
assert.NoError(t, err)
require.NoError(t, err)
specCheck(t, testID, testPid, spec)
assert.Equal(t, tty, spec.Process.Terminal)
if tty {
@ -302,7 +303,7 @@ func TestContainerSpecReadonlyRootfs(t *testing.T) {
for _, readonly := range []bool{true, false} {
config.Linux.SecurityContext.ReadonlyRootfs = readonly
spec, err := c.generateContainerSpec(testID, testPid, config, sandboxConfig, imageConfig, nil)
assert.NoError(t, err)
require.NoError(t, err)
specCheck(t, testID, testPid, spec)
assert.Equal(t, readonly, spec.Root.Readonly)
}
@ -325,7 +326,7 @@ func TestContainerSpecWithExtraMounts(t *testing.T) {
Readonly: true,
}
spec, err := c.generateContainerSpec(testID, testPid, config, sandboxConfig, imageConfig, []*runtime.Mount{extraMount})
assert.NoError(t, err)
require.NoError(t, err)
specCheck(t, testID, testPid, spec)
var mounts []runtimespec.Mount
for _, m := range spec.Mounts {
@ -402,9 +403,66 @@ func TestContainerSpecCommand(t *testing.T) {
}
}
func TestGenerateVolumeMounts(t *testing.T) {
testContainerRootDir := "test-container-root"
for desc, test := range map[string]struct {
criMounts []*runtime.Mount
imageVolumes map[string]struct{}
expectedMountDest []string
}{
"should setup rw mount for image volumes": {
imageVolumes: map[string]struct{}{
"/test-volume-1": {},
"/test-volume-2": {},
},
expectedMountDest: []string{
"/test-volume-1",
"/test-volume-2",
},
},
"should skip image volumes if already mounted by CRI": {
criMounts: []*runtime.Mount{
{
ContainerPath: "/test-volume-1",
HostPath: "/test-hostpath-1",
},
},
imageVolumes: map[string]struct{}{
"/test-volume-1": {},
"/test-volume-2": {},
},
expectedMountDest: []string{
"/test-volume-2",
},
},
} {
t.Logf("TestCase %q", desc)
config := &imagespec.ImageConfig{
Volumes: test.imageVolumes,
}
c := newTestCRIContainerdService()
got := c.generateVolumeMounts(testContainerRootDir, test.criMounts, config)
assert.Len(t, got, len(test.expectedMountDest))
for _, dest := range test.expectedMountDest {
found := false
for _, m := range got {
if m.ContainerPath == dest {
found = true
assert.Equal(t,
filepath.Dir(m.HostPath),
filepath.Join(testContainerRootDir, "volumes"))
break
}
}
assert.True(t, found)
}
}
}
func TestGenerateContainerMounts(t *testing.T) {
testSandboxRootDir := "test-sandbox-root"
for desc, test := range map[string]struct {
criMounts []*runtime.Mount
securityContext *runtime.LinuxContainerSecurityContext
expectedMounts []*runtime.Mount
}{
@ -472,12 +530,31 @@ func TestGenerateContainerMounts(t *testing.T) {
},
},
},
"should skip contaner mounts if already mounted by CRI": {
criMounts: []*runtime.Mount{
{
ContainerPath: "/etc/hosts",
HostPath: "/test-etc-host",
},
{
ContainerPath: resolvConfPath,
HostPath: "test-resolv-conf",
},
{
ContainerPath: "/dev/shm",
HostPath: "test-dev-shm",
},
},
securityContext: &runtime.LinuxContainerSecurityContext{},
expectedMounts: nil,
},
} {
config := &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{
Name: "test-name",
Attempt: 1,
},
Mounts: test.criMounts,
Linux: &runtime.LinuxContainerConfig{
SecurityContext: test.securityContext,
},
@ -514,7 +591,8 @@ func TestPrivilegedBindMount(t *testing.T) {
t.Logf("TestCase %q", desc)
g := generate.New()
g.SetRootReadonly(test.readonlyRootFS)
addOCIBindMounts(&g, nil, "")
c := newTestCRIContainerdService()
c.addOCIBindMounts(&g, nil, "")
if test.privileged {
setOCIBindMountsPrivileged(&g)
}
@ -540,7 +618,7 @@ func TestPidNamespace(t *testing.T) {
t.Logf("should not set pid namespace when host pid is true")
config.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{HostPid: true}
spec, err := c.generateContainerSpec(testID, testPid, config, sandboxConfig, imageConfig, nil)
assert.NoError(t, err)
require.NoError(t, err)
specCheck(t, testID, testPid, spec)
for _, ns := range spec.Linux.Namespaces {
assert.NotEqual(t, ns.Type, runtimespec.PIDNamespace)
@ -549,7 +627,7 @@ func TestPidNamespace(t *testing.T) {
t.Logf("should set pid namespace when host pid is false")
config.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{HostPid: false}
spec, err = c.generateContainerSpec(testID, testPid, config, sandboxConfig, imageConfig, nil)
assert.NoError(t, err)
require.NoError(t, err)
specCheck(t, testID, testPid, spec)
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.PIDNamespace,

View File

@ -297,19 +297,6 @@ func (c *criContainerdService) ensureImageExists(ctx context.Context, ref string
return &newImage, nil
}
// resolveSymbolicLink resolves a possbile symlink path. If the path is a symlink, returns resolved
// path; if not, returns the original path.
func resolveSymbolicLink(path string) (string, error) {
info, err := os.Lstat(path)
if err != nil {
return "", err
}
if info.Mode()&os.ModeSymlink != os.ModeSymlink {
return path, nil
}
return filepath.EvalSymlinks(path)
}
// loadCgroup loads the cgroup associated with path if it exists and moves the current process into the cgroup. If the cgroup
// is not created it is created and returned.
func loadCgroup(cgroupPath string) (cgroups.Cgroup, error) {
@ -387,3 +374,13 @@ func initSelinuxOpts(selinuxOpt *runtime.SELinuxOption) (string, string, error)
selinuxOpt.GetType())
return label.InitLabels(selinux.DupSecOpt(labelOpts))
}
// isInCRIMounts checks whether a destination is in CRI mount list.
func isInCRIMounts(dst string, mounts []*runtime.Mount) bool {
for _, m := range mounts {
if m.ContainerPath == dst {
return true
}
}
return false
}