Add image volume support.
Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
parent
e0079125d2
commit
cd27050425
95
pkg/opts/container.go
Normal file
95
pkg/opts/container.go
Normal file
@ -0,0 +1,95 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package opts
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/containers"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// WithVolumes copies ownership of volume in rootfs to its corresponding host path.
|
||||
// It doesn't update runtime spec.
|
||||
// The passed in map is a host path to container path map for all volumes.
|
||||
// TODO(random-liu): Figure out whether we need to copy volume content.
|
||||
func WithVolumes(volumeMounts map[string]string) containerd.NewContainerOpts {
|
||||
return func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
|
||||
if c.Snapshotter == "" {
|
||||
return errors.Errorf("no snapshotter set for container")
|
||||
}
|
||||
if c.SnapshotKey == "" {
|
||||
return errors.Errorf("rootfs not created for container")
|
||||
}
|
||||
snapshotter := client.SnapshotService(c.Snapshotter)
|
||||
mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
root, err := ioutil.TempDir("", "ctd-volume")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer os.RemoveAll(root) // nolint: errcheck
|
||||
for _, m := range mounts {
|
||||
if err := m.Mount(root); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
defer unix.Unmount(root, 0) // nolint: errcheck
|
||||
|
||||
for host, volume := range volumeMounts {
|
||||
if err := copyOwnership(filepath.Join(root, volume), host); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// copyOwnership copies the permissions and uid:gid of the src file
|
||||
// to the dst file
|
||||
func copyOwnership(src, dst string) error {
|
||||
stat, err := system.Stat(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dstStat, err := system.Stat(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// In some cases, even though UID/GID match and it would effectively be a no-op,
|
||||
// this can return a permission denied error... for example if this is an NFS
|
||||
// mount.
|
||||
// Since it's not really an error that we can't chown to the same UID/GID, don't
|
||||
// even bother trying in such cases.
|
||||
if stat.UID() != dstStat.UID() || stat.GID() != dstStat.GID() {
|
||||
if err := os.Chown(dst, int(stat.UID()), int(stat.GID())); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if stat.Mode() != dstStat.Mode() {
|
||||
return os.Chmod(dst, os.FileMode(stat.Mode()))
|
||||
}
|
||||
return nil
|
||||
}
|
@ -19,6 +19,7 @@ package server
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@ -36,6 +37,7 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
|
||||
|
||||
customopts "github.com/kubernetes-incubator/cri-containerd/pkg/opts"
|
||||
cio "github.com/kubernetes-incubator/cri-containerd/pkg/server/io"
|
||||
containerstore "github.com/kubernetes-incubator/cri-containerd/pkg/store/container"
|
||||
"github.com/kubernetes-incubator/cri-containerd/pkg/util"
|
||||
@ -101,26 +103,6 @@ func (c *criContainerdService) CreateContainer(ctx context.Context, r *runtime.C
|
||||
return nil, fmt.Errorf("image %q not found", imageRef)
|
||||
}
|
||||
|
||||
// Generate container runtime spec.
|
||||
mounts := c.generateContainerMounts(getSandboxRootDir(c.rootDir, sandboxID), config)
|
||||
spec, err := c.generateContainerSpec(id, sandboxPid, config, sandboxConfig, image.Config, mounts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate container %q spec: %v", id, err)
|
||||
}
|
||||
glog.V(4).Infof("Container spec: %+v", spec)
|
||||
|
||||
// Set snapshotter before any other options.
|
||||
opts := []containerd.NewContainerOpts{
|
||||
containerd.WithSnapshotter(c.snapshotter),
|
||||
}
|
||||
// Prepare container rootfs. This is always writeable even if
|
||||
// the container wants a readonly rootfs since we want to give
|
||||
// the runtime (runc) a chance to modify (e.g. to create mount
|
||||
// points corresponding to spec.Mounts) before making the
|
||||
// rootfs readonly (requested by spec.Root.Readonly).
|
||||
opts = append(opts, containerd.WithNewSnapshot(id, image.Image))
|
||||
meta.ImageRef = image.ID
|
||||
|
||||
// Create container root directory.
|
||||
containerRootDir := getContainerRootDir(c.rootDir, id)
|
||||
if err = c.os.MkdirAll(containerRootDir, 0755); err != nil {
|
||||
@ -137,6 +119,39 @@ func (c *criContainerdService) CreateContainer(ctx context.Context, r *runtime.C
|
||||
}
|
||||
}()
|
||||
|
||||
// Create container volumes mounts.
|
||||
// TODO(random-liu): Add cri-containerd integration test for image volume.
|
||||
volumeMounts := c.generateVolumeMounts(containerRootDir, config.GetMounts(), image.Config)
|
||||
|
||||
// Generate container runtime spec.
|
||||
mounts := c.generateContainerMounts(getSandboxRootDir(c.rootDir, sandboxID), config)
|
||||
|
||||
spec, err := c.generateContainerSpec(id, sandboxPid, config, sandboxConfig, image.Config, append(mounts, volumeMounts...))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate container %q spec: %v", id, err)
|
||||
}
|
||||
glog.V(4).Infof("Container spec: %+v", spec)
|
||||
|
||||
// Set snapshotter before any other options.
|
||||
opts := []containerd.NewContainerOpts{
|
||||
containerd.WithSnapshotter(c.snapshotter),
|
||||
// Prepare container rootfs. This is always writeable even if
|
||||
// the container wants a readonly rootfs since we want to give
|
||||
// the runtime (runc) a chance to modify (e.g. to create mount
|
||||
// points corresponding to spec.Mounts) before making the
|
||||
// rootfs readonly (requested by spec.Root.Readonly).
|
||||
containerd.WithNewSnapshot(id, image.Image),
|
||||
}
|
||||
|
||||
if len(volumeMounts) > 0 {
|
||||
mountMap := make(map[string]string)
|
||||
for _, v := range volumeMounts {
|
||||
mountMap[v.HostPath] = v.ContainerPath
|
||||
}
|
||||
opts = append(opts, customopts.WithVolumes(mountMap))
|
||||
}
|
||||
meta.ImageRef = image.ID
|
||||
|
||||
containerIO, err := cio.NewContainerIO(id,
|
||||
cio.WithStdin(config.GetStdin()),
|
||||
cio.WithTerminal(config.GetTty()),
|
||||
@ -277,7 +292,7 @@ func (c *criContainerdService) generateContainerSpec(id string, sandboxPid uint3
|
||||
|
||||
// Add extra mounts first so that CRI specified mounts can override.
|
||||
mounts := append(extraMounts, config.GetMounts()...)
|
||||
if err := addOCIBindMounts(&g, mounts, mountLabel); err != nil {
|
||||
if err := c.addOCIBindMounts(&g, mounts, mountLabel); err != nil {
|
||||
return nil, fmt.Errorf("failed to set OCI bind mounts %+v: %v", mounts, err)
|
||||
}
|
||||
|
||||
@ -328,25 +343,59 @@ func (c *criContainerdService) generateContainerSpec(id string, sandboxPid uint3
|
||||
return g.Spec(), nil
|
||||
}
|
||||
|
||||
// generateVolumeMounts sets up image volumes for container. Rely on the removal of container
|
||||
// root directory to do cleanup. Note that image volume will be skipped, if there is criMounts
|
||||
// specified with the same destination.
|
||||
func (c *criContainerdService) generateVolumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount {
|
||||
if len(config.Volumes) == 0 {
|
||||
return nil
|
||||
}
|
||||
var mounts []*runtime.Mount
|
||||
for dst := range config.Volumes {
|
||||
if isInCRIMounts(dst, criMounts) {
|
||||
// Skip the image volume, if there is CRI defined volume mapping.
|
||||
// TODO(random-liu): This should be handled by Kubelet in the future.
|
||||
// Kubelet should decide what to use for image volume, and also de-duplicate
|
||||
// the image volume and user mounts.
|
||||
continue
|
||||
}
|
||||
volumeID := util.GenerateID()
|
||||
src := filepath.Join(containerRootDir, "volumes", volumeID)
|
||||
// addOCIBindMounts will create these volumes.
|
||||
mounts = append(mounts, &runtime.Mount{
|
||||
ContainerPath: dst,
|
||||
HostPath: src,
|
||||
// Use default mount propagation.
|
||||
// TODO(random-liu): What about selinux relabel?
|
||||
})
|
||||
}
|
||||
return mounts
|
||||
}
|
||||
|
||||
// generateContainerMounts sets up necessary container mounts including /dev/shm, /etc/hosts
|
||||
// and /etc/resolv.conf.
|
||||
func (c *criContainerdService) generateContainerMounts(sandboxRootDir string, config *runtime.ContainerConfig) []*runtime.Mount {
|
||||
var mounts []*runtime.Mount
|
||||
securityContext := config.GetLinux().GetSecurityContext()
|
||||
if !isInCRIMounts(etcHosts, config.GetMounts()) {
|
||||
mounts = append(mounts, &runtime.Mount{
|
||||
ContainerPath: etcHosts,
|
||||
HostPath: getSandboxHosts(sandboxRootDir),
|
||||
Readonly: securityContext.GetReadonlyRootfs(),
|
||||
})
|
||||
}
|
||||
|
||||
// Mount sandbox resolv.config.
|
||||
// TODO: Need to figure out whether we should always mount it as read-only
|
||||
if !isInCRIMounts(resolvConfPath, config.GetMounts()) {
|
||||
mounts = append(mounts, &runtime.Mount{
|
||||
ContainerPath: resolvConfPath,
|
||||
HostPath: getResolvPath(sandboxRootDir),
|
||||
Readonly: securityContext.GetReadonlyRootfs(),
|
||||
})
|
||||
}
|
||||
|
||||
if !isInCRIMounts(devShm, config.GetMounts()) {
|
||||
sandboxDevShm := getSandboxDevShm(sandboxRootDir)
|
||||
if securityContext.GetNamespaceOptions().GetHostIpc() {
|
||||
sandboxDevShm = devShm
|
||||
@ -356,6 +405,7 @@ func (c *criContainerdService) generateContainerMounts(sandboxRootDir string, co
|
||||
HostPath: sandboxDevShm,
|
||||
Readonly: false,
|
||||
})
|
||||
}
|
||||
return mounts
|
||||
}
|
||||
|
||||
@ -479,7 +529,7 @@ func setOCIDevicesPrivileged(g *generate.Generator) error {
|
||||
// addOCIBindMounts adds bind mounts.
|
||||
// TODO(random-liu): Figure out whether we need to change all CRI mounts to readonly when
|
||||
// rootfs is readonly. (https://github.com/moby/moby/blob/master/daemon/oci_linux.go)
|
||||
func addOCIBindMounts(g *generate.Generator, mounts []*runtime.Mount, mountLabel string) error {
|
||||
func (c *criContainerdService) addOCIBindMounts(g *generate.Generator, mounts []*runtime.Mount, mountLabel string) error {
|
||||
// Mount cgroup into the container as readonly, which inherits docker's behavior.
|
||||
g.AddCgroupsMount("ro") // nolint: errcheck
|
||||
for _, mount := range mounts {
|
||||
@ -487,11 +537,11 @@ func addOCIBindMounts(g *generate.Generator, mounts []*runtime.Mount, mountLabel
|
||||
src := mount.GetHostPath()
|
||||
// Create the host path if it doesn't exist.
|
||||
// TODO(random-liu): Add CRI validation test for this case.
|
||||
if _, err := os.Stat(src); err != nil {
|
||||
if _, err := c.os.Stat(src); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return fmt.Errorf("failed to stat %q: %v", src, err)
|
||||
}
|
||||
if err := os.MkdirAll(src, 0755); err != nil {
|
||||
if err := c.os.MkdirAll(src, 0755); err != nil {
|
||||
return fmt.Errorf("failed to mkdir %q: %v", src, err)
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
||||
package server
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
@ -402,9 +403,66 @@ func TestContainerSpecCommand(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateVolumeMounts(t *testing.T) {
|
||||
testContainerRootDir := "test-container-root"
|
||||
for desc, test := range map[string]struct {
|
||||
criMounts []*runtime.Mount
|
||||
imageVolumes map[string]struct{}
|
||||
expectedMountDest []string
|
||||
}{
|
||||
"should setup rw mount for image volumes": {
|
||||
imageVolumes: map[string]struct{}{
|
||||
"/test-volume-1": {},
|
||||
"/test-volume-2": {},
|
||||
},
|
||||
expectedMountDest: []string{
|
||||
"/test-volume-1",
|
||||
"/test-volume-2",
|
||||
},
|
||||
},
|
||||
"should skip image volumes if already mounted by CRI": {
|
||||
criMounts: []*runtime.Mount{
|
||||
{
|
||||
ContainerPath: "/test-volume-1",
|
||||
HostPath: "/test-hostpath-1",
|
||||
},
|
||||
},
|
||||
imageVolumes: map[string]struct{}{
|
||||
"/test-volume-1": {},
|
||||
"/test-volume-2": {},
|
||||
},
|
||||
expectedMountDest: []string{
|
||||
"/test-volume-2",
|
||||
},
|
||||
},
|
||||
} {
|
||||
t.Logf("TestCase %q", desc)
|
||||
config := &imagespec.ImageConfig{
|
||||
Volumes: test.imageVolumes,
|
||||
}
|
||||
c := newTestCRIContainerdService()
|
||||
got := c.generateVolumeMounts(testContainerRootDir, test.criMounts, config)
|
||||
assert.Len(t, got, len(test.expectedMountDest))
|
||||
for _, dest := range test.expectedMountDest {
|
||||
found := false
|
||||
for _, m := range got {
|
||||
if m.ContainerPath == dest {
|
||||
found = true
|
||||
assert.Equal(t,
|
||||
filepath.Dir(m.HostPath),
|
||||
filepath.Join(testContainerRootDir, "volumes"))
|
||||
break
|
||||
}
|
||||
}
|
||||
assert.True(t, found)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateContainerMounts(t *testing.T) {
|
||||
testSandboxRootDir := "test-sandbox-root"
|
||||
for desc, test := range map[string]struct {
|
||||
criMounts []*runtime.Mount
|
||||
securityContext *runtime.LinuxContainerSecurityContext
|
||||
expectedMounts []*runtime.Mount
|
||||
}{
|
||||
@ -472,12 +530,31 @@ func TestGenerateContainerMounts(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
"should skip contaner mounts if already mounted by CRI": {
|
||||
criMounts: []*runtime.Mount{
|
||||
{
|
||||
ContainerPath: "/etc/hosts",
|
||||
HostPath: "/test-etc-host",
|
||||
},
|
||||
{
|
||||
ContainerPath: resolvConfPath,
|
||||
HostPath: "test-resolv-conf",
|
||||
},
|
||||
{
|
||||
ContainerPath: "/dev/shm",
|
||||
HostPath: "test-dev-shm",
|
||||
},
|
||||
},
|
||||
securityContext: &runtime.LinuxContainerSecurityContext{},
|
||||
expectedMounts: nil,
|
||||
},
|
||||
} {
|
||||
config := &runtime.ContainerConfig{
|
||||
Metadata: &runtime.ContainerMetadata{
|
||||
Name: "test-name",
|
||||
Attempt: 1,
|
||||
},
|
||||
Mounts: test.criMounts,
|
||||
Linux: &runtime.LinuxContainerConfig{
|
||||
SecurityContext: test.securityContext,
|
||||
},
|
||||
@ -514,7 +591,8 @@ func TestPrivilegedBindMount(t *testing.T) {
|
||||
t.Logf("TestCase %q", desc)
|
||||
g := generate.New()
|
||||
g.SetRootReadonly(test.readonlyRootFS)
|
||||
addOCIBindMounts(&g, nil, "")
|
||||
c := newTestCRIContainerdService()
|
||||
c.addOCIBindMounts(&g, nil, "")
|
||||
if test.privileged {
|
||||
setOCIBindMountsPrivileged(&g)
|
||||
}
|
||||
|
@ -374,3 +374,13 @@ func initSelinuxOpts(selinuxOpt *runtime.SELinuxOption) (string, string, error)
|
||||
selinuxOpt.GetType())
|
||||
return label.InitLabels(selinux.DupSecOpt(labelOpts))
|
||||
}
|
||||
|
||||
// isInCRIMounts checks whether a destination is in CRI mount list.
|
||||
func isInCRIMounts(dst string, mounts []*runtime.Mount) bool {
|
||||
for _, m := range mounts {
|
||||
if m.ContainerPath == dst {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user