Add OCI/Image Volume Source support

Signed-off-by: Shiming Zhang <wzshiming@hotmail.com>
This commit is contained in:
Shiming Zhang 2025-02-18 14:06:24 +08:00 committed by Liyi Meng
parent 890953d3c6
commit a3c777afd1
8 changed files with 460 additions and 5 deletions

View File

@ -0,0 +1,161 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package integration
import (
"fmt"
"os"
"path/filepath"
"runtime"
"testing"
"time"
"github.com/containerd/containerd/v2/integration/images"
"github.com/opencontainers/selinux/go-selinux"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
criruntime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func TestImageMount(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("Only running on linux")
}
testImage := images.Get(images.Alpine)
testMountImage := images.Get(images.Pause)
mountPath := "/image-mount"
EnsureImageExists(t, testMountImage)
EnsureImageExists(t, testImage)
testImageMount(t, testImage, testMountImage, mountPath, []string{
"ls",
mountPath,
}, []string{
fmt.Sprintf("%s %s %s", criruntime.Stdout, criruntime.LogTagFull, "pause"),
})
}
func TestImageMountSELinux(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("Only running on linux")
}
if !selinux.GetEnabled() {
t.Skip("SELinux is not enabled")
}
testImage := images.Get(images.ResourceConsumer)
testMountImage := images.Get(images.Pause)
mountPath := "/image-mount"
EnsureImageExists(t, testMountImage)
EnsureImageExists(t, testImage)
testImageMountSELinux(t, testImage, testMountImage, mountPath, "s0:c4,c5", "system_u:object_r:container_file_t:s0:c4,c5 pause")
testImageMountSELinux(t, testImage, testMountImage, mountPath, "s0:c200,c100", "system_u:object_r:container_file_t:s0:c100,c200 pause")
}
func testImageMountSELinux(t *testing.T, testImage, testMountImage, mountPath string, level string, want string) {
var (
containerName = "test-image-mount-container"
)
testPodLogDir := t.TempDir()
sb, sbConfig := PodSandboxConfigWithCleanup(t, "sandbox",
"image-mount",
WithHostNetwork,
WithSelinuxLevel(level),
WithPodLogDirectory(testPodLogDir),
)
containerConfig := ContainerConfig(
containerName,
testImage,
WithCommand("ls", "-Z", mountPath),
WithImageVolumeMount(testMountImage, mountPath),
WithLogPath(containerName),
)
cn, err := runtimeService.CreateContainer(sb, containerConfig, sbConfig)
require.NoError(t, err)
defer func() {
assert.NoError(t, runtimeService.RemoveContainer(cn))
}()
require.NoError(t, runtimeService.StartContainer(cn))
require.NoError(t, Eventually(func() (bool, error) {
s, err := runtimeService.ContainerStatus(cn)
if err != nil {
return false, err
}
if s.GetState() == criruntime.ContainerState_CONTAINER_EXITED {
return true, nil
}
return false, nil
}, time.Second, 30*time.Second))
content, err := os.ReadFile(filepath.Join(testPodLogDir, containerName))
assert.NoError(t, err)
checkContainerLog(t, string(content), []string{
fmt.Sprintf("%s %s %s", criruntime.Stdout, criruntime.LogTagFull, want),
})
}
func testImageMount(t *testing.T, testImage, testMountImage, mountPath string, cmd, want []string) {
var (
containerName = "test-image-mount-container"
)
testPodLogDir := t.TempDir()
sb, sbConfig := PodSandboxConfigWithCleanup(t, "sandbox",
"image-mount",
WithHostNetwork,
WithPodLogDirectory(testPodLogDir),
)
containerConfig := ContainerConfig(
containerName,
testImage,
WithCommand(cmd[0], cmd[1:]...),
WithImageVolumeMount(testMountImage, mountPath),
WithLogPath(containerName),
)
cn, err := runtimeService.CreateContainer(sb, containerConfig, sbConfig)
require.NoError(t, err)
defer func() {
assert.NoError(t, runtimeService.RemoveContainer(cn))
}()
require.NoError(t, runtimeService.StartContainer(cn))
require.NoError(t, Eventually(func() (bool, error) {
s, err := runtimeService.ContainerStatus(cn)
if err != nil {
return false, err
}
if s.GetState() == criruntime.ContainerState_CONTAINER_EXITED {
return true, nil
}
return false, nil
}, time.Second, 30*time.Second))
content, err := os.ReadFile(filepath.Join(testPodLogDir, containerName))
assert.NoError(t, err)
checkContainerLog(t, string(content), want)
}

View File

@ -138,6 +138,22 @@ func WithHostNetwork(p *runtime.PodSandboxConfig) {
p.Linux.SecurityContext.NamespaceOptions.Network = runtime.NamespaceMode_NODE p.Linux.SecurityContext.NamespaceOptions.Network = runtime.NamespaceMode_NODE
} }
// Set selinux level
func WithSelinuxLevel(level string) PodSandboxOpts {
return func(p *runtime.PodSandboxConfig) {
if p.Linux == nil {
p.Linux = &runtime.LinuxPodSandboxConfig{}
}
if p.Linux.SecurityContext == nil {
p.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{}
}
if p.Linux.SecurityContext.SelinuxOptions == nil {
p.Linux.SecurityContext.SelinuxOptions = &runtime.SELinuxOption{}
}
p.Linux.SecurityContext.SelinuxOptions.Level = level
}
}
// Set pod userns. // Set pod userns.
func WithPodUserNs(containerID, hostID, length uint32) PodSandboxOpts { func WithPodUserNs(containerID, hostID, length uint32) PodSandboxOpts {
return func(p *runtime.PodSandboxConfig) { return func(p *runtime.PodSandboxConfig) {
@ -338,6 +354,27 @@ func WithIDMapVolumeMount(hostPath, containerPath string, uidMaps, gidMaps []*ru
} }
} }
func WithImageVolumeMount(image, containerPath string) ContainerOpts {
return WithIDMapImageVolumeMount(image, containerPath, nil, nil)
}
func WithIDMapImageVolumeMount(image string, containerPath string, uidMaps, gidMaps []*runtime.IDMapping) ContainerOpts {
return func(c *runtime.ContainerConfig) {
containerPath, _ = filepath.Abs(containerPath)
mount := &runtime.Mount{
ContainerPath: containerPath,
UidMappings: uidMaps,
GidMappings: gidMaps,
Image: &runtime.ImageSpec{
Image: image,
},
Readonly: true,
SelinuxRelabel: true,
}
c.Mounts = append(c.Mounts, mount)
}
}
func WithWindowsUsername(username string) ContainerOpts { func WithWindowsUsername(username string) ContainerOpts {
return func(c *runtime.ContainerConfig) { return func(c *runtime.ContainerConfig) {
if c.Windows == nil { if c.Windows == nil {

View File

@ -161,18 +161,25 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
return nil, fmt.Errorf("failed to query sandbox platform: %w", err) return nil, fmt.Errorf("failed to query sandbox platform: %w", err)
} }
ociRuntime, err := c.getPodSandboxRuntime(sandboxID)
if err != nil {
return nil, fmt.Errorf("failed to get sandbox runtime: %w", err)
}
// mutate the extra CRI volume mounts from the runtime spec to properly specify the OCI image volume mount requests as bind mounts for this container
err = c.mutateMounts(ctx, config.GetMounts(), c.RuntimeSnapshotter(ctx, ociRuntime), sandboxID, platform)
if err != nil {
return nil, fmt.Errorf("failed to mount image volume: %w", err)
}
var volumeMounts []*runtime.Mount var volumeMounts []*runtime.Mount
if !c.config.IgnoreImageDefinedVolumes { if !c.config.IgnoreImageDefinedVolumes {
// Create container image volumes mounts. // create a list of image volume mounts from the image spec that are not also already in the runtime config volume list
volumeMounts = c.volumeMounts(platform, containerRootDir, config, &image.ImageSpec.Config) volumeMounts = c.volumeMounts(platform, containerRootDir, config, &image.ImageSpec.Config)
} else if len(image.ImageSpec.Config.Volumes) != 0 { } else if len(image.ImageSpec.Config.Volumes) != 0 {
log.G(ctx).Debugf("Ignoring volumes defined in image %v because IgnoreImageDefinedVolumes is set", image.ID) log.G(ctx).Debugf("Ignoring volumes defined in image %v because IgnoreImageDefinedVolumes is set", image.ID)
} }
ociRuntime, err := c.config.GetSandboxRuntime(sandboxConfig, sandbox.Metadata.RuntimeHandler)
if err != nil {
return nil, fmt.Errorf("failed to get sandbox runtime: %w", err)
}
var runtimeHandler *runtime.RuntimeHandler var runtimeHandler *runtime.RuntimeHandler
for _, f := range c.runtimeHandlers { for _, f := range c.runtimeHandlers {
f := f f := f

View File

@ -0,0 +1,196 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"context"
"fmt"
"os"
"path/filepath"
containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/leases"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/errdefs"
"github.com/containerd/log"
"github.com/containerd/platforms"
"github.com/opencontainers/image-spec/identity"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
func (c *criService) mutateMounts(
ctx context.Context,
extraMounts []*runtime.Mount,
snapshotter string,
sandboxID string,
platform imagespec.Platform,
) error {
if err := c.ensureLeaseExist(ctx, sandboxID); err != nil {
return fmt.Errorf("failed to ensure lease %v for sandbox: %w", sandboxID, err)
}
ctx = leases.WithLease(ctx, sandboxID)
for _, m := range extraMounts {
err := c.mutateImageMount(ctx, m, snapshotter, sandboxID, platform)
if err != nil {
return err
}
}
return nil
}
func (c *criService) ensureLeaseExist(ctx context.Context, sandboxID string) error {
leaseSvc := c.client.LeasesService()
_, err := leaseSvc.Create(ctx, leases.WithID(sandboxID))
if err != nil {
if errdefs.IsAlreadyExists(err) {
err = nil
}
}
return err
}
func (c *criService) mutateImageMount(
ctx context.Context,
extraMount *runtime.Mount,
snapshotter string,
sandboxID string,
platform imagespec.Platform,
) (retErr error) {
imageSpec := extraMount.GetImage()
if imageSpec == nil {
return nil
}
if extraMount.GetHostPath() != "" {
return fmt.Errorf("hostpath must be empty while mount image: %+v", extraMount)
}
if !extraMount.GetReadonly() {
return fmt.Errorf("readonly must be true while mount image: %+v", extraMount)
}
ref := imageSpec.GetImage()
if ref == "" {
return fmt.Errorf("image not specified in: %+v", imageSpec)
}
image, err := c.LocalResolve(ref)
if err != nil {
return fmt.Errorf("failed to resolve image %q: %w", ref, err)
}
containerdImage, err := c.toContainerdImage(ctx, image)
if err != nil {
return fmt.Errorf("failed to get image from containerd %q: %w", image.ID, err)
}
// This is a digest of the manifest
imageID := containerdImage.Target().Digest.Encoded()
target := c.getImageVolumeHostPath(sandboxID, imageID)
// Already mounted in another container on the same pod
if stat, err := os.Stat(target); err == nil && stat.IsDir() {
extraMount.HostPath = target
return nil
}
img, err := c.client.ImageService().Get(ctx, ref)
if err != nil {
return fmt.Errorf("failed to get image volume ref %q: %w", ref, err)
}
i := containerd.NewImageWithPlatform(c.client, img, platforms.Only(platform))
if err := i.Unpack(ctx, snapshotter); err != nil {
return fmt.Errorf("failed to unpack image volume: %w", err)
}
diffIDs, err := i.RootFS(ctx)
if err != nil {
return fmt.Errorf("failed to get diff IDs for image volume %q: %w", ref, err)
}
chainID := identity.ChainID(diffIDs).String()
s := c.client.SnapshotService(snapshotter)
mounts, err := s.Prepare(ctx, target, chainID)
if err != nil {
return fmt.Errorf("failed to prepare for image volume %q: %w", ref, err)
}
defer func() {
if retErr != nil {
_ = s.Remove(ctx, target)
}
}()
err = os.MkdirAll(target, 0755)
if err != nil {
return fmt.Errorf("failed to create directory to image volume target path %q: %w", target, err)
}
if err := mount.All(mounts, target); err != nil {
return fmt.Errorf("failed to mount image volume component %q: %w", target, err)
}
extraMount.HostPath = target
return nil
}
func (c *criService) cleanupImageMounts(
ctx context.Context,
sandboxID string,
) (retErr error) {
// Some checks to avoid affecting old pods.
ociRuntime, err := c.getPodSandboxRuntime(sandboxID)
if err != nil {
log.G(ctx).WithError(err).Errorf("failed to get sandbox runtime handler %q", sandboxID)
return nil
}
snapshotter := c.RuntimeSnapshotter(ctx, ociRuntime)
s := c.client.SnapshotService(snapshotter)
if s == nil {
return nil
}
targetBase := c.getImageVolumeBaseDir(sandboxID)
entries, err := os.ReadDir(targetBase)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("failed to read directory: %w", err)
}
for _, entry := range entries {
target := filepath.Join(targetBase, entry.Name())
err = mount.UnmountAll(target, 0)
if err != nil {
return fmt.Errorf("failed to unmount image volume component %q: %w", target, err)
}
err = s.Remove(ctx, target)
if err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to removing snapshot: %w", err)
}
err = os.Remove(target)
if err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to removing mounts directory: %w", err)
}
}
err = os.Remove(targetBase)
if err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("failed to remove directory to cleanup image volume mounts: %w", err)
}
return nil
}

View File

@ -33,6 +33,7 @@ import (
containerd "github.com/containerd/containerd/v2/client" containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/containers" "github.com/containerd/containerd/v2/core/containers"
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
containerstore "github.com/containerd/containerd/v2/internal/cri/store/container" containerstore "github.com/containerd/containerd/v2/internal/cri/store/container"
imagestore "github.com/containerd/containerd/v2/internal/cri/store/image" imagestore "github.com/containerd/containerd/v2/internal/cri/store/image"
"github.com/containerd/errdefs" "github.com/containerd/errdefs"
@ -61,6 +62,8 @@ const (
sandboxesDir = "sandboxes" sandboxesDir = "sandboxes"
// containersDir contains all container root. // containersDir contains all container root.
containersDir = "containers" containersDir = "containers"
// imageVolumeDir contains all image volume root.
imageVolumeDir = "image-volumes"
// Delimiter used to construct container/sandbox names. // Delimiter used to construct container/sandbox names.
nameDelimiter = "_" nameDelimiter = "_"
@ -139,6 +142,16 @@ func (c *criService) getContainerRootDir(id string) string {
return filepath.Join(c.config.RootDir, containersDir, id) return filepath.Join(c.config.RootDir, containersDir, id)
} }
// getImageVolumeHostPath returns the image volume directory for share.
func (c *criService) getImageVolumeHostPath(podID, imageID string) string {
return filepath.Join(c.config.StateDir, imageVolumeDir, podID, imageID)
}
// getImageVolumeBaseDir returns the image volume base directory for cleanup.
func (c *criService) getImageVolumeBaseDir(podID string) string {
return filepath.Join(c.config.StateDir, imageVolumeDir, podID)
}
// getVolatileContainerRootDir returns the root directory for managing volatile container files, // getVolatileContainerRootDir returns the root directory for managing volatile container files,
// e.g. named pipes. // e.g. named pipes.
func (c *criService) getVolatileContainerRootDir(id string) string { func (c *criService) getVolatileContainerRootDir(id string) string {
@ -356,6 +369,18 @@ func (c *criService) generateAndSendContainerEvent(ctx context.Context, containe
c.containerEventsQ.Send(event) c.containerEventsQ.Send(event)
} }
func (c *criService) getPodSandboxRuntime(sandboxID string) (runtime criconfig.Runtime, err error) {
sandbox, err := c.sandboxStore.Get(sandboxID)
if err != nil {
return criconfig.Runtime{}, err
}
runtime, err = c.config.GetSandboxRuntime(sandbox.Config, sandbox.Metadata.RuntimeHandler)
if err != nil {
return criconfig.Runtime{}, err
}
return runtime, nil
}
func (c *criService) getPodSandboxStatus(ctx context.Context, podSandboxID string) (*runtime.PodSandboxStatus, error) { func (c *criService) getPodSandboxStatus(ctx context.Context, podSandboxID string) (*runtime.PodSandboxStatus, error) {
request := &runtime.PodSandboxStatusRequest{PodSandboxId: podSandboxID} request := &runtime.PodSandboxStatusRequest{PodSandboxId: podSandboxID}
response, err := c.PodSandboxStatus(ctx, request) response, err := c.PodSandboxStatus(ctx, request)

View File

@ -21,6 +21,7 @@ import (
"fmt" "fmt"
"time" "time"
"github.com/containerd/containerd/v2/core/leases"
"github.com/containerd/containerd/v2/pkg/tracing" "github.com/containerd/containerd/v2/pkg/tracing"
"github.com/containerd/errdefs" "github.com/containerd/errdefs"
"github.com/containerd/log" "github.com/containerd/log"
@ -59,6 +60,12 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS
return nil, fmt.Errorf("failed to forcibly stop sandbox %q: %w", id, err) return nil, fmt.Errorf("failed to forcibly stop sandbox %q: %w", id, err)
} }
if err := c.client.LeasesService().Delete(ctx, leases.Lease{ID: id}); err != nil {
if !errdefs.IsNotFound(err) {
return nil, fmt.Errorf("failed to delete lease for sandbox %q: %w", id, err)
}
}
// Return error if sandbox network namespace is not closed yet. // Return error if sandbox network namespace is not closed yet.
if sandbox.NetNS != nil { if sandbox.NetNS != nil {
nsPath := sandbox.NetNS.GetPath() nsPath := sandbox.NetNS.GetPath()

View File

@ -31,6 +31,7 @@ import (
"github.com/containerd/typeurl/v2" "github.com/containerd/typeurl/v2"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1" runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/v2/core/leases"
sb "github.com/containerd/containerd/v2/core/sandbox" sb "github.com/containerd/containerd/v2/core/sandbox"
"github.com/containerd/containerd/v2/internal/cri/annotations" "github.com/containerd/containerd/v2/internal/cri/annotations"
"github.com/containerd/containerd/v2/internal/cri/bandwidth" "github.com/containerd/containerd/v2/internal/cri/bandwidth"
@ -87,6 +88,22 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
} }
}() }()
leaseSvc := c.client.LeasesService()
ls, lerr := leaseSvc.Create(ctx, leases.WithID(id))
if lerr != nil {
return nil, fmt.Errorf("failed to create lease for sandbox name %q: %w", name, lerr)
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := util.DeferContext()
defer deferCancel()
if derr := leaseSvc.Delete(deferCtx, ls); derr != nil {
log.G(deferCtx).WithError(derr).Error("failed to delete lease during cleanup")
}
}
}()
var ( var (
err error err error
sandboxInfo = sb.Sandbox{ID: id} sandboxInfo = sb.Sandbox{ID: id}

View File

@ -130,6 +130,11 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
} }
log.G(ctx).Infof("TearDown network for sandbox %q successfully", id) log.G(ctx).Infof("TearDown network for sandbox %q successfully", id)
err = c.cleanupImageMounts(ctx, id)
if err != nil {
return fmt.Errorf("failed to cleanup image mounts for sandbox %q: %w", id, err)
}
return nil return nil
} }