Windows snapshotter touch ups and new functionality
This change does a couple things to remove some cruft/unused functionality in the Windows snapshotter, as well as add a way to specify the rootfs size in bytes for a Windows container via a new field added in the CRI api in k8s 1.24. Setting the rootfs/scratch volume size was assumed to be working prior to this but turns out not to be the case. Previously I'd added a change to pass any annotations in the containerd snapshot form (containerd.io/snapshot/*) as labels for the containers rootfs snapshot. This was added as a means for a client to be able to provide containerd.io/snapshot/io.microsoft.container.storage.rootfs.size-gb as an annotation and have that be translated to a label and ultimately set the size for the scratch volume in Windows. However, this actually only worked if interfacing with the CRI api directly (crictl) as Kubernetes itself will fail to validate annotations that if split by "/" end up with > 2 parts, which the snapshot labels will (containerd.io / snapshot / foobarbaz). With this in mind, passing the annotations and filtering to containerd.io/snapshot/* is moot, so I've removed this code in favor of a new `snapshotterOpts()` function that will return platform specific snapshotter options if ones exist. Now on Windows we can just check if RootfsSizeInBytes is set on the WindowsContainerResources struct and then return a snapshotter option that sets the right label. So all in all this change: - Gets rid of code to pass CRI annotations as labels down to snapshotters. - Gets rid of the functionality to create a 1GB sized scratch disk if the client provided a size < 20GB. This code is not used currently and has a few logical shortcomings as it won't be able to create the disk if a container is already running and using the same base layer. WCIFS (driver that handles the unioning of windows container layers together) holds open handles to some files that we need to delete to create the 1GB scratch disk is the underlying problem. - Deprecates the containerd.io/snapshot/io.microsoft.container.storage.rootfs.size-gb label in favor of a new containerd.io/snapshot/windows/rootfs.sizebytes label. The previous label/annotation wasn't being used by us, and from a cursory github search wasn't being used by anyone else either. Now that there is a CRI field to specify the size, this should just be a field that users can set on their pod specs and don't need to concern themselves with what it eventually gets translated to, but non-CRI clients can still use the new label/deprecated label as usual. - Add test to cri integration suite to validate expanding the rootfs size. Signed-off-by: Daniel Canter <dcanter@microsoft.com>
This commit is contained in:
parent
c1bcabb454
commit
44e12dc5d8
@ -231,6 +231,16 @@ func WithResources(r *runtime.LinuxContainerResources) ContainerOpts { //nolint:
|
||||
}
|
||||
}
|
||||
|
||||
// Adds Windows container resource limits.
|
||||
func WithWindowsResources(r *runtime.WindowsContainerResources) ContainerOpts { //nolint:unused
|
||||
return func(c *runtime.ContainerConfig) {
|
||||
if c.Windows == nil {
|
||||
c.Windows = &runtime.WindowsContainerConfig{}
|
||||
}
|
||||
c.Windows.Resources = r
|
||||
}
|
||||
}
|
||||
|
||||
func WithVolumeMount(hostPath, containerPath string) ContainerOpts {
|
||||
return func(c *runtime.ContainerConfig) {
|
||||
hostPath, _ = filepath.Abs(hostPath)
|
||||
|
134
integration/windows_rootfs_size_test.go
Normal file
134
integration/windows_rootfs_size_test.go
Normal file
@ -0,0 +1,134 @@
|
||||
//go:build windows
|
||||
// +build windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
func TestWindowsRootfsSize(t *testing.T) {
|
||||
testPodLogDir := t.TempDir()
|
||||
|
||||
t.Log("Create a sandbox with log directory")
|
||||
sb, sbConfig := PodSandboxConfigWithCleanup(t, "sandbox", "windows-rootfs-size",
|
||||
WithPodLogDirectory(testPodLogDir),
|
||||
)
|
||||
|
||||
var (
|
||||
testImage = GetImage(Pause)
|
||||
containerName = "test-container"
|
||||
)
|
||||
|
||||
EnsureImageExists(t, testImage)
|
||||
|
||||
t.Log("Create a container to run the rootfs size test")
|
||||
|
||||
// Ask for 200GiB disk size
|
||||
rootfsSize := int64(200 * 1024 * 1024 * 1024)
|
||||
cnConfig := ContainerConfig(
|
||||
containerName,
|
||||
testImage,
|
||||
// Execute dir on the root of the image as it'll show the size available.
|
||||
// We're asking for ten times the default volume size so this should be
|
||||
// easy to verify.
|
||||
WithCommand("cmd", "/c", "dir", "/-C", "C:\\"),
|
||||
WithLogPath(containerName),
|
||||
WithWindowsResources(&runtime.WindowsContainerResources{RootfsSizeInBytes: rootfsSize}),
|
||||
)
|
||||
cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Log("Start the container")
|
||||
require.NoError(t, runtimeService.StartContainer(cn))
|
||||
|
||||
t.Log("Wait for container to finish running")
|
||||
require.NoError(t, Eventually(func() (bool, error) {
|
||||
s, err := runtimeService.ContainerStatus(cn)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if s.GetState() == runtime.ContainerState_CONTAINER_EXITED {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}, time.Second, 30*time.Second))
|
||||
|
||||
t.Log("Check container log")
|
||||
content, err := os.ReadFile(filepath.Join(testPodLogDir, containerName))
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Format of output for dir /-C:
|
||||
//
|
||||
// Volume in drive C has no label.
|
||||
// Volume Serial Number is 5CA1-BDE0
|
||||
|
||||
// Directory of C:\
|
||||
//
|
||||
// 05/05/2022 09:36 AM 5510 License.txt
|
||||
// 05/12/2022 08:34 PM <DIR> Users
|
||||
// 05/12/2022 08:34 PM <DIR> Windows
|
||||
// 1 File(s) 5510 bytes
|
||||
// 2 Dir(s) 214545743872 bytes free
|
||||
scanner := bufio.NewScanner(strings.NewReader(string(content)))
|
||||
found := false
|
||||
var (
|
||||
cols []string
|
||||
driveSize int64
|
||||
)
|
||||
for scanner.Scan() {
|
||||
outputLine := scanner.Text()
|
||||
cols = strings.Fields(outputLine)
|
||||
n := len(cols)
|
||||
if n >= 3 {
|
||||
if cols[n-2] == "bytes" && cols[n-1] == "free" {
|
||||
driveSize, err = strconv.ParseInt(cols[n-3], 10, 64)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
t.Log(string(content))
|
||||
t.Fatalf("could not find the size available on the drive")
|
||||
}
|
||||
|
||||
// Compare the bytes available at the root of the drive with the 200GiB we asked for. They won't
|
||||
// match up exactly as space is always occupied but we're giving 300MiB of leeway for content on
|
||||
// the virtual drive.
|
||||
toleranceInMB := int64(300 * 1024 * 1024)
|
||||
if driveSize < (rootfsSize - toleranceInMB) {
|
||||
t.Log(string(content))
|
||||
t.Fatalf("Size of the C:\\ volume is not within 300MiB of 200GiB. It is %d bytes", driveSize)
|
||||
}
|
||||
}
|
@ -40,7 +40,6 @@ import (
|
||||
containerstore "github.com/containerd/containerd/pkg/cri/store/container"
|
||||
"github.com/containerd/containerd/pkg/cri/util"
|
||||
ctrdutil "github.com/containerd/containerd/pkg/cri/util"
|
||||
"github.com/containerd/containerd/snapshots"
|
||||
)
|
||||
|
||||
func init() {
|
||||
@ -184,7 +183,9 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
|
||||
|
||||
log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec))
|
||||
|
||||
snapshotterOpt := snapshots.WithLabels(snapshots.FilterInheritedLabels(config.Annotations))
|
||||
// Grab any platform specific snapshotter opts.
|
||||
sOpts := snapshotterOpts(c.config.ContainerdConfig.Snapshotter, config)
|
||||
|
||||
// Set snapshotter before any other options.
|
||||
opts := []containerd.NewContainerOpts{
|
||||
containerd.WithSnapshotter(c.runtimeSnapshotter(ctx, ociRuntime)),
|
||||
@ -193,7 +194,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
|
||||
// the runtime (runc) a chance to modify (e.g. to create mount
|
||||
// points corresponding to spec.Mounts) before making the
|
||||
// rootfs readonly (requested by spec.Root.Readonly).
|
||||
customopts.WithNewSnapshot(id, containerdImage, snapshotterOpt),
|
||||
customopts.WithNewSnapshot(id, containerdImage, sOpts...),
|
||||
}
|
||||
if len(volumeMounts) > 0 {
|
||||
mountMap := make(map[string]string)
|
||||
|
@ -29,6 +29,7 @@ import (
|
||||
"github.com/containerd/containerd/contrib/apparmor"
|
||||
"github.com/containerd/containerd/contrib/seccomp"
|
||||
"github.com/containerd/containerd/oci"
|
||||
"github.com/containerd/containerd/snapshots"
|
||||
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
selinux "github.com/opencontainers/selinux/go-selinux"
|
||||
@ -597,3 +598,8 @@ func generateUserString(username string, uid, gid *runtime.Int64Value) (string,
|
||||
}
|
||||
return userstr, nil
|
||||
}
|
||||
|
||||
// snapshotterOpts returns any Linux specific snapshotter options for the rootfs snapshot
|
||||
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt {
|
||||
return []snapshots.Opt{}
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ package server
|
||||
|
||||
import (
|
||||
"github.com/containerd/containerd/oci"
|
||||
"github.com/containerd/containerd/snapshots"
|
||||
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
@ -53,3 +54,8 @@ func (c *criService) containerSpec(
|
||||
func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
|
||||
return []oci.SpecOpts{}, nil
|
||||
}
|
||||
|
||||
// snapshotterOpts returns snapshotter options for the rootfs snapshot
|
||||
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt {
|
||||
return []snapshots.Opt{}
|
||||
}
|
||||
|
@ -18,9 +18,11 @@ package server
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/containerd/containerd/oci"
|
||||
"github.com/containerd/containerd/snapshots"
|
||||
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
@ -140,3 +142,22 @@ func (c *criService) containerSpec(
|
||||
func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// snapshotterOpts returns any Windows specific snapshotter options for the r/w layer
|
||||
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt {
|
||||
var opts []snapshots.Opt
|
||||
|
||||
switch snapshotterName {
|
||||
case "windows":
|
||||
rootfsSize := config.GetWindows().GetResources().GetRootfsSizeInBytes()
|
||||
if rootfsSize != 0 {
|
||||
sizeStr := fmt.Sprintf("%d", rootfsSize)
|
||||
labels := map[string]string{
|
||||
"containerd.io/snapshot/windows/rootfs.sizebytes": sizeStr,
|
||||
}
|
||||
opts = append(opts, snapshots.WithLabels(labels))
|
||||
}
|
||||
}
|
||||
|
||||
return opts
|
||||
}
|
||||
|
@ -33,7 +33,6 @@ import (
|
||||
"github.com/Microsoft/go-winio"
|
||||
winfs "github.com/Microsoft/go-winio/pkg/fs"
|
||||
"github.com/Microsoft/hcsshim"
|
||||
"github.com/Microsoft/hcsshim/computestorage"
|
||||
"github.com/Microsoft/hcsshim/pkg/ociwclayer"
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
"github.com/containerd/containerd/log"
|
||||
@ -61,7 +60,12 @@ const (
|
||||
// Label to specify that we should make a scratch space for a UtilityVM.
|
||||
uvmScratchLabel = "containerd.io/snapshot/io.microsoft.vm.storage.scratch"
|
||||
// Label to control a containers scratch space size (sandbox.vhdx).
|
||||
rootfsSizeLabel = "containerd.io/snapshot/io.microsoft.container.storage.rootfs.size-gb"
|
||||
//
|
||||
// Deprecated: use rootfsSizeInBytesLabel
|
||||
rootfsSizeInGBLabel = "containerd.io/snapshot/io.microsoft.container.storage.rootfs.size-gb"
|
||||
// rootfsSizeInBytesLabel is a label to control a Windows containers scratch space
|
||||
// size in bytes.
|
||||
rootfsSizeInBytesLabel = "containerd.io/snapshot/windows/rootfs.sizebytes"
|
||||
)
|
||||
|
||||
type snapshotter struct {
|
||||
@ -381,13 +385,23 @@ func (s *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, k
|
||||
o(&snapshotInfo)
|
||||
}
|
||||
|
||||
var sizeGB int
|
||||
if sizeGBstr, ok := snapshotInfo.Labels[rootfsSizeLabel]; ok {
|
||||
i32, err := strconv.ParseInt(sizeGBstr, 10, 32)
|
||||
var sizeInBytes uint64
|
||||
if sizeGBstr, ok := snapshotInfo.Labels[rootfsSizeInGBLabel]; ok {
|
||||
log.G(ctx).Warnf("%q label is deprecated, please use %q instead.", rootfsSizeInGBLabel, rootfsSizeInBytesLabel)
|
||||
|
||||
sizeInGB, err := strconv.ParseUint(sizeGBstr, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse label %q=%q: %w", rootfsSizeLabel, sizeGBstr, err)
|
||||
return nil, fmt.Errorf("failed to parse label %q=%q: %w", rootfsSizeInGBLabel, sizeGBstr, err)
|
||||
}
|
||||
sizeInBytes = sizeInGB * 1024 * 1024 * 1024
|
||||
}
|
||||
|
||||
// Prefer the newer label in bytes over the deprecated Windows specific GB variant.
|
||||
if sizeBytesStr, ok := snapshotInfo.Labels[rootfsSizeInBytesLabel]; ok {
|
||||
sizeInBytes, err = strconv.ParseUint(sizeBytesStr, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse label %q=%q: %w", rootfsSizeInBytesLabel, sizeBytesStr, err)
|
||||
}
|
||||
sizeGB = int(i32)
|
||||
}
|
||||
|
||||
var makeUVMScratch bool
|
||||
@ -401,7 +415,7 @@ func (s *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, k
|
||||
return nil, fmt.Errorf("failed to make UVM's scratch layer: %w", err)
|
||||
}
|
||||
}
|
||||
if err := s.createScratchLayer(ctx, snDir, parentLayerPaths, sizeGB); err != nil {
|
||||
if err := s.createScratchLayer(ctx, snDir, parentLayerPaths, sizeInBytes); err != nil {
|
||||
return nil, fmt.Errorf("failed to create scratch layer: %w", err)
|
||||
}
|
||||
}
|
||||
@ -423,46 +437,23 @@ func (s *snapshotter) parentIDsToParentPaths(parentIDs []string) []string {
|
||||
}
|
||||
|
||||
// This is essentially a recreation of what HCS' CreateSandboxLayer does with some extra bells and
|
||||
// whistles like expanding the volume if a size is specified. This will create a 1GB scratch
|
||||
// vhdx to be used if a different sized scratch that is not equal to the default of 20 is requested.
|
||||
func (s *snapshotter) createScratchLayer(ctx context.Context, snDir string, parentLayers []string, sizeGB int) error {
|
||||
// whistles like expanding the volume if a size is specified.
|
||||
func (s *snapshotter) createScratchLayer(ctx context.Context, snDir string, parentLayers []string, sizeInBytes uint64) error {
|
||||
parentLen := len(parentLayers)
|
||||
if parentLen == 0 {
|
||||
return errors.New("no parent layers present")
|
||||
}
|
||||
|
||||
baseLayer := parentLayers[parentLen-1]
|
||||
|
||||
var (
|
||||
templateBase = filepath.Join(baseLayer, "blank-base.vhdx")
|
||||
templateDiffDisk = filepath.Join(baseLayer, "blank.vhdx")
|
||||
newDisks = sizeGB > 0 && sizeGB < 20
|
||||
expand = sizeGB > 0 && sizeGB != 20
|
||||
)
|
||||
|
||||
// If a size greater than 0 and less than 20 (the default size produced by hcs)
|
||||
// was specified we make a new set of disks to be used. We make it a 1GB disk and just
|
||||
// expand it to the size specified so for future container runs we don't need to remake a disk.
|
||||
if newDisks {
|
||||
templateBase = filepath.Join(baseLayer, "scratch.vhdx")
|
||||
templateDiffDisk = filepath.Join(baseLayer, "scratch-diff.vhdx")
|
||||
}
|
||||
|
||||
if _, err := os.Stat(templateDiffDisk); os.IsNotExist(err) {
|
||||
// Scratch disk not present so lets make it.
|
||||
if err := computestorage.SetupContainerBaseLayer(ctx, baseLayer, templateBase, templateDiffDisk, 1); err != nil {
|
||||
return fmt.Errorf("failed to create scratch vhdx at %q: %w", baseLayer, err)
|
||||
}
|
||||
}
|
||||
|
||||
templateDiffDisk := filepath.Join(baseLayer, "blank.vhdx")
|
||||
dest := filepath.Join(snDir, "sandbox.vhdx")
|
||||
if err := copyScratchDisk(templateDiffDisk, dest); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if expand {
|
||||
gbToByte := 1024 * 1024 * 1024
|
||||
if err := hcsshim.ExpandSandboxSize(s.info, filepath.Base(snDir), uint64(gbToByte*sizeGB)); err != nil {
|
||||
return fmt.Errorf("failed to expand sandbox vhdx size to %d GB: %w", sizeGB, err)
|
||||
if sizeInBytes != 0 {
|
||||
if err := hcsshim.ExpandSandboxSize(s.info, filepath.Base(snDir), sizeInBytes); err != nil {
|
||||
return fmt.Errorf("failed to expand sandbox vhdx size to %d bytes: %w", sizeInBytes, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
Loading…
Reference in New Issue
Block a user