containerd/pkg/cri/server/container_create_windows.go
Daniel Canter 44e12dc5d8 Windows snapshotter touch ups and new functionality
This change does a couple things to remove some cruft/unused functionality
in the Windows snapshotter, as well as add a way to specify the rootfs
size in bytes for a Windows container via a new field added in the CRI api in
k8s 1.24. Setting the rootfs/scratch volume size was assumed to be working
prior to this but turns out not to be the case.

Previously I'd added a change to pass any annotations in the containerd
snapshot form (containerd.io/snapshot/*) as labels for the containers
rootfs snapshot. This was added as a means for a client to be able to provide
containerd.io/snapshot/io.microsoft.container.storage.rootfs.size-gb as an
annotation and have that be translated to a label and ultimately set the
size for the scratch volume in Windows. However, this actually only worked if
interfacing with the CRI api directly (crictl) as Kubernetes itself will
fail to validate annotations that if split by "/" end up with > 2 parts,
which the snapshot labels will (containerd.io / snapshot / foobarbaz).

With this in mind, passing the annotations and filtering to
containerd.io/snapshot/* is moot, so I've removed this code in favor of
a new `snapshotterOpts()` function that will return platform specific
snapshotter options if ones exist. Now on Windows we can just check if
RootfsSizeInBytes is set on the WindowsContainerResources struct and
then return a snapshotter option that sets the right label.

So all in all this change:
- Gets rid of code to pass CRI annotations as labels down to snapshotters.

- Gets rid of the functionality to create a 1GB sized scratch disk if
the client provided a size < 20GB. This code is not used currently and
has a few logical shortcomings as it won't be able to create the disk
if a container is already running and using the same base layer. WCIFS
(driver that handles the unioning of windows container layers together)
holds open handles to some files that we need to delete to create the
1GB scratch disk is the underlying problem.

- Deprecates the containerd.io/snapshot/io.microsoft.container.storage.rootfs.size-gb
label in favor of a new containerd.io/snapshot/windows/rootfs.sizebytes label.
The previous label/annotation wasn't being used by us, and from a cursory
github search wasn't being used by anyone else either. Now that there is a CRI
field to specify the size, this should just be a field that users can set
on their pod specs and don't need to concern themselves with what it eventually
gets translated to, but non-CRI clients can still use the new label/deprecated
label as usual.

- Add test to cri integration suite to validate expanding the rootfs size.

Signed-off-by: Daniel Canter <dcanter@microsoft.com>
2022-06-06 14:57:07 -07:00

164 lines
5.8 KiB
Go

/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"errors"
"fmt"
"strconv"
"github.com/containerd/containerd/oci"
"github.com/containerd/containerd/snapshots"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/pkg/cri/annotations"
"github.com/containerd/containerd/pkg/cri/config"
customopts "github.com/containerd/containerd/pkg/cri/opts"
)
// No container mounts for windows.
func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount {
return nil
}
func (c *criService) containerSpec(
id string,
sandboxID string,
sandboxPid uint32,
netNSPath string,
containerName string,
imageName string,
config *runtime.ContainerConfig,
sandboxConfig *runtime.PodSandboxConfig,
imageConfig *imagespec.ImageConfig,
extraMounts []*runtime.Mount,
ociRuntime config.Runtime,
) (*runtimespec.Spec, error) {
specOpts := []oci.SpecOpts{
customopts.WithProcessArgs(config, imageConfig),
}
// All containers in a pod need to have HostProcess set if it was set on the pod,
// and vice versa no containers in the pod can be HostProcess if the pods spec
// didn't have the field set. The only case that is valid is if these are the same value.
cntrHpc := config.GetWindows().GetSecurityContext().GetHostProcess()
sandboxHpc := sandboxConfig.GetWindows().GetSecurityContext().GetHostProcess()
if cntrHpc != sandboxHpc {
return nil, errors.New("pod spec and all containers inside must have the HostProcess field set to be valid")
}
if config.GetWorkingDir() != "" {
specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir()))
} else if imageConfig.WorkingDir != "" {
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
}
if config.GetTty() {
specOpts = append(specOpts, oci.WithTTY)
}
// Apply envs from image config first, so that envs from container config
// can override them.
env := append([]string{}, imageConfig.Env...)
for _, e := range config.GetEnvs() {
env = append(env, e.GetKey()+"="+e.GetValue())
}
specOpts = append(specOpts, oci.WithEnv(env))
specOpts = append(specOpts,
// Clear the root location since hcsshim expects it.
// NOTE: readonly rootfs doesn't work on windows.
customopts.WithoutRoot,
customopts.WithWindowsNetworkNamespace(netNSPath),
oci.WithHostname(sandboxConfig.GetHostname()),
)
specOpts = append(specOpts, customopts.WithWindowsMounts(c.os, config, extraMounts), customopts.WithDevices(config))
// Start with the image config user and override below if RunAsUsername is not "".
username := imageConfig.User
windowsConfig := config.GetWindows()
if windowsConfig != nil {
specOpts = append(specOpts, customopts.WithWindowsResources(windowsConfig.GetResources()))
securityCtx := windowsConfig.GetSecurityContext()
if securityCtx != nil {
runAsUser := securityCtx.GetRunAsUsername()
if runAsUser != "" {
username = runAsUser
}
cs := securityCtx.GetCredentialSpec()
if cs != "" {
specOpts = append(specOpts, customopts.WithWindowsCredentialSpec(cs))
}
}
}
// There really isn't a good Windows way to verify that the username is available in the
// image as early as here like there is for Linux. Later on in the stack hcsshim
// will handle the behavior of erroring out if the user isn't available in the image
// when trying to run the init process.
specOpts = append(specOpts, oci.WithUser(username))
for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations,
ociRuntime.PodAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
ociRuntime.ContainerAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
specOpts = append(specOpts,
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
customopts.WithAnnotation(annotations.SandboxNamespace, sandboxConfig.GetMetadata().GetNamespace()),
customopts.WithAnnotation(annotations.SandboxName, sandboxConfig.GetMetadata().GetName()),
customopts.WithAnnotation(annotations.ContainerName, containerName),
customopts.WithAnnotation(annotations.ImageName, imageName),
customopts.WithAnnotation(annotations.WindowsHostProcess, strconv.FormatBool(sandboxHpc)),
)
return c.runtimeSpec(id, ociRuntime.BaseRuntimeSpec, specOpts...)
}
// No extra spec options needed for windows.
func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
return nil, nil
}
// snapshotterOpts returns any Windows specific snapshotter options for the r/w layer
func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) []snapshots.Opt {
var opts []snapshots.Opt
switch snapshotterName {
case "windows":
rootfsSize := config.GetWindows().GetResources().GetRootfsSizeInBytes()
if rootfsSize != 0 {
sizeStr := fmt.Sprintf("%d", rootfsSize)
labels := map[string]string{
"containerd.io/snapshot/windows/rootfs.sizebytes": sizeStr,
}
opts = append(opts, snapshots.WithLabels(labels))
}
}
return opts
}