637 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			637 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
   Copyright The containerd Authors.
 | 
						|
 | 
						|
   Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
   you may not use this file except in compliance with the License.
 | 
						|
   You may obtain a copy of the License at
 | 
						|
 | 
						|
       http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
   Unless required by applicable law or agreed to in writing, software
 | 
						|
   distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
   See the License for the specific language governing permissions and
 | 
						|
   limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package server
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"fmt"
 | 
						|
	"path"
 | 
						|
	"path/filepath"
 | 
						|
	"regexp"
 | 
						|
	goruntime "runtime"
 | 
						|
	"strconv"
 | 
						|
	"strings"
 | 
						|
	"time"
 | 
						|
 | 
						|
	"github.com/containerd/typeurl/v2"
 | 
						|
	runtimespec "github.com/opencontainers/runtime-spec/specs-go"
 | 
						|
	runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
 | 
						|
 | 
						|
	containerd "github.com/containerd/containerd/v2/client"
 | 
						|
	"github.com/containerd/containerd/v2/core/containers"
 | 
						|
	crilabels "github.com/containerd/containerd/v2/pkg/cri/labels"
 | 
						|
	containerstore "github.com/containerd/containerd/v2/pkg/cri/store/container"
 | 
						|
	imagestore "github.com/containerd/containerd/v2/pkg/cri/store/image"
 | 
						|
	"github.com/containerd/containerd/v2/pkg/errdefs"
 | 
						|
	clabels "github.com/containerd/containerd/v2/pkg/labels"
 | 
						|
	"github.com/containerd/log"
 | 
						|
)
 | 
						|
 | 
						|
// TODO: Move common helpers for sbserver and podsandbox to a dedicated package once basic services are functinal.
 | 
						|
 | 
						|
const (
 | 
						|
	// errorStartReason is the exit reason when fails to start container.
 | 
						|
	errorStartReason = "StartError"
 | 
						|
	// errorStartExitCode is the exit code when fails to start container.
 | 
						|
	// 128 is the same with Docker's behavior.
 | 
						|
	// TODO(windows): Figure out what should be used for windows.
 | 
						|
	errorStartExitCode = 128
 | 
						|
	// completeExitReason is the exit reason when container exits with code 0.
 | 
						|
	completeExitReason = "Completed"
 | 
						|
	// errorExitReason is the exit reason when container exits with code non-zero.
 | 
						|
	errorExitReason = "Error"
 | 
						|
	// oomExitReason is the exit reason when process in container is oom killed.
 | 
						|
	oomExitReason = "OOMKilled"
 | 
						|
 | 
						|
	// sandboxesDir contains all sandbox root. A sandbox root is the running
 | 
						|
	// directory of the sandbox, all files created for the sandbox will be
 | 
						|
	// placed under this directory.
 | 
						|
	sandboxesDir = "sandboxes"
 | 
						|
	// containersDir contains all container root.
 | 
						|
	containersDir = "containers"
 | 
						|
	// Delimiter used to construct container/sandbox names.
 | 
						|
	nameDelimiter = "_"
 | 
						|
 | 
						|
	// defaultIfName is the default network interface for the pods
 | 
						|
	defaultIfName = "eth0"
 | 
						|
 | 
						|
	// devShm is the default path of /dev/shm.
 | 
						|
	devShm = "/dev/shm"
 | 
						|
	// etcHosts is the default path of /etc/hosts file.
 | 
						|
	etcHosts = "/etc/hosts"
 | 
						|
	// etcHostname is the default path of /etc/hostname file.
 | 
						|
	etcHostname = "/etc/hostname"
 | 
						|
	// resolvConfPath is the abs path of resolv.conf on host or container.
 | 
						|
	resolvConfPath = "/etc/resolv.conf"
 | 
						|
)
 | 
						|
 | 
						|
// getSandboxRootDir returns the root directory for managing sandbox files,
 | 
						|
// e.g. hosts files.
 | 
						|
func (c *criService) getSandboxRootDir(id string) string {
 | 
						|
	return filepath.Join(c.config.RootDir, sandboxesDir, id)
 | 
						|
}
 | 
						|
 | 
						|
// getVolatileSandboxRootDir returns the root directory for managing volatile sandbox files,
 | 
						|
// e.g. named pipes.
 | 
						|
func (c *criService) getVolatileSandboxRootDir(id string) string {
 | 
						|
	return filepath.Join(c.config.StateDir, sandboxesDir, id)
 | 
						|
}
 | 
						|
 | 
						|
// getSandboxHostname returns the hostname file path inside the sandbox root directory.
 | 
						|
func (c *criService) getSandboxHostname(id string) string {
 | 
						|
	return filepath.Join(c.getSandboxRootDir(id), "hostname")
 | 
						|
}
 | 
						|
 | 
						|
// getSandboxHosts returns the hosts file path inside the sandbox root directory.
 | 
						|
func (c *criService) getSandboxHosts(id string) string {
 | 
						|
	return filepath.Join(c.getSandboxRootDir(id), "hosts")
 | 
						|
}
 | 
						|
 | 
						|
// getResolvPath returns resolv.conf filepath for specified sandbox.
 | 
						|
func (c *criService) getResolvPath(id string) string {
 | 
						|
	return filepath.Join(c.getSandboxRootDir(id), "resolv.conf")
 | 
						|
}
 | 
						|
 | 
						|
// getSandboxDevShm returns the shm file path inside the sandbox root directory.
 | 
						|
func (c *criService) getSandboxDevShm(id string) string {
 | 
						|
	return filepath.Join(c.getVolatileSandboxRootDir(id), "shm")
 | 
						|
}
 | 
						|
 | 
						|
// makeSandboxName generates sandbox name from sandbox metadata. The name
 | 
						|
// generated is unique as long as sandbox metadata is unique.
 | 
						|
func makeSandboxName(s *runtime.PodSandboxMetadata) string {
 | 
						|
	return strings.Join([]string{
 | 
						|
		s.Name,      // 0
 | 
						|
		s.Namespace, // 1
 | 
						|
		s.Uid,       // 2
 | 
						|
		strconv.FormatUint(uint64(s.Attempt), 10), // 3
 | 
						|
	}, nameDelimiter)
 | 
						|
}
 | 
						|
 | 
						|
// makeContainerName generates container name from sandbox and container metadata.
 | 
						|
// The name generated is unique as long as the sandbox container combination is
 | 
						|
// unique.
 | 
						|
func makeContainerName(c *runtime.ContainerMetadata, s *runtime.PodSandboxMetadata) string {
 | 
						|
	return strings.Join([]string{
 | 
						|
		c.Name,      // 0: container name
 | 
						|
		s.Name,      // 1: pod name
 | 
						|
		s.Namespace, // 2: pod namespace
 | 
						|
		s.Uid,       // 3: pod uid
 | 
						|
		strconv.FormatUint(uint64(c.Attempt), 10), // 4: attempt number of creating the container
 | 
						|
	}, nameDelimiter)
 | 
						|
}
 | 
						|
 | 
						|
// getContainerRootDir returns the root directory for managing container files,
 | 
						|
// e.g. state checkpoint.
 | 
						|
func (c *criService) getContainerRootDir(id string) string {
 | 
						|
	return filepath.Join(c.config.RootDir, containersDir, id)
 | 
						|
}
 | 
						|
 | 
						|
// getVolatileContainerRootDir returns the root directory for managing volatile container files,
 | 
						|
// e.g. named pipes.
 | 
						|
func (c *criService) getVolatileContainerRootDir(id string) string {
 | 
						|
	return filepath.Join(c.config.StateDir, containersDir, id)
 | 
						|
}
 | 
						|
 | 
						|
// criContainerStateToString formats CRI container state to string.
 | 
						|
func criContainerStateToString(state runtime.ContainerState) string {
 | 
						|
	return runtime.ContainerState_name[int32(state)]
 | 
						|
}
 | 
						|
 | 
						|
// toContainerdImage converts an image object in image store to containerd image handler.
 | 
						|
func (c *criService) toContainerdImage(ctx context.Context, image imagestore.Image) (containerd.Image, error) {
 | 
						|
	// image should always have at least one reference.
 | 
						|
	if len(image.References) == 0 {
 | 
						|
		return nil, fmt.Errorf("invalid image with no reference %q", image.ID)
 | 
						|
	}
 | 
						|
	return c.client.GetImage(ctx, image.References[0])
 | 
						|
}
 | 
						|
 | 
						|
// getUserFromImage gets uid or user name of the image user.
 | 
						|
// If user is numeric, it will be treated as uid; or else, it is treated as user name.
 | 
						|
func getUserFromImage(user string) (*int64, string) {
 | 
						|
	// return both empty if user is not specified in the image.
 | 
						|
	if user == "" {
 | 
						|
		return nil, ""
 | 
						|
	}
 | 
						|
	// split instances where the id may contain user:group
 | 
						|
	user = strings.Split(user, ":")[0]
 | 
						|
	// user could be either uid or user name. Try to interpret as numeric uid.
 | 
						|
	uid, err := strconv.ParseInt(user, 10, 64)
 | 
						|
	if err != nil {
 | 
						|
		// If user is non numeric, assume it's user name.
 | 
						|
		return nil, user
 | 
						|
	}
 | 
						|
	// If user is a numeric uid.
 | 
						|
	return &uid, ""
 | 
						|
}
 | 
						|
 | 
						|
// validateTargetContainer checks that a container is a valid
 | 
						|
// target for a container using PID NamespaceMode_TARGET.
 | 
						|
// The target container must be in the same sandbox and must be running.
 | 
						|
// Returns the target container for convenience.
 | 
						|
func (c *criService) validateTargetContainer(sandboxID, targetContainerID string) (containerstore.Container, error) {
 | 
						|
	targetContainer, err := c.containerStore.Get(targetContainerID)
 | 
						|
	if err != nil {
 | 
						|
		return containerstore.Container{}, fmt.Errorf("container %q does not exist: %w", targetContainerID, err)
 | 
						|
	}
 | 
						|
 | 
						|
	targetSandboxID := targetContainer.Metadata.SandboxID
 | 
						|
	if targetSandboxID != sandboxID {
 | 
						|
		return containerstore.Container{},
 | 
						|
			fmt.Errorf("container %q (sandbox %s) does not belong to sandbox %s", targetContainerID, targetSandboxID, sandboxID)
 | 
						|
	}
 | 
						|
 | 
						|
	status := targetContainer.Status.Get()
 | 
						|
	if state := status.State(); state != runtime.ContainerState_CONTAINER_RUNNING {
 | 
						|
		return containerstore.Container{}, fmt.Errorf("container %q is not running - in state %s", targetContainerID, state)
 | 
						|
	}
 | 
						|
 | 
						|
	return targetContainer, nil
 | 
						|
}
 | 
						|
 | 
						|
// isInCRIMounts checks whether a destination is in CRI mount list.
 | 
						|
func isInCRIMounts(dst string, mounts []*runtime.Mount) bool {
 | 
						|
	for _, m := range mounts {
 | 
						|
		if filepath.Clean(m.ContainerPath) == filepath.Clean(dst) {
 | 
						|
			return true
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// filterLabel returns a label filter. Use `%q` here because containerd
 | 
						|
// filter needs extra quote to work properly.
 | 
						|
func filterLabel(k, v string) string {
 | 
						|
	return fmt.Sprintf("labels.%q==%q", k, v)
 | 
						|
}
 | 
						|
 | 
						|
// buildLabel builds the labels from config to be passed to containerd
 | 
						|
func buildLabels(configLabels, imageConfigLabels map[string]string, containerType string) map[string]string {
 | 
						|
	labels := make(map[string]string)
 | 
						|
 | 
						|
	for k, v := range imageConfigLabels {
 | 
						|
		if err := clabels.Validate(k, v); err == nil {
 | 
						|
			labels[k] = v
 | 
						|
		} else {
 | 
						|
			// In case the image label is invalid, we output a warning and skip adding it to the
 | 
						|
			// container.
 | 
						|
			log.L.WithError(err).Warnf("unable to add image label with key %s to the container", k)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// labels from the CRI request (config) will override labels in the image config
 | 
						|
	for k, v := range configLabels {
 | 
						|
		labels[k] = v
 | 
						|
	}
 | 
						|
	labels[crilabels.ContainerKindLabel] = containerType
 | 
						|
	return labels
 | 
						|
}
 | 
						|
 | 
						|
// getRuntimeOptions get runtime options from container metadata.
 | 
						|
func getRuntimeOptions(c containers.Container) (interface{}, error) {
 | 
						|
	from := c.Runtime.Options
 | 
						|
	if from == nil || from.GetValue() == nil {
 | 
						|
		return nil, nil
 | 
						|
	}
 | 
						|
	opts, err := typeurl.UnmarshalAny(from)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	return opts, nil
 | 
						|
}
 | 
						|
 | 
						|
const (
 | 
						|
	// unknownExitCode is the exit code when exit reason is unknown.
 | 
						|
	unknownExitCode = 255
 | 
						|
	// unknownExitReason is the exit reason when exit reason is unknown.
 | 
						|
	unknownExitReason = "Unknown"
 | 
						|
)
 | 
						|
 | 
						|
// unknownContainerStatus returns the default container status when its status is unknown.
 | 
						|
func unknownContainerStatus() containerstore.Status {
 | 
						|
	return containerstore.Status{
 | 
						|
		CreatedAt:  0,
 | 
						|
		StartedAt:  0,
 | 
						|
		FinishedAt: 0,
 | 
						|
		ExitCode:   unknownExitCode,
 | 
						|
		Reason:     unknownExitReason,
 | 
						|
		Unknown:    true,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// getPassthroughAnnotations filters requested pod annotations by comparing
 | 
						|
// against permitted annotations for the given runtime.
 | 
						|
func getPassthroughAnnotations(podAnnotations map[string]string,
 | 
						|
	runtimePodAnnotations []string) (passthroughAnnotations map[string]string) {
 | 
						|
	passthroughAnnotations = make(map[string]string)
 | 
						|
 | 
						|
	for podAnnotationKey, podAnnotationValue := range podAnnotations {
 | 
						|
		for _, pattern := range runtimePodAnnotations {
 | 
						|
			// Use path.Match instead of filepath.Match here.
 | 
						|
			// filepath.Match treated `\\` as path separator
 | 
						|
			// on windows, which is not what we want.
 | 
						|
			if ok, _ := path.Match(pattern, podAnnotationKey); ok {
 | 
						|
				passthroughAnnotations[podAnnotationKey] = podAnnotationValue
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return passthroughAnnotations
 | 
						|
}
 | 
						|
 | 
						|
// copyResourcesToStatus copys container resource contraints from spec to
 | 
						|
// container status.
 | 
						|
// This will need updates when new fields are added to ContainerResources.
 | 
						|
func copyResourcesToStatus(spec *runtimespec.Spec, status containerstore.Status) containerstore.Status {
 | 
						|
	status.Resources = &runtime.ContainerResources{}
 | 
						|
	if spec.Linux != nil {
 | 
						|
		status.Resources.Linux = &runtime.LinuxContainerResources{}
 | 
						|
 | 
						|
		if spec.Process != nil && spec.Process.OOMScoreAdj != nil {
 | 
						|
			status.Resources.Linux.OomScoreAdj = int64(*spec.Process.OOMScoreAdj)
 | 
						|
		}
 | 
						|
 | 
						|
		if spec.Linux.Resources == nil {
 | 
						|
			return status
 | 
						|
		}
 | 
						|
 | 
						|
		if spec.Linux.Resources.CPU != nil {
 | 
						|
			if spec.Linux.Resources.CPU.Period != nil {
 | 
						|
				status.Resources.Linux.CpuPeriod = int64(*spec.Linux.Resources.CPU.Period)
 | 
						|
			}
 | 
						|
			if spec.Linux.Resources.CPU.Quota != nil {
 | 
						|
				status.Resources.Linux.CpuQuota = *spec.Linux.Resources.CPU.Quota
 | 
						|
			}
 | 
						|
			if spec.Linux.Resources.CPU.Shares != nil {
 | 
						|
				status.Resources.Linux.CpuShares = int64(*spec.Linux.Resources.CPU.Shares)
 | 
						|
			}
 | 
						|
			status.Resources.Linux.CpusetCpus = spec.Linux.Resources.CPU.Cpus
 | 
						|
			status.Resources.Linux.CpusetMems = spec.Linux.Resources.CPU.Mems
 | 
						|
		}
 | 
						|
 | 
						|
		if spec.Linux.Resources.Memory != nil {
 | 
						|
			if spec.Linux.Resources.Memory.Limit != nil {
 | 
						|
				status.Resources.Linux.MemoryLimitInBytes = *spec.Linux.Resources.Memory.Limit
 | 
						|
			}
 | 
						|
			if spec.Linux.Resources.Memory.Swap != nil {
 | 
						|
				status.Resources.Linux.MemorySwapLimitInBytes = *spec.Linux.Resources.Memory.Swap
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		if spec.Linux.Resources.HugepageLimits != nil {
 | 
						|
			hugepageLimits := make([]*runtime.HugepageLimit, 0, len(spec.Linux.Resources.HugepageLimits))
 | 
						|
			for _, l := range spec.Linux.Resources.HugepageLimits {
 | 
						|
				hugepageLimits = append(hugepageLimits, &runtime.HugepageLimit{
 | 
						|
					PageSize: l.Pagesize,
 | 
						|
					Limit:    l.Limit,
 | 
						|
				})
 | 
						|
			}
 | 
						|
			status.Resources.Linux.HugepageLimits = hugepageLimits
 | 
						|
		}
 | 
						|
 | 
						|
		if spec.Linux.Resources.Unified != nil {
 | 
						|
			status.Resources.Linux.Unified = spec.Linux.Resources.Unified
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if spec.Windows != nil {
 | 
						|
		status.Resources.Windows = &runtime.WindowsContainerResources{}
 | 
						|
		if spec.Windows.Resources == nil {
 | 
						|
			return status
 | 
						|
		}
 | 
						|
 | 
						|
		if spec.Windows.Resources.CPU != nil {
 | 
						|
			if spec.Windows.Resources.CPU.Shares != nil {
 | 
						|
				status.Resources.Windows.CpuShares = int64(*spec.Windows.Resources.CPU.Shares)
 | 
						|
			}
 | 
						|
			if spec.Windows.Resources.CPU.Count != nil {
 | 
						|
				status.Resources.Windows.CpuCount = int64(*spec.Windows.Resources.CPU.Count)
 | 
						|
			}
 | 
						|
			if spec.Windows.Resources.CPU.Maximum != nil {
 | 
						|
				status.Resources.Windows.CpuMaximum = int64(*spec.Windows.Resources.CPU.Maximum)
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		if spec.Windows.Resources.Memory != nil {
 | 
						|
			if spec.Windows.Resources.Memory.Limit != nil {
 | 
						|
				status.Resources.Windows.MemoryLimitInBytes = int64(*spec.Windows.Resources.Memory.Limit)
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		// TODO: Figure out how to get RootfsSizeInBytes
 | 
						|
	}
 | 
						|
	return status
 | 
						|
}
 | 
						|
 | 
						|
func (c *criService) generateAndSendContainerEvent(ctx context.Context, containerID string, sandboxID string, eventType runtime.ContainerEventType) {
 | 
						|
	podSandboxStatus, err := c.getPodSandboxStatus(ctx, sandboxID)
 | 
						|
	if err != nil {
 | 
						|
		log.G(ctx).Warnf("Failed to get podSandbox status for container event for sandboxID %q: %v. Sending the event with nil podSandboxStatus.", sandboxID, err)
 | 
						|
		podSandboxStatus = nil
 | 
						|
	}
 | 
						|
	containerStatuses, err := c.getContainerStatuses(ctx, sandboxID)
 | 
						|
	if err != nil {
 | 
						|
		log.G(ctx).Errorf("Failed to get container statuses for container event for sandboxID %q: %v", sandboxID, err)
 | 
						|
	}
 | 
						|
 | 
						|
	event := runtime.ContainerEventResponse{
 | 
						|
		ContainerId:        containerID,
 | 
						|
		ContainerEventType: eventType,
 | 
						|
		CreatedAt:          time.Now().UnixNano(),
 | 
						|
		PodSandboxStatus:   podSandboxStatus,
 | 
						|
		ContainersStatuses: containerStatuses,
 | 
						|
	}
 | 
						|
 | 
						|
	// TODO(ruiwen-zhao): write events to a cache, storage, or increase the size of the channel
 | 
						|
	select {
 | 
						|
	case c.containerEventsChan <- event:
 | 
						|
	default:
 | 
						|
		containerEventsDroppedCount.Inc()
 | 
						|
		log.G(ctx).Debugf("containerEventsChan is full, discarding event %+v", event)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (c *criService) getPodSandboxStatus(ctx context.Context, podSandboxID string) (*runtime.PodSandboxStatus, error) {
 | 
						|
	request := &runtime.PodSandboxStatusRequest{PodSandboxId: podSandboxID}
 | 
						|
	response, err := c.PodSandboxStatus(ctx, request)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	return response.GetStatus(), nil
 | 
						|
}
 | 
						|
 | 
						|
func (c *criService) getContainerStatuses(ctx context.Context, podSandboxID string) ([]*runtime.ContainerStatus, error) {
 | 
						|
	response, err := c.ListContainers(ctx, &runtime.ListContainersRequest{
 | 
						|
		Filter: &runtime.ContainerFilter{
 | 
						|
			PodSandboxId: podSandboxID,
 | 
						|
		},
 | 
						|
	})
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	containerStatuses := []*runtime.ContainerStatus{}
 | 
						|
	for _, container := range response.Containers {
 | 
						|
		statusResp, err := c.ContainerStatus(ctx, &runtime.ContainerStatusRequest{
 | 
						|
			ContainerId: container.Id,
 | 
						|
			Verbose:     false,
 | 
						|
		})
 | 
						|
		if err != nil {
 | 
						|
			if errdefs.IsNotFound(err) {
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			return nil, err
 | 
						|
		}
 | 
						|
		containerStatuses = append(containerStatuses, statusResp.GetStatus())
 | 
						|
	}
 | 
						|
	return containerStatuses, nil
 | 
						|
}
 | 
						|
 | 
						|
// hostNetwork handles checking if host networking was requested.
 | 
						|
func hostNetwork(config *runtime.PodSandboxConfig) bool {
 | 
						|
	var hostNet bool
 | 
						|
	switch goruntime.GOOS {
 | 
						|
	case "windows":
 | 
						|
		// Windows HostProcess pods can only run on the host network
 | 
						|
		hostNet = config.GetWindows().GetSecurityContext().GetHostProcess()
 | 
						|
	case "darwin":
 | 
						|
		// No CNI on Darwin yet.
 | 
						|
		hostNet = true
 | 
						|
	default:
 | 
						|
		// Even on other platforms, the logic containerd uses is to check if NamespaceMode == NODE.
 | 
						|
		// So this handles Linux, as well as any other platforms not governed by the cases above
 | 
						|
		// that have special quirks.
 | 
						|
		hostNet = config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE
 | 
						|
	}
 | 
						|
	return hostNet
 | 
						|
}
 | 
						|
 | 
						|
// getCgroupsPath generates container cgroups path.
 | 
						|
func getCgroupsPath(cgroupsParent, id string) string {
 | 
						|
	base := path.Base(cgroupsParent)
 | 
						|
	if strings.HasSuffix(base, ".slice") {
 | 
						|
		// For a.slice/b.slice/c.slice, base is c.slice.
 | 
						|
		// runc systemd cgroup path format is "slice:prefix:name".
 | 
						|
		return strings.Join([]string{base, "cri-containerd", id}, ":")
 | 
						|
	}
 | 
						|
	return filepath.Join(cgroupsParent, id)
 | 
						|
}
 | 
						|
 | 
						|
func toLabel(selinuxOptions *runtime.SELinuxOption) ([]string, error) {
 | 
						|
	var labels []string
 | 
						|
 | 
						|
	if selinuxOptions == nil {
 | 
						|
		return nil, nil
 | 
						|
	}
 | 
						|
	if err := checkSelinuxLevel(selinuxOptions.Level); err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	if selinuxOptions.User != "" {
 | 
						|
		labels = append(labels, "user:"+selinuxOptions.User)
 | 
						|
	}
 | 
						|
	if selinuxOptions.Role != "" {
 | 
						|
		labels = append(labels, "role:"+selinuxOptions.Role)
 | 
						|
	}
 | 
						|
	if selinuxOptions.Type != "" {
 | 
						|
		labels = append(labels, "type:"+selinuxOptions.Type)
 | 
						|
	}
 | 
						|
	if selinuxOptions.Level != "" {
 | 
						|
		labels = append(labels, "level:"+selinuxOptions.Level)
 | 
						|
	}
 | 
						|
 | 
						|
	return labels, nil
 | 
						|
}
 | 
						|
 | 
						|
func checkSelinuxLevel(level string) error {
 | 
						|
	if len(level) == 0 {
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
 | 
						|
	matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}(\.c\d{1,4})?(,c\d{1,4}(\.c\d{1,4})?)*)?$`, level)
 | 
						|
	if err != nil {
 | 
						|
		return fmt.Errorf("the format of 'level' %q is not correct: %w", level, err)
 | 
						|
	}
 | 
						|
	if !matched {
 | 
						|
		return fmt.Errorf("the format of 'level' %q is not correct", level)
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func parseUsernsIDMap(runtimeIDMap []*runtime.IDMapping) ([]runtimespec.LinuxIDMapping, error) {
 | 
						|
	var m []runtimespec.LinuxIDMapping
 | 
						|
 | 
						|
	if len(runtimeIDMap) == 0 {
 | 
						|
		return m, nil
 | 
						|
	}
 | 
						|
 | 
						|
	if len(runtimeIDMap) > 1 {
 | 
						|
		// We only accept 1 line, because containerd.WithRemappedSnapshot() only supports that.
 | 
						|
		return m, fmt.Errorf("only one mapping line supported, got %v mapping lines", len(runtimeIDMap))
 | 
						|
	}
 | 
						|
 | 
						|
	// We know len is 1 now.
 | 
						|
	if runtimeIDMap[0] == nil {
 | 
						|
		return m, nil
 | 
						|
	}
 | 
						|
	uidMap := *runtimeIDMap[0]
 | 
						|
 | 
						|
	if uidMap.Length < 1 {
 | 
						|
		return m, fmt.Errorf("invalid mapping length: %v", uidMap.Length)
 | 
						|
	}
 | 
						|
 | 
						|
	m = []runtimespec.LinuxIDMapping{
 | 
						|
		{
 | 
						|
			ContainerID: uidMap.ContainerId,
 | 
						|
			HostID:      uidMap.HostId,
 | 
						|
			Size:        uidMap.Length,
 | 
						|
		},
 | 
						|
	}
 | 
						|
 | 
						|
	return m, nil
 | 
						|
}
 | 
						|
 | 
						|
func parseUsernsIDs(userns *runtime.UserNamespace) (uids, gids []runtimespec.LinuxIDMapping, retErr error) {
 | 
						|
	if userns == nil {
 | 
						|
		// If userns is not set, the kubelet doesn't support this option
 | 
						|
		// and we should just fallback to no userns. This is completely
 | 
						|
		// valid.
 | 
						|
		return nil, nil, nil
 | 
						|
	}
 | 
						|
 | 
						|
	uids, err := parseUsernsIDMap(userns.GetUids())
 | 
						|
	if err != nil {
 | 
						|
		return nil, nil, fmt.Errorf("UID mapping: %w", err)
 | 
						|
	}
 | 
						|
 | 
						|
	gids, err = parseUsernsIDMap(userns.GetGids())
 | 
						|
	if err != nil {
 | 
						|
		return nil, nil, fmt.Errorf("GID mapping: %w", err)
 | 
						|
	}
 | 
						|
 | 
						|
	switch mode := userns.GetMode(); mode {
 | 
						|
	case runtime.NamespaceMode_NODE:
 | 
						|
		if len(uids) != 0 || len(gids) != 0 {
 | 
						|
			return nil, nil, fmt.Errorf("can't use user namespace mode %q with mappings. Got %v UID mappings and %v GID mappings", mode, len(uids), len(gids))
 | 
						|
		}
 | 
						|
	case runtime.NamespaceMode_POD:
 | 
						|
		// This is valid, we will handle it in WithPodNamespaces().
 | 
						|
		if len(uids) == 0 || len(gids) == 0 {
 | 
						|
			return nil, nil, fmt.Errorf("can't use user namespace mode %q without UID and GID mappings", mode)
 | 
						|
		}
 | 
						|
	default:
 | 
						|
		return nil, nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
 | 
						|
	}
 | 
						|
 | 
						|
	return uids, gids, nil
 | 
						|
}
 | 
						|
 | 
						|
// sameUsernsConfig checks if the userns configs are the same. If the mappings
 | 
						|
// on each config are the same but in different order, it returns false.
 | 
						|
// XXX: If the runtime.UserNamespace struct changes, we should update this
 | 
						|
// function accordingly.
 | 
						|
func sameUsernsConfig(a, b *runtime.UserNamespace) bool {
 | 
						|
	// If both are nil, they are the same.
 | 
						|
	if a == nil && b == nil {
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	// If only one is nil, they are different.
 | 
						|
	if a == nil || b == nil {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	// At this point, a is not nil nor b.
 | 
						|
 | 
						|
	if a.GetMode() != b.GetMode() {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
 | 
						|
	aUids, aGids, err := parseUsernsIDs(a)
 | 
						|
	if err != nil {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	bUids, bGids, err := parseUsernsIDs(b)
 | 
						|
	if err != nil {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
 | 
						|
	if !sameMapping(aUids, bUids) {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	if !sameMapping(aGids, bGids) {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	return true
 | 
						|
}
 | 
						|
 | 
						|
// sameMapping checks if the mappings are the same. If the mappings are the same
 | 
						|
// but in different order, it returns false.
 | 
						|
func sameMapping(a, b []runtimespec.LinuxIDMapping) bool {
 | 
						|
	if len(a) != len(b) {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
 | 
						|
	for x := range a {
 | 
						|
		if a[x].ContainerID != b[x].ContainerID {
 | 
						|
			return false
 | 
						|
		}
 | 
						|
		if a[x].HostID != b[x].HostID {
 | 
						|
			return false
 | 
						|
		}
 | 
						|
		if a[x].Size != b[x].Size {
 | 
						|
			return false
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return true
 | 
						|
}
 |