Persist container and sandbox if resource cleanup fails, like teardownPodNetwork
Signed-off-by: Qiutong Song <songqt01@gmail.com>
This commit is contained in:
parent
7a66f70b5b
commit
4f4aad057d
@ -23,6 +23,10 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/containerd/containerd"
|
||||||
|
"github.com/containerd/containerd/containers"
|
||||||
|
"github.com/containerd/typeurl"
|
||||||
|
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
|
||||||
containerstore "github.com/containerd/containerd/pkg/cri/store/container"
|
containerstore "github.com/containerd/containerd/pkg/cri/store/container"
|
||||||
@ -44,3 +48,19 @@ func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.Up
|
|||||||
}
|
}
|
||||||
return &runtime.UpdateContainerResourcesResponse{}, nil
|
return &runtime.UpdateContainerResourcesResponse{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Copied from container_update_resources.go because that file is not built for darwin.
|
||||||
|
// updateContainerSpec updates container spec.
|
||||||
|
func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error {
|
||||||
|
any, err := typeurl.MarshalAny(spec)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to marshal spec %+v: %w", spec, err)
|
||||||
|
}
|
||||||
|
if err := cntr.Update(ctx, func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
|
||||||
|
c.Spec = any
|
||||||
|
return nil
|
||||||
|
}); err != nil {
|
||||||
|
return fmt.Errorf("failed to update container spec: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
@ -70,14 +70,22 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|||||||
}
|
}
|
||||||
name := makeSandboxName(metadata)
|
name := makeSandboxName(metadata)
|
||||||
log.G(ctx).WithField("podsandboxid", id).Debugf("generated id for sandbox name %q", name)
|
log.G(ctx).WithField("podsandboxid", id).Debugf("generated id for sandbox name %q", name)
|
||||||
|
|
||||||
|
// cleanupErr records the last error returned by the critical cleanup operations in deferred functions,
|
||||||
|
// like CNI teardown and stopping the running sandbox task.
|
||||||
|
// If cleanup is not completed for some reason, the CRI-plugin will leave the sandbox
|
||||||
|
// in a not-ready state, which can later be cleaned up by the next execution of the kubelet's syncPod workflow.
|
||||||
|
var cleanupErr error
|
||||||
|
|
||||||
// Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the
|
// Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the
|
||||||
// same sandbox.
|
// same sandbox.
|
||||||
if err := c.sandboxNameIndex.Reserve(name, id); err != nil {
|
if err := c.sandboxNameIndex.Reserve(name, id); err != nil {
|
||||||
return nil, fmt.Errorf("failed to reserve sandbox name %q: %w", name, err)
|
return nil, fmt.Errorf("failed to reserve sandbox name %q: %w", name, err)
|
||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
// Release the name if the function returns with an error.
|
// Release the name if the function returns with an error and all the resource cleanup is done.
|
||||||
if retErr != nil {
|
// When cleanupErr != nil, the name will be cleaned in sandbox_remove.
|
||||||
|
if retErr != nil && cleanupErr == nil {
|
||||||
c.sandboxNameIndex.ReleaseByName(name)
|
c.sandboxNameIndex.ReleaseByName(name)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@ -111,70 +119,13 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|||||||
}
|
}
|
||||||
log.G(ctx).WithField("podsandboxid", id).Debugf("use OCI runtime %+v", ociRuntime)
|
log.G(ctx).WithField("podsandboxid", id).Debugf("use OCI runtime %+v", ociRuntime)
|
||||||
|
|
||||||
podNetwork := true
|
|
||||||
|
|
||||||
if goruntime.GOOS != "windows" &&
|
|
||||||
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
|
|
||||||
// Pod network is not needed on linux with host network.
|
|
||||||
podNetwork = false
|
|
||||||
}
|
|
||||||
if goruntime.GOOS == "windows" &&
|
|
||||||
config.GetWindows().GetSecurityContext().GetHostProcess() {
|
|
||||||
//Windows HostProcess pods can only run on the host network
|
|
||||||
podNetwork = false
|
|
||||||
}
|
|
||||||
|
|
||||||
if podNetwork {
|
|
||||||
netStart := time.Now()
|
|
||||||
// If it is not in host network namespace then create a namespace and set the sandbox
|
|
||||||
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
|
|
||||||
// namespaces. If the pod is in host network namespace then both are empty and should not
|
|
||||||
// be used.
|
|
||||||
var netnsMountDir = "/var/run/netns"
|
|
||||||
if c.config.NetNSMountsUnderStateDir {
|
|
||||||
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
|
|
||||||
}
|
|
||||||
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
|
|
||||||
}
|
|
||||||
sandbox.NetNSPath = sandbox.NetNS.GetPath()
|
|
||||||
defer func() {
|
|
||||||
if retErr != nil {
|
|
||||||
deferCtx, deferCancel := ctrdutil.DeferContext()
|
|
||||||
defer deferCancel()
|
|
||||||
// Teardown network if an error is returned.
|
|
||||||
if err := c.teardownPodNetwork(deferCtx, sandbox); err != nil {
|
|
||||||
log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := sandbox.NetNS.Remove(); err != nil {
|
|
||||||
log.G(ctx).WithError(err).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
|
|
||||||
}
|
|
||||||
sandbox.NetNSPath = ""
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Setup network for sandbox.
|
|
||||||
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
|
|
||||||
// rely on the assumption that CRI shim will not be querying the network namespace to check the
|
|
||||||
// network states such as IP.
|
|
||||||
// In future runtime implementation should avoid relying on CRI shim implementation details.
|
|
||||||
// In this case however caching the IP will add a subtle performance enhancement by avoiding
|
|
||||||
// calls to network namespace of the pod to query the IP of the veth interface on every
|
|
||||||
// SandboxStatus request.
|
|
||||||
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
|
|
||||||
}
|
|
||||||
sandboxCreateNetworkTimer.UpdateSince(netStart)
|
|
||||||
}
|
|
||||||
|
|
||||||
runtimeStart := time.Now()
|
runtimeStart := time.Now()
|
||||||
// Create sandbox container.
|
// Create sandbox container.
|
||||||
// NOTE: sandboxContainerSpec SHOULD NOT have side
|
// NOTE: sandboxContainerSpec SHOULD NOT have side
|
||||||
// effect, e.g. accessing/creating files, so that we can test
|
// effect, e.g. accessing/creating files, so that we can test
|
||||||
// it safely.
|
// it safely.
|
||||||
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations)
|
// NOTE: the network namespace path will be created later and update through updateNetNamespacePath function
|
||||||
|
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, "", ociRuntime.PodAnnotations)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to generate sandbox container spec: %w", err)
|
return nil, fmt.Errorf("failed to generate sandbox container spec: %w", err)
|
||||||
}
|
}
|
||||||
@ -222,12 +173,27 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create containerd container: %w", err)
|
return nil, fmt.Errorf("failed to create containerd container: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add container into sandbox store in INIT state.
|
||||||
|
sandbox.Container = container
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
if retErr != nil {
|
// Put the sandbox into sandbox store when the some resource fails to be cleaned.
|
||||||
|
if retErr != nil && cleanupErr != nil {
|
||||||
|
log.G(ctx).WithError(cleanupErr).Errorf("encountered an error cleaning up failed sandbox %q, marking sandbox state as SANDBOX_UNKNOWN", id)
|
||||||
|
if err := c.sandboxStore.Add(sandbox); err != nil {
|
||||||
|
log.G(ctx).WithError(err).Errorf("failed to add sandbox %+v into store", sandbox)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
// Delete container only if all the resource cleanup is done.
|
||||||
|
if retErr != nil && cleanupErr == nil {
|
||||||
deferCtx, deferCancel := ctrdutil.DeferContext()
|
deferCtx, deferCancel := ctrdutil.DeferContext()
|
||||||
defer deferCancel()
|
defer deferCancel()
|
||||||
if err := container.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil {
|
if cleanupErr = container.Delete(deferCtx, containerd.WithSnapshotCleanup); cleanupErr != nil {
|
||||||
log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id)
|
log.G(ctx).WithError(cleanupErr).Errorf("Failed to delete containerd container %q", id)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@ -281,6 +247,82 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|||||||
return nil, fmt.Errorf("failed to get sandbox container info: %w", err)
|
return nil, fmt.Errorf("failed to get sandbox container info: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
podNetwork := true
|
||||||
|
|
||||||
|
if goruntime.GOOS != "windows" &&
|
||||||
|
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
|
||||||
|
// Pod network is not needed on linux with host network.
|
||||||
|
podNetwork = false
|
||||||
|
}
|
||||||
|
if goruntime.GOOS == "windows" &&
|
||||||
|
config.GetWindows().GetSecurityContext().GetHostProcess() {
|
||||||
|
// Windows HostProcess pods can only run on the host network
|
||||||
|
podNetwork = false
|
||||||
|
}
|
||||||
|
|
||||||
|
if podNetwork {
|
||||||
|
netStart := time.Now()
|
||||||
|
|
||||||
|
// If it is not in host network namespace then create a namespace and set the sandbox
|
||||||
|
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
|
||||||
|
// namespaces. If the pod is in host network namespace then both are empty and should not
|
||||||
|
// be used.
|
||||||
|
var netnsMountDir = "/var/run/netns"
|
||||||
|
if c.config.NetNSMountsUnderStateDir {
|
||||||
|
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
|
||||||
|
}
|
||||||
|
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
|
||||||
|
}
|
||||||
|
sandbox.NetNSPath = sandbox.NetNS.GetPath()
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
// Remove the network namespace only if all the resource cleanup is done.
|
||||||
|
if retErr != nil && cleanupErr == nil {
|
||||||
|
if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil {
|
||||||
|
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
sandbox.NetNSPath = ""
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Update network namespace in the container's spec
|
||||||
|
c.updateNetNamespacePath(spec, sandbox.NetNSPath)
|
||||||
|
if err := updateContainerSpec(ctx, container, spec); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to update the network namespace for the sandbox container %q: %w", id, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define this defer to teardownPodNetwork prior to the setupPodNetwork function call.
|
||||||
|
// This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions.
|
||||||
|
defer func() {
|
||||||
|
// Teardown the network only if all the resource cleanup is done.
|
||||||
|
if retErr != nil && cleanupErr == nil {
|
||||||
|
deferCtx, deferCancel := ctrdutil.DeferContext()
|
||||||
|
defer deferCancel()
|
||||||
|
// Teardown network if an error is returned.
|
||||||
|
if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil {
|
||||||
|
log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Setup network for sandbox.
|
||||||
|
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
|
||||||
|
// rely on the assumption that CRI shim will not be querying the network namespace to check the
|
||||||
|
// network states such as IP.
|
||||||
|
// In future runtime implementation should avoid relying on CRI shim implementation details.
|
||||||
|
// In this case however caching the IP will add a subtle performance enhancement by avoiding
|
||||||
|
// calls to network namespace of the pod to query the IP of the veth interface on every
|
||||||
|
// SandboxStatus request.
|
||||||
|
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sandboxCreateNetworkTimer.UpdateSince(netStart)
|
||||||
|
}
|
||||||
|
|
||||||
// Create sandbox task in containerd.
|
// Create sandbox task in containerd.
|
||||||
log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).",
|
log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).",
|
||||||
id, name)
|
id, name)
|
||||||
@ -301,6 +343,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|||||||
// Cleanup the sandbox container if an error is returned.
|
// Cleanup the sandbox container if an error is returned.
|
||||||
if _, err := task.Delete(deferCtx, WithNRISandboxDelete(id), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
|
if _, err := task.Delete(deferCtx, WithNRISandboxDelete(id), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
|
||||||
log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id)
|
log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id)
|
||||||
|
cleanupErr = err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@ -339,9 +382,6 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|||||||
return nil, fmt.Errorf("failed to update sandbox status: %w", err)
|
return nil, fmt.Errorf("failed to update sandbox status: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add sandbox into sandbox store in INIT state.
|
|
||||||
sandbox.Container = container
|
|
||||||
|
|
||||||
if err := c.sandboxStore.Add(sandbox); err != nil {
|
if err := c.sandboxStore.Add(sandbox); err != nil {
|
||||||
return nil, fmt.Errorf("failed to add sandbox %+v into store: %w", sandbox, err)
|
return nil, fmt.Errorf("failed to add sandbox %+v into store: %w", sandbox, err)
|
||||||
}
|
}
|
||||||
|
@ -348,3 +348,12 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
|
|||||||
|
|
||||||
return taskOpts
|
return taskOpts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
|
||||||
|
for i := range spec.Linux.Namespaces {
|
||||||
|
if spec.Linux.Namespaces[i].Type == runtimespec.NetworkNamespace {
|
||||||
|
spec.Linux.Namespaces[i].Path = nsPath
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -54,3 +54,6 @@ func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxCo
|
|||||||
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
|
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
|
||||||
return []containerd.NewTaskOpts{}
|
return []containerd.NewTaskOpts{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
|
||||||
|
}
|
||||||
|
@ -111,3 +111,7 @@ func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxCo
|
|||||||
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
|
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
|
||||||
|
spec.Windows.Network.NetworkNamespace = nsPath
|
||||||
|
}
|
||||||
|
@ -29,24 +29,24 @@ import (
|
|||||||
// | |
|
// | |
|
||||||
// | Create(Run) | Load
|
// | Create(Run) | Load
|
||||||
// | |
|
// | |
|
||||||
// Start | |
|
// | |
|
||||||
// (failed) | |
|
// | | Start
|
||||||
// +------------------+ +-----------+
|
// | |(failed and not cleaned)
|
||||||
// | | | |
|
// Start |--------------|--------------+
|
||||||
// | | | |
|
//(failed but cleaned)| | |
|
||||||
// | | | |
|
// +------------------+ |-----------+ |
|
||||||
// | | Start(Run) | |
|
// | | Start(Run) | | |
|
||||||
// | | | |
|
// | | | | |
|
||||||
// | PortForward +----v----+ | |
|
// | PortForward +----v----+ | | |
|
||||||
// | +------+ | | |
|
// | +------+ | | | |
|
||||||
// | | | READY <---------+ |
|
// | | | READY <---------+ | |
|
||||||
// | +------> | | |
|
// | +------> | | | |
|
||||||
// | +----+----+ | |
|
// | +----+----+ | | |
|
||||||
// | | | |
|
// | | | | |
|
||||||
// | | Stop/Exit | |
|
// | | Stop/Exit | | |
|
||||||
// | | | |
|
// | | | | |
|
||||||
// | +----v----+ | |
|
// | +----v----+ | | |
|
||||||
// | | <---------+ +----v----+
|
// | | <---------+ +----v--v-+
|
||||||
// | | NOTREADY| | |
|
// | | NOTREADY| | |
|
||||||
// | | <----------------+ UNKNOWN |
|
// | | <----------------+ UNKNOWN |
|
||||||
// | +----+----+ Stop | |
|
// | +----+----+ Stop | |
|
||||||
|
Loading…
Reference in New Issue
Block a user