From 1deaedd38a5490633e2d12614d3071f2186fd4cd Mon Sep 17 00:00:00 2001 From: Samuel Karp Date: Thu, 22 Sep 2022 17:01:38 -0700 Subject: [PATCH] sbserver: persist sandbox during partial teardown Port of 4f4aad057d17caaa1a72e8e0f0e799ce9071549b to sbserver Signed-off-by: Samuel Karp --- pkg/cri/sbserver/podsandbox/helpers.go | 2 + pkg/cri/sbserver/podsandbox/sandbox_delete.go | 10 +- pkg/cri/sbserver/podsandbox/sandbox_run.go | 58 +++++++----- pkg/cri/sbserver/podsandbox/sandbox_stop.go | 5 +- pkg/cri/sbserver/restart.go | 45 +++++++-- pkg/cri/sbserver/sandbox_run.go | 91 ++++++++++++++----- 6 files changed, 151 insertions(+), 60 deletions(-) diff --git a/pkg/cri/sbserver/podsandbox/helpers.go b/pkg/cri/sbserver/podsandbox/helpers.go index 03202e017..d3f90888c 100644 --- a/pkg/cri/sbserver/podsandbox/helpers.go +++ b/pkg/cri/sbserver/podsandbox/helpers.go @@ -63,6 +63,8 @@ const ( sandboxMetadataExtension = criContainerdPrefix + ".sandbox.metadata" // runtimeRunhcsV1 is the runtime type for runhcs. runtimeRunhcsV1 = "io.containerd.runhcs.v1" + // MetadataKey is the key used for storing metadata in the sandbox extensions + MetadataKey = "metadata" ) const ( diff --git a/pkg/cri/sbserver/podsandbox/sandbox_delete.go b/pkg/cri/sbserver/podsandbox/sandbox_delete.go index b40c768ca..73b67e58f 100644 --- a/pkg/cri/sbserver/podsandbox/sandbox_delete.go +++ b/pkg/cri/sbserver/podsandbox/sandbox_delete.go @@ -49,11 +49,13 @@ func (c *Controller) Delete(ctx context.Context, sandboxID string) (*api.Control } // Delete sandbox container. - if err := sandbox.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil { - if !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("failed to delete sandbox container %q: %w", sandboxID, err) + if sandbox.Container != nil { + if err := sandbox.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil { + if !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("failed to delete sandbox container %q: %w", sandboxID, err) + } + log.G(ctx).Tracef("Sandbox controller Delete called for sandbox container %q that does not exist", sandboxID) } - log.G(ctx).Tracef("Sandbox controller Delete called for sandbox container %q that does not exist", sandboxID) } return &api.ControllerDeleteResponse{}, nil diff --git a/pkg/cri/sbserver/podsandbox/sandbox_run.go b/pkg/cri/sbserver/podsandbox/sandbox_run.go index 3f372a3b2..d5aef016f 100644 --- a/pkg/cri/sbserver/podsandbox/sandbox_run.go +++ b/pkg/cri/sbserver/podsandbox/sandbox_run.go @@ -21,6 +21,13 @@ import ( "errors" "fmt" + "github.com/containerd/nri" + v1 "github.com/containerd/nri/types/v1" + "github.com/containerd/typeurl" + "github.com/davecgh/go-spew/spew" + "github.com/opencontainers/selinux/go-selinux" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + "github.com/containerd/containerd" api "github.com/containerd/containerd/api/services/sandbox/v1" containerdio "github.com/containerd/containerd/cio" @@ -33,12 +40,6 @@ import ( ctrdutil "github.com/containerd/containerd/pkg/cri/util" "github.com/containerd/containerd/protobuf" "github.com/containerd/containerd/snapshots" - "github.com/containerd/nri" - v1 "github.com/containerd/nri/types/v1" - "github.com/containerd/typeurl" - "github.com/davecgh/go-spew/spew" - "github.com/opencontainers/selinux/go-selinux" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) func init() { @@ -46,14 +47,26 @@ func init() { "github.com/containerd/cri/pkg/store/sandbox", "Metadata") } -func (c *Controller) Start(ctx context.Context, id string) (_ *api.ControllerStartResponse, retErr error) { +// Start creates resources required for the sandbox and starts the sandbox. If an error occurs, Start attempts to tear +// down the created resources. If an error occurs while tearing down resources, a zero-valued response is returned +// alongside the error. If the teardown was successful, a nil response is returned with the error. +// TODO(samuelkarp) Determine whether this error indication is reasonable to retain once controller.Delete is implemented. +func (c *Controller) Start(ctx context.Context, id string) (resp *api.ControllerStartResponse, retErr error) { + var cleanupErr error + defer func() { + if retErr != nil && cleanupErr != nil { + log.G(ctx).WithField("id", id).WithError(cleanupErr).Errorf("failed to fully teardown sandbox resources after earlier error: %s", retErr) + resp = &api.ControllerStartResponse{} + } + }() + sandboxInfo, err := c.client.SandboxStore().Get(ctx, id) if err != nil { return nil, fmt.Errorf("unable to find sandbox with id %q: %w", id, err) } var metadata sandboxstore.Metadata - if err := sandboxInfo.GetExtension("metadata", &metadata); err != nil { + if err := sandboxInfo.GetExtension(MetadataKey, &metadata); err != nil { return nil, fmt.Errorf("failed to get sandbox %q metadata: %w", id, err) } @@ -137,11 +150,11 @@ func (c *Controller) Start(ctx context.Context, id string) (_ *api.ControllerSta return nil, fmt.Errorf("failed to create containerd container: %w", err) } defer func() { - if retErr != nil { + if retErr != nil && cleanupErr == nil { deferCtx, deferCancel := ctrdutil.DeferContext() defer deferCancel() - if err := container.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id) + if cleanupErr = container.Delete(deferCtx, containerd.WithSnapshotCleanup); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Errorf("Failed to delete containerd container %q", id) } } }() @@ -153,10 +166,10 @@ func (c *Controller) Start(ctx context.Context, id string) (_ *api.ControllerSta sandboxRootDir, err) } defer func() { - if retErr != nil { + if retErr != nil && cleanupErr == nil { // Cleanup the sandbox root directory. - if err := c.os.RemoveAll(sandboxRootDir); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to remove sandbox root directory %q", + if cleanupErr = c.os.RemoveAll(sandboxRootDir); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove sandbox root directory %q", sandboxRootDir) } } @@ -168,10 +181,10 @@ func (c *Controller) Start(ctx context.Context, id string) (_ *api.ControllerSta volatileSandboxRootDir, err) } defer func() { - if retErr != nil { + if retErr != nil && cleanupErr == nil { // Cleanup the volatile sandbox root directory. - if err := c.os.RemoveAll(volatileSandboxRootDir); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to remove volatile sandbox root directory %q", + if cleanupErr = c.os.RemoveAll(volatileSandboxRootDir); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove volatile sandbox root directory %q", volatileSandboxRootDir) } } @@ -182,9 +195,9 @@ func (c *Controller) Start(ctx context.Context, id string) (_ *api.ControllerSta return nil, fmt.Errorf("failed to setup sandbox files: %w", err) } defer func() { - if retErr != nil { - if err = c.cleanupSandboxFiles(id, config); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to cleanup sandbox files in %q", + if retErr != nil && cleanupErr == nil { + if cleanupErr = c.cleanupSandboxFiles(id, config); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Errorf("Failed to cleanup sandbox files in %q", sandboxRootDir) } } @@ -210,12 +223,13 @@ func (c *Controller) Start(ctx context.Context, id string) (_ *api.ControllerSta return nil, fmt.Errorf("failed to create containerd task: %w", err) } defer func() { - if retErr != nil { + if retErr != nil && cleanupErr == nil { deferCtx, deferCancel := ctrdutil.DeferContext() defer deferCancel() // Cleanup the sandbox container if an error is returned. if _, err := task.Delete(deferCtx, WithNRISandboxDelete(id), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id) + cleanupErr = err } } }() @@ -245,7 +259,7 @@ func (c *Controller) Start(ctx context.Context, id string) (_ *api.ControllerSta return nil, fmt.Errorf("failed to start sandbox container task %q: %w", id, err) } - resp := &api.ControllerStartResponse{ + resp = &api.ControllerStartResponse{ SandboxID: id, Pid: task.Pid(), CreatedAt: protobuf.ToTimestamp(info.CreatedAt), diff --git a/pkg/cri/sbserver/podsandbox/sandbox_stop.go b/pkg/cri/sbserver/podsandbox/sandbox_stop.go index 5173431ba..0f817e08f 100644 --- a/pkg/cri/sbserver/podsandbox/sandbox_stop.go +++ b/pkg/cri/sbserver/podsandbox/sandbox_stop.go @@ -21,13 +21,14 @@ import ( "fmt" "syscall" + "github.com/sirupsen/logrus" + eventtypes "github.com/containerd/containerd/api/events" api "github.com/containerd/containerd/api/services/sandbox/v1" "github.com/containerd/containerd/errdefs" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" ctrdutil "github.com/containerd/containerd/pkg/cri/util" "github.com/containerd/containerd/protobuf" - "github.com/sirupsen/logrus" ) func (c *Controller) Stop(ctx context.Context, sandboxID string) (*api.ControllerStopResponse, error) { @@ -44,7 +45,7 @@ func (c *Controller) Stop(ctx context.Context, sandboxID string) (*api.Controlle // TODO: The Controller maintains its own Status instead of CRI's sandboxStore. // Only stop sandbox container when it's running or unknown. state := sandbox.Status.Get().State - if state == sandboxstore.StateReady || state == sandboxstore.StateUnknown { + if (state == sandboxstore.StateReady || state == sandboxstore.StateUnknown) && sandbox.Container != nil { if err := c.stopSandboxContainer(ctx, sandbox); err != nil { return nil, fmt.Errorf("failed to stop sandbox container %q in %q state: %w", sandboxID, state, err) } diff --git a/pkg/cri/sbserver/restart.go b/pkg/cri/sbserver/restart.go index 6bc91968c..547dd1093 100644 --- a/pkg/cri/sbserver/restart.go +++ b/pkg/cri/sbserver/restart.go @@ -30,6 +30,7 @@ import ( "github.com/containerd/containerd/errdefs" containerdimages "github.com/containerd/containerd/images" "github.com/containerd/containerd/log" + "github.com/containerd/containerd/pkg/cri/sbserver/podsandbox" "github.com/containerd/containerd/platforms" "github.com/containerd/typeurl" "golang.org/x/sync/errgroup" @@ -82,6 +83,28 @@ func (c *criService) recover(ctx context.Context) error { return err } + // Recover sandboxes in the new SandboxStore + storedSandboxes, err := c.client.SandboxStore().List(ctx) + if err != nil { + return fmt.Errorf("failed to list sandboxes from API: %w", err) + } + for _, sbx := range storedSandboxes { + if _, err := c.sandboxStore.Get(sbx.ID); err == nil { + continue + } + metadata := sandboxstore.Metadata{} + err := sbx.GetExtension(podsandbox.MetadataKey, &metadata) + if err != nil { + return fmt.Errorf("failed to get metadata for stored sandbox %q: %w", sbx.ID, err) + } + sb := sandboxstore.NewSandbox(metadata, sandboxstore.Status{State: sandboxstore.StateUnknown}) + // Load network namespace. + sb.NetNS = getNetNS(&metadata) + if err := c.sandboxStore.Add(sb); err != nil { + return fmt.Errorf("failed to add stored sandbox %q to store: %w", sbx.ID, err) + } + } + // Recover all containers. containers, err := c.client.Containers(ctx, filterLabel(containerKindLabel, containerKindContainer)) if err != nil { @@ -427,15 +450,7 @@ func (c *criService) loadSandbox(ctx context.Context, cntr containerd.Container) sandbox.Container = cntr // Load network namespace. - if goruntime.GOOS != "windows" && - meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { - // Don't need to load netns for host network sandbox. - return sandbox, nil - } - if goruntime.GOOS == "windows" && meta.Config.GetWindows().GetSecurityContext().GetHostProcess() { - return sandbox, nil - } - sandbox.NetNS = netns.LoadNetNS(meta.NetNSPath) + sandbox.NetNS = getNetNS(meta) // It doesn't matter whether task is running or not. If it is running, sandbox // status will be `READY`; if it is not running, sandbox status will be `NOT_READY`, @@ -443,6 +458,18 @@ func (c *criService) loadSandbox(ctx context.Context, cntr containerd.Container) return sandbox, nil } +func getNetNS(meta *sandboxstore.Metadata) *netns.NetNS { + if goruntime.GOOS != "windows" && + meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { + // Don't need to load netns for host network sandbox. + return nil + } + if goruntime.GOOS == "windows" && meta.Config.GetWindows().GetSecurityContext().GetHostProcess() { + return nil + } + return netns.LoadNetNS(meta.NetNSPath) +} + // loadImages loads images from containerd. func (c *criService) loadImages(ctx context.Context, cImages []containerd.Image) { snapshotter := c.config.ContainerdConfig.Snapshotter diff --git a/pkg/cri/sbserver/sandbox_run.go b/pkg/cri/sbserver/sandbox_run.go index 8bc0ca499..aec116fd0 100644 --- a/pkg/cri/sbserver/sandbox_run.go +++ b/pkg/cri/sbserver/sandbox_run.go @@ -27,14 +27,16 @@ import ( "strings" "time" - eventtypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/protobuf" - sb "github.com/containerd/containerd/sandbox" "github.com/containerd/go-cni" "github.com/containerd/typeurl" "github.com/sirupsen/logrus" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + eventtypes "github.com/containerd/containerd/api/events" + "github.com/containerd/containerd/pkg/cri/sbserver/podsandbox" + "github.com/containerd/containerd/protobuf" + sb "github.com/containerd/containerd/sandbox" + "github.com/containerd/containerd/log" "github.com/containerd/containerd/pkg/cri/annotations" criconfig "github.com/containerd/containerd/pkg/cri/config" @@ -64,6 +66,13 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } name := makeSandboxName(metadata) log.G(ctx).WithField("podsandboxid", id).Debugf("generated id for sandbox name %q", name) + + // cleanupErr records the last error returned by the critical cleanup operations in deferred functions, + // like CNI teardown and stopping the running sandbox task. + // If cleanup is not completed for some reason, the CRI-plugin will leave the sandbox + // in a not-ready state, which can later be cleaned up by the next execution of the kubelet's syncPod workflow. + var cleanupErr error + // Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the // same sandbox. if err := c.sandboxNameIndex.Reserve(name, id); err != nil { @@ -71,7 +80,8 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } defer func() { // Release the name if the function returns with an error. - if retErr != nil { + // When cleanupErr != nil, the name will be cleaned in sandbox_remove. + if retErr != nil && cleanupErr == nil { c.sandboxNameIndex.ReleaseByName(name) } }() @@ -98,6 +108,25 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox }, ) + if _, err := c.client.SandboxStore().Create(ctx, sandboxInfo); err != nil { + return nil, fmt.Errorf("failed to save sandbox metadata: %w", err) + } + defer func() { + if retErr != nil && cleanupErr == nil { + cleanupErr = c.client.SandboxStore().Delete(ctx, id) + } + }() + + defer func() { + // Put the sandbox into sandbox store when some resources fail to be cleaned. + if retErr != nil && cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Errorf("encountered an error cleaning up failed sandbox %q, marking sandbox state as SANDBOX_UNKNOWN", id) + if err := c.sandboxStore.Add(sandbox); err != nil { + log.G(ctx).WithError(err).Errorf("failed to add sandbox %+v into store", sandbox) + } + } + }() + var ( podNetwork = true err error @@ -110,7 +139,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } if goruntime.GOOS == "windows" && config.GetWindows().GetSecurityContext().GetHostProcess() { - //Windows HostProcess pods can only run on the host network + // Windows HostProcess pods can only run on the host network podNetwork = false } @@ -128,20 +157,32 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox if err != nil { return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err) } + // Update network namespace in the store, which is used to generate the container's spec sandbox.NetNSPath = sandbox.NetNS.GetPath() defer func() { - if retErr != nil { + // Remove the network namespace only if all the resource cleanup is done + if retErr != nil && cleanupErr == nil { + if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id) + return + } + sandbox.NetNSPath = "" + } + }() + + // Define this defer to teardownPodNetwork prior to the setupPodNetwork function call. + // This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource + // creation functions. + defer func() { + // Remove the network namespace only if all the resource cleanup is done. + if retErr != nil && cleanupErr == nil { deferCtx, deferCancel := ctrdutil.DeferContext() defer deferCancel() // Teardown network if an error is returned. - if err := c.teardownPodNetwork(deferCtx, sandbox); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id) + if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id) } - if err := sandbox.NetNS.Remove(); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id) - } - sandbox.NetNSPath = "" } }() @@ -159,8 +200,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox sandboxCreateNetworkTimer.UpdateSince(netStart) } - // Save sandbox metadata to store - if err := sandboxInfo.AddExtension("metadata", &sandbox.Metadata); err != nil { + if err := sandboxInfo.AddExtension(podsandbox.MetadataKey, &sandbox.Metadata); err != nil { return nil, fmt.Errorf("unable to save sandbox %q to store: %w", id, err) } @@ -169,8 +209,9 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox return nil, fmt.Errorf("failed to get sandbox controller: %w", err) } - if _, err := c.client.SandboxStore().Create(ctx, sandboxInfo); err != nil { - return nil, fmt.Errorf("failed to save sandbox metadata: %w", err) + // Save sandbox metadata to store + if sandboxInfo, err = c.client.SandboxStore().Update(ctx, sandboxInfo, "extensions"); err != nil { + return nil, fmt.Errorf("unable to update extensions for sandbox %q: %w", id, err) } runtimeStart := time.Now() @@ -181,8 +222,19 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox resp, err := controller.Start(ctx, id) if err != nil { + sandbox.Container, _ = c.client.LoadContainer(ctx, id) + if resp != nil && resp.SandboxID == "" && resp.Pid == 0 && resp.CreatedAt == nil && len(resp.Labels) == 0 { + // if resp is a non-nil zero-value, an error occurred during cleanup + cleanupErr = fmt.Errorf("failed to cleanup sandbox") + } return nil, fmt.Errorf("failed to start sandbox %q: %w", id, err) } + // TODO: get rid of this. sandbox object should no longer have Container field. + container, err := c.client.LoadContainer(ctx, id) + if err != nil { + return nil, fmt.Errorf("failed to load container %q for sandbox: %w", id, err) + } + sandbox.Container = container labels := resp.GetLabels() if labels == nil { @@ -201,14 +253,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox return nil, fmt.Errorf("failed to update sandbox status: %w", err) } - // TODO: get rid of this. sandbox object should no longer have Container field. - container, err := c.client.LoadContainer(ctx, id) - if err != nil { - return nil, fmt.Errorf("failed to load container %q for sandbox: %w", id, err) - } // Add sandbox into sandbox store in INIT state. - sandbox.Container = container - if err := c.sandboxStore.Add(sandbox); err != nil { return nil, fmt.Errorf("failed to add sandbox %+v into store: %w", sandbox, err) }