CRI Sbserver: Make PodSandboxStatus friendlier to shim crashes
Currently if you're using the shim-mode sandbox server support, if your shim that's hosting the Sandbox API dies for any reason that wasn't intentional (segfault, oom etc.) PodSandboxStatus is kind of wedged. We can use the fact that if we didn't go through the usual k8s flow of Stop->Remove and we still have an entry in our sandbox store, us not having a shim mapping anymore means this was likely unintentional. Signed-off-by: Danny Canter <danny@dcantah.dev>
This commit is contained in:
parent
7cd72cce99
commit
7a7519a780
@ -21,6 +21,7 @@ import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
@ -42,12 +43,32 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox
|
||||
return nil, fmt.Errorf("failed to get sandbox controller: %w", err)
|
||||
}
|
||||
|
||||
var (
|
||||
createdAt time.Time
|
||||
state string
|
||||
info map[string]string
|
||||
)
|
||||
cstatus, err := controller.Status(ctx, sandbox.ID, r.GetVerbose())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query controller status: %w", err)
|
||||
// If the shim died unexpectedly (segfault etc.) let's set the state as
|
||||
// NOTREADY and not just error out to make k8s and clients like crictl
|
||||
// happy. If we get back ErrNotFound from controller.Status above while
|
||||
// we're using the shim-mode controller, this is a decent indicator it
|
||||
// exited unexpectedly. We can use the fact that we successfully retrieved
|
||||
// the sandbox object from the store above to tell that this is true, otherwise
|
||||
// if we followed the normal k8s convention of StopPodSandbox -> RemovePodSandbox,
|
||||
// we wouldn't have that object in the store anymore.
|
||||
if !errdefs.IsNotFound(err) {
|
||||
return nil, fmt.Errorf("failed to query controller status: %w", err)
|
||||
}
|
||||
state = runtime.PodSandboxState_SANDBOX_NOTREADY.String()
|
||||
} else {
|
||||
state = cstatus.State
|
||||
createdAt = cstatus.CreatedAt
|
||||
info = cstatus.Info
|
||||
}
|
||||
|
||||
status := toCRISandboxStatus(sandbox.Metadata, cstatus.State, cstatus.CreatedAt, ip, additionalIPs)
|
||||
status := toCRISandboxStatus(sandbox.Metadata, state, createdAt, ip, additionalIPs)
|
||||
if status.GetCreatedAt() == 0 {
|
||||
// CRI doesn't allow CreatedAt == 0.
|
||||
sandboxInfo, err := c.client.SandboxStore().Get(ctx, sandbox.ID)
|
||||
@ -59,7 +80,7 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox
|
||||
|
||||
return &runtime.PodSandboxStatusResponse{
|
||||
Status: status,
|
||||
Info: cstatus.Info,
|
||||
Info: info,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user