Support unknown state for sandbox and container
Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
parent
4dc6f6d0b5
commit
83af4dad87
@ -98,9 +98,12 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta
|
|||||||
// container will not be started or removed again.
|
// container will not be started or removed again.
|
||||||
func setContainerRemoving(container containerstore.Container) error {
|
func setContainerRemoving(container containerstore.Container) error {
|
||||||
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
|
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
|
||||||
// Do not remove container if it's still running.
|
// Do not remove container if it's still running or unknown.
|
||||||
if status.State() == runtime.ContainerState_CONTAINER_RUNNING {
|
if status.State() == runtime.ContainerState_CONTAINER_RUNNING {
|
||||||
return status, errors.New("container is still running")
|
return status, errors.New("container is still running, to stop first")
|
||||||
|
}
|
||||||
|
if status.State() == runtime.ContainerState_CONTAINER_UNKNOWN {
|
||||||
|
return status, errors.New("container state is unknown, to stop first")
|
||||||
}
|
}
|
||||||
if status.Removing {
|
if status.Removing {
|
||||||
return status, errors.New("container is already in removing state")
|
return status, errors.New("container is already in removing state")
|
||||||
|
@ -60,6 +60,15 @@ func (c *criService) ContainerStatus(ctx context.Context, r *runtime.ContainerSt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
status := toCRIContainerStatus(container, spec, imageRef)
|
status := toCRIContainerStatus(container, spec, imageRef)
|
||||||
|
if status.GetCreatedAt() == 0 {
|
||||||
|
// CRI doesn't allow CreatedAt == 0.
|
||||||
|
info, err := container.Container.Info(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Wrapf(err, "failed to get CreatedAt in %q state", status.State)
|
||||||
|
}
|
||||||
|
status.CreatedAt = info.CreatedAt.UnixNano()
|
||||||
|
}
|
||||||
|
|
||||||
info, err := toCRIContainerInfo(ctx, container, r.GetVerbose())
|
info, err := toCRIContainerInfo(ctx, container, r.GetVerbose())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "failed to get verbose container info")
|
return nil, errors.Wrap(err, "failed to get verbose container info")
|
||||||
|
@ -19,6 +19,8 @@ package server
|
|||||||
import (
|
import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/containerd/containerd"
|
||||||
|
eventtypes "github.com/containerd/containerd/api/events"
|
||||||
"github.com/containerd/containerd/errdefs"
|
"github.com/containerd/containerd/errdefs"
|
||||||
"github.com/docker/docker/pkg/signal"
|
"github.com/docker/docker/pkg/signal"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
@ -60,8 +62,9 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore
|
|||||||
// Return without error if container is not running. This makes sure that
|
// Return without error if container is not running. This makes sure that
|
||||||
// stop only takes real action after the container is started.
|
// stop only takes real action after the container is started.
|
||||||
state := container.Status.Get().State()
|
state := container.Status.Get().State()
|
||||||
if state != runtime.ContainerState_CONTAINER_RUNNING {
|
if state != runtime.ContainerState_CONTAINER_RUNNING &&
|
||||||
logrus.Infof("Container to stop %q is not running, current state %q",
|
state != runtime.ContainerState_CONTAINER_UNKNOWN {
|
||||||
|
logrus.Infof("Container to stop %q must be in running or unknown state, current state %q",
|
||||||
id, criContainerStateToString(state))
|
id, criContainerStateToString(state))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -69,10 +72,40 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore
|
|||||||
task, err := container.Container.Task(ctx, nil)
|
task, err := container.Container.Task(ctx, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !errdefs.IsNotFound(err) {
|
if !errdefs.IsNotFound(err) {
|
||||||
return errors.Wrapf(err, "failed to stop container, task not found for container %q", id)
|
return errors.Wrapf(err, "failed to get task for container %q", id)
|
||||||
}
|
}
|
||||||
|
// Don't return for unknown state, some cleanup needs to be done.
|
||||||
|
if state != runtime.ContainerState_CONTAINER_UNKNOWN {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
// Task is an interface, explicitly set it to nil just in case.
|
||||||
|
task = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle unknown state.
|
||||||
|
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
|
||||||
|
status, err := getTaskStatus(ctx, task)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrapf(err, "failed to get task status for %q", id)
|
||||||
|
}
|
||||||
|
switch status.Status {
|
||||||
|
case containerd.Running, containerd.Created:
|
||||||
|
// The task is still running, continue stopping the task.
|
||||||
|
case containerd.Stopped:
|
||||||
|
// The task has exited. If the task exited after containerd
|
||||||
|
// started, the event monitor will receive its exit event; if it
|
||||||
|
// exited before containerd started, the event monitor will never
|
||||||
|
// receive its exit event.
|
||||||
|
// However, we can't tell that because the task state was not
|
||||||
|
// successfully loaded during containerd start (container is
|
||||||
|
// in UNKNOWN state).
|
||||||
|
// So always do cleanup here, just in case that we've missed the
|
||||||
|
// exit event.
|
||||||
|
return cleanupUnknownContainer(ctx, id, status, container)
|
||||||
|
default:
|
||||||
|
return errors.Wrapf(err, "unsupported task status %q", status.Status)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We only need to kill the task. The event handler will Delete the
|
// We only need to kill the task. The event handler will Delete the
|
||||||
// task from containerd after it handles the Exited event.
|
// task from containerd after it handles the Exited event.
|
||||||
@ -141,3 +174,21 @@ func (c *criService) waitContainerStop(ctx context.Context, container containers
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cleanupUnknownContainer cleanup stopped container in unknown state.
|
||||||
|
func cleanupUnknownContainer(ctx context.Context, id string, status containerd.Status,
|
||||||
|
cntr containerstore.Container) error {
|
||||||
|
// Reuse handleContainerExit to do the cleanup.
|
||||||
|
// NOTE(random-liu): If the task did exit after containerd started, both
|
||||||
|
// the event monitor and the cleanup function would update the container
|
||||||
|
// state. The final container state will be whatever being updated first.
|
||||||
|
// There is no way to completely avoid this race condition, and for best
|
||||||
|
// effort unknown state container cleanup, this seems acceptable.
|
||||||
|
return handleContainerExit(ctx, &eventtypes.TaskExit{
|
||||||
|
ContainerID: id,
|
||||||
|
ID: id,
|
||||||
|
Pid: 0,
|
||||||
|
ExitStatus: status.ExitStatus,
|
||||||
|
ExitedAt: status.ExitTime,
|
||||||
|
}, cntr)
|
||||||
|
}
|
||||||
|
@ -260,7 +260,16 @@ func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr conta
|
|||||||
// Attach container IO so that `Delete` could cleanup the stream properly.
|
// Attach container IO so that `Delete` could cleanup the stream properly.
|
||||||
task, err := cntr.Container.Task(ctx,
|
task, err := cntr.Container.Task(ctx,
|
||||||
func(*containerdio.FIFOSet) (containerdio.IO, error) {
|
func(*containerdio.FIFOSet) (containerdio.IO, error) {
|
||||||
|
// We can't directly return cntr.IO here, because
|
||||||
|
// even if cntr.IO is nil, the cio.IO interface
|
||||||
|
// is not.
|
||||||
|
// See https://tour.golang.org/methods/12:
|
||||||
|
// Note that an interface value that holds a nil
|
||||||
|
// concrete value is itself non-nil.
|
||||||
|
if cntr.IO != nil {
|
||||||
return cntr.IO, nil
|
return cntr.IO, nil
|
||||||
|
}
|
||||||
|
return nil, nil
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -25,9 +25,12 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/BurntSushi/toml"
|
"github.com/BurntSushi/toml"
|
||||||
|
"github.com/containerd/containerd"
|
||||||
"github.com/containerd/containerd/containers"
|
"github.com/containerd/containerd/containers"
|
||||||
|
"github.com/containerd/containerd/errdefs"
|
||||||
"github.com/containerd/containerd/runtime/linux/runctypes"
|
"github.com/containerd/containerd/runtime/linux/runctypes"
|
||||||
runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
|
runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
|
||||||
"github.com/containerd/typeurl"
|
"github.com/containerd/typeurl"
|
||||||
@ -44,7 +47,9 @@ import (
|
|||||||
runtimeoptions "github.com/containerd/cri/pkg/api/runtimeoptions/v1"
|
runtimeoptions "github.com/containerd/cri/pkg/api/runtimeoptions/v1"
|
||||||
criconfig "github.com/containerd/cri/pkg/config"
|
criconfig "github.com/containerd/cri/pkg/config"
|
||||||
"github.com/containerd/cri/pkg/store"
|
"github.com/containerd/cri/pkg/store"
|
||||||
|
containerstore "github.com/containerd/cri/pkg/store/container"
|
||||||
imagestore "github.com/containerd/cri/pkg/store/image"
|
imagestore "github.com/containerd/cri/pkg/store/image"
|
||||||
|
sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
|
||||||
"github.com/containerd/cri/pkg/util"
|
"github.com/containerd/cri/pkg/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -524,3 +529,53 @@ func restrictOOMScoreAdj(preferredOOMScoreAdj int) (int, error) {
|
|||||||
}
|
}
|
||||||
return preferredOOMScoreAdj, nil
|
return preferredOOMScoreAdj, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
// unknownExitCode is the exit code when exit reason is unknown.
|
||||||
|
unknownExitCode = 255
|
||||||
|
// unknownExitReason is the exit reason when exit reason is unknown.
|
||||||
|
unknownExitReason = "Unknown"
|
||||||
|
)
|
||||||
|
|
||||||
|
// unknownContainerStatus returns the default container status when its status is unknown.
|
||||||
|
func unknownContainerStatus() containerstore.Status {
|
||||||
|
return containerstore.Status{
|
||||||
|
CreatedAt: 0,
|
||||||
|
StartedAt: 0,
|
||||||
|
FinishedAt: 0,
|
||||||
|
ExitCode: unknownExitCode,
|
||||||
|
Reason: unknownExitReason,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// unknownSandboxStatus returns the default sandbox status when its status is unknown.
|
||||||
|
func unknownSandboxStatus() sandboxstore.Status {
|
||||||
|
return sandboxstore.Status{
|
||||||
|
State: sandboxstore.StateUnknown,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// unknownExitStatus generates containerd.Status for container exited with unknown exit code.
|
||||||
|
func unknownExitStatus() containerd.Status {
|
||||||
|
return containerd.Status{
|
||||||
|
Status: containerd.Stopped,
|
||||||
|
ExitStatus: unknownExitCode,
|
||||||
|
ExitTime: time.Now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getTaskStatus returns status for a given task. It returns unknown exit status if
|
||||||
|
// the task is nil or not found.
|
||||||
|
func getTaskStatus(ctx context.Context, task containerd.Task) (containerd.Status, error) {
|
||||||
|
if task == nil {
|
||||||
|
return unknownExitStatus(), nil
|
||||||
|
}
|
||||||
|
status, err := task.Status(ctx)
|
||||||
|
if err != nil {
|
||||||
|
if !errdefs.IsNotFound(err) {
|
||||||
|
return containerd.Status{}, err
|
||||||
|
}
|
||||||
|
return unknownExitStatus(), nil
|
||||||
|
}
|
||||||
|
return status, nil
|
||||||
|
}
|
||||||
|
@ -179,8 +179,9 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe
|
|||||||
status = unknownContainerStatus()
|
status = unknownContainerStatus()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load up-to-date status from containerd.
|
|
||||||
var containerIO *cio.ContainerIO
|
var containerIO *cio.ContainerIO
|
||||||
|
err = func() error {
|
||||||
|
// Load up-to-date status from containerd.
|
||||||
t, err := cntr.Task(ctx, func(fifos *containerdio.FIFOSet) (_ containerdio.IO, err error) {
|
t, err := cntr.Task(ctx, func(fifos *containerdio.FIFOSet) (_ containerdio.IO, err error) {
|
||||||
stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, meta.Config.GetTty())
|
stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, meta.Config.GetTty())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -207,7 +208,7 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe
|
|||||||
return containerIO, nil
|
return containerIO, nil
|
||||||
})
|
})
|
||||||
if err != nil && !errdefs.IsNotFound(err) {
|
if err != nil && !errdefs.IsNotFound(err) {
|
||||||
return container, errors.Wrap(err, "failed to load task")
|
return errors.Wrap(err, "failed to load task")
|
||||||
}
|
}
|
||||||
var s containerd.Status
|
var s containerd.Status
|
||||||
var notFound bool
|
var notFound bool
|
||||||
@ -220,7 +221,7 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
// It's still possible that task is deleted during this window.
|
// It's still possible that task is deleted during this window.
|
||||||
if !errdefs.IsNotFound(err) {
|
if !errdefs.IsNotFound(err) {
|
||||||
return container, errors.Wrap(err, "failed to get task status")
|
return errors.Wrap(err, "failed to get task status")
|
||||||
}
|
}
|
||||||
notFound = true
|
notFound = true
|
||||||
}
|
}
|
||||||
@ -237,7 +238,7 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe
|
|||||||
cio.WithNewFIFOs(volatileContainerDir, meta.Config.GetTty(), meta.Config.GetStdin()),
|
cio.WithNewFIFOs(volatileContainerDir, meta.Config.GetTty(), meta.Config.GetStdin()),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return container, errors.Wrap(err, "failed to create container io")
|
return errors.Wrap(err, "failed to create container io")
|
||||||
}
|
}
|
||||||
case runtime.ContainerState_CONTAINER_RUNNING:
|
case runtime.ContainerState_CONTAINER_RUNNING:
|
||||||
// Container was in running state, but its task has been deleted,
|
// Container was in running state, but its task has been deleted,
|
||||||
@ -256,17 +257,17 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe
|
|||||||
// gets restarted during container start.
|
// gets restarted during container start.
|
||||||
// Container must be in `CREATED` state.
|
// Container must be in `CREATED` state.
|
||||||
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
|
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
|
||||||
return container, errors.Wrap(err, "failed to delete task")
|
return errors.Wrap(err, "failed to delete task")
|
||||||
}
|
}
|
||||||
if status.State() != runtime.ContainerState_CONTAINER_CREATED {
|
if status.State() != runtime.ContainerState_CONTAINER_CREATED {
|
||||||
return container, errors.Errorf("unexpected container state for created task: %q", status.State())
|
return errors.Errorf("unexpected container state for created task: %q", status.State())
|
||||||
}
|
}
|
||||||
case containerd.Running:
|
case containerd.Running:
|
||||||
// Task is running. Container must be in `RUNNING` state, based on our assuption that
|
// Task is running. Container must be in `RUNNING` state, based on our assuption that
|
||||||
// "task should not be started when containerd is down".
|
// "task should not be started when containerd is down".
|
||||||
switch status.State() {
|
switch status.State() {
|
||||||
case runtime.ContainerState_CONTAINER_EXITED:
|
case runtime.ContainerState_CONTAINER_EXITED:
|
||||||
return container, errors.Errorf("unexpected container state for running task: %q", status.State())
|
return errors.Errorf("unexpected container state for running task: %q", status.State())
|
||||||
case runtime.ContainerState_CONTAINER_RUNNING:
|
case runtime.ContainerState_CONTAINER_RUNNING:
|
||||||
default:
|
default:
|
||||||
// This may happen if containerd gets restarted after task is started, but
|
// This may happen if containerd gets restarted after task is started, but
|
||||||
@ -277,42 +278,31 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe
|
|||||||
case containerd.Stopped:
|
case containerd.Stopped:
|
||||||
// Task is stopped. Updata status and delete the task.
|
// Task is stopped. Updata status and delete the task.
|
||||||
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
|
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
|
||||||
return container, errors.Wrap(err, "failed to delete task")
|
return errors.Wrap(err, "failed to delete task")
|
||||||
}
|
}
|
||||||
status.FinishedAt = s.ExitTime.UnixNano()
|
status.FinishedAt = s.ExitTime.UnixNano()
|
||||||
status.ExitCode = int32(s.ExitStatus)
|
status.ExitCode = int32(s.ExitStatus)
|
||||||
default:
|
default:
|
||||||
return container, errors.Errorf("unexpected task status %q", s.Status)
|
return errors.Errorf("unexpected task status %q", s.Status)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
|
}()
|
||||||
|
if err != nil {
|
||||||
|
logrus.WithError(err).Errorf("Failed to load container status for %q", id)
|
||||||
|
status = unknownContainerStatus()
|
||||||
|
}
|
||||||
opts := []containerstore.Opts{
|
opts := []containerstore.Opts{
|
||||||
containerstore.WithStatus(status, containerDir),
|
containerstore.WithStatus(status, containerDir),
|
||||||
containerstore.WithContainer(cntr),
|
containerstore.WithContainer(cntr),
|
||||||
}
|
}
|
||||||
|
// containerIO could be nil for container in unknown state.
|
||||||
if containerIO != nil {
|
if containerIO != nil {
|
||||||
opts = append(opts, containerstore.WithContainerIO(containerIO))
|
opts = append(opts, containerstore.WithContainerIO(containerIO))
|
||||||
}
|
}
|
||||||
return containerstore.NewContainer(*meta, opts...)
|
return containerstore.NewContainer(*meta, opts...)
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
|
||||||
// unknownExitCode is the exit code when exit reason is unknown.
|
|
||||||
unknownExitCode = 255
|
|
||||||
// unknownExitReason is the exit reason when exit reason is unknown.
|
|
||||||
unknownExitReason = "Unknown"
|
|
||||||
)
|
|
||||||
|
|
||||||
// unknownContainerStatus returns the default container status when its status is unknown.
|
|
||||||
func unknownContainerStatus() containerstore.Status {
|
|
||||||
return containerstore.Status{
|
|
||||||
CreatedAt: 0,
|
|
||||||
StartedAt: 0,
|
|
||||||
FinishedAt: 0,
|
|
||||||
ExitCode: unknownExitCode,
|
|
||||||
Reason: unknownExitReason,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// loadSandbox loads sandbox from containerd.
|
// loadSandbox loads sandbox from containerd.
|
||||||
func loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) {
|
func loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) {
|
||||||
ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout)
|
ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout)
|
||||||
@ -333,61 +323,59 @@ func loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.S
|
|||||||
}
|
}
|
||||||
meta := data.(*sandboxstore.Metadata)
|
meta := data.(*sandboxstore.Metadata)
|
||||||
|
|
||||||
|
s, err := func() (sandboxstore.Status, error) {
|
||||||
|
status := unknownSandboxStatus()
|
||||||
// Load sandbox created timestamp.
|
// Load sandbox created timestamp.
|
||||||
info, err := cntr.Info(ctx)
|
info, err := cntr.Info(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return sandbox, errors.Wrap(err, "failed to get sandbox container info")
|
return status, errors.Wrap(err, "failed to get sandbox container info")
|
||||||
}
|
}
|
||||||
createdAt := info.CreatedAt
|
status.CreatedAt = info.CreatedAt
|
||||||
|
|
||||||
// Load sandbox status.
|
// Load sandbox state.
|
||||||
t, err := cntr.Task(ctx, nil)
|
t, err := cntr.Task(ctx, nil)
|
||||||
if err != nil && !errdefs.IsNotFound(err) {
|
if err != nil && !errdefs.IsNotFound(err) {
|
||||||
return sandbox, errors.Wrap(err, "failed to load task")
|
return status, errors.Wrap(err, "failed to load task")
|
||||||
}
|
}
|
||||||
var s containerd.Status
|
var taskStatus containerd.Status
|
||||||
var notFound bool
|
var notFound bool
|
||||||
if errdefs.IsNotFound(err) {
|
if errdefs.IsNotFound(err) {
|
||||||
// Task is not found.
|
// Task is not found.
|
||||||
notFound = true
|
notFound = true
|
||||||
} else {
|
} else {
|
||||||
// Task is found. Get task status.
|
// Task is found. Get task status.
|
||||||
s, err = t.Status(ctx)
|
taskStatus, err = t.Status(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// It's still possible that task is deleted during this window.
|
// It's still possible that task is deleted during this window.
|
||||||
if !errdefs.IsNotFound(err) {
|
if !errdefs.IsNotFound(err) {
|
||||||
return sandbox, errors.Wrap(err, "failed to get task status")
|
return status, errors.Wrap(err, "failed to get task status")
|
||||||
}
|
}
|
||||||
notFound = true
|
notFound = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var state sandboxstore.State
|
|
||||||
var pid uint32
|
|
||||||
if notFound {
|
if notFound {
|
||||||
// Task does not exist, set sandbox state as NOTREADY.
|
// Task does not exist, set sandbox state as NOTREADY.
|
||||||
state = sandboxstore.StateNotReady
|
status.State = sandboxstore.StateNotReady
|
||||||
} else {
|
} else {
|
||||||
if s.Status == containerd.Running {
|
if taskStatus.Status == containerd.Running {
|
||||||
// Task is running, set sandbox state as READY.
|
// Task is running, set sandbox state as READY.
|
||||||
state = sandboxstore.StateReady
|
status.State = sandboxstore.StateReady
|
||||||
pid = t.Pid()
|
status.Pid = t.Pid()
|
||||||
} else {
|
} else {
|
||||||
// Task is not running. Delete the task and set sandbox state as NOTREADY.
|
// Task is not running. Delete the task and set sandbox state as NOTREADY.
|
||||||
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
|
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
|
||||||
return sandbox, errors.Wrap(err, "failed to delete task")
|
return status, errors.Wrap(err, "failed to delete task")
|
||||||
}
|
}
|
||||||
state = sandboxstore.StateNotReady
|
status.State = sandboxstore.StateNotReady
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return status, nil
|
||||||
|
}()
|
||||||
|
if err != nil {
|
||||||
|
logrus.WithError(err).Errorf("Failed to load sandbox status for %q", cntr.ID())
|
||||||
|
}
|
||||||
|
|
||||||
sandbox = sandboxstore.NewSandbox(
|
sandbox = sandboxstore.NewSandbox(*meta, s)
|
||||||
*meta,
|
|
||||||
sandboxstore.Status{
|
|
||||||
Pid: pid,
|
|
||||||
CreatedAt: createdAt,
|
|
||||||
State: state,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
sandbox.Container = cntr
|
sandbox.Container = cntr
|
||||||
|
|
||||||
// Load network namespace.
|
// Load network namespace.
|
||||||
|
@ -63,6 +63,10 @@ func TestToCRISandbox(t *testing.T) {
|
|||||||
state: sandboxstore.StateNotReady,
|
state: sandboxstore.StateNotReady,
|
||||||
expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
|
expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
|
||||||
},
|
},
|
||||||
|
"sandbox state unknown": {
|
||||||
|
state: sandboxstore.StateUnknown,
|
||||||
|
expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
|
||||||
|
},
|
||||||
} {
|
} {
|
||||||
status := sandboxstore.Status{
|
status := sandboxstore.Status{
|
||||||
CreatedAt: createdAt,
|
CreatedAt: createdAt,
|
||||||
|
@ -46,8 +46,9 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS
|
|||||||
// Use the full sandbox id.
|
// Use the full sandbox id.
|
||||||
id := sandbox.ID
|
id := sandbox.ID
|
||||||
|
|
||||||
// Return error if sandbox container is still running.
|
// Return error if sandbox container is still running or unknown.
|
||||||
if sandbox.Status.Get().State == sandboxstore.StateReady {
|
state := sandbox.Status.Get().State
|
||||||
|
if state == sandboxstore.StateReady || state == sandboxstore.StateUnknown {
|
||||||
return nil, errors.Errorf("sandbox container %q is not fully stopped", id)
|
return nil, errors.Errorf("sandbox container %q is not fully stopped", id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,6 +42,14 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox
|
|||||||
return nil, errors.Wrap(err, "failed to get sandbox ip")
|
return nil, errors.Wrap(err, "failed to get sandbox ip")
|
||||||
}
|
}
|
||||||
status := toCRISandboxStatus(sandbox.Metadata, sandbox.Status.Get(), ip)
|
status := toCRISandboxStatus(sandbox.Metadata, sandbox.Status.Get(), ip)
|
||||||
|
if status.GetCreatedAt() == 0 {
|
||||||
|
// CRI doesn't allow CreatedAt == 0.
|
||||||
|
info, err := sandbox.Container.Info(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Wrapf(err, "failed to get CreatedAt for sandbox container in %q state", status.State)
|
||||||
|
}
|
||||||
|
status.CreatedAt = info.CreatedAt.UnixNano()
|
||||||
|
}
|
||||||
if !r.GetVerbose() {
|
if !r.GetVerbose() {
|
||||||
return &runtime.PodSandboxStatusResponse{Status: status}, nil
|
return &runtime.PodSandboxStatusResponse{Status: status}, nil
|
||||||
}
|
}
|
||||||
|
@ -86,6 +86,10 @@ func TestPodSandboxStatus(t *testing.T) {
|
|||||||
state: sandboxstore.StateNotReady,
|
state: sandboxstore.StateNotReady,
|
||||||
expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
|
expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
|
||||||
},
|
},
|
||||||
|
"sandbox state unknown": {
|
||||||
|
state: sandboxstore.StateUnknown,
|
||||||
|
expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
|
||||||
|
},
|
||||||
} {
|
} {
|
||||||
t.Logf("TestCase: %s", desc)
|
t.Logf("TestCase: %s", desc)
|
||||||
status := sandboxstore.Status{
|
status := sandboxstore.Status{
|
||||||
|
@ -19,6 +19,8 @@ package server
|
|||||||
import (
|
import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/containerd/containerd"
|
||||||
|
eventtypes "github.com/containerd/containerd/api/events"
|
||||||
"github.com/containerd/containerd/errdefs"
|
"github.com/containerd/containerd/errdefs"
|
||||||
cni "github.com/containerd/go-cni"
|
cni "github.com/containerd/go-cni"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
@ -60,10 +62,11 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb
|
|||||||
return nil, errors.Wrap(err, "failed to unmount sandbox files")
|
return nil, errors.Wrap(err, "failed to unmount sandbox files")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only stop sandbox container when it's running.
|
// Only stop sandbox container when it's running or unknown.
|
||||||
if sandbox.Status.Get().State == sandboxstore.StateReady {
|
state := sandbox.Status.Get().State
|
||||||
|
if state == sandboxstore.StateReady || state == sandboxstore.StateUnknown {
|
||||||
if err := c.stopSandboxContainer(ctx, sandbox); err != nil {
|
if err := c.stopSandboxContainer(ctx, sandbox); err != nil {
|
||||||
return nil, errors.Wrapf(err, "failed to stop sandbox container %q", id)
|
return nil, errors.Wrapf(err, "failed to stop sandbox container %q in %q state", id, state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,12 +98,36 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb
|
|||||||
// the event monitor handles the `TaskExit` event.
|
// the event monitor handles the `TaskExit` event.
|
||||||
func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxstore.Sandbox) error {
|
func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxstore.Sandbox) error {
|
||||||
container := sandbox.Container
|
container := sandbox.Container
|
||||||
|
state := sandbox.Status.Get().State
|
||||||
task, err := container.Task(ctx, nil)
|
task, err := container.Task(ctx, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errdefs.IsNotFound(err) {
|
if !errdefs.IsNotFound(err) {
|
||||||
|
return errors.Wrap(err, "failed to get sandbox container")
|
||||||
|
}
|
||||||
|
// Don't return for unknown state, some cleanup needs to be done.
|
||||||
|
if state != sandboxstore.StateUnknown {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return errors.Wrap(err, "failed to get sandbox container")
|
// Task is an interface, explicitly set it to nil just in case.
|
||||||
|
task = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle unknown state.
|
||||||
|
// The cleanup logic is the same with container unknown state.
|
||||||
|
if state == sandboxstore.StateUnknown {
|
||||||
|
status, err := getTaskStatus(ctx, task)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrapf(err, "failed to get task status for %q", sandbox.ID)
|
||||||
|
}
|
||||||
|
switch status.Status {
|
||||||
|
case containerd.Running, containerd.Created:
|
||||||
|
// The task is still running, continue stopping the task.
|
||||||
|
case containerd.Stopped:
|
||||||
|
// The task has exited, explicitly cleanup.
|
||||||
|
return cleanupUnknownSandbox(ctx, sandbox.ID, status, sandbox)
|
||||||
|
default:
|
||||||
|
return errors.Wrapf(err, "unsupported task status %q", status.Status)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Kill the sandbox container.
|
// Kill the sandbox container.
|
||||||
@ -137,3 +164,16 @@ func (c *criService) teardownPod(id string, path string, config *runtime.PodSand
|
|||||||
cni.WithLabels(labels),
|
cni.WithLabels(labels),
|
||||||
cni.WithCapabilityPortMap(toCNIPortMappings(config.GetPortMappings())))
|
cni.WithCapabilityPortMap(toCNIPortMappings(config.GetPortMappings())))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cleanupUnknownSandbox cleanup stopped sandbox in unknown state.
|
||||||
|
func cleanupUnknownSandbox(ctx context.Context, id string, status containerd.Status,
|
||||||
|
sandbox sandboxstore.Sandbox) error {
|
||||||
|
// Reuse handleSandboxExit to do the cleanup.
|
||||||
|
return handleSandboxExit(ctx, &eventtypes.TaskExit{
|
||||||
|
ContainerID: id,
|
||||||
|
ID: id,
|
||||||
|
Pid: 0,
|
||||||
|
ExitStatus: status.ExitStatus,
|
||||||
|
ExitedAt: status.ExitTime,
|
||||||
|
}, sandbox)
|
||||||
|
}
|
||||||
|
@ -36,7 +36,8 @@ type Container struct {
|
|||||||
Status StatusStorage
|
Status StatusStorage
|
||||||
// Container is the containerd container client.
|
// Container is the containerd container client.
|
||||||
Container containerd.Container
|
Container containerd.Container
|
||||||
// Container IO
|
// Container IO.
|
||||||
|
// IO could only be nil when the container is in unknown state.
|
||||||
IO *cio.ContainerIO
|
IO *cio.ContainerIO
|
||||||
// StopCh is used to propagate the stop information of the container.
|
// StopCh is used to propagate the stop information of the container.
|
||||||
*store.StopCh
|
*store.StopCh
|
||||||
|
Loading…
Reference in New Issue
Block a user