Use wait instead of TaskExit.

Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
Lantao Liu
2019-04-11 16:36:44 -07:00
parent ebb0928057
commit d1f9611cb0
18 changed files with 390 additions and 266 deletions

View File

@@ -28,6 +28,7 @@ import (
"golang.org/x/sys/unix"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
"github.com/containerd/cri/pkg/store"
containerstore "github.com/containerd/cri/pkg/store/container"
)
@@ -74,36 +75,34 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore
return errors.Wrapf(err, "failed to get task for container %q", id)
}
// Don't return for unknown state, some cleanup needs to be done.
if state != runtime.ContainerState_CONTAINER_UNKNOWN {
return nil
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
return cleanupUnknownContainer(ctx, id, container)
}
// Task is an interface, explicitly set it to nil just in case.
task = nil
return nil
}
// Handle unknown state.
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
status, err := getTaskStatus(ctx, task)
// Start an exit handler for containers in unknown state.
waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext())
defer waitCancel()
exitCh, err := task.Wait(waitCtx)
if err != nil {
return errors.Wrapf(err, "failed to get task status for %q", id)
}
switch status.Status {
case containerd.Running, containerd.Created:
// The task is still running, continue stopping the task.
case containerd.Stopped:
// The task has exited. If the task exited after containerd
// started, the event monitor will receive its exit event; if it
// exited before containerd started, the event monitor will never
// receive its exit event.
// However, we can't tell that because the task state was not
// successfully loaded during containerd start (container is
// in UNKNOWN state).
// So always do cleanup here, just in case that we've missed the
// exit event.
return cleanupUnknownContainer(ctx, id, status, container)
default:
return errors.Wrapf(err, "unsupported task status %q", status.Status)
if !errdefs.IsNotFound(err) {
return errors.Wrapf(err, "failed to wait for task for %q", id)
}
return cleanupUnknownContainer(ctx, id, container)
}
exitCtx, exitCancel := context.WithCancel(context.Background())
stopCh := c.eventMonitor.startExitMonitor(exitCtx, id, task.Pid(), exitCh)
defer func() {
exitCancel()
// This ensures that exit monitor is stopped before
// `Wait` is cancelled, so no exit event is generated
// because of the `Wait` cancellation.
<-stopCh
}()
}
// We only need to kill the task. The event handler will Delete the
@@ -176,19 +175,13 @@ func (c *criService) waitContainerStop(ctx context.Context, container containers
}
// cleanupUnknownContainer cleanup stopped container in unknown state.
func cleanupUnknownContainer(ctx context.Context, id string, status containerd.Status,
cntr containerstore.Container) error {
func cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container) error {
// Reuse handleContainerExit to do the cleanup.
// NOTE(random-liu): If the task did exit after containerd started, both
// the event monitor and the cleanup function would update the container
// state. The final container state will be whatever being updated first.
// There is no way to completely avoid this race condition, and for best
// effort unknown state container cleanup, this seems acceptable.
return handleContainerExit(ctx, &eventtypes.TaskExit{
ContainerID: id,
ID: id,
Pid: 0,
ExitStatus: status.ExitStatus,
ExitedAt: status.ExitTime,
ExitStatus: unknownExitCode,
ExitedAt: time.Now(),
}, cntr)
}