Avoid containerd access as much as possible.

Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
Lantao Liu
2018-01-25 01:15:45 +00:00
parent 11042a4141
commit df58d6825d
22 changed files with 797 additions and 337 deletions

View File

@@ -28,12 +28,14 @@ import (
"github.com/sirupsen/logrus"
"golang.org/x/net/context"
"github.com/containerd/cri-containerd/pkg/store"
containerstore "github.com/containerd/cri-containerd/pkg/store/container"
sandboxstore "github.com/containerd/cri-containerd/pkg/store/sandbox"
)
// eventMonitor monitors containerd event and updates internal state correspondingly.
// TODO(random-liu): [P1] Is it possible to drop event during containerd is running?
// TODO(random-liu): [P1] Figure out is it possible to drop event during containerd
// is running. If it is, we should do periodically list to sync state with containerd.
type eventMonitor struct {
containerStore *containerstore.Store
sandboxStore *sandboxstore.Store
@@ -106,57 +108,22 @@ func (em *eventMonitor) handleEvent(evt *events.Envelope) {
e := any.(*eventtypes.TaskExit)
logrus.Infof("TaskExit event %+v", e)
cntr, err := em.containerStore.Get(e.ContainerID)
if err != nil {
if _, err := em.sandboxStore.Get(e.ContainerID); err == nil {
return
}
if err == nil {
handleContainerExit(e, cntr)
return
} else if err != store.ErrNotExist {
logrus.WithError(err).Errorf("Failed to get container %q", e.ContainerID)
return
}
if e.Pid != cntr.Status.Get().Pid {
// Non-init process died, ignore the event.
// Use GetAll to include sandbox in unknown state.
sb, err := em.sandboxStore.GetAll(e.ContainerID)
if err == nil {
handleSandboxExit(e, sb)
return
} else if err != store.ErrNotExist {
logrus.WithError(err).Errorf("Failed to get sandbox %q", e.ContainerID)
return
}
// Attach container IO so that `Delete` could cleanup the stream properly.
task, err := cntr.Container.Task(context.Background(),
func(*containerdio.FIFOSet) (containerdio.IO, error) {
return cntr.IO, nil
},
)
if err != nil {
if !errdefs.IsNotFound(err) {
logrus.WithError(err).Errorf("failed to stop container, task not found for container %q", e.ContainerID)
return
}
} else {
// TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker
if _, err = task.Delete(context.Background()); err != nil {
// TODO(random-liu): [P0] Enqueue the event and retry.
if !errdefs.IsNotFound(err) {
logrus.WithError(err).Errorf("failed to stop container %q", e.ContainerID)
return
}
// Move on to make sure container status is updated.
}
}
err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
// If FinishedAt has been set (e.g. with start failure), keep as
// it is.
if status.FinishedAt != 0 {
return status, nil
}
status.Pid = 0
status.FinishedAt = e.ExitedAt.UnixNano()
status.ExitCode = int32(e.ExitStatus)
return status, nil
})
if err != nil {
logrus.WithError(err).Errorf("Failed to update container %q state", e.ContainerID)
// TODO(random-liu): [P0] Enqueue the event and retry.
return
}
// Using channel to propagate the information of container stop
cntr.Stop()
case *eventtypes.TaskOOM:
e := any.(*eventtypes.TaskOOM)
logrus.Infof("TaskOOM event %+v", e)
@@ -177,3 +144,95 @@ func (em *eventMonitor) handleEvent(evt *events.Envelope) {
}
}
}
// handleContainerExit handles TaskExit event for container.
func handleContainerExit(e *eventtypes.TaskExit, cntr containerstore.Container) {
if e.Pid != cntr.Status.Get().Pid {
// Non-init process died, ignore the event.
return
}
// Attach container IO so that `Delete` could cleanup the stream properly.
task, err := cntr.Container.Task(context.Background(),
func(*containerdio.FIFOSet) (containerdio.IO, error) {
return cntr.IO, nil
},
)
if err != nil {
if !errdefs.IsNotFound(err) {
logrus.WithError(err).Errorf("failed to load task for container %q", e.ContainerID)
return
}
} else {
// TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker
if _, err = task.Delete(context.Background()); err != nil {
// TODO(random-liu): [P0] Enqueue the event and retry.
if !errdefs.IsNotFound(err) {
logrus.WithError(err).Errorf("failed to stop container %q", e.ContainerID)
return
}
// Move on to make sure container status is updated.
}
}
err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
// If FinishedAt has been set (e.g. with start failure), keep as
// it is.
if status.FinishedAt != 0 {
return status, nil
}
status.Pid = 0
status.FinishedAt = e.ExitedAt.UnixNano()
status.ExitCode = int32(e.ExitStatus)
return status, nil
})
if err != nil {
logrus.WithError(err).Errorf("Failed to update container %q state", e.ContainerID)
// TODO(random-liu): [P0] Enqueue the event and retry.
return
}
// Using channel to propagate the information of container stop
cntr.Stop()
}
// handleSandboxExit handles TaskExit event for sandbox.
func handleSandboxExit(e *eventtypes.TaskExit, sb sandboxstore.Sandbox) {
if e.Pid != sb.Status.Get().Pid {
// Non-init process died, ignore the event.
return
}
// No stream attached to sandbox container.
task, err := sb.Container.Task(context.Background(), nil)
if err != nil {
if !errdefs.IsNotFound(err) {
logrus.WithError(err).Errorf("failed to load task for sandbox %q", e.ContainerID)
return
}
} else {
// TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker
if _, err = task.Delete(context.Background()); err != nil {
// TODO(random-liu): [P0] Enqueue the event and retry.
if !errdefs.IsNotFound(err) {
logrus.WithError(err).Errorf("failed to stop sandbox %q", e.ContainerID)
return
}
// Move on to make sure container status is updated.
}
}
err = sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
// NOTE(random-liu): We SHOULD NOT change UNKNOWN state here.
// If sandbox state is UNKNOWN when event monitor receives an TaskExit event,
// it means that sandbox start has failed. In that case, `RunPodSandbox` will
// cleanup everything immediately.
// Once sandbox state goes out of UNKNOWN, it becomes visable to the user, which
// is not what we want.
if status.State != sandboxstore.StateUnknown {
status.State = sandboxstore.StateNotReady
}
status.Pid = 0
return status, nil
})
if err != nil {
logrus.WithError(err).Errorf("Failed to update sandbox %q state", e.ContainerID)
// TODO(random-liu): [P0] Enqueue the event and retry.
return
}
}