Use wait instead of TaskExit.

Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
Lantao Liu 2019-04-11 16:36:44 -07:00
parent ebb0928057
commit d1f9611cb0
18 changed files with 390 additions and 266 deletions

View File

@ -105,6 +105,9 @@ func setContainerRemoving(container containerstore.Container) error {
if status.State() == runtime.ContainerState_CONTAINER_UNKNOWN { if status.State() == runtime.ContainerState_CONTAINER_UNKNOWN {
return status, errors.New("container state is unknown, to stop first") return status, errors.New("container state is unknown, to stop first")
} }
if status.Starting {
return status, errors.New("container is in starting state, can't be removed")
}
if status.Removing { if status.Removing {
return status, errors.New("container is already in removing state") return status, errors.New("container is already in removing state")
} }

View File

@ -40,6 +40,13 @@ func TestSetContainerRemoving(t *testing.T) {
}, },
expectErr: true, expectErr: true,
}, },
"should return error when container is in starting state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Starting: true,
},
expectErr: true,
},
"should return error when container is in removing state": { "should return error when container is in removing state": {
status: containerstore.Status{ status: containerstore.Status{
CreatedAt: time.Now().UnixNano(), CreatedAt: time.Now().UnixNano(),
@ -71,6 +78,8 @@ func TestSetContainerRemoving(t *testing.T) {
} else { } else {
assert.NoError(t, err) assert.NoError(t, err)
assert.True(t, container.Status.Get().Removing, "removing should be set") assert.True(t, container.Status.Get().Removing, "removing should be set")
assert.NoError(t, resetContainerRemoving(container))
assert.False(t, container.Status.Get().Removing, "removing should be reset")
} }
} }
} }

View File

@ -38,64 +38,48 @@ import (
// StartContainer starts the container. // StartContainer starts the container.
func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) { func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
container, err := c.containerStore.Get(r.GetContainerId()) cntr, err := c.containerStore.Get(r.GetContainerId())
if err != nil { if err != nil {
return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId()) return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
} }
var startErr error
// update container status in one transaction to avoid race with event monitor.
if err := container.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
// Always apply status change no matter startContainer fails or not. Because startContainer
// may change container state no matter it fails or succeeds.
startErr = c.startContainer(ctx, container, &status)
return status, nil
}); startErr != nil {
return nil, startErr
} else if err != nil {
return nil, errors.Wrapf(err, "failed to update container %q metadata", container.ID)
}
return &runtime.StartContainerResponse{}, nil
}
// startContainer actually starts the container. The function needs to be run in one transaction. Any updates
// to the status passed in will be applied no matter the function returns error or not.
func (c *criService) startContainer(ctx context.Context,
cntr containerstore.Container,
status *containerstore.Status) (retErr error) {
id := cntr.ID id := cntr.ID
meta := cntr.Metadata meta := cntr.Metadata
container := cntr.Container container := cntr.Container
config := meta.Config config := meta.Config
// Return error if container is not in created state. // Set starting state to prevent other start/remove operations against this container
if status.State() != runtime.ContainerState_CONTAINER_CREATED { // while it's being started.
return errors.Errorf("container %q is in %s state", id, criContainerStateToString(status.State())) if err := setContainerStarting(cntr); err != nil {
return nil, errors.Wrapf(err, "failed to set starting state for container %q", id)
} }
// Do not start the container when there is a removal in progress.
if status.Removing {
return errors.Errorf("container %q is in removing state", id)
}
defer func() { defer func() {
if retErr != nil { if retErr != nil {
// Set container to exited if fail to start. // Set container to exited if fail to start.
status.Pid = 0 if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.FinishedAt = time.Now().UnixNano() status.Pid = 0
status.ExitCode = errorStartExitCode status.FinishedAt = time.Now().UnixNano()
status.Reason = errorStartReason status.ExitCode = errorStartExitCode
status.Message = retErr.Error() status.Reason = errorStartReason
status.Message = retErr.Error()
return status, nil
}); err != nil {
logrus.WithError(err).Errorf("failed to set start failure state for container %q", id)
}
}
if err := resetContainerStarting(cntr); err != nil {
logrus.WithError(err).Errorf("failed to reset starting state for container %q", id)
} }
}() }()
// Get sandbox config from sandbox store. // Get sandbox config from sandbox store.
sandbox, err := c.sandboxStore.Get(meta.SandboxID) sandbox, err := c.sandboxStore.Get(meta.SandboxID)
if err != nil { if err != nil {
return errors.Wrapf(err, "sandbox %q not found", meta.SandboxID) return nil, errors.Wrapf(err, "sandbox %q not found", meta.SandboxID)
} }
sandboxID := meta.SandboxID sandboxID := meta.SandboxID
if sandbox.Status.Get().State != sandboxstore.StateReady { if sandbox.Status.Get().State != sandboxstore.StateReady {
return errors.Errorf("sandbox container %q is not running", sandboxID) return nil, errors.Errorf("sandbox container %q is not running", sandboxID)
} }
ioCreation := func(id string) (_ containerdio.IO, err error) { ioCreation := func(id string) (_ containerdio.IO, err error) {
@ -110,7 +94,7 @@ func (c *criService) startContainer(ctx context.Context,
ctrInfo, err := container.Info(ctx) ctrInfo, err := container.Info(ctx)
if err != nil { if err != nil {
return errors.Wrap(err, "failed to get container info") return nil, errors.Wrap(err, "failed to get container info")
} }
var taskOpts []containerd.NewTaskOpts var taskOpts []containerd.NewTaskOpts
@ -120,7 +104,7 @@ func (c *criService) startContainer(ctx context.Context,
} }
task, err := container.NewTask(ctx, ioCreation, taskOpts...) task, err := container.NewTask(ctx, ioCreation, taskOpts...)
if err != nil { if err != nil {
return errors.Wrap(err, "failed to create containerd task") return nil, errors.Wrap(err, "failed to create containerd task")
} }
defer func() { defer func() {
if retErr != nil { if retErr != nil {
@ -133,15 +117,61 @@ func (c *criService) startContainer(ctx context.Context,
} }
}() }()
// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
return nil, errors.Wrap(err, "failed to wait for containerd task")
}
// Start containerd task. // Start containerd task.
if err := task.Start(ctx); err != nil { if err := task.Start(ctx); err != nil {
return errors.Wrapf(err, "failed to start containerd task %q", id) return nil, errors.Wrapf(err, "failed to start containerd task %q", id)
} }
// Update container start timestamp. // Update container start timestamp.
status.Pid = task.Pid() if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.StartedAt = time.Now().UnixNano() status.Pid = task.Pid()
return nil status.StartedAt = time.Now().UnixNano()
return status, nil
}); err != nil {
return nil, errors.Wrapf(err, "failed to update container %q state", id)
}
// start the monitor after updating container state, this ensures that
// event monitor receives the TaskExit event and update container state
// after this.
c.eventMonitor.startExitMonitor(context.Background(), id, task.Pid(), exitCh)
return &runtime.StartContainerResponse{}, nil
}
// setContainerStarting sets the container into starting state. In starting state, the
// container will not be removed or started again.
func setContainerStarting(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
// Return error if container is not in created state.
if status.State() != runtime.ContainerState_CONTAINER_CREATED {
return status, errors.Errorf("container is in %s state", criContainerStateToString(status.State()))
}
// Do not start the container when there is a removal in progress.
if status.Removing {
return status, errors.New("container is in removing state, can't be started")
}
if status.Starting {
return status, errors.New("container is already in starting state")
}
status.Starting = true
return status, nil
})
}
// resetContainerStarting resets the container starting state on start failure. So
// that we could remove the container later.
func resetContainerStarting(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
status.Starting = false
return status, nil
})
} }
// createContainerLoggers creates container loggers and return write closer for stdout and stderr. // createContainerLoggers creates container loggers and return write closer for stdout and stderr.

View File

@ -0,0 +1,98 @@
/*
Copyright 2019 The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
containerstore "github.com/containerd/cri/pkg/store/container"
)
// TestSetContainerStarting tests setContainerStarting sets removing
// state correctly.
func TestSetContainerStarting(t *testing.T) {
testID := "test-id"
for desc, test := range map[string]struct {
status containerstore.Status
expectErr bool
}{
"should not return error when container is in created state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
},
expectErr: false,
},
"should return error when container is in running state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
},
expectErr: true,
},
"should return error when container is in exited state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
},
expectErr: true,
},
"should return error when container is in unknown state": {
status: containerstore.Status{
CreatedAt: 0,
StartedAt: 0,
FinishedAt: 0,
},
expectErr: true,
},
"should return error when container is in starting state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Starting: true,
},
expectErr: true,
},
"should return error when container is in removing state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Removing: true,
},
expectErr: true,
},
} {
t.Logf("TestCase %q", desc)
container, err := containerstore.NewContainer(
containerstore.Metadata{ID: testID},
containerstore.WithFakeStatus(test.status),
)
assert.NoError(t, err)
err = setContainerStarting(container)
if test.expectErr {
assert.Error(t, err)
assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated")
} else {
assert.NoError(t, err)
assert.True(t, container.Status.Get().Starting, "starting should be set")
assert.NoError(t, resetContainerStarting(container))
assert.False(t, container.Status.Get().Starting, "starting should be reset")
}
}
}

View File

@ -28,6 +28,7 @@ import (
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
"github.com/containerd/cri/pkg/store" "github.com/containerd/cri/pkg/store"
containerstore "github.com/containerd/cri/pkg/store/container" containerstore "github.com/containerd/cri/pkg/store/container"
) )
@ -74,36 +75,34 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore
return errors.Wrapf(err, "failed to get task for container %q", id) return errors.Wrapf(err, "failed to get task for container %q", id)
} }
// Don't return for unknown state, some cleanup needs to be done. // Don't return for unknown state, some cleanup needs to be done.
if state != runtime.ContainerState_CONTAINER_UNKNOWN { if state == runtime.ContainerState_CONTAINER_UNKNOWN {
return nil return cleanupUnknownContainer(ctx, id, container)
} }
// Task is an interface, explicitly set it to nil just in case. return nil
task = nil
} }
// Handle unknown state. // Handle unknown state.
if state == runtime.ContainerState_CONTAINER_UNKNOWN { if state == runtime.ContainerState_CONTAINER_UNKNOWN {
status, err := getTaskStatus(ctx, task) // Start an exit handler for containers in unknown state.
waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext())
defer waitCancel()
exitCh, err := task.Wait(waitCtx)
if err != nil { if err != nil {
return errors.Wrapf(err, "failed to get task status for %q", id) if !errdefs.IsNotFound(err) {
} return errors.Wrapf(err, "failed to wait for task for %q", id)
switch status.Status { }
case containerd.Running, containerd.Created: return cleanupUnknownContainer(ctx, id, container)
// The task is still running, continue stopping the task.
case containerd.Stopped:
// The task has exited. If the task exited after containerd
// started, the event monitor will receive its exit event; if it
// exited before containerd started, the event monitor will never
// receive its exit event.
// However, we can't tell that because the task state was not
// successfully loaded during containerd start (container is
// in UNKNOWN state).
// So always do cleanup here, just in case that we've missed the
// exit event.
return cleanupUnknownContainer(ctx, id, status, container)
default:
return errors.Wrapf(err, "unsupported task status %q", status.Status)
} }
exitCtx, exitCancel := context.WithCancel(context.Background())
stopCh := c.eventMonitor.startExitMonitor(exitCtx, id, task.Pid(), exitCh)
defer func() {
exitCancel()
// This ensures that exit monitor is stopped before
// `Wait` is cancelled, so no exit event is generated
// because of the `Wait` cancellation.
<-stopCh
}()
} }
// We only need to kill the task. The event handler will Delete the // We only need to kill the task. The event handler will Delete the
@ -176,19 +175,13 @@ func (c *criService) waitContainerStop(ctx context.Context, container containers
} }
// cleanupUnknownContainer cleanup stopped container in unknown state. // cleanupUnknownContainer cleanup stopped container in unknown state.
func cleanupUnknownContainer(ctx context.Context, id string, status containerd.Status, func cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container) error {
cntr containerstore.Container) error {
// Reuse handleContainerExit to do the cleanup. // Reuse handleContainerExit to do the cleanup.
// NOTE(random-liu): If the task did exit after containerd started, both
// the event monitor and the cleanup function would update the container
// state. The final container state will be whatever being updated first.
// There is no way to completely avoid this race condition, and for best
// effort unknown state container cleanup, this seems acceptable.
return handleContainerExit(ctx, &eventtypes.TaskExit{ return handleContainerExit(ctx, &eventtypes.TaskExit{
ContainerID: id, ContainerID: id,
ID: id, ID: id,
Pid: 0, Pid: 0,
ExitStatus: status.ExitStatus, ExitStatus: unknownExitCode,
ExitedAt: status.ExitTime, ExitedAt: time.Now(),
}, cntr) }, cntr)
} }

View File

@ -50,13 +50,17 @@ const (
// Add a timeout for each event handling, events that timeout will be requeued and // Add a timeout for each event handling, events that timeout will be requeued and
// handled again in the future. // handled again in the future.
handleEventTimeout = 10 * time.Second handleEventTimeout = 10 * time.Second
exitChannelSize = 1024
) )
// eventMonitor monitors containerd event and updates internal state correspondingly. // eventMonitor monitors containerd event and updates internal state correspondingly.
// TODO(random-liu): Handle event for each container in a separate goroutine. // TODO(random-liu): Handle event for each container in a separate goroutine.
type eventMonitor struct { type eventMonitor struct {
c *criService c *criService
ch <-chan *events.Envelope ch <-chan *events.Envelope
// exitCh receives container/sandbox exit events from exit monitors.
exitCh chan *eventtypes.TaskExit
errCh <-chan error errCh <-chan error
ctx context.Context ctx context.Context
cancel context.CancelFunc cancel context.CancelFunc
@ -89,6 +93,7 @@ func newEventMonitor(c *criService) *eventMonitor {
c: c, c: c,
ctx: ctx, ctx: ctx,
cancel: cancel, cancel: cancel,
exitCh: make(chan *eventtypes.TaskExit, exitChannelSize),
backOff: newBackOff(), backOff: newBackOff(),
} }
} }
@ -98,13 +103,38 @@ func (em *eventMonitor) subscribe(subscriber events.Subscriber) {
// note: filters are any match, if you want any match but not in namespace foo // note: filters are any match, if you want any match but not in namespace foo
// then you have to manually filter namespace foo // then you have to manually filter namespace foo
filters := []string{ filters := []string{
`topic=="/tasks/exit"`,
`topic=="/tasks/oom"`, `topic=="/tasks/oom"`,
`topic~="/images/"`, `topic~="/images/"`,
} }
em.ch, em.errCh = subscriber.Subscribe(em.ctx, filters...) em.ch, em.errCh = subscriber.Subscribe(em.ctx, filters...)
} }
// startExitMonitor starts an exit monitor for a given container/sandbox.
func (em *eventMonitor) startExitMonitor(ctx context.Context, id string, pid uint32, exitCh <-chan containerd.ExitStatus) <-chan struct{} {
stopCh := make(chan struct{})
go func() {
defer close(stopCh)
select {
case exitRes := <-exitCh:
exitStatus, exitedAt, err := exitRes.Result()
if err != nil {
logrus.WithError(err).Errorf("Failed to get task exit status for %q", id)
exitStatus = unknownExitCode
exitedAt = time.Now()
}
em.exitCh <- &eventtypes.TaskExit{
ContainerID: id,
ID: id,
Pid: pid,
ExitStatus: exitStatus,
ExitedAt: exitedAt,
}
case <-ctx.Done():
}
}()
return stopCh
}
func convertEvent(e *gogotypes.Any) (string, interface{}, error) { func convertEvent(e *gogotypes.Any) (string, interface{}, error) {
id := "" id := ""
evt, err := typeurl.UnmarshalAny(e) evt, err := typeurl.UnmarshalAny(e)
@ -113,8 +143,6 @@ func convertEvent(e *gogotypes.Any) (string, interface{}, error) {
} }
switch e := evt.(type) { switch e := evt.(type) {
case *eventtypes.TaskExit:
id = e.ContainerID
case *eventtypes.TaskOOM: case *eventtypes.TaskOOM:
id = e.ContainerID id = e.ContainerID
case *eventtypes.ImageCreate: case *eventtypes.ImageCreate:
@ -142,6 +170,18 @@ func (em *eventMonitor) start() <-chan error {
defer close(errCh) defer close(errCh)
for { for {
select { select {
case e := <-em.exitCh:
logrus.Debugf("Received exit event %+v", e)
id := e.ID
if em.backOff.isInBackOff(id) {
logrus.Infof("Events for %q is in backoff, enqueue event %+v", id, e)
em.backOff.enBackOff(id, e)
break
}
if err := em.handleEvent(e); err != nil {
logrus.WithError(err).Errorf("Failed to handle exit event %+v for %s", e, id)
em.backOff.enBackOff(id, e)
}
case e := <-em.ch: case e := <-em.ch:
logrus.Debugf("Received containerd event timestamp - %v, namespace - %q, topic - %q", e.Timestamp, e.Namespace, e.Topic) logrus.Debugf("Received containerd event timestamp - %v, namespace - %q, topic - %q", e.Timestamp, e.Namespace, e.Topic)
if e.Namespace != constants.K8sContainerdNamespace { if e.Namespace != constants.K8sContainerdNamespace {
@ -213,8 +253,7 @@ func (em *eventMonitor) handleEvent(any interface{}) error {
} else if err != store.ErrNotExist { } else if err != store.ErrNotExist {
return errors.Wrap(err, "can't find container for TaskExit event") return errors.Wrap(err, "can't find container for TaskExit event")
} }
// Use GetAll to include sandbox in init state. sb, err := em.c.sandboxStore.Get(e.ID)
sb, err := em.c.sandboxStore.GetAll(e.ID)
if err == nil { if err == nil {
if err := handleSandboxExit(ctx, e, sb); err != nil { if err := handleSandboxExit(ctx, e, sb); err != nil {
return errors.Wrap(err, "failed to handle sandbox TaskExit event") return errors.Wrap(err, "failed to handle sandbox TaskExit event")
@ -322,15 +361,7 @@ func handleSandboxExit(ctx context.Context, e *eventtypes.TaskExit, sb sandboxst
} }
} }
err = sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { err = sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
// NOTE(random-liu): We SHOULD NOT change INIT state here. status.State = sandboxstore.StateNotReady
// If sandbox state is INIT when event monitor receives an TaskExit event,
// it means that sandbox start has failed. In that case, `RunPodSandbox` will
// cleanup everything immediately.
// Once sandbox state goes out of INIT, it becomes visable to the user, which
// is not what we want.
if status.State != sandboxstore.StateInit {
status.State = sandboxstore.StateNotReady
}
status.Pid = 0 status.Pid = 0
return status, nil return status, nil
}) })

View File

@ -21,9 +21,7 @@ import (
"time" "time"
eventtypes "github.com/containerd/containerd/api/events" eventtypes "github.com/containerd/containerd/api/events"
//"github.com/containerd/containerd/api/services/events/v1"
"github.com/containerd/typeurl" "github.com/containerd/typeurl"
//gogotypes "github.com/gogo/protobuf/types"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"k8s.io/apimachinery/pkg/util/clock" "k8s.io/apimachinery/pkg/util/clock"
) )
@ -35,22 +33,22 @@ func TestBackOff(t *testing.T) {
inputQueues := map[string]*backOffQueue{ inputQueues := map[string]*backOffQueue{
"container1": { "container1": {
events: []interface{}{ events: []interface{}{
&eventtypes.TaskExit{ContainerID: "container1", ID: "1"}, &eventtypes.TaskOOM{ContainerID: "container1"},
&eventtypes.TaskExit{ContainerID: "container1", ID: "2"}, &eventtypes.TaskOOM{ContainerID: "container1"},
}, },
}, },
"container2": { "container2": {
events: []interface{}{ events: []interface{}{
&eventtypes.TaskExit{ContainerID: "container2", ID: "1"}, &eventtypes.TaskOOM{ContainerID: "container2"},
&eventtypes.TaskExit{ContainerID: "container2", ID: "2"}, &eventtypes.TaskOOM{ContainerID: "container2"},
}, },
}, },
} }
expectedQueues := map[string]*backOffQueue{ expectedQueues := map[string]*backOffQueue{
"container2": { "container2": {
events: []interface{}{ events: []interface{}{
&eventtypes.TaskExit{ContainerID: "container2", ID: "1"}, &eventtypes.TaskOOM{ContainerID: "container2"},
&eventtypes.TaskExit{ContainerID: "container2", ID: "2"}, &eventtypes.TaskOOM{ContainerID: "container2"},
}, },
expireTime: testClock.Now().Add(backOffInitDuration), expireTime: testClock.Now().Add(backOffInitDuration),
duration: backOffInitDuration, duration: backOffInitDuration,
@ -58,8 +56,8 @@ func TestBackOff(t *testing.T) {
}, },
"container1": { "container1": {
events: []interface{}{ events: []interface{}{
&eventtypes.TaskExit{ContainerID: "container1", ID: "1"}, &eventtypes.TaskOOM{ContainerID: "container1"},
&eventtypes.TaskExit{ContainerID: "container1", ID: "2"}, &eventtypes.TaskOOM{ContainerID: "container1"},
}, },
expireTime: testClock.Now().Add(backOffInitDuration), expireTime: testClock.Now().Add(backOffInitDuration),
duration: backOffInitDuration, duration: backOffInitDuration,

View File

@ -23,12 +23,9 @@ import (
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
"time"
"github.com/BurntSushi/toml" "github.com/BurntSushi/toml"
"github.com/containerd/containerd"
"github.com/containerd/containerd/containers" "github.com/containerd/containerd/containers"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/runtime/linux/runctypes" "github.com/containerd/containerd/runtime/linux/runctypes"
runcoptions "github.com/containerd/containerd/runtime/v2/runc/options" runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
"github.com/containerd/typeurl" "github.com/containerd/typeurl"
@ -477,31 +474,6 @@ func unknownSandboxStatus() sandboxstore.Status {
} }
} }
// unknownExitStatus generates containerd.Status for container exited with unknown exit code.
func unknownExitStatus() containerd.Status {
return containerd.Status{
Status: containerd.Stopped,
ExitStatus: unknownExitCode,
ExitTime: time.Now(),
}
}
// getTaskStatus returns status for a given task. It returns unknown exit status if
// the task is nil or not found.
func getTaskStatus(ctx context.Context, task containerd.Task) (containerd.Status, error) {
if task == nil {
return unknownExitStatus(), nil
}
status, err := task.Status(ctx)
if err != nil {
if !errdefs.IsNotFound(err) {
return containerd.Status{}, err
}
return unknownExitStatus(), nil
}
return status, nil
}
// getPassthroughAnnotations filters requested pod annotations by comparing // getPassthroughAnnotations filters requested pod annotations by comparing
// against permitted annotations for the given runtime. // against permitted annotations for the given runtime.
func getPassthroughAnnotations(podAnnotations map[string]string, func getPassthroughAnnotations(podAnnotations map[string]string,

View File

@ -34,6 +34,7 @@ import (
"golang.org/x/net/context" "golang.org/x/net/context"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
"github.com/containerd/cri/pkg/netns" "github.com/containerd/cri/pkg/netns"
cio "github.com/containerd/cri/pkg/server/io" cio "github.com/containerd/cri/pkg/server/io"
containerstore "github.com/containerd/cri/pkg/store/container" containerstore "github.com/containerd/cri/pkg/store/container"
@ -57,7 +58,7 @@ func (c *criService) recover(ctx context.Context) error {
return errors.Wrap(err, "failed to list sandbox containers") return errors.Wrap(err, "failed to list sandbox containers")
} }
for _, sandbox := range sandboxes { for _, sandbox := range sandboxes {
sb, err := loadSandbox(ctx, sandbox) sb, err := c.loadSandbox(ctx, sandbox)
if err != nil { if err != nil {
logrus.WithError(err).Errorf("Failed to load sandbox %q", sandbox.ID()) logrus.WithError(err).Errorf("Failed to load sandbox %q", sandbox.ID())
continue continue
@ -275,6 +276,22 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe
status.StartedAt = time.Now().UnixNano() status.StartedAt = time.Now().UnixNano()
status.Pid = t.Pid() status.Pid = t.Pid()
} }
// Wait for the task for exit monitor.
// wait is a long running background request, no timeout needed.
exitCh, err := t.Wait(ctrdutil.NamespacedContext())
if err != nil {
if !errdefs.IsNotFound(err) {
return errors.Wrap(err, "failed to wait for task")
}
// Container was in running state, but its task has been deleted,
// set unknown exited state.
status.FinishedAt = time.Now().UnixNano()
status.ExitCode = unknownExitCode
status.Reason = unknownExitReason
} else {
// Start exit monitor.
c.eventMonitor.startExitMonitor(context.Background(), id, status.Pid, exitCh)
}
case containerd.Stopped: case containerd.Stopped:
// Task is stopped. Updata status and delete the task. // Task is stopped. Updata status and delete the task.
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
@ -304,7 +321,7 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe
} }
// loadSandbox loads sandbox from containerd. // loadSandbox loads sandbox from containerd.
func loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) { func (c *criService) loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) {
ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout) ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout)
defer cancel() defer cancel()
var sandbox sandboxstore.Sandbox var sandbox sandboxstore.Sandbox
@ -358,9 +375,20 @@ func loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.S
status.State = sandboxstore.StateNotReady status.State = sandboxstore.StateNotReady
} else { } else {
if taskStatus.Status == containerd.Running { if taskStatus.Status == containerd.Running {
// Task is running, set sandbox state as READY. // Wait for the task for sandbox monitor.
status.State = sandboxstore.StateReady // wait is a long running background request, no timeout needed.
status.Pid = t.Pid() exitCh, err := t.Wait(ctrdutil.NamespacedContext())
if err != nil {
if !errdefs.IsNotFound(err) {
return status, errors.Wrap(err, "failed to wait for task")
}
status.State = sandboxstore.StateNotReady
} else {
// Task is running, set sandbox state as READY.
status.State = sandboxstore.StateReady
status.Pid = t.Pid()
c.eventMonitor.startExitMonitor(context.Background(), meta.ID, status.Pid, exitCh)
}
} else { } else {
// Task is not running. Delete the task and set sandbox state as NOTREADY. // Task is not running. Delete the task and set sandbox state as NOTREADY.
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {

View File

@ -87,7 +87,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
RuntimeHandler: r.GetRuntimeHandler(), RuntimeHandler: r.GetRuntimeHandler(),
}, },
sandboxstore.Status{ sandboxstore.Status{
State: sandboxstore.StateInit, State: sandboxstore.StateUnknown,
}, },
) )
@ -253,88 +253,65 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
if err != nil { if err != nil {
return nil, errors.Wrap(err, "failed to get sandbox container info") return nil, errors.Wrap(err, "failed to get sandbox container info")
} }
// Create sandbox task in containerd.
log.Tracef("Create sandbox container (id=%q, name=%q).",
id, name)
var taskOpts []containerd.NewTaskOpts
// TODO(random-liu): Remove this after shim v1 is deprecated.
if c.config.NoPivot && ociRuntime.Type == linuxRuntime {
taskOpts = append(taskOpts, containerd.WithNoPivotRoot)
}
// We don't need stdio for sandbox container.
task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...)
if err != nil {
return nil, errors.Wrap(err, "failed to create containerd task")
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Cleanup the sandbox container if an error is returned.
if _, err := task.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
logrus.WithError(err).Errorf("Failed to delete sandbox container %q", id)
}
}
}()
// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
return nil, errors.Wrap(err, "failed to wait for sandbox container task")
}
if err := task.Start(ctx); err != nil {
return nil, errors.Wrapf(err, "failed to start sandbox container task %q", id)
}
if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
// Set the pod sandbox as ready after successfully start sandbox container.
status.Pid = task.Pid()
status.State = sandboxstore.StateReady
status.CreatedAt = info.CreatedAt status.CreatedAt = info.CreatedAt
return status, nil return status, nil
}); err != nil { }); err != nil {
return nil, errors.Wrap(err, "failed to update sandbox created timestamp") return nil, errors.Wrap(err, "failed to update sandbox status")
} }
// Add sandbox into sandbox store in INIT state. // Add sandbox into sandbox store in INIT state.
sandbox.Container = container sandbox.Container = container
if err := c.sandboxStore.Add(sandbox); err != nil { if err := c.sandboxStore.Add(sandbox); err != nil {
return nil, errors.Wrapf(err, "failed to add sandbox %+v into store", sandbox) return nil, errors.Wrapf(err, "failed to add sandbox %+v into store", sandbox)
} }
defer func() {
// Delete sandbox from sandbox store if there is an error. // start the monitor after adding sandbox into the store, this ensures
if retErr != nil { // that sandbox is in the store, when event monitor receives the TaskExit event.
c.sandboxStore.Delete(id)
}
}()
// NOTE(random-liu): Sandbox state only stay in INIT state after this point
// and before the end of this function.
// * If `Update` succeeds, sandbox state will become READY in one transaction.
// * If `Update` fails, sandbox will be removed from the store in the defer above.
// * If containerd stops at any point before `Update` finishes, because sandbox
// state is not checkpointed, it will be recovered from corresponding containerd task
// status during restart:
// * If the task is running, sandbox state will be READY,
// * Or else, sandbox state will be NOTREADY.
// //
// In any case, sandbox will leave INIT state, so it's safe to ignore sandbox // TaskOOM from containerd may come before sandbox is added to store,
// in INIT state in other functions. // but we don't care about sandbox TaskOOM right now, so it is fine.
c.eventMonitor.startExitMonitor(context.Background(), id, task.Pid(), exitCh)
// Start sandbox container in one transaction to avoid race condition with
// event monitor.
if err := sandbox.Status.Update(func(status sandboxstore.Status) (_ sandboxstore.Status, retErr error) {
// NOTE(random-liu): We should not change the sandbox state to NOTREADY
// if `Update` fails.
//
// If `Update` fails, the sandbox will be cleaned up by all the defers
// above. We should not let user see this sandbox, or else they will
// see the sandbox disappear after the defer clean up, which may confuse
// them.
//
// Given so, we should keep the sandbox in INIT state if `Update` fails,
// and ignore sandbox in INIT state in all the inspection functions.
// Create sandbox task in containerd.
log.Tracef("Create sandbox container (id=%q, name=%q).",
id, name)
var taskOpts []containerd.NewTaskOpts
// TODO(random-liu): Remove this after shim v1 is deprecated.
if c.config.NoPivot && ociRuntime.Type == linuxRuntime {
taskOpts = append(taskOpts, containerd.WithNoPivotRoot)
}
// We don't need stdio for sandbox container.
task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...)
if err != nil {
return status, errors.Wrap(err, "failed to create containerd task")
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Cleanup the sandbox container if an error is returned.
// It's possible that task is deleted by event monitor.
if _, err := task.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
logrus.WithError(err).Errorf("Failed to delete sandbox container %q", id)
}
}
}()
if err := task.Start(ctx); err != nil {
return status, errors.Wrapf(err, "failed to start sandbox container task %q", id)
}
// Set the pod sandbox as ready after successfully start sandbox container.
status.Pid = task.Pid()
status.State = sandboxstore.StateReady
return status, nil
}); err != nil {
return nil, errors.Wrap(err, "failed to start sandbox container")
}
return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil
} }

View File

@ -19,7 +19,6 @@ package server
import ( import (
"time" "time"
"github.com/containerd/containerd"
eventtypes "github.com/containerd/containerd/api/events" eventtypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/errdefs"
cni "github.com/containerd/go-cni" cni "github.com/containerd/go-cni"
@ -29,6 +28,7 @@ import (
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
sandboxstore "github.com/containerd/cri/pkg/store/sandbox" sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
) )
@ -97,6 +97,7 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb
// `task.Delete` is not called here because it will be called when // `task.Delete` is not called here because it will be called when
// the event monitor handles the `TaskExit` event. // the event monitor handles the `TaskExit` event.
func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxstore.Sandbox) error { func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxstore.Sandbox) error {
id := sandbox.ID
container := sandbox.Container container := sandbox.Container
state := sandbox.Status.Get().State state := sandbox.Status.Get().State
task, err := container.Task(ctx, nil) task, err := container.Task(ctx, nil)
@ -105,29 +106,35 @@ func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxst
return errors.Wrap(err, "failed to get sandbox container") return errors.Wrap(err, "failed to get sandbox container")
} }
// Don't return for unknown state, some cleanup needs to be done. // Don't return for unknown state, some cleanup needs to be done.
if state != sandboxstore.StateUnknown { if state == sandboxstore.StateUnknown {
return nil return cleanupUnknownSandbox(ctx, id, sandbox)
} }
// Task is an interface, explicitly set it to nil just in case. return nil
task = nil
} }
// Handle unknown state. // Handle unknown state.
// The cleanup logic is the same with container unknown state. // The cleanup logic is the same with container unknown state.
if state == sandboxstore.StateUnknown { if state == sandboxstore.StateUnknown {
status, err := getTaskStatus(ctx, task) // Start an exit handler for containers in unknown state.
waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext())
defer waitCancel()
exitCh, err := task.Wait(waitCtx)
if err != nil { if err != nil {
return errors.Wrapf(err, "failed to get task status for %q", sandbox.ID) if !errdefs.IsNotFound(err) {
} return errors.Wrap(err, "failed to wait for task")
switch status.Status { }
case containerd.Running, containerd.Created: return cleanupUnknownSandbox(ctx, id, sandbox)
// The task is still running, continue stopping the task.
case containerd.Stopped:
// The task has exited, explicitly cleanup.
return cleanupUnknownSandbox(ctx, sandbox.ID, status, sandbox)
default:
return errors.Wrapf(err, "unsupported task status %q", status.Status)
} }
exitCtx, exitCancel := context.WithCancel(context.Background())
stopCh := c.eventMonitor.startExitMonitor(exitCtx, id, task.Pid(), exitCh)
defer func() {
exitCancel()
// This ensures that exit monitor is stopped before
// `Wait` is cancelled, so no exit event is generated
// because of the `Wait` cancellation.
<-stopCh
}()
} }
// Kill the sandbox container. // Kill the sandbox container.
@ -166,14 +173,13 @@ func (c *criService) teardownPod(id string, path string, config *runtime.PodSand
} }
// cleanupUnknownSandbox cleanup stopped sandbox in unknown state. // cleanupUnknownSandbox cleanup stopped sandbox in unknown state.
func cleanupUnknownSandbox(ctx context.Context, id string, status containerd.Status, func cleanupUnknownSandbox(ctx context.Context, id string, sandbox sandboxstore.Sandbox) error {
sandbox sandboxstore.Sandbox) error {
// Reuse handleSandboxExit to do the cleanup. // Reuse handleSandboxExit to do the cleanup.
return handleSandboxExit(ctx, &eventtypes.TaskExit{ return handleSandboxExit(ctx, &eventtypes.TaskExit{
ContainerID: id, ContainerID: id,
ID: id, ID: id,
Pid: 0, Pid: 0,
ExitStatus: status.ExitStatus, ExitStatus: unknownExitCode,
ExitedAt: status.ExitTime, ExitedAt: time.Now(),
}, sandbox) }, sandbox)
} }

View File

@ -112,7 +112,7 @@ func TestContainerStore(t *testing.T) {
ExitCode: 3, ExitCode: 3,
Reason: "TestReason-4a333", Reason: "TestReason-4a333",
Message: "TestMessage-4a333", Message: "TestMessage-4a333",
Removing: true, Starting: true,
}, },
"4abcd": { "4abcd": {
Pid: 4, Pid: 4,

View File

@ -88,6 +88,10 @@ type Status struct {
// Human-readable message indicating details about why container is in its // Human-readable message indicating details about why container is in its
// current state. // current state.
Message string Message string
// Starting indicates that the container is in starting state.
// This field doesn't need to be checkpointed.
// TODO(now): Add unit test.
Starting bool `json:"-"`
// Removing indicates that the container is in removing state. // Removing indicates that the container is in removing state.
// This field doesn't need to be checkpointed. // This field doesn't need to be checkpointed.
Removing bool `json:"-"` Removing bool `json:"-"`

View File

@ -75,6 +75,7 @@ func TestStatusEncodeDecode(t *testing.T) {
Reason: "test-reason", Reason: "test-reason",
Message: "test-message", Message: "test-message",
Removing: true, Removing: true,
Starting: true,
} }
assert := assertlib.New(t) assert := assertlib.New(t)
data, err := s.encode() data, err := s.encode()
@ -82,6 +83,7 @@ func TestStatusEncodeDecode(t *testing.T) {
newS := &Status{} newS := &Status{}
assert.NoError(newS.decode(data)) assert.NoError(newS.decode(data))
s.Removing = false // Removing should not be encoded. s.Removing = false // Removing should not be encoded.
s.Starting = false // Starting should not be encoded.
assert.Equal(s, newS) assert.Equal(s, newS)
unsupported, err := json.Marshal(&versionedStatus{ unsupported, err := json.Marshal(&versionedStatus{

View File

@ -86,22 +86,9 @@ func (s *Store) Add(sb Sandbox) error {
return nil return nil
} }
// Get returns the sandbox with specified id. Returns store.ErrNotExist // Get returns the sandbox with specified id.
// if the sandbox doesn't exist. // Returns store.ErrNotExist if the sandbox doesn't exist.
func (s *Store) Get(id string) (Sandbox, error) { func (s *Store) Get(id string) (Sandbox, error) {
sb, err := s.GetAll(id)
if err != nil {
return sb, err
}
if sb.Status.Get().State == StateInit {
return Sandbox{}, store.ErrNotExist
}
return sb, nil
}
// GetAll returns the sandbox with specified id, including sandbox in unknown
// state. Returns store.ErrNotExist if the sandbox doesn't exist.
func (s *Store) GetAll(id string) (Sandbox, error) {
s.lock.RLock() s.lock.RLock()
defer s.lock.RUnlock() defer s.lock.RUnlock()
id, err := s.idIndex.Get(id) id, err := s.idIndex.Get(id)
@ -123,9 +110,6 @@ func (s *Store) List() []Sandbox {
defer s.lock.RUnlock() defer s.lock.RUnlock()
var sandboxes []Sandbox var sandboxes []Sandbox
for _, sb := range s.sandboxes { for _, sb := range s.sandboxes {
if sb.Status.Get().State == StateInit {
continue
}
sandboxes = append(sandboxes, sb) sandboxes = append(sandboxes, sb)
} }
return sandboxes return sandboxes

View File

@ -106,7 +106,7 @@ func TestSandboxStore(t *testing.T) {
}, },
NetNSPath: "TestNetNS-3defg", NetNSPath: "TestNetNS-3defg",
}, },
Status{State: StateInit}, Status{State: StateUnknown},
) )
assert := assertlib.New(t) assert := assertlib.New(t)
s := NewStore() s := NewStore()
@ -125,21 +125,16 @@ func TestSandboxStore(t *testing.T) {
assert.Equal(sb, got) assert.Equal(sb, got)
} }
t.Logf("should not be able to get unknown sandbox") t.Logf("should be able to get sandbox in unknown state with Get")
got, err := s.Get(unknown.ID) got, err := s.Get(unknown.ID)
assert.Equal(store.ErrNotExist, err)
assert.Equal(Sandbox{}, got)
t.Logf("should be able to get unknown sandbox with GetAll")
got, err = s.GetAll(unknown.ID)
assert.NoError(err) assert.NoError(err)
assert.Equal(unknown, got) assert.Equal(unknown, got)
t.Logf("should be able to list sandboxes") t.Logf("should be able to list sandboxes")
sbNum := len(sandboxes) + 1
sbs := s.List() sbs := s.List()
assert.Len(sbs, len(sandboxes)) assert.Len(sbs, sbNum)
sbNum := len(sandboxes)
for testID, v := range sandboxes { for testID, v := range sandboxes {
truncID := genTruncIndex(testID) truncID := genTruncIndex(testID)

View File

@ -26,11 +26,11 @@ import (
// | | // | |
// | Create(Run) | Load // | Create(Run) | Load
// | | // | |
// Start +----v----+ | // Start | |
// (failed) | | | // (failed) | |
// +-------------+ INIT | +-----------+ // +------------------+ +-----------+
// | | | | | // | | | |
// | +----+----+ | | // | | | |
// | | | | // | | | |
// | | Start(Run) | | // | | Start(Run) | |
// | | | | // | | | |
@ -53,23 +53,17 @@ import (
// +-------------> DELETED // +-------------> DELETED
// State is the sandbox state we use in containerd/cri. // State is the sandbox state we use in containerd/cri.
// It includes init and unknown, which are internal states not defined in CRI. // It includes unknown, which is internal states not defined in CRI.
// The state mapping from internal states to CRI states: // The state mapping from internal states to CRI states:
// * ready -> ready // * ready -> ready
// * not ready -> not ready // * not ready -> not ready
// * init -> not exist
// * unknown -> not ready // * unknown -> not ready
type State uint32 type State uint32
const ( const (
// StateInit is init state of sandbox. Sandbox
// is in init state before its corresponding sandbox container
// is created. Sandbox in init state should be ignored by most
// functions, unless the caller needs to update sandbox state.
StateInit State = iota
// StateReady is ready state, it means sandbox container // StateReady is ready state, it means sandbox container
// is running. // is running.
StateReady StateReady = iota
// StateNotReady is notready state, it ONLY means sandbox // StateNotReady is notready state, it ONLY means sandbox
// container is not running. // container is not running.
// StopPodSandbox should still be called for NOTREADY sandbox to // StopPodSandbox should still be called for NOTREADY sandbox to

View File

@ -28,7 +28,7 @@ func TestStatus(t *testing.T) {
testStatus := Status{ testStatus := Status{
Pid: 123, Pid: 123,
CreatedAt: time.Now(), CreatedAt: time.Now(),
State: StateInit, State: StateUnknown,
} }
updateStatus := Status{ updateStatus := Status{
Pid: 456, Pid: 456,