Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
Lantao Liu
2019-04-26 10:54:51 -07:00
parent 3a3f0aac88
commit 5d17ed2302
20 changed files with 435 additions and 301 deletions

View File

@@ -17,6 +17,7 @@ limitations under the License.
package cri
import (
"context"
"flag"
"path/filepath"
"time"
@@ -73,7 +74,7 @@ func initCRIService(ic *plugin.InitContext) (interface{}, error) {
}
log.G(ctx).Infof("Start cri plugin with config %+v", c)
if err := validateConfig(&c); err != nil {
if err := validateConfig(ctx, &c); err != nil {
return nil, errors.Wrap(err, "invalid config")
}
@@ -111,14 +112,36 @@ func initCRIService(ic *plugin.InitContext) (interface{}, error) {
}
// validateConfig validates the given configuration.
func validateConfig(c *criconfig.Config) error {
// It is an error to provide both an UntrustedWorkloadRuntime & define an 'untrusted' runtime.
if _, ok := c.ContainerdConfig.Runtimes[criconfig.RuntimeUntrusted]; ok {
if c.ContainerdConfig.UntrustedWorkloadRuntime.Type != "" {
return errors.New("conflicting definitions: configuration includes untrusted_workload_runtime and runtimes['untrusted']")
}
func validateConfig(ctx context.Context, c *criconfig.Config) error {
if c.ContainerdConfig.Runtimes == nil {
c.ContainerdConfig.Runtimes = make(map[string]criconfig.Runtime)
}
// Validation for deprecated untrusted_workload_runtime.
if c.ContainerdConfig.UntrustedWorkloadRuntime.Type != "" {
log.G(ctx).Warning("`untrusted_workload_runtime` is deprecated, please use `untrusted` runtime in `runtimes` instead")
if _, ok := c.ContainerdConfig.Runtimes[criconfig.RuntimeUntrusted]; ok {
return errors.Errorf("conflicting definitions: configuration includes both `untrusted_workload_runtime` and `runtimes[%q]`", criconfig.RuntimeUntrusted)
}
c.ContainerdConfig.Runtimes[criconfig.RuntimeUntrusted] = c.ContainerdConfig.UntrustedWorkloadRuntime
}
// Validation for deprecated default_runtime field.
if c.ContainerdConfig.DefaultRuntime.Type != "" {
log.G(ctx).Warning("`default_runtime` is deprecated, please use `default_runtime_name` to reference the default configuration you have defined in `runtimes`")
c.ContainerdConfig.DefaultRuntimeName = criconfig.RuntimeDefault
c.ContainerdConfig.Runtimes[criconfig.RuntimeDefault] = c.ContainerdConfig.DefaultRuntime
}
// Validation for default_runtime_name
if c.ContainerdConfig.DefaultRuntimeName == "" {
return errors.New("`default_runtime_name` is empty")
}
if _, ok := c.ContainerdConfig.Runtimes[c.ContainerdConfig.DefaultRuntimeName]; !ok {
return errors.New("no corresponding runtime configured in `runtimes` for `default_runtime_name`")
}
// Validation for stream_idle_timeout
if c.StreamIdleTimeout != "" {
if _, err := time.ParseDuration(c.StreamIdleTimeout); err != nil {
return errors.Wrap(err, "invalid stream idle timeout")

View File

@@ -47,14 +47,14 @@ type Runtime struct {
type ContainerdConfig struct {
// Snapshotter is the snapshotter used by containerd.
Snapshotter string `toml:"snapshotter" json:"snapshotter"`
// DefaultRuntimeName is the default runtime name to use from the runtimes table.
DefaultRuntimeName string `toml:"default_runtime_name" json:"defaultRuntimeName"`
// DefaultRuntime is the default runtime to use in containerd.
// This runtime is used when no runtime handler (or the empty string) is provided.
// DEPRECATED: use DefaultRuntimeName instead. Remove in containerd 1.4.
DefaultRuntime Runtime `toml:"default_runtime" json:"defaultRuntime"`
// UntrustedWorkloadRuntime is a runtime to run untrusted workloads on it.
// DEPRECATED: use Runtimes instead. If provided, this runtime is mapped to the runtime handler
// named 'untrusted'. It is a configuration error to provide both the (now deprecated)
// UntrustedWorkloadRuntime and a handler in the Runtimes handler map (below) for 'untrusted'
// workloads at the same time. Please provide one or the other.
// DEPRECATED: use `untrusted` runtime in Runtimes instead. Remove in containerd 1.4.
UntrustedWorkloadRuntime Runtime `toml:"untrusted_workload_runtime" json:"untrustedWorkloadRuntime"`
// Runtimes is a map from CRI RuntimeHandler strings, which specify types of runtime
// configurations, to the matching configurations.
@@ -195,13 +195,14 @@ func DefaultConfig() PluginConfig {
NetworkPluginConfTemplate: "",
},
ContainerdConfig: ContainerdConfig{
Snapshotter: containerd.DefaultSnapshotter,
DefaultRuntime: Runtime{
Type: "io.containerd.runtime.v1.linux",
Engine: "",
Root: "",
Snapshotter: containerd.DefaultSnapshotter,
DefaultRuntimeName: "runc",
NoPivot: false,
Runtimes: map[string]Runtime{
"runc": {
Type: "io.containerd.runc.v1",
},
},
NoPivot: false,
},
StreamServerAddress: "127.0.0.1",
StreamServerPort: "0",
@@ -229,4 +230,6 @@ func DefaultConfig() PluginConfig {
const (
// RuntimeUntrusted is the implicit runtime defined for ContainerdConfig.UntrustedWorkloadRuntime
RuntimeUntrusted = "untrusted"
// RuntimeDefault is the implicit runtime defined for ContainerdConfig.DefaultRuntime
RuntimeDefault = "default"
)

View File

@@ -105,6 +105,9 @@ func setContainerRemoving(container containerstore.Container) error {
if status.State() == runtime.ContainerState_CONTAINER_UNKNOWN {
return status, errors.New("container state is unknown, to stop first")
}
if status.Starting {
return status, errors.New("container is in starting state, can't be removed")
}
if status.Removing {
return status, errors.New("container is already in removing state")
}

View File

@@ -38,64 +38,48 @@ import (
// StartContainer starts the container.
func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
container, err := c.containerStore.Get(r.GetContainerId())
cntr, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
}
var startErr error
// update container status in one transaction to avoid race with event monitor.
if err := container.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
// Always apply status change no matter startContainer fails or not. Because startContainer
// may change container state no matter it fails or succeeds.
startErr = c.startContainer(ctx, container, &status)
return status, nil
}); startErr != nil {
return nil, startErr
} else if err != nil {
return nil, errors.Wrapf(err, "failed to update container %q metadata", container.ID)
}
return &runtime.StartContainerResponse{}, nil
}
// startContainer actually starts the container. The function needs to be run in one transaction. Any updates
// to the status passed in will be applied no matter the function returns error or not.
func (c *criService) startContainer(ctx context.Context,
cntr containerstore.Container,
status *containerstore.Status) (retErr error) {
id := cntr.ID
meta := cntr.Metadata
container := cntr.Container
config := meta.Config
// Return error if container is not in created state.
if status.State() != runtime.ContainerState_CONTAINER_CREATED {
return errors.Errorf("container %q is in %s state", id, criContainerStateToString(status.State()))
// Set starting state to prevent other start/remove operations against this container
// while it's being started.
if err := setContainerStarting(cntr); err != nil {
return nil, errors.Wrapf(err, "failed to set starting state for container %q", id)
}
// Do not start the container when there is a removal in progress.
if status.Removing {
return errors.Errorf("container %q is in removing state", id)
}
defer func() {
if retErr != nil {
// Set container to exited if fail to start.
status.Pid = 0
status.FinishedAt = time.Now().UnixNano()
status.ExitCode = errorStartExitCode
status.Reason = errorStartReason
status.Message = retErr.Error()
if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.Pid = 0
status.FinishedAt = time.Now().UnixNano()
status.ExitCode = errorStartExitCode
status.Reason = errorStartReason
status.Message = retErr.Error()
return status, nil
}); err != nil {
logrus.WithError(err).Errorf("failed to set start failure state for container %q", id)
}
}
if err := resetContainerStarting(cntr); err != nil {
logrus.WithError(err).Errorf("failed to reset starting state for container %q", id)
}
}()
// Get sandbox config from sandbox store.
sandbox, err := c.sandboxStore.Get(meta.SandboxID)
if err != nil {
return errors.Wrapf(err, "sandbox %q not found", meta.SandboxID)
return nil, errors.Wrapf(err, "sandbox %q not found", meta.SandboxID)
}
sandboxID := meta.SandboxID
if sandbox.Status.Get().State != sandboxstore.StateReady {
return errors.Errorf("sandbox container %q is not running", sandboxID)
return nil, errors.Errorf("sandbox container %q is not running", sandboxID)
}
ioCreation := func(id string) (_ containerdio.IO, err error) {
@@ -110,7 +94,7 @@ func (c *criService) startContainer(ctx context.Context,
ctrInfo, err := container.Info(ctx)
if err != nil {
return errors.Wrap(err, "failed to get container info")
return nil, errors.Wrap(err, "failed to get container info")
}
var taskOpts []containerd.NewTaskOpts
@@ -120,7 +104,7 @@ func (c *criService) startContainer(ctx context.Context,
}
task, err := container.NewTask(ctx, ioCreation, taskOpts...)
if err != nil {
return errors.Wrap(err, "failed to create containerd task")
return nil, errors.Wrap(err, "failed to create containerd task")
}
defer func() {
if retErr != nil {
@@ -133,15 +117,61 @@ func (c *criService) startContainer(ctx context.Context,
}
}()
// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
return nil, errors.Wrap(err, "failed to wait for containerd task")
}
// Start containerd task.
if err := task.Start(ctx); err != nil {
return errors.Wrapf(err, "failed to start containerd task %q", id)
return nil, errors.Wrapf(err, "failed to start containerd task %q", id)
}
// Update container start timestamp.
status.Pid = task.Pid()
status.StartedAt = time.Now().UnixNano()
return nil
if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.Pid = task.Pid()
status.StartedAt = time.Now().UnixNano()
return status, nil
}); err != nil {
return nil, errors.Wrapf(err, "failed to update container %q state", id)
}
// start the monitor after updating container state, this ensures that
// event monitor receives the TaskExit event and update container state
// after this.
c.eventMonitor.startExitMonitor(context.Background(), id, task.Pid(), exitCh)
return &runtime.StartContainerResponse{}, nil
}
// setContainerStarting sets the container into starting state. In starting state, the
// container will not be removed or started again.
func setContainerStarting(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
// Return error if container is not in created state.
if status.State() != runtime.ContainerState_CONTAINER_CREATED {
return status, errors.Errorf("container is in %s state", criContainerStateToString(status.State()))
}
// Do not start the container when there is a removal in progress.
if status.Removing {
return status, errors.New("container is in removing state, can't be started")
}
if status.Starting {
return status, errors.New("container is already in starting state")
}
status.Starting = true
return status, nil
})
}
// resetContainerStarting resets the container starting state on start failure. So
// that we could remove the container later.
func resetContainerStarting(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
status.Starting = false
return status, nil
})
}
// createContainerLoggers creates container loggers and return write closer for stdout and stderr.

View File

@@ -28,6 +28,7 @@ import (
"golang.org/x/sys/unix"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
"github.com/containerd/cri/pkg/store"
containerstore "github.com/containerd/cri/pkg/store/container"
)
@@ -74,36 +75,34 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore
return errors.Wrapf(err, "failed to get task for container %q", id)
}
// Don't return for unknown state, some cleanup needs to be done.
if state != runtime.ContainerState_CONTAINER_UNKNOWN {
return nil
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
return cleanupUnknownContainer(ctx, id, container)
}
// Task is an interface, explicitly set it to nil just in case.
task = nil
return nil
}
// Handle unknown state.
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
status, err := getTaskStatus(ctx, task)
// Start an exit handler for containers in unknown state.
waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext())
defer waitCancel()
exitCh, err := task.Wait(waitCtx)
if err != nil {
return errors.Wrapf(err, "failed to get task status for %q", id)
}
switch status.Status {
case containerd.Running, containerd.Created:
// The task is still running, continue stopping the task.
case containerd.Stopped:
// The task has exited. If the task exited after containerd
// started, the event monitor will receive its exit event; if it
// exited before containerd started, the event monitor will never
// receive its exit event.
// However, we can't tell that because the task state was not
// successfully loaded during containerd start (container is
// in UNKNOWN state).
// So always do cleanup here, just in case that we've missed the
// exit event.
return cleanupUnknownContainer(ctx, id, status, container)
default:
return errors.Wrapf(err, "unsupported task status %q", status.Status)
if !errdefs.IsNotFound(err) {
return errors.Wrapf(err, "failed to wait for task for %q", id)
}
return cleanupUnknownContainer(ctx, id, container)
}
exitCtx, exitCancel := context.WithCancel(context.Background())
stopCh := c.eventMonitor.startExitMonitor(exitCtx, id, task.Pid(), exitCh)
defer func() {
exitCancel()
// This ensures that exit monitor is stopped before
// `Wait` is cancelled, so no exit event is generated
// because of the `Wait` cancellation.
<-stopCh
}()
}
// We only need to kill the task. The event handler will Delete the
@@ -176,19 +175,13 @@ func (c *criService) waitContainerStop(ctx context.Context, container containers
}
// cleanupUnknownContainer cleanup stopped container in unknown state.
func cleanupUnknownContainer(ctx context.Context, id string, status containerd.Status,
cntr containerstore.Container) error {
func cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container) error {
// Reuse handleContainerExit to do the cleanup.
// NOTE(random-liu): If the task did exit after containerd started, both
// the event monitor and the cleanup function would update the container
// state. The final container state will be whatever being updated first.
// There is no way to completely avoid this race condition, and for best
// effort unknown state container cleanup, this seems acceptable.
return handleContainerExit(ctx, &eventtypes.TaskExit{
ContainerID: id,
ID: id,
Pid: 0,
ExitStatus: status.ExitStatus,
ExitedAt: status.ExitTime,
ExitStatus: unknownExitCode,
ExitedAt: time.Now(),
}, cntr)
}

View File

@@ -50,13 +50,17 @@ const (
// Add a timeout for each event handling, events that timeout will be requeued and
// handled again in the future.
handleEventTimeout = 10 * time.Second
exitChannelSize = 1024
)
// eventMonitor monitors containerd event and updates internal state correspondingly.
// TODO(random-liu): Handle event for each container in a separate goroutine.
type eventMonitor struct {
c *criService
ch <-chan *events.Envelope
c *criService
ch <-chan *events.Envelope
// exitCh receives container/sandbox exit events from exit monitors.
exitCh chan *eventtypes.TaskExit
errCh <-chan error
ctx context.Context
cancel context.CancelFunc
@@ -89,6 +93,7 @@ func newEventMonitor(c *criService) *eventMonitor {
c: c,
ctx: ctx,
cancel: cancel,
exitCh: make(chan *eventtypes.TaskExit, exitChannelSize),
backOff: newBackOff(),
}
}
@@ -98,13 +103,38 @@ func (em *eventMonitor) subscribe(subscriber events.Subscriber) {
// note: filters are any match, if you want any match but not in namespace foo
// then you have to manually filter namespace foo
filters := []string{
`topic=="/tasks/exit"`,
`topic=="/tasks/oom"`,
`topic~="/images/"`,
}
em.ch, em.errCh = subscriber.Subscribe(em.ctx, filters...)
}
// startExitMonitor starts an exit monitor for a given container/sandbox.
func (em *eventMonitor) startExitMonitor(ctx context.Context, id string, pid uint32, exitCh <-chan containerd.ExitStatus) <-chan struct{} {
stopCh := make(chan struct{})
go func() {
defer close(stopCh)
select {
case exitRes := <-exitCh:
exitStatus, exitedAt, err := exitRes.Result()
if err != nil {
logrus.WithError(err).Errorf("Failed to get task exit status for %q", id)
exitStatus = unknownExitCode
exitedAt = time.Now()
}
em.exitCh <- &eventtypes.TaskExit{
ContainerID: id,
ID: id,
Pid: pid,
ExitStatus: exitStatus,
ExitedAt: exitedAt,
}
case <-ctx.Done():
}
}()
return stopCh
}
func convertEvent(e *gogotypes.Any) (string, interface{}, error) {
id := ""
evt, err := typeurl.UnmarshalAny(e)
@@ -113,8 +143,6 @@ func convertEvent(e *gogotypes.Any) (string, interface{}, error) {
}
switch e := evt.(type) {
case *eventtypes.TaskExit:
id = e.ContainerID
case *eventtypes.TaskOOM:
id = e.ContainerID
case *eventtypes.ImageCreate:
@@ -142,6 +170,18 @@ func (em *eventMonitor) start() <-chan error {
defer close(errCh)
for {
select {
case e := <-em.exitCh:
logrus.Debugf("Received exit event %+v", e)
id := e.ID
if em.backOff.isInBackOff(id) {
logrus.Infof("Events for %q is in backoff, enqueue event %+v", id, e)
em.backOff.enBackOff(id, e)
break
}
if err := em.handleEvent(e); err != nil {
logrus.WithError(err).Errorf("Failed to handle exit event %+v for %s", e, id)
em.backOff.enBackOff(id, e)
}
case e := <-em.ch:
logrus.Debugf("Received containerd event timestamp - %v, namespace - %q, topic - %q", e.Timestamp, e.Namespace, e.Topic)
if e.Namespace != constants.K8sContainerdNamespace {
@@ -213,8 +253,7 @@ func (em *eventMonitor) handleEvent(any interface{}) error {
} else if err != store.ErrNotExist {
return errors.Wrap(err, "can't find container for TaskExit event")
}
// Use GetAll to include sandbox in init state.
sb, err := em.c.sandboxStore.GetAll(e.ID)
sb, err := em.c.sandboxStore.Get(e.ID)
if err == nil {
if err := handleSandboxExit(ctx, e, sb); err != nil {
return errors.Wrap(err, "failed to handle sandbox TaskExit event")
@@ -322,15 +361,7 @@ func handleSandboxExit(ctx context.Context, e *eventtypes.TaskExit, sb sandboxst
}
}
err = sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
// NOTE(random-liu): We SHOULD NOT change INIT state here.
// If sandbox state is INIT when event monitor receives an TaskExit event,
// it means that sandbox start has failed. In that case, `RunPodSandbox` will
// cleanup everything immediately.
// Once sandbox state goes out of INIT, it becomes visable to the user, which
// is not what we want.
if status.State != sandboxstore.StateInit {
status.State = sandboxstore.StateNotReady
}
status.State = sandboxstore.StateNotReady
status.Pid = 0
return status, nil
})

View File

@@ -23,12 +23,9 @@ import (
"regexp"
"strconv"
"strings"
"time"
"github.com/BurntSushi/toml"
"github.com/containerd/containerd"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/runtime/linux/runctypes"
runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
"github.com/containerd/typeurl"
@@ -477,31 +474,6 @@ func unknownSandboxStatus() sandboxstore.Status {
}
}
// unknownExitStatus generates containerd.Status for container exited with unknown exit code.
func unknownExitStatus() containerd.Status {
return containerd.Status{
Status: containerd.Stopped,
ExitStatus: unknownExitCode,
ExitTime: time.Now(),
}
}
// getTaskStatus returns status for a given task. It returns unknown exit status if
// the task is nil or not found.
func getTaskStatus(ctx context.Context, task containerd.Task) (containerd.Status, error) {
if task == nil {
return unknownExitStatus(), nil
}
status, err := task.Status(ctx)
if err != nil {
if !errdefs.IsNotFound(err) {
return containerd.Status{}, err
}
return unknownExitStatus(), nil
}
return status, nil
}
// getPassthroughAnnotations filters requested pod annotations by comparing
// against permitted annotations for the given runtime.
func getPassthroughAnnotations(podAnnotations map[string]string,

View File

@@ -34,6 +34,7 @@ import (
"golang.org/x/net/context"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
"github.com/containerd/cri/pkg/netns"
cio "github.com/containerd/cri/pkg/server/io"
containerstore "github.com/containerd/cri/pkg/store/container"
@@ -57,7 +58,7 @@ func (c *criService) recover(ctx context.Context) error {
return errors.Wrap(err, "failed to list sandbox containers")
}
for _, sandbox := range sandboxes {
sb, err := loadSandbox(ctx, sandbox)
sb, err := c.loadSandbox(ctx, sandbox)
if err != nil {
logrus.WithError(err).Errorf("Failed to load sandbox %q", sandbox.ID())
continue
@@ -275,6 +276,22 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe
status.StartedAt = time.Now().UnixNano()
status.Pid = t.Pid()
}
// Wait for the task for exit monitor.
// wait is a long running background request, no timeout needed.
exitCh, err := t.Wait(ctrdutil.NamespacedContext())
if err != nil {
if !errdefs.IsNotFound(err) {
return errors.Wrap(err, "failed to wait for task")
}
// Container was in running state, but its task has been deleted,
// set unknown exited state.
status.FinishedAt = time.Now().UnixNano()
status.ExitCode = unknownExitCode
status.Reason = unknownExitReason
} else {
// Start exit monitor.
c.eventMonitor.startExitMonitor(context.Background(), id, status.Pid, exitCh)
}
case containerd.Stopped:
// Task is stopped. Updata status and delete the task.
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
@@ -304,7 +321,7 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe
}
// loadSandbox loads sandbox from containerd.
func loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) {
func (c *criService) loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) {
ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout)
defer cancel()
var sandbox sandboxstore.Sandbox
@@ -358,9 +375,20 @@ func loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.S
status.State = sandboxstore.StateNotReady
} else {
if taskStatus.Status == containerd.Running {
// Task is running, set sandbox state as READY.
status.State = sandboxstore.StateReady
status.Pid = t.Pid()
// Wait for the task for sandbox monitor.
// wait is a long running background request, no timeout needed.
exitCh, err := t.Wait(ctrdutil.NamespacedContext())
if err != nil {
if !errdefs.IsNotFound(err) {
return status, errors.Wrap(err, "failed to wait for task")
}
status.State = sandboxstore.StateNotReady
} else {
// Task is running, set sandbox state as READY.
status.State = sandboxstore.StateReady
status.Pid = t.Pid()
c.eventMonitor.startExitMonitor(context.Background(), meta.ID, status.Pid, exitCh)
}
} else {
// Task is not running. Delete the task and set sandbox state as NOTREADY.
if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {

View File

@@ -87,7 +87,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
RuntimeHandler: r.GetRuntimeHandler(),
},
sandboxstore.Status{
State: sandboxstore.StateInit,
State: sandboxstore.StateUnknown,
},
)
@@ -253,88 +253,65 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
if err != nil {
return nil, errors.Wrap(err, "failed to get sandbox container info")
}
// Create sandbox task in containerd.
log.Tracef("Create sandbox container (id=%q, name=%q).",
id, name)
var taskOpts []containerd.NewTaskOpts
// TODO(random-liu): Remove this after shim v1 is deprecated.
if c.config.NoPivot && ociRuntime.Type == linuxRuntime {
taskOpts = append(taskOpts, containerd.WithNoPivotRoot)
}
// We don't need stdio for sandbox container.
task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...)
if err != nil {
return nil, errors.Wrap(err, "failed to create containerd task")
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Cleanup the sandbox container if an error is returned.
if _, err := task.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
logrus.WithError(err).Errorf("Failed to delete sandbox container %q", id)
}
}
}()
// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
return nil, errors.Wrap(err, "failed to wait for sandbox container task")
}
if err := task.Start(ctx); err != nil {
return nil, errors.Wrapf(err, "failed to start sandbox container task %q", id)
}
if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
// Set the pod sandbox as ready after successfully start sandbox container.
status.Pid = task.Pid()
status.State = sandboxstore.StateReady
status.CreatedAt = info.CreatedAt
return status, nil
}); err != nil {
return nil, errors.Wrap(err, "failed to update sandbox created timestamp")
return nil, errors.Wrap(err, "failed to update sandbox status")
}
// Add sandbox into sandbox store in INIT state.
sandbox.Container = container
if err := c.sandboxStore.Add(sandbox); err != nil {
return nil, errors.Wrapf(err, "failed to add sandbox %+v into store", sandbox)
}
defer func() {
// Delete sandbox from sandbox store if there is an error.
if retErr != nil {
c.sandboxStore.Delete(id)
}
}()
// NOTE(random-liu): Sandbox state only stay in INIT state after this point
// and before the end of this function.
// * If `Update` succeeds, sandbox state will become READY in one transaction.
// * If `Update` fails, sandbox will be removed from the store in the defer above.
// * If containerd stops at any point before `Update` finishes, because sandbox
// state is not checkpointed, it will be recovered from corresponding containerd task
// status during restart:
// * If the task is running, sandbox state will be READY,
// * Or else, sandbox state will be NOTREADY.
// start the monitor after adding sandbox into the store, this ensures
// that sandbox is in the store, when event monitor receives the TaskExit event.
//
// In any case, sandbox will leave INIT state, so it's safe to ignore sandbox
// in INIT state in other functions.
// Start sandbox container in one transaction to avoid race condition with
// event monitor.
if err := sandbox.Status.Update(func(status sandboxstore.Status) (_ sandboxstore.Status, retErr error) {
// NOTE(random-liu): We should not change the sandbox state to NOTREADY
// if `Update` fails.
//
// If `Update` fails, the sandbox will be cleaned up by all the defers
// above. We should not let user see this sandbox, or else they will
// see the sandbox disappear after the defer clean up, which may confuse
// them.
//
// Given so, we should keep the sandbox in INIT state if `Update` fails,
// and ignore sandbox in INIT state in all the inspection functions.
// Create sandbox task in containerd.
log.Tracef("Create sandbox container (id=%q, name=%q).",
id, name)
var taskOpts []containerd.NewTaskOpts
// TODO(random-liu): Remove this after shim v1 is deprecated.
if c.config.NoPivot && ociRuntime.Type == linuxRuntime {
taskOpts = append(taskOpts, containerd.WithNoPivotRoot)
}
// We don't need stdio for sandbox container.
task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...)
if err != nil {
return status, errors.Wrap(err, "failed to create containerd task")
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Cleanup the sandbox container if an error is returned.
// It's possible that task is deleted by event monitor.
if _, err := task.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
logrus.WithError(err).Errorf("Failed to delete sandbox container %q", id)
}
}
}()
if err := task.Start(ctx); err != nil {
return status, errors.Wrapf(err, "failed to start sandbox container task %q", id)
}
// Set the pod sandbox as ready after successfully start sandbox container.
status.Pid = task.Pid()
status.State = sandboxstore.StateReady
return status, nil
}); err != nil {
return nil, errors.Wrap(err, "failed to start sandbox container")
}
// TaskOOM from containerd may come before sandbox is added to store,
// but we don't care about sandbox TaskOOM right now, so it is fine.
c.eventMonitor.startExitMonitor(context.Background(), id, task.Pid(), exitCh)
return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil
}
@@ -651,16 +628,11 @@ func (c *criService) getSandboxRuntime(config *runtime.PodSandboxConfig, runtime
return criconfig.Runtime{}, errors.New("untrusted workload with host access is not allowed")
}
// Handle the deprecated UntrustedWorkloadRuntime.
if c.config.ContainerdConfig.UntrustedWorkloadRuntime.Type != "" {
return c.config.ContainerdConfig.UntrustedWorkloadRuntime, nil
}
runtimeHandler = criconfig.RuntimeUntrusted
}
if runtimeHandler == "" {
return c.config.ContainerdConfig.DefaultRuntime, nil
runtimeHandler = c.config.ContainerdConfig.DefaultRuntimeName
}
handler, ok := c.config.ContainerdConfig.Runtimes[runtimeHandler]

View File

@@ -19,7 +19,6 @@ package server
import (
"time"
"github.com/containerd/containerd"
eventtypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/errdefs"
cni "github.com/containerd/go-cni"
@@ -29,6 +28,7 @@ import (
"golang.org/x/sys/unix"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
)
@@ -97,6 +97,7 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb
// `task.Delete` is not called here because it will be called when
// the event monitor handles the `TaskExit` event.
func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxstore.Sandbox) error {
id := sandbox.ID
container := sandbox.Container
state := sandbox.Status.Get().State
task, err := container.Task(ctx, nil)
@@ -105,29 +106,35 @@ func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxst
return errors.Wrap(err, "failed to get sandbox container")
}
// Don't return for unknown state, some cleanup needs to be done.
if state != sandboxstore.StateUnknown {
return nil
if state == sandboxstore.StateUnknown {
return cleanupUnknownSandbox(ctx, id, sandbox)
}
// Task is an interface, explicitly set it to nil just in case.
task = nil
return nil
}
// Handle unknown state.
// The cleanup logic is the same with container unknown state.
if state == sandboxstore.StateUnknown {
status, err := getTaskStatus(ctx, task)
// Start an exit handler for containers in unknown state.
waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext())
defer waitCancel()
exitCh, err := task.Wait(waitCtx)
if err != nil {
return errors.Wrapf(err, "failed to get task status for %q", sandbox.ID)
}
switch status.Status {
case containerd.Running, containerd.Created:
// The task is still running, continue stopping the task.
case containerd.Stopped:
// The task has exited, explicitly cleanup.
return cleanupUnknownSandbox(ctx, sandbox.ID, status, sandbox)
default:
return errors.Wrapf(err, "unsupported task status %q", status.Status)
if !errdefs.IsNotFound(err) {
return errors.Wrap(err, "failed to wait for task")
}
return cleanupUnknownSandbox(ctx, id, sandbox)
}
exitCtx, exitCancel := context.WithCancel(context.Background())
stopCh := c.eventMonitor.startExitMonitor(exitCtx, id, task.Pid(), exitCh)
defer func() {
exitCancel()
// This ensures that exit monitor is stopped before
// `Wait` is cancelled, so no exit event is generated
// because of the `Wait` cancellation.
<-stopCh
}()
}
// Kill the sandbox container.
@@ -166,14 +173,13 @@ func (c *criService) teardownPod(id string, path string, config *runtime.PodSand
}
// cleanupUnknownSandbox cleanup stopped sandbox in unknown state.
func cleanupUnknownSandbox(ctx context.Context, id string, status containerd.Status,
sandbox sandboxstore.Sandbox) error {
func cleanupUnknownSandbox(ctx context.Context, id string, sandbox sandboxstore.Sandbox) error {
// Reuse handleSandboxExit to do the cleanup.
return handleSandboxExit(ctx, &eventtypes.TaskExit{
ContainerID: id,
ID: id,
Pid: 0,
ExitStatus: status.ExitStatus,
ExitedAt: status.ExitTime,
ExitStatus: unknownExitCode,
ExitedAt: time.Now(),
}, sandbox)
}

View File

@@ -88,6 +88,10 @@ type Status struct {
// Human-readable message indicating details about why container is in its
// current state.
Message string
// Starting indicates that the container is in starting state.
// This field doesn't need to be checkpointed.
// TODO(now): Add unit test.
Starting bool `json:"-"`
// Removing indicates that the container is in removing state.
// This field doesn't need to be checkpointed.
Removing bool `json:"-"`

View File

@@ -86,22 +86,9 @@ func (s *Store) Add(sb Sandbox) error {
return nil
}
// Get returns the sandbox with specified id. Returns store.ErrNotExist
// if the sandbox doesn't exist.
// Get returns the sandbox with specified id.
// Returns store.ErrNotExist if the sandbox doesn't exist.
func (s *Store) Get(id string) (Sandbox, error) {
sb, err := s.GetAll(id)
if err != nil {
return sb, err
}
if sb.Status.Get().State == StateInit {
return Sandbox{}, store.ErrNotExist
}
return sb, nil
}
// GetAll returns the sandbox with specified id, including sandbox in unknown
// state. Returns store.ErrNotExist if the sandbox doesn't exist.
func (s *Store) GetAll(id string) (Sandbox, error) {
s.lock.RLock()
defer s.lock.RUnlock()
id, err := s.idIndex.Get(id)
@@ -123,9 +110,6 @@ func (s *Store) List() []Sandbox {
defer s.lock.RUnlock()
var sandboxes []Sandbox
for _, sb := range s.sandboxes {
if sb.Status.Get().State == StateInit {
continue
}
sandboxes = append(sandboxes, sb)
}
return sandboxes

View File

@@ -26,11 +26,11 @@ import (
// | |
// | Create(Run) | Load
// | |
// Start +----v----+ |
// (failed) | | |
// +-------------+ INIT | +-----------+
// | | | | |
// | +----+----+ | |
// Start | |
// (failed) | |
// +------------------+ +-----------+
// | | | |
// | | | |
// | | | |
// | | Start(Run) | |
// | | | |
@@ -53,23 +53,17 @@ import (
// +-------------> DELETED
// State is the sandbox state we use in containerd/cri.
// It includes init and unknown, which are internal states not defined in CRI.
// It includes unknown, which is internal states not defined in CRI.
// The state mapping from internal states to CRI states:
// * ready -> ready
// * not ready -> not ready
// * init -> not exist
// * unknown -> not ready
type State uint32
const (
// StateInit is init state of sandbox. Sandbox
// is in init state before its corresponding sandbox container
// is created. Sandbox in init state should be ignored by most
// functions, unless the caller needs to update sandbox state.
StateInit State = iota
// StateReady is ready state, it means sandbox container
// is running.
StateReady
StateReady = iota
// StateNotReady is notready state, it ONLY means sandbox
// container is not running.
// StopPodSandbox should still be called for NOTREADY sandbox to

View File

@@ -1,11 +1,11 @@
github.com/beorn7/perks 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9
github.com/BurntSushi/toml a368813c5e648fee92e5f6c30e3944ff9d5e8895
github.com/containerd/cgroups 1152b960fcee041f50df15cdc67c29dbccf801ef
github.com/BurntSushi/toml v0.3.1
github.com/containerd/cgroups 4994991857f9b0ae8dc439551e8bebdbb4bf66c1
github.com/containerd/console c12b1e7919c14469339a5d38f2f8ed9b64a9de23
github.com/containerd/containerd f2a20ead833f8caf3ffc12be058d6ce668b4ebed
github.com/containerd/containerd 32e788a8be3ab4418265693d9e742c30495fdd4c
github.com/containerd/continuity bd77b46c8352f74eb12c85bdc01f4b90f69d66b4
github.com/containerd/fifo 3d5202aec260678c48179c56f40e6f38a095738c
github.com/containerd/go-cni 40bcf8ec8acd7372be1d77031d585d5d8e561c90
github.com/containerd/go-cni 891c2a41e18144b2d7921f971d6c9789a68046b2
github.com/containerd/go-runc 5a6d9f37cfa36b15efba46dc7ea349fa9b7143c3
github.com/containerd/ttrpc f02858b1457c5ca3aaec3a0803eb0d59f96e41d6
github.com/containerd/typeurl a93fcdb778cd272c6e9b3028b2f42d813e785d40
@@ -21,35 +21,35 @@ github.com/docker/go-units v0.3.1
github.com/docker/spdystream 449fdfce4d962303d702fec724ef0ad181c92528
github.com/emicklei/go-restful v2.2.1
github.com/godbus/dbus v3
github.com/gogo/googleapis 08a7655d27152912db7aaf4f983275eaf8d128ef
github.com/gogo/protobuf v1.0.0
github.com/golang/protobuf v1.1.0
github.com/gogo/googleapis v1.2.0
github.com/gogo/protobuf v1.2.1
github.com/golang/protobuf v1.2.0
github.com/google/gofuzz 44d81051d367757e1c7c6a5a86423ece9afcf63c
github.com/grpc-ecosystem/go-grpc-prometheus v1.1
github.com/json-iterator/go 1.1.5
github.com/matttproud/golang_protobuf_extensions v1.0.0
github.com/matttproud/golang_protobuf_extensions v1.0.1
github.com/Microsoft/go-winio c599b533b43b1363d7d7c6cfda5ede70ed73ff13
github.com/Microsoft/hcsshim 8abdbb8205e4192c68b5f84c31197156f31be517
github.com/modern-go/concurrent 1.0.3
github.com/modern-go/reflect2 1.0.1
github.com/opencontainers/go-digest c9281466c8b2f606084ac71339773efd177436e7
github.com/opencontainers/image-spec v1.0.1
github.com/opencontainers/runc 12f6a991201fdb8f82579582d5e00e28fba06d0a
github.com/opencontainers/runtime-spec eba862dc2470385a233c7507392675cbeadf7353
github.com/opencontainers/runc 029124da7af7360afa781a0234d1b083550f797c
github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4
github.com/opencontainers/selinux v1.2.1
github.com/pkg/errors v0.8.0
github.com/pkg/errors v0.8.1
github.com/pmezard/go-difflib v1.0.0
github.com/prometheus/client_golang f4fb1b73fb099f396a7f0036bf86aa8def4ed823
github.com/prometheus/client_model 99fa1f4be8e564e8a6b613da7fa6f46c9edafc6c
github.com/prometheus/common 89604d197083d4781071d3c65855d24ecfb0a563
github.com/prometheus/procfs cb4147076ac75738c9a7d279075a253c0cc5acbd
github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0
github.com/sirupsen/logrus v1.0.3
github.com/sirupsen/logrus v1.4.1
github.com/stretchr/testify v1.1.4
github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16
github.com/tchap/go-patricia v2.2.6
github.com/urfave/cli 7bc6a0acffa589f415f88aca16cc1de5ffd66f9c
go.etcd.io/bbolt v1.3.1-etcd.8
go.etcd.io/bbolt v1.3.2
golang.org/x/crypto 49796115aa4b964c318aad4f3084fdb41e9aa067
golang.org/x/net b3756b4b77d7b13260a0a2ec658753cf48922eac
golang.org/x/oauth2 a6bd8cefa1811bd24b86f8902872e4e8225f74c4