Merge pull request #570 from miaoyq/fixes-569

Use channel to pass the stop info instead of polling for container stop
This commit is contained in:
Lantao Liu 2018-01-25 10:27:18 -08:00 committed by GitHub
commit 11042a4141
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 44 additions and 50 deletions

View File

@ -28,15 +28,10 @@ import (
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
"k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
"github.com/containerd/cri-containerd/pkg/store"
containerstore "github.com/containerd/cri-containerd/pkg/store/container" containerstore "github.com/containerd/cri-containerd/pkg/store/container"
) )
const ( const (
// stopCheckPollInterval is the the interval to check whether a container
// is stopped successfully.
stopCheckPollInterval = 100 * time.Millisecond
// killContainerTimeout is the timeout that we wait for the container to // killContainerTimeout is the timeout that we wait for the container to
// be SIGKILLed. // be SIGKILLed.
killContainerTimeout = 2 * time.Minute killContainerTimeout = 2 * time.Minute
@ -104,7 +99,7 @@ func (c *criContainerdService) stopContainer(ctx context.Context, container cont
} }
} }
err = c.waitContainerStop(ctx, id, timeout) err = c.waitContainerStop(ctx, container, timeout)
if err == nil { if err == nil {
return nil return nil
} }
@ -130,41 +125,22 @@ func (c *criContainerdService) stopContainer(ctx context.Context, container cont
} }
// Wait for a fixed timeout until container stop is observed by event monitor. // Wait for a fixed timeout until container stop is observed by event monitor.
if err := c.waitContainerStop(ctx, id, killContainerTimeout); err != nil { if err := c.waitContainerStop(ctx, container, killContainerTimeout); err != nil {
return fmt.Errorf("an error occurs during waiting for container %q to stop: %v", id, err) return fmt.Errorf("an error occurs during waiting for container %q to stop: %v", id, err)
} }
return nil return nil
} }
// waitContainerStop polls container state until timeout exceeds or container is stopped. // waitContainerStop waits for container to be stopped until timeout exceeds or centext is cancelled.
func (c *criContainerdService) waitContainerStop(ctx context.Context, id string, timeout time.Duration) error { func (c *criContainerdService) waitContainerStop(ctx context.Context, container containerstore.Container, timeout time.Duration) error {
ticker := time.NewTicker(stopCheckPollInterval)
defer ticker.Stop()
timeoutTimer := time.NewTimer(timeout) timeoutTimer := time.NewTimer(timeout)
defer timeoutTimer.Stop() defer timeoutTimer.Stop()
for { select {
// Poll once before waiting for stopCheckPollInterval. case <-ctx.Done():
container, err := c.containerStore.Get(id) return fmt.Errorf("wait container %q is cancelled", container.ID)
if err != nil { case <-timeoutTimer.C:
if err != store.ErrNotExist { return fmt.Errorf("wait container %q stop timeout", container.ID)
return fmt.Errorf("failed to get container %q: %v", id, err) case <-container.Stopped():
} return nil
// Do not return error here because container was removed means
// it is already stopped.
logrus.Warnf("Container %q was removed during stopping", id)
return nil
}
// TODO(random-liu): Use channel with event handler instead of polling.
if container.Status.Get().State() == runtime.ContainerState_CONTAINER_EXITED {
return nil
}
select {
case <-ctx.Done():
return fmt.Errorf("wait container %q is cancelled", id)
case <-timeoutTimer.C:
return fmt.Errorf("wait container %q stop timeout", id)
case <-ticker.C:
continue
}
} }
} }

View File

@ -39,7 +39,7 @@ func TestWaitContainerStop(t *testing.T) {
CreatedAt: time.Now().UnixNano(), CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(), StartedAt: time.Now().UnixNano(),
}, },
timeout: 2 * stopCheckPollInterval, timeout: 200 * time.Millisecond,
expectErr: true, expectErr: true,
}, },
"should return error if context is cancelled": { "should return error if context is cancelled": {
@ -51,11 +51,6 @@ func TestWaitContainerStop(t *testing.T) {
cancel: true, cancel: true,
expectErr: true, expectErr: true,
}, },
"should not return error if container is removed before timeout": {
status: nil,
timeout: time.Hour,
expectErr: false,
},
"should not return error if container is stopped before timeout": { "should not return error if container is stopped before timeout": {
status: &containerstore.Status{ status: &containerstore.Status{
CreatedAt: time.Now().UnixNano(), CreatedAt: time.Now().UnixNano(),
@ -67,13 +62,15 @@ func TestWaitContainerStop(t *testing.T) {
}, },
} { } {
c := newTestCRIContainerdService() c := newTestCRIContainerdService()
if test.status != nil { container, err := containerstore.NewContainer(
container, err := containerstore.NewContainer( containerstore.Metadata{ID: id},
containerstore.Metadata{ID: id}, containerstore.WithFakeStatus(*test.status),
containerstore.WithFakeStatus(*test.status), )
) assert.NoError(t, err)
assert.NoError(t, err) assert.NoError(t, c.containerStore.Add(container))
assert.NoError(t, c.containerStore.Add(container)) if test.status.FinishedAt != 0 {
// Fake the TaskExit event
container.Stop()
} }
ctx := context.Background() ctx := context.Background()
if test.cancel { if test.cancel {
@ -81,7 +78,7 @@ func TestWaitContainerStop(t *testing.T) {
cancel() cancel()
ctx = cancelledCtx ctx = cancelledCtx
} }
err := c.waitContainerStop(ctx, id, test.timeout) err = c.waitContainerStop(ctx, container, test.timeout)
assert.Equal(t, test.expectErr, err != nil, desc) assert.Equal(t, test.expectErr, err != nil, desc)
} }
} }

View File

@ -155,6 +155,8 @@ func (em *eventMonitor) handleEvent(evt *events.Envelope) {
// TODO(random-liu): [P0] Enqueue the event and retry. // TODO(random-liu): [P0] Enqueue the event and retry.
return return
} }
// Using channel to propagate the information of container stop
cntr.Stop()
case *eventtypes.TaskOOM: case *eventtypes.TaskOOM:
e := any.(*eventtypes.TaskOOM) e := any.(*eventtypes.TaskOOM)
logrus.Infof("TaskOOM event %+v", e) logrus.Infof("TaskOOM event %+v", e)

View File

@ -21,6 +21,7 @@ import (
"github.com/containerd/containerd" "github.com/containerd/containerd"
"github.com/docker/docker/pkg/truncindex" "github.com/docker/docker/pkg/truncindex"
"k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
cio "github.com/containerd/cri-containerd/pkg/server/io" cio "github.com/containerd/cri-containerd/pkg/server/io"
"github.com/containerd/cri-containerd/pkg/store" "github.com/containerd/cri-containerd/pkg/store"
@ -37,7 +38,8 @@ type Container struct {
Container containerd.Container Container containerd.Container
// Container IO // Container IO
IO *cio.ContainerIO IO *cio.ContainerIO
// TODO(random-liu): Add stop channel to get rid of stop poll waiting. // StopCh is used to propagate the stop information of the container.
StopCh
} }
// Opts sets specific information to newly created Container. // Opts sets specific information to newly created Container.
@ -67,6 +69,9 @@ func WithStatus(status Status, root string) Opts {
return err return err
} }
c.Status = s c.Status = s
if s.Get().State() == runtime.ContainerState_CONTAINER_EXITED {
c.Stop()
}
return nil return nil
} }
} }
@ -75,6 +80,7 @@ func WithStatus(status Status, root string) Opts {
func NewContainer(metadata Metadata, opts ...Opts) (Container, error) { func NewContainer(metadata Metadata, opts ...Opts) (Container, error) {
c := Container{ c := Container{
Metadata: metadata, Metadata: metadata,
StopCh: StopCh(make(chan struct{})),
} }
for _, o := range opts { for _, o := range opts {
if err := o(&c); err != nil { if err := o(&c); err != nil {
@ -89,6 +95,19 @@ func (c *Container) Delete() error {
return c.Status.Delete() return c.Status.Delete()
} }
// StopCh is used to propagate the stop information of a container.
type StopCh chan struct{}
// Stop close stopCh of the container.
func (ch StopCh) Stop() {
close(ch)
}
// Stopped return the stopCh of the container as a readonly channel.
func (ch StopCh) Stopped() <-chan struct{} {
return ch
}
// Store stores all Containers. // Store stores all Containers.
type Store struct { type Store struct {
lock sync.RWMutex lock sync.RWMutex