Fix Delete race.
Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
parent
7050011faa
commit
bd09d31777
@ -71,10 +71,10 @@ func (c *criContainerdService) StopContainer(ctx context.Context, r *runtime.Sto
|
|||||||
glog.V(2).Infof("Stop container %q with signal %v", id, stopSignal)
|
glog.V(2).Infof("Stop container %q with signal %v", id, stopSignal)
|
||||||
_, err = c.containerService.Kill(ctx, &execution.KillRequest{ID: id, Signal: uint32(stopSignal)})
|
_, err = c.containerService.Kill(ctx, &execution.KillRequest{ID: id, Signal: uint32(stopSignal)})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if isContainerdContainerNotExistError(err) {
|
if !isContainerdContainerNotExistError(err) && !isRuncProcessAlreadyFinishedError(err) {
|
||||||
return &runtime.StopContainerResponse{}, nil
|
return nil, fmt.Errorf("failed to stop container %q: %v", id, err)
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("failed to stop container %q: %v", id, err)
|
// Move on to make sure container status is updated.
|
||||||
}
|
}
|
||||||
|
|
||||||
err = c.waitContainerStop(ctx, id, time.Duration(r.GetTimeout())*time.Second)
|
err = c.waitContainerStop(ctx, id, time.Duration(r.GetTimeout())*time.Second)
|
||||||
@ -84,20 +84,19 @@ func (c *criContainerdService) StopContainer(ctx context.Context, r *runtime.Sto
|
|||||||
glog.Errorf("Stop container %q timed out: %v", id, err)
|
glog.Errorf("Stop container %q timed out: %v", id, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
glog.V(2).Infof("Delete container from containerd %q", id)
|
// Event handler will Delete the container from containerd after it handles the Exited event.
|
||||||
// Delete sends SIGKILL to the container in the containerd version we are using.
|
glog.V(2).Infof("Kill container %q", id)
|
||||||
// TODO(random-liu): Replace with `Kill` to avoid race soon.
|
_, err = c.containerService.Kill(ctx, &execution.KillRequest{ID: id, Signal: uint32(unix.SIGKILL)})
|
||||||
_, err = c.containerService.Delete(ctx, &execution.DeleteRequest{ID: id})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if isContainerdContainerNotExistError(err) {
|
if !isContainerdContainerNotExistError(err) && !isRuncProcessAlreadyFinishedError(err) {
|
||||||
return &runtime.StopContainerResponse{}, nil
|
return nil, fmt.Errorf("failed to kill container %q: %v", id, err)
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("failed to delete container %q: %v", id, err)
|
// Move on to make sure container status is updated.
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait forever until container stop is observed by event monitor.
|
// Wait for a fixed timeout until container stop is observed by event monitor.
|
||||||
if err := c.waitContainerStop(ctx, id, killContainerTimeout); err != nil {
|
if err := c.waitContainerStop(ctx, id, killContainerTimeout); err != nil {
|
||||||
return nil, fmt.Errorf("error occurs during waiting for container %q to stop: %v",
|
return nil, fmt.Errorf("an error occurs during waiting for container %q to stop: %v",
|
||||||
id, err)
|
id, err)
|
||||||
}
|
}
|
||||||
return &runtime.StopContainerResponse{}, nil
|
return &runtime.StopContainerResponse{}, nil
|
||||||
|
@ -25,6 +25,7 @@ import (
|
|||||||
"github.com/containerd/containerd/api/types/container"
|
"github.com/containerd/containerd/api/types/container"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"golang.org/x/net/context"
|
"golang.org/x/net/context"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1"
|
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1"
|
||||||
|
|
||||||
"github.com/kubernetes-incubator/cri-containerd/pkg/metadata"
|
"github.com/kubernetes-incubator/cri-containerd/pkg/metadata"
|
||||||
@ -106,17 +107,15 @@ func TestStopContainer(t *testing.T) {
|
|||||||
for desc, test := range map[string]struct {
|
for desc, test := range map[string]struct {
|
||||||
metadata *metadata.ContainerMetadata
|
metadata *metadata.ContainerMetadata
|
||||||
containerdContainer *container.Container
|
containerdContainer *container.Container
|
||||||
killErr error
|
stopErr error
|
||||||
deleteErr error
|
|
||||||
discardEvents int
|
|
||||||
noTimeout bool
|
noTimeout bool
|
||||||
expectErr bool
|
expectErr bool
|
||||||
expectCalls []string
|
expectCalls []servertesting.CalledDetail
|
||||||
}{
|
}{
|
||||||
"should return error when container does not exist": {
|
"should return error when container does not exist": {
|
||||||
metadata: nil,
|
metadata: nil,
|
||||||
expectErr: true,
|
expectErr: true,
|
||||||
expectCalls: []string{},
|
expectCalls: []servertesting.CalledDetail{},
|
||||||
},
|
},
|
||||||
"should not return error when container is not running": {
|
"should not return error when container is not running": {
|
||||||
metadata: &metadata.ContainerMetadata{
|
metadata: &metadata.ContainerMetadata{
|
||||||
@ -124,52 +123,99 @@ func TestStopContainer(t *testing.T) {
|
|||||||
CreatedAt: time.Now().UnixNano(),
|
CreatedAt: time.Now().UnixNano(),
|
||||||
},
|
},
|
||||||
expectErr: false,
|
expectErr: false,
|
||||||
expectCalls: []string{},
|
expectCalls: []servertesting.CalledDetail{},
|
||||||
},
|
},
|
||||||
"should not return error if containerd container does not exist": {
|
"should not return error if containerd container does not exist": {
|
||||||
metadata: &testMetadata,
|
metadata: &testMetadata,
|
||||||
expectErr: false,
|
containerdContainer: &testContainer,
|
||||||
expectCalls: []string{"kill"},
|
// Since it's hard to inject event during `StopContainer` is running,
|
||||||
|
// we only test the case that first stop returns error, but container
|
||||||
|
// status is not updated yet.
|
||||||
|
// We also leverage this behavior to test that when graceful
|
||||||
|
// stop doesn't take effect, container should be SIGKILL-ed.
|
||||||
|
stopErr: servertesting.ContainerNotExistError,
|
||||||
|
expectErr: false,
|
||||||
|
expectCalls: []servertesting.CalledDetail{
|
||||||
|
{
|
||||||
|
Name: "kill",
|
||||||
|
Argument: &execution.KillRequest{ID: testID, Signal: uint32(unix.SIGTERM)},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "kill",
|
||||||
|
Argument: &execution.KillRequest{ID: testID, Signal: uint32(unix.SIGKILL)},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "delete",
|
||||||
|
Argument: &execution.DeleteRequest{ID: testID},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"should not return error if containerd container is killed": {
|
"should not return error if containerd container process already finished": {
|
||||||
|
metadata: &testMetadata,
|
||||||
|
containerdContainer: &testContainer,
|
||||||
|
stopErr: errors.New("os: process already finished"),
|
||||||
|
expectErr: false,
|
||||||
|
expectCalls: []servertesting.CalledDetail{
|
||||||
|
{
|
||||||
|
Name: "kill",
|
||||||
|
Argument: &execution.KillRequest{ID: testID, Signal: uint32(unix.SIGTERM)},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "kill",
|
||||||
|
Argument: &execution.KillRequest{ID: testID, Signal: uint32(unix.SIGKILL)},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "delete",
|
||||||
|
Argument: &execution.DeleteRequest{ID: testID},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"should return error if graceful stop returns random error": {
|
||||||
|
metadata: &testMetadata,
|
||||||
|
containerdContainer: &testContainer,
|
||||||
|
stopErr: errors.New("random stop error"),
|
||||||
|
expectErr: true,
|
||||||
|
expectCalls: []servertesting.CalledDetail{
|
||||||
|
{
|
||||||
|
Name: "kill",
|
||||||
|
Argument: &execution.KillRequest{ID: testID, Signal: uint32(unix.SIGTERM)},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"should not return error if containerd container is gracefully stopped": {
|
||||||
metadata: &testMetadata,
|
metadata: &testMetadata,
|
||||||
containerdContainer: &testContainer,
|
containerdContainer: &testContainer,
|
||||||
expectErr: false,
|
expectErr: false,
|
||||||
// deleted by the event monitor.
|
// deleted by the event monitor.
|
||||||
expectCalls: []string{"kill", "delete"},
|
expectCalls: []servertesting.CalledDetail{
|
||||||
},
|
{
|
||||||
"should not return error if containerd container is deleted": {
|
Name: "kill",
|
||||||
metadata: &testMetadata,
|
Argument: &execution.KillRequest{ID: testID, Signal: uint32(unix.SIGTERM)},
|
||||||
containerdContainer: &testContainer,
|
},
|
||||||
// discard killed events to force a delete. This is only
|
{
|
||||||
// for testing. Actually real containerd should only generate
|
Name: "delete",
|
||||||
// one EXIT event.
|
Argument: &execution.DeleteRequest{ID: testID},
|
||||||
discardEvents: 1,
|
},
|
||||||
expectErr: false,
|
},
|
||||||
// one more delete from the event monitor.
|
|
||||||
expectCalls: []string{"kill", "delete", "delete"},
|
|
||||||
},
|
|
||||||
"should return error if kill failed": {
|
|
||||||
metadata: &testMetadata,
|
|
||||||
containerdContainer: &testContainer,
|
|
||||||
killErr: errors.New("random error"),
|
|
||||||
expectErr: true,
|
|
||||||
expectCalls: []string{"kill"},
|
|
||||||
},
|
},
|
||||||
"should directly kill container if timeout is 0": {
|
"should directly kill container if timeout is 0": {
|
||||||
metadata: &testMetadata,
|
metadata: &testMetadata,
|
||||||
containerdContainer: &testContainer,
|
containerdContainer: &testContainer,
|
||||||
noTimeout: true,
|
noTimeout: true,
|
||||||
expectCalls: []string{"delete", "delete"},
|
expectErr: false,
|
||||||
},
|
expectCalls: []servertesting.CalledDetail{
|
||||||
"should return error if delete failed": {
|
{
|
||||||
metadata: &testMetadata,
|
Name: "kill",
|
||||||
containerdContainer: &testContainer,
|
Argument: &execution.KillRequest{ID: testID, Signal: uint32(unix.SIGKILL)},
|
||||||
deleteErr: errors.New("random error"),
|
},
|
||||||
discardEvents: 1,
|
{
|
||||||
expectErr: true,
|
Name: "delete",
|
||||||
expectCalls: []string{"kill", "delete"},
|
Argument: &execution.DeleteRequest{ID: testID},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
// TODO(random-liu): Test "should return error if both failed" after we have
|
||||||
|
// fake clock for test.
|
||||||
} {
|
} {
|
||||||
t.Logf("TestCase %q", desc)
|
t.Logf("TestCase %q", desc)
|
||||||
c := newTestCRIContainerdService()
|
c := newTestCRIContainerdService()
|
||||||
@ -185,28 +231,19 @@ func TestStopContainer(t *testing.T) {
|
|||||||
if test.containerdContainer != nil {
|
if test.containerdContainer != nil {
|
||||||
fake.SetFakeContainers([]container.Container{*test.containerdContainer})
|
fake.SetFakeContainers([]container.Container{*test.containerdContainer})
|
||||||
}
|
}
|
||||||
if test.killErr != nil {
|
if test.stopErr != nil {
|
||||||
fake.InjectError("kill", test.killErr)
|
fake.InjectError("kill", test.stopErr)
|
||||||
}
|
|
||||||
if test.deleteErr != nil {
|
|
||||||
fake.InjectError("delete", test.deleteErr)
|
|
||||||
}
|
}
|
||||||
eventClient, err := fake.Events(context.Background(), &execution.EventsRequest{})
|
eventClient, err := fake.Events(context.Background(), &execution.EventsRequest{})
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
// Start a simple test event monitor.
|
// Start a simple test event monitor.
|
||||||
go func(e execution.ContainerService_EventsClient, discard int) {
|
go func(e execution.ContainerService_EventsClient) {
|
||||||
for {
|
for {
|
||||||
e, err := e.Recv() // nolint: vetshadow
|
if err := c.handleEventStream(e); err != nil { // nolint: vetshadow
|
||||||
if err != nil {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if discard > 0 {
|
|
||||||
discard--
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
c.handleEvent(e)
|
|
||||||
}
|
}
|
||||||
}(eventClient, test.discardEvents)
|
}(eventClient)
|
||||||
fake.ClearCalls()
|
fake.ClearCalls()
|
||||||
timeout := int64(1)
|
timeout := int64(1)
|
||||||
if test.noTimeout {
|
if test.noTimeout {
|
||||||
@ -225,6 +262,6 @@ func TestStopContainer(t *testing.T) {
|
|||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.NotNil(t, resp)
|
assert.NotNil(t, resp)
|
||||||
}
|
}
|
||||||
assert.Equal(t, test.expectCalls, fake.GetCalledNames())
|
assert.Equal(t, test.expectCalls, fake.GetCalledDetails())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -226,6 +226,13 @@ func isContainerdContainerNotExistError(grpcError error) bool {
|
|||||||
return grpc.ErrorDesc(grpcError) == containerd.ErrContainerNotExist.Error()
|
return grpc.ErrorDesc(grpcError) == containerd.ErrContainerNotExist.Error()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isRuncProcessAlreadyFinishedError checks whether a grpc error is a process already
|
||||||
|
// finished error.
|
||||||
|
// TODO(random-liu): Containerd should expose this error in api. (containerd#999)
|
||||||
|
func isRuncProcessAlreadyFinishedError(grpcError error) bool {
|
||||||
|
return strings.Contains(grpc.ErrorDesc(grpcError), "os: process already finished")
|
||||||
|
}
|
||||||
|
|
||||||
// getSandbox gets the sandbox metadata from the sandbox store. It returns nil without
|
// getSandbox gets the sandbox metadata from the sandbox store. It returns nil without
|
||||||
// error if the sandbox metadata is not found. It also tries to get full sandbox id and
|
// error if the sandbox metadata is not found. It also tries to get full sandbox id and
|
||||||
// retry if the sandbox metadata is not found with the initial id.
|
// retry if the sandbox metadata is not found with the initial id.
|
||||||
|
@ -31,7 +31,8 @@ import (
|
|||||||
"google.golang.org/grpc/codes"
|
"google.golang.org/grpc/codes"
|
||||||
)
|
)
|
||||||
|
|
||||||
var containerNotExistError = grpc.Errorf(codes.Unknown, containerd.ErrContainerNotExist.Error())
|
// ContainerNotExistError is the fake error returned when container does not exist.
|
||||||
|
var ContainerNotExistError = grpc.Errorf(codes.Unknown, containerd.ErrContainerNotExist.Error())
|
||||||
|
|
||||||
// CalledDetail is the struct contains called function name and arguments.
|
// CalledDetail is the struct contains called function name and arguments.
|
||||||
type CalledDetail struct {
|
type CalledDetail struct {
|
||||||
@ -229,7 +230,7 @@ func (f *FakeExecutionClient) Start(ctx context.Context, startOpts *execution.St
|
|||||||
}
|
}
|
||||||
c, ok := f.ContainerList[startOpts.ID]
|
c, ok := f.ContainerList[startOpts.ID]
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, containerNotExistError
|
return nil, ContainerNotExistError
|
||||||
}
|
}
|
||||||
f.sendEvent(&container.Event{
|
f.sendEvent(&container.Event{
|
||||||
ID: c.ID,
|
ID: c.ID,
|
||||||
@ -260,7 +261,7 @@ func (f *FakeExecutionClient) Delete(ctx context.Context, deleteOpts *execution.
|
|||||||
}
|
}
|
||||||
c, ok := f.ContainerList[deleteOpts.ID]
|
c, ok := f.ContainerList[deleteOpts.ID]
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, containerNotExistError
|
return nil, ContainerNotExistError
|
||||||
}
|
}
|
||||||
delete(f.ContainerList, deleteOpts.ID)
|
delete(f.ContainerList, deleteOpts.ID)
|
||||||
f.sendEvent(&container.Event{
|
f.sendEvent(&container.Event{
|
||||||
@ -281,7 +282,7 @@ func (f *FakeExecutionClient) Info(ctx context.Context, infoOpts *execution.Info
|
|||||||
}
|
}
|
||||||
c, ok := f.ContainerList[infoOpts.ID]
|
c, ok := f.ContainerList[infoOpts.ID]
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, containerNotExistError
|
return nil, ContainerNotExistError
|
||||||
}
|
}
|
||||||
return &c, nil
|
return &c, nil
|
||||||
}
|
}
|
||||||
@ -315,7 +316,7 @@ func (f *FakeExecutionClient) Kill(ctx context.Context, killOpts *execution.Kill
|
|||||||
}
|
}
|
||||||
c, ok := f.ContainerList[killOpts.ID]
|
c, ok := f.ContainerList[killOpts.ID]
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, containerNotExistError
|
return nil, ContainerNotExistError
|
||||||
}
|
}
|
||||||
c.Status = container.Status_STOPPED
|
c.Status = container.Status_STOPPED
|
||||||
f.ContainerList[killOpts.ID] = c
|
f.ContainerList[killOpts.ID] = c
|
||||||
|
Loading…
Reference in New Issue
Block a user