Handle locking and errors for process state
ref: #1464 This tries to solve issues with races around process state. First it adds the process mutex around the state call so that any state changes, deletions, etc will be handled in order. Second, for IsNoExist errors from the runtime, return a stopped state if a process has been removed from the underlying OCI runtime but not from the shim yet. This shouldn't happen with the lock from above but its hare to verify this issue. Third, handle shim disconnections and return an ErrNotFound. Forth, don't abort returning all tasks if one task is unable to return its state. Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
aa8bd16ae7
commit
951c129bf1
@ -222,8 +222,13 @@ func (p *initProcess) ExitedAt() time.Time {
|
||||
}
|
||||
|
||||
func (p *initProcess) Status(ctx context.Context) (string, error) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
c, err := p.runtime.State(ctx, p.id)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return "stopped", nil
|
||||
}
|
||||
return "", p.runtimeError(err, "OCI runtime state failed")
|
||||
}
|
||||
return c.Status, nil
|
||||
|
@ -5,6 +5,8 @@ package linux
|
||||
import (
|
||||
"context"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
"github.com/containerd/containerd/api/types/task"
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
@ -63,7 +65,10 @@ func (t *Task) State(ctx context.Context) (runtime.State, error) {
|
||||
ID: t.id,
|
||||
})
|
||||
if err != nil {
|
||||
return runtime.State{}, errdefs.FromGRPC(err)
|
||||
if err != grpc.ErrServerStopped {
|
||||
return runtime.State{}, errdefs.FromGRPC(err)
|
||||
}
|
||||
return runtime.State{}, errdefs.ErrNotFound
|
||||
}
|
||||
var status runtime.Status
|
||||
switch response.Status {
|
||||
|
@ -274,17 +274,22 @@ func (s *Service) List(ctx context.Context, r *api.ListTasksRequest) (*api.ListT
|
||||
if err != nil {
|
||||
return nil, errdefs.ToGRPC(err)
|
||||
}
|
||||
for _, t := range tasks {
|
||||
tt, err := processFromContainerd(ctx, t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp.Tasks = append(resp.Tasks, tt)
|
||||
}
|
||||
addTasks(ctx, resp, tasks)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func addTasks(ctx context.Context, r *api.ListTasksResponse, tasks []runtime.Task) {
|
||||
for _, t := range tasks {
|
||||
tt, err := processFromContainerd(ctx, t)
|
||||
if err != nil {
|
||||
log.G(ctx).WithError(err).WithField("id", t.ID()).Error("converting task to protobuf")
|
||||
continue
|
||||
}
|
||||
r.Tasks = append(r.Tasks, tt)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Service) Pause(ctx context.Context, r *api.PauseTaskRequest) (*google_protobuf.Empty, error) {
|
||||
t, err := s.getTask(ctx, r.ContainerID)
|
||||
if err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user