containerd-shim-runc-v2: avoid potential deadlock in create handler
After pr #8617, create handler of containerd-shim-runc-v2 will call handleStarted() to record the init process and handle its exit. Init process wouldn't quit so early in normal circumstances. But if this screnario occurs, handleStarted() will call handleProcessExit(), which will cause deadlock because create() had acquired s.mu, and handleProcessExit() will try to lock it again. So, I added a parameter muLocked to handleStarted to indicate whether or not s.mu is currently locked, and thus deciding whether or not to lock it when calling handleProcessExit. Fix: #9103 Signed-off-by: Chen Yiyang <cyyzero@qq.com>
This commit is contained in:
parent
6604ff6c55
commit
68dd47ef70
@ -137,11 +137,14 @@ type containerProcess struct {
|
|||||||
// that its exit can be handled efficiently. If the process has already exited,
|
// that its exit can be handled efficiently. If the process has already exited,
|
||||||
// it handles the exit immediately. handleStarted should be called after the
|
// it handles the exit immediately. handleStarted should be called after the
|
||||||
// event announcing the start of the process has been published.
|
// event announcing the start of the process has been published.
|
||||||
|
// Note that handleStarted needs to be aware of whether s.mu is already held
|
||||||
|
// when it is called. If s.mu has been held, we don't need to lock it when
|
||||||
|
// calling handleProcessExit.
|
||||||
//
|
//
|
||||||
// The returned cleanup closure releases resources used to handle early exits.
|
// The returned cleanup closure releases resources used to handle early exits.
|
||||||
// It must be called before the caller of preStart returns, otherwise severe
|
// It must be called before the caller of preStart returns, otherwise severe
|
||||||
// memory leaks will occur.
|
// memory leaks will occur.
|
||||||
func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Container, process.Process), cleanup func()) {
|
func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Container, process.Process, bool), cleanup func()) {
|
||||||
exits := make(map[int][]runcC.Exit)
|
exits := make(map[int][]runcC.Exit)
|
||||||
|
|
||||||
s.lifecycleMu.Lock()
|
s.lifecycleMu.Lock()
|
||||||
@ -165,7 +168,7 @@ func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Containe
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
handleStarted = func(c *runc.Container, p process.Process) {
|
handleStarted = func(c *runc.Container, p process.Process, muLocked bool) {
|
||||||
var pid int
|
var pid int
|
||||||
if p != nil {
|
if p != nil {
|
||||||
pid = p.Pid()
|
pid = p.Pid()
|
||||||
@ -180,7 +183,13 @@ func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Containe
|
|||||||
} else if exited {
|
} else if exited {
|
||||||
s.lifecycleMu.Unlock()
|
s.lifecycleMu.Unlock()
|
||||||
for _, ee := range ees {
|
for _, ee := range ees {
|
||||||
s.handleProcessExit(ee, c, p)
|
if muLocked {
|
||||||
|
s.handleProcessExit(ee, c, p)
|
||||||
|
} else {
|
||||||
|
s.mu.Lock()
|
||||||
|
s.handleProcessExit(ee, c, p)
|
||||||
|
s.mu.Unlock()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
s.running[pid] = append(s.running[pid], containerProcess{
|
s.running[pid] = append(s.running[pid], containerProcess{
|
||||||
@ -235,7 +244,7 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *
|
|||||||
// could happen would also cause the container.Pid() call above to
|
// could happen would also cause the container.Pid() call above to
|
||||||
// nil-deference panic.
|
// nil-deference panic.
|
||||||
proc, _ := container.Process("")
|
proc, _ := container.Process("")
|
||||||
handleStarted(container, proc)
|
handleStarted(container, proc, true)
|
||||||
|
|
||||||
return &taskAPI.CreateTaskResponse{
|
return &taskAPI.CreateTaskResponse{
|
||||||
Pid: uint32(container.Pid()),
|
Pid: uint32(container.Pid()),
|
||||||
@ -262,7 +271,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.
|
|||||||
defer cleanup()
|
defer cleanup()
|
||||||
p, err := container.Start(ctx, r)
|
p, err := container.Start(ctx, r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
handleStarted(container, p)
|
handleStarted(container, p, false)
|
||||||
return nil, errdefs.ToGRPC(err)
|
return nil, errdefs.ToGRPC(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -302,7 +311,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.
|
|||||||
Pid: uint32(p.Pid()),
|
Pid: uint32(p.Pid()),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
handleStarted(container, p)
|
handleStarted(container, p, false)
|
||||||
return &taskAPI.StartResponse{
|
return &taskAPI.StartResponse{
|
||||||
Pid: uint32(p.Pid()),
|
Pid: uint32(p.Pid()),
|
||||||
}, nil
|
}, nil
|
||||||
@ -631,7 +640,9 @@ func (s *service) processExits() {
|
|||||||
s.lifecycleMu.Unlock()
|
s.lifecycleMu.Unlock()
|
||||||
|
|
||||||
for _, cp := range cps {
|
for _, cp := range cps {
|
||||||
|
s.mu.Lock()
|
||||||
s.handleProcessExit(e, cp.Container, cp.Process)
|
s.handleProcessExit(e, cp.Container, cp.Process)
|
||||||
|
s.mu.Unlock()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -640,10 +651,8 @@ func (s *service) send(evt interface{}) {
|
|||||||
s.events <- evt
|
s.events <- evt
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// s.mu must be locked when calling handleProcessExit
|
||||||
func (s *service) handleProcessExit(e runcC.Exit, c *runc.Container, p process.Process) {
|
func (s *service) handleProcessExit(e runcC.Exit, c *runc.Container, p process.Process) {
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
|
|
||||||
if ip, ok := p.(*process.Init); ok {
|
if ip, ok := p.(*process.Init); ok {
|
||||||
// Ensure all children are killed
|
// Ensure all children are killed
|
||||||
if runc.ShouldKillAllOnExit(s.context, c.Bundle) {
|
if runc.ShouldKillAllOnExit(s.context, c.Bundle) {
|
||||||
|
Loading…
Reference in New Issue
Block a user