cri: handle sandbox/container exit event separately

The event monitor handles exit events one by one. If there is something wrong about deleting task, it will slow down the terminating Pods. In order to reduce the impact, the exit event watcher should handle exit event separately. If it failed, the watcher should put it into backoff queue and retry it. Signed-off-by: Wei Fu <fuweid89@gmail.com>
2020-10-31 17:39:10 +08:00
parent 643bb9b66d
commit e56de63099
6 changed files with 117 additions and 36 deletions
--- a/pkg/cri/server/restart.go
+++ b/pkg/cri/server/restart.go
@@ -290,7 +290,7 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe
 					status.Reason = unknownExitReason
 				} else {
 					// Start exit monitor.
-					c.eventMonitor.startExitMonitor(context.Background(), id, status.Pid, exitCh)
+					c.eventMonitor.startContainerExitMonitor(context.Background(), id, status.Pid, exitCh)
 				}
 			case containerd.Stopped:
 				// Task is stopped. Updata status and delete the task.
@@ -389,7 +389,7 @@ func (c *criService) loadSandbox(ctx context.Context, cntr containerd.Container)
 					// Task is running, set sandbox state as READY.
 					status.State = sandboxstore.StateReady
 					status.Pid = t.Pid()
-					c.eventMonitor.startExitMonitor(context.Background(), meta.ID, status.Pid, exitCh)
+					c.eventMonitor.startSandboxExitMonitor(context.Background(), meta.ID, status.Pid, exitCh)
 				}
 			} else {
 				// Task is not running. Delete the task and set sandbox state as NOTREADY.