From 6604ff6c55a51285fb88cc081078ba74785af32d Mon Sep 17 00:00:00 2001 From: Chen Yiyang Date: Sat, 16 Sep 2023 19:33:18 +0800 Subject: [PATCH 1/3] containerd-shim-runc-v2: remove unnecessary `s.getContainer()` Previous code has already called `getContainer()`, just pass it into `s.getContainerPids` to reduce unnecessary lock and map lookup. Signed-off-by: Chen Yiyang --- runtime/v2/runc/task/service.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/runtime/v2/runc/task/service.go b/runtime/v2/runc/task/service.go index ff09c045a..060c48774 100644 --- a/runtime/v2/runc/task/service.go +++ b/runtime/v2/runc/task/service.go @@ -462,7 +462,7 @@ func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.Pi if err != nil { return nil, err } - pids, err := s.getContainerPids(ctx, r.ID) + pids, err := s.getContainerPids(ctx, container) if err != nil { return nil, errdefs.ToGRPC(err) } @@ -664,16 +664,12 @@ func (s *service) handleProcessExit(e runcC.Exit, c *runc.Container, p process.P }) } -func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { - container, err := s.getContainer(id) - if err != nil { - return nil, err - } +func (s *service) getContainerPids(ctx context.Context, container *runc.Container) ([]uint32, error) { p, err := container.Process("") if err != nil { return nil, errdefs.ToGRPC(err) } - ps, err := p.(*process.Init).Runtime().Ps(ctx, id) + ps, err := p.(*process.Init).Runtime().Ps(ctx, container.ID) if err != nil { return nil, err } From 68dd47ef70ca787c7779f7b855c36c596864d289 Mon Sep 17 00:00:00 2001 From: Chen Yiyang Date: Sat, 16 Sep 2023 20:49:54 +0800 Subject: [PATCH 2/3] containerd-shim-runc-v2: avoid potential deadlock in create handler After pr #8617, create handler of containerd-shim-runc-v2 will call handleStarted() to record the init process and handle its exit. Init process wouldn't quit so early in normal circumstances. But if this screnario occurs, handleStarted() will call handleProcessExit(), which will cause deadlock because create() had acquired s.mu, and handleProcessExit() will try to lock it again. So, I added a parameter muLocked to handleStarted to indicate whether or not s.mu is currently locked, and thus deciding whether or not to lock it when calling handleProcessExit. Fix: #9103 Signed-off-by: Chen Yiyang --- runtime/v2/runc/task/service.go | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/runtime/v2/runc/task/service.go b/runtime/v2/runc/task/service.go index 060c48774..4b97c7eb9 100644 --- a/runtime/v2/runc/task/service.go +++ b/runtime/v2/runc/task/service.go @@ -137,11 +137,14 @@ type containerProcess struct { // that its exit can be handled efficiently. If the process has already exited, // it handles the exit immediately. handleStarted should be called after the // event announcing the start of the process has been published. +// Note that handleStarted needs to be aware of whether s.mu is already held +// when it is called. If s.mu has been held, we don't need to lock it when +// calling handleProcessExit. // // The returned cleanup closure releases resources used to handle early exits. // It must be called before the caller of preStart returns, otherwise severe // memory leaks will occur. -func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Container, process.Process), cleanup func()) { +func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Container, process.Process, bool), cleanup func()) { exits := make(map[int][]runcC.Exit) s.lifecycleMu.Lock() @@ -165,7 +168,7 @@ func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Containe } } - handleStarted = func(c *runc.Container, p process.Process) { + handleStarted = func(c *runc.Container, p process.Process, muLocked bool) { var pid int if p != nil { pid = p.Pid() @@ -180,7 +183,13 @@ func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Containe } else if exited { s.lifecycleMu.Unlock() for _, ee := range ees { - s.handleProcessExit(ee, c, p) + if muLocked { + s.handleProcessExit(ee, c, p) + } else { + s.mu.Lock() + s.handleProcessExit(ee, c, p) + s.mu.Unlock() + } } } else { s.running[pid] = append(s.running[pid], containerProcess{ @@ -235,7 +244,7 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * // could happen would also cause the container.Pid() call above to // nil-deference panic. proc, _ := container.Process("") - handleStarted(container, proc) + handleStarted(container, proc, true) return &taskAPI.CreateTaskResponse{ Pid: uint32(container.Pid()), @@ -262,7 +271,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI. defer cleanup() p, err := container.Start(ctx, r) if err != nil { - handleStarted(container, p) + handleStarted(container, p, false) return nil, errdefs.ToGRPC(err) } @@ -302,7 +311,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI. Pid: uint32(p.Pid()), }) } - handleStarted(container, p) + handleStarted(container, p, false) return &taskAPI.StartResponse{ Pid: uint32(p.Pid()), }, nil @@ -631,7 +640,9 @@ func (s *service) processExits() { s.lifecycleMu.Unlock() for _, cp := range cps { + s.mu.Lock() s.handleProcessExit(e, cp.Container, cp.Process) + s.mu.Unlock() } } } @@ -640,10 +651,8 @@ func (s *service) send(evt interface{}) { s.events <- evt } +// s.mu must be locked when calling handleProcessExit func (s *service) handleProcessExit(e runcC.Exit, c *runc.Container, p process.Process) { - s.mu.Lock() - defer s.mu.Unlock() - if ip, ok := p.(*process.Init); ok { // Ensure all children are killed if runc.ShouldKillAllOnExit(s.context, c.Bundle) { From 11a7751af5729b93c574eb7d30521e7d0027d7e0 Mon Sep 17 00:00:00 2001 From: Wei Fu Date: Wed, 20 Sep 2023 15:23:42 +0800 Subject: [PATCH 3/3] *: add runc-fp as runc wrapper to inject failpoint Signed-off-by: Wei Fu --- Makefile | 5 + integration/client/container_linux_test.go | 79 +++++++++++++ .../failpoint/cmd/runc-fp/issue9103.go | 69 +++++++++++ integration/failpoint/cmd/runc-fp/main.go | 108 ++++++++++++++++++ script/setup/install-failpoint-binaries | 4 + 5 files changed, 265 insertions(+) create mode 100644 integration/failpoint/cmd/runc-fp/issue9103.go create mode 100644 integration/failpoint/cmd/runc-fp/main.go diff --git a/Makefile b/Makefile index dcc8030a2..668a8d1d6 100644 --- a/Makefile +++ b/Makefile @@ -236,6 +236,11 @@ bin/cni-bridge-fp: integration/failpoint/cmd/cni-bridge-fp FORCE @echo "$(WHALE) $@" @$(GO) build ${GO_BUILD_FLAGS} -o $@ ./integration/failpoint/cmd/cni-bridge-fp +# build runc-fp as runc wrapper to support failpoint, only used by integration test +bin/runc-fp: integration/failpoint/cmd/runc-fp FORCE + @echo "$(WHALE) $@" + @$(GO) build ${GO_BUILD_FLAGS} -o $@ ./integration/failpoint/cmd/runc-fp + benchmark: ## run benchmarks tests @echo "$(WHALE) $@" @$(GO) test ${TESTFLAGS} -bench . -run Benchmark -test.root diff --git a/integration/client/container_linux_test.go b/integration/client/container_linux_test.go index 8ecdbf822..19fd7aa2f 100644 --- a/integration/client/container_linux_test.go +++ b/integration/client/container_linux_test.go @@ -41,7 +41,9 @@ import ( "github.com/containerd/containerd/plugin" "github.com/containerd/containerd/runtime/v2/runc/options" "github.com/containerd/containerd/sys" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/require" exec "golang.org/x/sys/execabs" "golang.org/x/sys/unix" ) @@ -1417,3 +1419,80 @@ func TestShimOOMScore(t *testing.T) { case <-statusC: } } + +// TestIssue9103 is used as regression case for issue 9103. +// +// The runc-fp will kill the init process so that the shim should return stopped +// status after container.NewTask. It's used to simulate that the runc-init +// might be killed by oom-kill. +func TestIssue9103(t *testing.T) { + if os.Getenv("RUNC_FLAVOR") == "crun" { + t.Skip("skip it when using crun") + } + + client, err := newClient(t, address) + require.NoError(t, err) + defer client.Close() + + var ( + image Image + ctx, cancel = testContext(t) + id = t.Name() + ) + defer cancel() + + image, err = client.GetImage(ctx, testImage) + require.NoError(t, err) + + for idx, tc := range []struct { + desc string + cntrOpts []NewContainerOpts + expectedStatus ProcessStatus + }{ + { + desc: "should be created status", + cntrOpts: []NewContainerOpts{ + WithNewSpec(oci.WithImageConfig(image), + withProcessArgs("sleep", "30"), + ), + }, + expectedStatus: Created, + }, + { + desc: "should be stopped status if init has been killed", + cntrOpts: []NewContainerOpts{ + WithNewSpec(oci.WithImageConfig(image), + withProcessArgs("sleep", "30"), + oci.WithAnnotations(map[string]string{ + "oci.runc.failpoint.profile": "issue9103", + }), + ), + WithRuntime(client.Runtime(), &options.Options{ + BinaryName: "runc-fp", + }), + }, + expectedStatus: Stopped, + }, + } { + tc := tc + tName := fmt.Sprintf("%s%d", id, idx) + t.Run(tc.desc, func(t *testing.T) { + container, err := client.NewContainer(ctx, tName, + append([]NewContainerOpts{WithNewSnapshot(tName, image)}, tc.cntrOpts...)..., + ) + require.NoError(t, err) + defer container.Delete(ctx, WithSnapshotCleanup) + + cctx, ccancel := context.WithTimeout(ctx, 30*time.Second) + task, err := container.NewTask(cctx, empty()) + ccancel() + require.NoError(t, err) + + defer task.Delete(ctx, WithProcessKill) + + status, err := task.Status(ctx) + require.NoError(t, err) + require.Equal(t, status.Status, tc.expectedStatus) + }) + } +} diff --git a/integration/failpoint/cmd/runc-fp/issue9103.go b/integration/failpoint/cmd/runc-fp/issue9103.go new file mode 100644 index 000000000..699603391 --- /dev/null +++ b/integration/failpoint/cmd/runc-fp/issue9103.go @@ -0,0 +1,69 @@ +//go:build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "context" + "fmt" + "os" + "strconv" + "strings" + "syscall" + "time" +) + +// issue9103KillInitAfterCreate kills the runc.Init process after creating +// command returns successfully. +// +// REF: https://github.com/containerd/containerd/issues/9103 +func issue9103KillInitAfterCreate(ctx context.Context, method invoker) error { + isCreated := strings.Contains(strings.Join(os.Args, ","), ",create,") + + if err := method(ctx); err != nil { + return err + } + + if !isCreated { + return nil + } + + initPidPath := "init.pid" + data, err := os.ReadFile(initPidPath) + if err != nil { + return fmt.Errorf("failed to read %s: %w", initPidPath, err) + } + + pid, err := strconv.Atoi(string(data)) + if err != nil { + return fmt.Errorf("failed to get init pid from string %s: %w", string(data), err) + } + + if pid <= 0 { + return fmt.Errorf("unexpected init pid %v", pid) + } + + if err := syscall.Kill(pid, syscall.SIGKILL); err != nil { + return fmt.Errorf("failed to kill the init pid %v: %w", pid, err) + } + + // Ensure that the containerd-shim has received the SIGCHLD and start + // to cleanup + time.Sleep(3 * time.Second) + return nil +} diff --git a/integration/failpoint/cmd/runc-fp/main.go b/integration/failpoint/cmd/runc-fp/main.go new file mode 100644 index 000000000..7b6f8f6fc --- /dev/null +++ b/integration/failpoint/cmd/runc-fp/main.go @@ -0,0 +1,108 @@ +//go:build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "context" + "fmt" + "os" + "os/exec" + "syscall" + + "github.com/containerd/containerd/oci" + "github.com/sirupsen/logrus" +) + +const ( + failpointProfileKey = "oci.runc.failpoint.profile" +) + +type invoker func(context.Context) error + +type invokerInterceptor func(context.Context, invoker) error + +var ( + failpointProfiles = map[string]invokerInterceptor{ + "issue9103": issue9103KillInitAfterCreate, + } +) + +// setupLog setups messages into log file. +func setupLog() { + // containerd/go-runc always add --log option + idx := 2 + for ; idx < len(os.Args); idx++ { + if os.Args[idx] == "--log" { + break + } + } + + if idx >= len(os.Args)-1 || os.Args[idx] != "--log" { + panic("option --log required") + } + + logFile := os.Args[idx+1] + f, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0o644) + if err != nil { + panic(fmt.Errorf("failed to open %s: %w", logFile, err)) + } + + logrus.SetOutput(f) + logrus.SetFormatter(new(logrus.JSONFormatter)) +} + +func main() { + setupLog() + + fpProfile, err := failpointProfileFromOCIAnnotation() + if err != nil { + logrus.WithError(err).Fatal("failed to get failpoint profile") + } + + ctx := context.Background() + if err := fpProfile(ctx, defaultRuncInvoker); err != nil { + logrus.WithError(err).Fatal("failed to exec failpoint profile") + } +} + +// defaultRuncInvoker is to call the runc command with same arguments. +func defaultRuncInvoker(ctx context.Context) error { + cmd := exec.CommandContext(ctx, "runc", os.Args[1:]...) + cmd.SysProcAttr = &syscall.SysProcAttr{Pdeathsig: syscall.SIGKILL} + return cmd.Run() +} + +// failpointProfileFromOCIAnnotation gets the profile from OCI annotations. +func failpointProfileFromOCIAnnotation() (invokerInterceptor, error) { + spec, err := oci.ReadSpec(oci.ConfigFilename) + if err != nil { + return nil, fmt.Errorf("failed to read %s: %w", oci.ConfigFilename, err) + } + + profileName, ok := spec.Annotations[failpointProfileKey] + if !ok { + return nil, fmt.Errorf("failpoint profile is required") + } + + fp, ok := failpointProfiles[profileName] + if !ok { + return nil, fmt.Errorf("no such failpoint profile %s", profileName) + } + return fp, nil +} diff --git a/script/setup/install-failpoint-binaries b/script/setup/install-failpoint-binaries index 533eb5423..8e708ea45 100755 --- a/script/setup/install-failpoint-binaries +++ b/script/setup/install-failpoint-binaries @@ -33,3 +33,7 @@ sudo install bin/cni-bridge-fp "${CNI_BIN_DIR}" SHIM_BIN_DIR=${SHIM_BIN_DIR:-"/usr/local/bin"} make bin/containerd-shim-runc-fp-v1 sudo install bin/containerd-shim-runc-fp-v1 "${SHIM_BIN_DIR}" + +RUNCFP_BIN_DIR=${RUNCFP_BIN_DIR:-"/usr/local/bin"} +make bin/runc-fp +sudo install bin/runc-fp "${RUNCFP_BIN_DIR}"