diff --git a/Makefile b/Makefile index 68f16237c..9064288d6 100644 --- a/Makefile +++ b/Makefile @@ -236,6 +236,11 @@ bin/cni-bridge-fp: integration/failpoint/cmd/cni-bridge-fp FORCE @echo "$(WHALE) $@" @$(GO) build ${GO_BUILD_FLAGS} -o $@ ./integration/failpoint/cmd/cni-bridge-fp +# build runc-fp as runc wrapper to support failpoint, only used by integration test +bin/runc-fp: integration/failpoint/cmd/runc-fp FORCE + @echo "$(WHALE) $@" + @$(GO) build ${GO_BUILD_FLAGS} -o $@ ./integration/failpoint/cmd/runc-fp + benchmark: ## run benchmarks tests @echo "$(WHALE) $@" @$(GO) test ${TESTFLAGS} -bench . -run Benchmark -test.root diff --git a/integration/client/container_linux_test.go b/integration/client/container_linux_test.go index 8ecdbf822..19fd7aa2f 100644 --- a/integration/client/container_linux_test.go +++ b/integration/client/container_linux_test.go @@ -41,7 +41,9 @@ import ( "github.com/containerd/containerd/plugin" "github.com/containerd/containerd/runtime/v2/runc/options" "github.com/containerd/containerd/sys" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/require" exec "golang.org/x/sys/execabs" "golang.org/x/sys/unix" ) @@ -1417,3 +1419,80 @@ func TestShimOOMScore(t *testing.T) { case <-statusC: } } + +// TestIssue9103 is used as regression case for issue 9103. +// +// The runc-fp will kill the init process so that the shim should return stopped +// status after container.NewTask. It's used to simulate that the runc-init +// might be killed by oom-kill. +func TestIssue9103(t *testing.T) { + if os.Getenv("RUNC_FLAVOR") == "crun" { + t.Skip("skip it when using crun") + } + + client, err := newClient(t, address) + require.NoError(t, err) + defer client.Close() + + var ( + image Image + ctx, cancel = testContext(t) + id = t.Name() + ) + defer cancel() + + image, err = client.GetImage(ctx, testImage) + require.NoError(t, err) + + for idx, tc := range []struct { + desc string + cntrOpts []NewContainerOpts + expectedStatus ProcessStatus + }{ + { + desc: "should be created status", + cntrOpts: []NewContainerOpts{ + WithNewSpec(oci.WithImageConfig(image), + withProcessArgs("sleep", "30"), + ), + }, + expectedStatus: Created, + }, + { + desc: "should be stopped status if init has been killed", + cntrOpts: []NewContainerOpts{ + WithNewSpec(oci.WithImageConfig(image), + withProcessArgs("sleep", "30"), + oci.WithAnnotations(map[string]string{ + "oci.runc.failpoint.profile": "issue9103", + }), + ), + WithRuntime(client.Runtime(), &options.Options{ + BinaryName: "runc-fp", + }), + }, + expectedStatus: Stopped, + }, + } { + tc := tc + tName := fmt.Sprintf("%s%d", id, idx) + t.Run(tc.desc, func(t *testing.T) { + container, err := client.NewContainer(ctx, tName, + append([]NewContainerOpts{WithNewSnapshot(tName, image)}, tc.cntrOpts...)..., + ) + require.NoError(t, err) + defer container.Delete(ctx, WithSnapshotCleanup) + + cctx, ccancel := context.WithTimeout(ctx, 30*time.Second) + task, err := container.NewTask(cctx, empty()) + ccancel() + require.NoError(t, err) + + defer task.Delete(ctx, WithProcessKill) + + status, err := task.Status(ctx) + require.NoError(t, err) + require.Equal(t, status.Status, tc.expectedStatus) + }) + } +} diff --git a/integration/failpoint/cmd/runc-fp/issue9103.go b/integration/failpoint/cmd/runc-fp/issue9103.go new file mode 100644 index 000000000..699603391 --- /dev/null +++ b/integration/failpoint/cmd/runc-fp/issue9103.go @@ -0,0 +1,69 @@ +//go:build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "context" + "fmt" + "os" + "strconv" + "strings" + "syscall" + "time" +) + +// issue9103KillInitAfterCreate kills the runc.Init process after creating +// command returns successfully. +// +// REF: https://github.com/containerd/containerd/issues/9103 +func issue9103KillInitAfterCreate(ctx context.Context, method invoker) error { + isCreated := strings.Contains(strings.Join(os.Args, ","), ",create,") + + if err := method(ctx); err != nil { + return err + } + + if !isCreated { + return nil + } + + initPidPath := "init.pid" + data, err := os.ReadFile(initPidPath) + if err != nil { + return fmt.Errorf("failed to read %s: %w", initPidPath, err) + } + + pid, err := strconv.Atoi(string(data)) + if err != nil { + return fmt.Errorf("failed to get init pid from string %s: %w", string(data), err) + } + + if pid <= 0 { + return fmt.Errorf("unexpected init pid %v", pid) + } + + if err := syscall.Kill(pid, syscall.SIGKILL); err != nil { + return fmt.Errorf("failed to kill the init pid %v: %w", pid, err) + } + + // Ensure that the containerd-shim has received the SIGCHLD and start + // to cleanup + time.Sleep(3 * time.Second) + return nil +} diff --git a/integration/failpoint/cmd/runc-fp/main.go b/integration/failpoint/cmd/runc-fp/main.go new file mode 100644 index 000000000..7b6f8f6fc --- /dev/null +++ b/integration/failpoint/cmd/runc-fp/main.go @@ -0,0 +1,108 @@ +//go:build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "context" + "fmt" + "os" + "os/exec" + "syscall" + + "github.com/containerd/containerd/oci" + "github.com/sirupsen/logrus" +) + +const ( + failpointProfileKey = "oci.runc.failpoint.profile" +) + +type invoker func(context.Context) error + +type invokerInterceptor func(context.Context, invoker) error + +var ( + failpointProfiles = map[string]invokerInterceptor{ + "issue9103": issue9103KillInitAfterCreate, + } +) + +// setupLog setups messages into log file. +func setupLog() { + // containerd/go-runc always add --log option + idx := 2 + for ; idx < len(os.Args); idx++ { + if os.Args[idx] == "--log" { + break + } + } + + if idx >= len(os.Args)-1 || os.Args[idx] != "--log" { + panic("option --log required") + } + + logFile := os.Args[idx+1] + f, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0o644) + if err != nil { + panic(fmt.Errorf("failed to open %s: %w", logFile, err)) + } + + logrus.SetOutput(f) + logrus.SetFormatter(new(logrus.JSONFormatter)) +} + +func main() { + setupLog() + + fpProfile, err := failpointProfileFromOCIAnnotation() + if err != nil { + logrus.WithError(err).Fatal("failed to get failpoint profile") + } + + ctx := context.Background() + if err := fpProfile(ctx, defaultRuncInvoker); err != nil { + logrus.WithError(err).Fatal("failed to exec failpoint profile") + } +} + +// defaultRuncInvoker is to call the runc command with same arguments. +func defaultRuncInvoker(ctx context.Context) error { + cmd := exec.CommandContext(ctx, "runc", os.Args[1:]...) + cmd.SysProcAttr = &syscall.SysProcAttr{Pdeathsig: syscall.SIGKILL} + return cmd.Run() +} + +// failpointProfileFromOCIAnnotation gets the profile from OCI annotations. +func failpointProfileFromOCIAnnotation() (invokerInterceptor, error) { + spec, err := oci.ReadSpec(oci.ConfigFilename) + if err != nil { + return nil, fmt.Errorf("failed to read %s: %w", oci.ConfigFilename, err) + } + + profileName, ok := spec.Annotations[failpointProfileKey] + if !ok { + return nil, fmt.Errorf("failpoint profile is required") + } + + fp, ok := failpointProfiles[profileName] + if !ok { + return nil, fmt.Errorf("no such failpoint profile %s", profileName) + } + return fp, nil +} diff --git a/runtime/v2/runc/task/service.go b/runtime/v2/runc/task/service.go index ff09c045a..4b97c7eb9 100644 --- a/runtime/v2/runc/task/service.go +++ b/runtime/v2/runc/task/service.go @@ -137,11 +137,14 @@ type containerProcess struct { // that its exit can be handled efficiently. If the process has already exited, // it handles the exit immediately. handleStarted should be called after the // event announcing the start of the process has been published. +// Note that handleStarted needs to be aware of whether s.mu is already held +// when it is called. If s.mu has been held, we don't need to lock it when +// calling handleProcessExit. // // The returned cleanup closure releases resources used to handle early exits. // It must be called before the caller of preStart returns, otherwise severe // memory leaks will occur. -func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Container, process.Process), cleanup func()) { +func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Container, process.Process, bool), cleanup func()) { exits := make(map[int][]runcC.Exit) s.lifecycleMu.Lock() @@ -165,7 +168,7 @@ func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Containe } } - handleStarted = func(c *runc.Container, p process.Process) { + handleStarted = func(c *runc.Container, p process.Process, muLocked bool) { var pid int if p != nil { pid = p.Pid() @@ -180,7 +183,13 @@ func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Containe } else if exited { s.lifecycleMu.Unlock() for _, ee := range ees { - s.handleProcessExit(ee, c, p) + if muLocked { + s.handleProcessExit(ee, c, p) + } else { + s.mu.Lock() + s.handleProcessExit(ee, c, p) + s.mu.Unlock() + } } } else { s.running[pid] = append(s.running[pid], containerProcess{ @@ -235,7 +244,7 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * // could happen would also cause the container.Pid() call above to // nil-deference panic. proc, _ := container.Process("") - handleStarted(container, proc) + handleStarted(container, proc, true) return &taskAPI.CreateTaskResponse{ Pid: uint32(container.Pid()), @@ -262,7 +271,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI. defer cleanup() p, err := container.Start(ctx, r) if err != nil { - handleStarted(container, p) + handleStarted(container, p, false) return nil, errdefs.ToGRPC(err) } @@ -302,7 +311,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI. Pid: uint32(p.Pid()), }) } - handleStarted(container, p) + handleStarted(container, p, false) return &taskAPI.StartResponse{ Pid: uint32(p.Pid()), }, nil @@ -462,7 +471,7 @@ func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.Pi if err != nil { return nil, err } - pids, err := s.getContainerPids(ctx, r.ID) + pids, err := s.getContainerPids(ctx, container) if err != nil { return nil, errdefs.ToGRPC(err) } @@ -631,7 +640,9 @@ func (s *service) processExits() { s.lifecycleMu.Unlock() for _, cp := range cps { + s.mu.Lock() s.handleProcessExit(e, cp.Container, cp.Process) + s.mu.Unlock() } } } @@ -640,10 +651,8 @@ func (s *service) send(evt interface{}) { s.events <- evt } +// s.mu must be locked when calling handleProcessExit func (s *service) handleProcessExit(e runcC.Exit, c *runc.Container, p process.Process) { - s.mu.Lock() - defer s.mu.Unlock() - if ip, ok := p.(*process.Init); ok { // Ensure all children are killed if runc.ShouldKillAllOnExit(s.context, c.Bundle) { @@ -664,16 +673,12 @@ func (s *service) handleProcessExit(e runcC.Exit, c *runc.Container, p process.P }) } -func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { - container, err := s.getContainer(id) - if err != nil { - return nil, err - } +func (s *service) getContainerPids(ctx context.Context, container *runc.Container) ([]uint32, error) { p, err := container.Process("") if err != nil { return nil, errdefs.ToGRPC(err) } - ps, err := p.(*process.Init).Runtime().Ps(ctx, id) + ps, err := p.(*process.Init).Runtime().Ps(ctx, container.ID) if err != nil { return nil, err } diff --git a/script/setup/install-failpoint-binaries b/script/setup/install-failpoint-binaries index 533eb5423..8e708ea45 100755 --- a/script/setup/install-failpoint-binaries +++ b/script/setup/install-failpoint-binaries @@ -33,3 +33,7 @@ sudo install bin/cni-bridge-fp "${CNI_BIN_DIR}" SHIM_BIN_DIR=${SHIM_BIN_DIR:-"/usr/local/bin"} make bin/containerd-shim-runc-fp-v1 sudo install bin/containerd-shim-runc-fp-v1 "${SHIM_BIN_DIR}" + +RUNCFP_BIN_DIR=${RUNCFP_BIN_DIR:-"/usr/local/bin"} +make bin/runc-fp +sudo install bin/runc-fp "${RUNCFP_BIN_DIR}"