*: add runc-fp as runc wrapper to inject failpoint

Signed-off-by: Wei Fu <fuweid89@gmail.com>
This commit is contained in:
Wei Fu 2023-09-20 15:23:42 +08:00 committed by Sigma
parent 68dd47ef70
commit 11a7751af5
5 changed files with 265 additions and 0 deletions

View File

@ -236,6 +236,11 @@ bin/cni-bridge-fp: integration/failpoint/cmd/cni-bridge-fp FORCE
@echo "$(WHALE) $@"
@$(GO) build ${GO_BUILD_FLAGS} -o $@ ./integration/failpoint/cmd/cni-bridge-fp
# build runc-fp as runc wrapper to support failpoint, only used by integration test
bin/runc-fp: integration/failpoint/cmd/runc-fp FORCE
@echo "$(WHALE) $@"
@$(GO) build ${GO_BUILD_FLAGS} -o $@ ./integration/failpoint/cmd/runc-fp
benchmark: ## run benchmarks tests
@echo "$(WHALE) $@"
@$(GO) test ${TESTFLAGS} -bench . -run Benchmark -test.root

View File

@ -41,7 +41,9 @@ import (
"github.com/containerd/containerd/plugin"
"github.com/containerd/containerd/runtime/v2/runc/options"
"github.com/containerd/containerd/sys"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/require"
exec "golang.org/x/sys/execabs"
"golang.org/x/sys/unix"
)
@ -1417,3 +1419,80 @@ func TestShimOOMScore(t *testing.T) {
case <-statusC:
}
}
// TestIssue9103 is used as regression case for issue 9103.
//
// The runc-fp will kill the init process so that the shim should return stopped
// status after container.NewTask. It's used to simulate that the runc-init
// might be killed by oom-kill.
func TestIssue9103(t *testing.T) {
if os.Getenv("RUNC_FLAVOR") == "crun" {
t.Skip("skip it when using crun")
}
client, err := newClient(t, address)
require.NoError(t, err)
defer client.Close()
var (
image Image
ctx, cancel = testContext(t)
id = t.Name()
)
defer cancel()
image, err = client.GetImage(ctx, testImage)
require.NoError(t, err)
for idx, tc := range []struct {
desc string
cntrOpts []NewContainerOpts
expectedStatus ProcessStatus
}{
{
desc: "should be created status",
cntrOpts: []NewContainerOpts{
WithNewSpec(oci.WithImageConfig(image),
withProcessArgs("sleep", "30"),
),
},
expectedStatus: Created,
},
{
desc: "should be stopped status if init has been killed",
cntrOpts: []NewContainerOpts{
WithNewSpec(oci.WithImageConfig(image),
withProcessArgs("sleep", "30"),
oci.WithAnnotations(map[string]string{
"oci.runc.failpoint.profile": "issue9103",
}),
),
WithRuntime(client.Runtime(), &options.Options{
BinaryName: "runc-fp",
}),
},
expectedStatus: Stopped,
},
} {
tc := tc
tName := fmt.Sprintf("%s%d", id, idx)
t.Run(tc.desc, func(t *testing.T) {
container, err := client.NewContainer(ctx, tName,
append([]NewContainerOpts{WithNewSnapshot(tName, image)}, tc.cntrOpts...)...,
)
require.NoError(t, err)
defer container.Delete(ctx, WithSnapshotCleanup)
cctx, ccancel := context.WithTimeout(ctx, 30*time.Second)
task, err := container.NewTask(cctx, empty())
ccancel()
require.NoError(t, err)
defer task.Delete(ctx, WithProcessKill)
status, err := task.Status(ctx)
require.NoError(t, err)
require.Equal(t, status.Status, tc.expectedStatus)
})
}
}

View File

@ -0,0 +1,69 @@
//go:build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"context"
"fmt"
"os"
"strconv"
"strings"
"syscall"
"time"
)
// issue9103KillInitAfterCreate kills the runc.Init process after creating
// command returns successfully.
//
// REF: https://github.com/containerd/containerd/issues/9103
func issue9103KillInitAfterCreate(ctx context.Context, method invoker) error {
isCreated := strings.Contains(strings.Join(os.Args, ","), ",create,")
if err := method(ctx); err != nil {
return err
}
if !isCreated {
return nil
}
initPidPath := "init.pid"
data, err := os.ReadFile(initPidPath)
if err != nil {
return fmt.Errorf("failed to read %s: %w", initPidPath, err)
}
pid, err := strconv.Atoi(string(data))
if err != nil {
return fmt.Errorf("failed to get init pid from string %s: %w", string(data), err)
}
if pid <= 0 {
return fmt.Errorf("unexpected init pid %v", pid)
}
if err := syscall.Kill(pid, syscall.SIGKILL); err != nil {
return fmt.Errorf("failed to kill the init pid %v: %w", pid, err)
}
// Ensure that the containerd-shim has received the SIGCHLD and start
// to cleanup
time.Sleep(3 * time.Second)
return nil
}

View File

@ -0,0 +1,108 @@
//go:build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"context"
"fmt"
"os"
"os/exec"
"syscall"
"github.com/containerd/containerd/oci"
"github.com/sirupsen/logrus"
)
const (
failpointProfileKey = "oci.runc.failpoint.profile"
)
type invoker func(context.Context) error
type invokerInterceptor func(context.Context, invoker) error
var (
failpointProfiles = map[string]invokerInterceptor{
"issue9103": issue9103KillInitAfterCreate,
}
)
// setupLog setups messages into log file.
func setupLog() {
// containerd/go-runc always add --log option
idx := 2
for ; idx < len(os.Args); idx++ {
if os.Args[idx] == "--log" {
break
}
}
if idx >= len(os.Args)-1 || os.Args[idx] != "--log" {
panic("option --log required")
}
logFile := os.Args[idx+1]
f, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0o644)
if err != nil {
panic(fmt.Errorf("failed to open %s: %w", logFile, err))
}
logrus.SetOutput(f)
logrus.SetFormatter(new(logrus.JSONFormatter))
}
func main() {
setupLog()
fpProfile, err := failpointProfileFromOCIAnnotation()
if err != nil {
logrus.WithError(err).Fatal("failed to get failpoint profile")
}
ctx := context.Background()
if err := fpProfile(ctx, defaultRuncInvoker); err != nil {
logrus.WithError(err).Fatal("failed to exec failpoint profile")
}
}
// defaultRuncInvoker is to call the runc command with same arguments.
func defaultRuncInvoker(ctx context.Context) error {
cmd := exec.CommandContext(ctx, "runc", os.Args[1:]...)
cmd.SysProcAttr = &syscall.SysProcAttr{Pdeathsig: syscall.SIGKILL}
return cmd.Run()
}
// failpointProfileFromOCIAnnotation gets the profile from OCI annotations.
func failpointProfileFromOCIAnnotation() (invokerInterceptor, error) {
spec, err := oci.ReadSpec(oci.ConfigFilename)
if err != nil {
return nil, fmt.Errorf("failed to read %s: %w", oci.ConfigFilename, err)
}
profileName, ok := spec.Annotations[failpointProfileKey]
if !ok {
return nil, fmt.Errorf("failpoint profile is required")
}
fp, ok := failpointProfiles[profileName]
if !ok {
return nil, fmt.Errorf("no such failpoint profile %s", profileName)
}
return fp, nil
}

View File

@ -33,3 +33,7 @@ sudo install bin/cni-bridge-fp "${CNI_BIN_DIR}"
SHIM_BIN_DIR=${SHIM_BIN_DIR:-"/usr/local/bin"}
make bin/containerd-shim-runc-fp-v1
sudo install bin/containerd-shim-runc-fp-v1 "${SHIM_BIN_DIR}"
RUNCFP_BIN_DIR=${RUNCFP_BIN_DIR:-"/usr/local/bin"}
make bin/runc-fp
sudo install bin/runc-fp "${RUNCFP_BIN_DIR}"