Merge pull request #10649 from samuelkarp/shim-exec-fp-test
integration: regression test for issue 10589
This commit is contained in:
commit
e013322eed
@ -34,14 +34,18 @@ import (
|
||||
"github.com/containerd/cgroups/v3/cgroup1"
|
||||
cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
|
||||
"github.com/containerd/containerd/api/types/runc/options"
|
||||
"github.com/containerd/errdefs"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
. "github.com/containerd/containerd/v2/client"
|
||||
"github.com/containerd/containerd/v2/core/containers"
|
||||
"github.com/containerd/containerd/v2/integration/failpoint"
|
||||
"github.com/containerd/containerd/v2/pkg/cio"
|
||||
"github.com/containerd/containerd/v2/pkg/fifosync"
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
"github.com/containerd/containerd/v2/pkg/shim"
|
||||
"github.com/containerd/containerd/v2/pkg/sys"
|
||||
"github.com/containerd/containerd/v2/plugins"
|
||||
"github.com/containerd/errdefs"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
@ -1551,3 +1555,207 @@ func TestIssue9103(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestIssue10589 is used as regression case for issue 10589.
|
||||
//
|
||||
// This issue was caused by a race between init exits and new exec process tracking inside the shim. The test operates
|
||||
// by controlling the time between when the shim invokes "runc exec" and when the actual "runc exec" is triggered. This
|
||||
// allows validating that races for shim state tracking between pre- and post-start of the exec process do not exist.
|
||||
//
|
||||
// The workflow is as follows:
|
||||
// 1. Create a container as normal
|
||||
// 2. Make an exec1 using runc-fp with delayexec
|
||||
// 3. Wait until the exec is waiting to start (triggered by delayexec)
|
||||
// 4. Kill the container init process (signalling it is easiest)
|
||||
// 5. Make an exec2 using runc-fp with delayexec
|
||||
// 6. Wait until the exec is waiting to start
|
||||
// 7. Allow exec1 to proceed
|
||||
// 8. Allow exec2 to proceed
|
||||
// 9. See that the container has exited and all execs have exited too
|
||||
//
|
||||
// https://github.com/containerd/containerd/issues/10589
|
||||
func TestIssue10589(t *testing.T) {
|
||||
if f := os.Getenv("RUNC_FLAVOR"); f != "" && f != "runc" {
|
||||
t.Skip("test requires runc")
|
||||
}
|
||||
|
||||
client, err := newClient(t, address)
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() {
|
||||
client.Close()
|
||||
})
|
||||
|
||||
var (
|
||||
image Image
|
||||
ctx, cancel = testContext(t)
|
||||
id = t.Name()
|
||||
)
|
||||
t.Cleanup(cancel)
|
||||
|
||||
image, err = client.GetImage(ctx, testImage)
|
||||
require.NoError(t, err)
|
||||
|
||||
// 1. Create a sleeping container
|
||||
t.Log("1. Create a sleeping container")
|
||||
container, err := client.NewContainer(ctx, id,
|
||||
WithNewSnapshot(id, image),
|
||||
WithNewSpec(oci.WithImageConfig(image),
|
||||
withProcessArgs("sleep", "inf"),
|
||||
oci.WithAnnotations(map[string]string{
|
||||
"oci.runc.failpoint.profile": "delayExec",
|
||||
}),
|
||||
),
|
||||
WithRuntime(client.Runtime(), &options.Options{
|
||||
BinaryName: "runc-fp",
|
||||
}),
|
||||
)
|
||||
require.NoError(t, err, "create container")
|
||||
t.Cleanup(func() {
|
||||
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
err := container.Delete(ctx, WithSnapshotCleanup)
|
||||
if err != nil {
|
||||
t.Log("delete err", err)
|
||||
}
|
||||
cancel()
|
||||
})
|
||||
|
||||
task, err := container.NewTask(ctx, empty())
|
||||
require.NoError(t, err, "create task")
|
||||
t.Cleanup(func() {
|
||||
ctx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
||||
st, err := task.Delete(ctx, WithProcessKill)
|
||||
t.Log("exit status", st)
|
||||
if err != nil {
|
||||
t.Log("kill err", err)
|
||||
}
|
||||
cancel()
|
||||
})
|
||||
|
||||
err = task.Start(ctx)
|
||||
require.NoError(t, err, "start container")
|
||||
|
||||
status, err := task.Status(ctx)
|
||||
require.NoError(t, err, "container status")
|
||||
require.Equal(t, Running, status.Status)
|
||||
|
||||
// 2. Create an exec
|
||||
t.Log("2. Create exec1")
|
||||
exec1ReadyFifo, err := fifosync.NewWaiter(filepath.Join(t.TempDir(), "exec1-ready.fifo"), 0600)
|
||||
require.NoError(t, err, "create exec1 ready fifo")
|
||||
exec1DelayFifo, err := fifosync.NewTrigger(filepath.Join(t.TempDir(), "exec1-delay.fifo"), 0600)
|
||||
require.NoError(t, err, "create exec1 delay fifo")
|
||||
exec1, err := task.Exec(ctx, "exec1", &specs.Process{
|
||||
Args: []string{"/bin/sleep", "301"},
|
||||
Cwd: "/",
|
||||
Env: []string{
|
||||
failpoint.DelayExecReadyEnv + "=" + exec1ReadyFifo.Name(),
|
||||
failpoint.DelayExecDelayEnv + "=" + exec1DelayFifo.Name(),
|
||||
},
|
||||
}, cio.NullIO)
|
||||
require.NoError(t, err, "create exec1")
|
||||
|
||||
exec1done := make(chan struct{})
|
||||
go func() {
|
||||
defer close(exec1done)
|
||||
t.Log("Starting exec1")
|
||||
err := exec1.Start(ctx)
|
||||
assert.Error(t, err, "start exec1")
|
||||
t.Logf("error starting exec1: %s", err)
|
||||
}()
|
||||
|
||||
// 3. Wait until the exec is waiting to start
|
||||
t.Log("3. Wait until exec1 is waiting to start")
|
||||
err = exec1ReadyFifo.Wait()
|
||||
require.NoError(t, err, "open exec1 fifo")
|
||||
|
||||
// 4. Kill the container init process
|
||||
t.Log("4. Kill the container init process")
|
||||
target := task.Pid()
|
||||
t.Logf("Killing main pid (%v) of container %s", target, container.ID())
|
||||
syscall.Kill(int(target), syscall.SIGKILL)
|
||||
status, err = task.Status(ctx)
|
||||
require.NoError(t, err, "container status")
|
||||
t.Log("container status", status.Status)
|
||||
|
||||
// 5. Make an exec (2) using this failpoint
|
||||
t.Log("5. Create exec2")
|
||||
exec2ReadyFifo, err := fifosync.NewWaiter(filepath.Join(t.TempDir(), "exec2-ready.fifo"), 0600)
|
||||
require.NoError(t, err, "create exec2 ready fifo: %q", exec2ReadyFifo)
|
||||
exec2DelayFifo, err := fifosync.NewTrigger(filepath.Join(t.TempDir(), "exec2-delay.fifo"), 0600)
|
||||
require.NoError(t, err, "create exec2 delay fifo: %q", exec2DelayFifo)
|
||||
exec2, err := task.Exec(ctx, "exec2", &specs.Process{
|
||||
Args: []string{"/bin/sleep", "302"},
|
||||
Cwd: "/",
|
||||
Env: []string{
|
||||
failpoint.DelayExecReadyEnv + "=" + exec2ReadyFifo.Name(),
|
||||
failpoint.DelayExecDelayEnv + "=" + exec2DelayFifo.Name(),
|
||||
},
|
||||
}, cio.NullIO)
|
||||
require.NoError(t, err, "create exec2")
|
||||
|
||||
exec2done := make(chan struct{})
|
||||
didExec2Run := true
|
||||
go func() {
|
||||
defer close(exec2done)
|
||||
t.Log("Starting exec2")
|
||||
err := exec2.Start(ctx)
|
||||
assert.Error(t, err, "start exec2")
|
||||
t.Logf("error starting exec2: %s", err)
|
||||
}()
|
||||
|
||||
// 6. Wait until the exec is waiting to start
|
||||
t.Log("6. Wait until exec2 is waiting to start")
|
||||
exec2ready := make(chan struct{})
|
||||
go func() {
|
||||
exec2ReadyFifo.Wait()
|
||||
close(exec2ready)
|
||||
}()
|
||||
select {
|
||||
case <-exec2ready:
|
||||
case <-exec2done:
|
||||
didExec2Run = false
|
||||
}
|
||||
|
||||
// 7. Allow exec=1 to proceed
|
||||
t.Log("7. Allow exec=1 to proceed")
|
||||
err = exec1DelayFifo.Trigger()
|
||||
assert.NoError(t, err, "trigger exec1 fifo")
|
||||
status, err = task.Status(ctx)
|
||||
require.NoError(t, err, "container status")
|
||||
t.Log("container status", status.Status)
|
||||
<-exec1done
|
||||
status, err = task.Status(ctx)
|
||||
require.NoError(t, err, "container status")
|
||||
t.Log("container status", status.Status)
|
||||
|
||||
// 8. Allow exec=2 to proceed
|
||||
if didExec2Run {
|
||||
t.Log("8. Allow exec2 to proceed")
|
||||
err = exec2DelayFifo.Trigger()
|
||||
assert.NoError(t, err, "trigger exec2 fifo")
|
||||
status, err = task.Status(ctx)
|
||||
require.NoError(t, err, "container status")
|
||||
t.Log("container status", status.Status)
|
||||
<-exec2done
|
||||
status, err = task.Status(ctx)
|
||||
require.NoError(t, err, "container status")
|
||||
t.Log("container status", status.Status)
|
||||
} else {
|
||||
t.Log("8. Skip exec2")
|
||||
}
|
||||
|
||||
// 9. Validate
|
||||
t.Log("9. Validate")
|
||||
status, err = exec1.Status(ctx)
|
||||
require.NoError(t, err, "exec1 status")
|
||||
t.Logf("exec1 status: %s", status.Status)
|
||||
assert.Equal(t, Created, status.Status)
|
||||
status, err = exec2.Status(ctx)
|
||||
require.NoError(t, err, "exec2 status")
|
||||
t.Logf("exec2 status: %s", status.Status)
|
||||
assert.Equal(t, Created, status.Status)
|
||||
status, err = task.Status(ctx)
|
||||
t.Logf("task status: %s", status.Status)
|
||||
require.NoError(t, err, "container status")
|
||||
assert.Equal(t, Stopped, status.Status)
|
||||
}
|
||||
|
131
integration/failpoint/cmd/runc-fp/delayexec.go
Normal file
131
integration/failpoint/cmd/runc-fp/delayexec.go
Normal file
@ -0,0 +1,131 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/containerd/containerd/v2/integration/failpoint"
|
||||
"github.com/containerd/containerd/v2/pkg/fifosync"
|
||||
)
|
||||
|
||||
// delayExec delays an "exec" command until a trigger is received from the calling test program. This can be used to
|
||||
// test races around container lifecycle and exec processes.
|
||||
func delayExec(ctx context.Context, method invoker) error {
|
||||
isExec := strings.Contains(strings.Join(os.Args, ","), ",exec,")
|
||||
if !isExec {
|
||||
if err := method(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
logrus.Debug("EXEC!")
|
||||
|
||||
if err := delay(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := method(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func delay() error {
|
||||
ready, delay, err := fifoFromProcessEnv()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := ready.Trigger(); err != nil {
|
||||
return err
|
||||
}
|
||||
return delay.Wait()
|
||||
}
|
||||
|
||||
// fifoFromProcessEnv finds a fifo specified in the environment variables of an exec process
|
||||
func fifoFromProcessEnv() (fifosync.Trigger, fifosync.Waiter, error) {
|
||||
env, err := processEnvironment()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
readyName, ok := env[failpoint.DelayExecReadyEnv]
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("fifo: failed to find %q env var in %v", failpoint.DelayExecReadyEnv, env)
|
||||
}
|
||||
delayName, ok := env[failpoint.DelayExecDelayEnv]
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("fifo: failed to find %q env var in %v", failpoint.DelayExecDelayEnv, env)
|
||||
}
|
||||
logrus.WithField("ready", readyName).WithField("delay", delayName).Debug("Found FIFOs!")
|
||||
readyFIFO, err := fifosync.NewTrigger(readyName, 0600)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
delayFIFO, err := fifosync.NewWaiter(delayName, 0600)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return readyFIFO, delayFIFO, nil
|
||||
}
|
||||
|
||||
func processEnvironment() (map[string]string, error) {
|
||||
idx := 2
|
||||
for ; idx < len(os.Args); idx++ {
|
||||
if os.Args[idx] == "--process" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if idx >= len(os.Args)-1 || os.Args[idx] != "--process" {
|
||||
return nil, errors.New("env: option --process required")
|
||||
}
|
||||
|
||||
specFile := os.Args[idx+1]
|
||||
f, err := os.OpenFile(specFile, os.O_RDONLY, 0o644)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("env: failed to open %s: %w", specFile, err)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("env: failed to read spec from %q", specFile)
|
||||
}
|
||||
var spec specs.Process
|
||||
if err := json.Unmarshal(b, &spec); err != nil {
|
||||
return nil, fmt.Errorf("env: failed to unmarshal spec from %q: %w", specFile, err)
|
||||
}
|
||||
|
||||
// XXX: env vars can be specified multiple times, but we only keep one
|
||||
env := make(map[string]string)
|
||||
for _, e := range spec.Env {
|
||||
k, v, _ := strings.Cut(e, "=")
|
||||
env[k] = v
|
||||
}
|
||||
|
||||
return env, nil
|
||||
}
|
@ -25,8 +25,9 @@ import (
|
||||
"os/exec"
|
||||
"syscall"
|
||||
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/containerd/containerd/v2/pkg/oci"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -40,6 +41,7 @@ type invokerInterceptor func(context.Context, invoker) error
|
||||
var (
|
||||
failpointProfiles = map[string]invokerInterceptor{
|
||||
"issue9103": issue9103KillInitAfterCreate,
|
||||
"delayExec": delayExec,
|
||||
}
|
||||
)
|
||||
|
||||
|
22
integration/failpoint/const.go
Normal file
22
integration/failpoint/const.go
Normal file
@ -0,0 +1,22 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package failpoint
|
||||
|
||||
const (
|
||||
DelayExecReadyEnv = "_RUNC_FP_DELAY_EXEC_READY"
|
||||
DelayExecDelayEnv = "_RUNC_FP_DELAY_EXEC_DELAY"
|
||||
)
|
125
pkg/fifosync/fifo_unix.go
Normal file
125
pkg/fifosync/fifo_unix.go
Normal file
@ -0,0 +1,125 @@
|
||||
//go:build unix
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
Package fifosync provides a pattern on Unix-like operating systems for synchronizing across processes using Unix FIFOs
|
||||
(named pipes).
|
||||
*/
|
||||
package fifosync
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// Trigger is a FIFO which is used to signal another process to proceed.
|
||||
type Trigger interface {
|
||||
// Name returns the name of the trigger
|
||||
Name() string
|
||||
// Trigger triggers another process to proceed.
|
||||
Trigger() error
|
||||
}
|
||||
|
||||
// Waiter is a FIFO which is used to wait for trigger provided by another process.
|
||||
type Waiter interface {
|
||||
// Name returns the name of the waiter
|
||||
Name() string
|
||||
// Wait waits for a trigger from another process.
|
||||
Wait() error
|
||||
}
|
||||
|
||||
type fifo struct {
|
||||
name string
|
||||
}
|
||||
|
||||
// NewTrigger creates a new Trigger
|
||||
func NewTrigger(name string, mode uint32) (Trigger, error) {
|
||||
return new(name, mode)
|
||||
}
|
||||
|
||||
// NewWaiter creates a new Waiter
|
||||
func NewWaiter(name string, mode uint32) (Waiter, error) {
|
||||
return new(name, mode)
|
||||
}
|
||||
|
||||
// New creates a new FIFO if it does not already exist. Use AsTrigger or AsWaiter to convert the new FIFO to a Trigger
|
||||
// or Waiter.
|
||||
func new(name string, mode uint32) (*fifo, error) {
|
||||
s, err := os.Stat(name)
|
||||
exist := true
|
||||
if err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
return nil, fmt.Errorf("fifo: failed to stat %q: %w", name, err)
|
||||
}
|
||||
exist = false
|
||||
}
|
||||
if s != nil && s.Mode()&os.ModeNamedPipe == 0 {
|
||||
return nil, fmt.Errorf("fifo: not a named pipe: %q", name)
|
||||
}
|
||||
if !exist {
|
||||
err = unix.Mkfifo(name, mode)
|
||||
if err != nil && !errors.Is(err, unix.EEXIST) {
|
||||
return nil, fmt.Errorf("fifo: failed to create %q: %w", name, err)
|
||||
}
|
||||
}
|
||||
return &fifo{
|
||||
name: name,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (f *fifo) Name() string {
|
||||
return f.name
|
||||
}
|
||||
|
||||
// AsTrigger converts the FIFO to a Trigger.
|
||||
func (f *fifo) AsTrigger() Trigger {
|
||||
return f
|
||||
}
|
||||
|
||||
// Trigger triggers another process to proceed.
|
||||
func (f *fifo) Trigger() error {
|
||||
file, err := os.OpenFile(f.name, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("fifo: failed to open %s: %w", f.name, err)
|
||||
}
|
||||
defer file.Close()
|
||||
_, err = io.ReadAll(file)
|
||||
return err
|
||||
}
|
||||
|
||||
// AsWaiter converts the FIFO to a Waiter.
|
||||
func (f *fifo) AsWaiter() Waiter {
|
||||
return f
|
||||
}
|
||||
|
||||
// Wait waits for a trigger from another process.
|
||||
func (f *fifo) Wait() error {
|
||||
fd, err := unix.Open(f.name, unix.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("fifo: failed to open %s: %w", f.name, err)
|
||||
}
|
||||
defer unix.Close(fd)
|
||||
if _, err := unix.Write(fd, []byte("0")); err != nil {
|
||||
return fmt.Errorf("failed to write to %d: %w", fd, err)
|
||||
}
|
||||
return nil
|
||||
}
|
Loading…
Reference in New Issue
Block a user