Merge pull request #5904 from qiutongs/ip-leakage-fix

This commit is contained in:
Derek McGowan 2022-09-29 18:14:35 -07:00 committed by GitHub
commit 1cc38f8df7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 324 additions and 88 deletions

View File

@ -164,6 +164,15 @@ func WithPodHostname(hostname string) PodSandboxOpts {
}
}
// Add pod labels.
func WithPodLabels(kvs map[string]string) PodSandboxOpts {
return func(p *runtime.PodSandboxConfig) {
for k, v := range kvs {
p.Labels[k] = v
}
}
}
// PodSandboxConfig generates a pod sandbox config for test.
func PodSandboxConfig(name, ns string, opts ...PodSandboxOpts) *runtime.PodSandboxConfig {
config := &runtime.PodSandboxConfig{
@ -176,6 +185,7 @@ func PodSandboxConfig(name, ns string, opts ...PodSandboxOpts) *runtime.PodSandb
},
Linux: &runtime.LinuxPodSandboxConfig{},
Annotations: make(map[string]string),
Labels: make(map[string]string),
}
for _, opt := range opts {
opt(config)

View File

@ -28,10 +28,10 @@ import (
"strings"
"testing"
"github.com/stretchr/testify/require"
criapiv1 "k8s.io/cri-api/pkg/apis/runtime/v1"
"github.com/containerd/containerd/pkg/failpoint"
"github.com/stretchr/testify/require"
)
const (
@ -89,6 +89,146 @@ func TestRunPodSandboxWithShimStartFailure(t *testing.T) {
require.Equal(t, true, strings.Contains(err.Error(), "no hard feelings"))
}
// TestRunPodSandboxWithShimDeleteFailure should keep the sandbox record if
// failed to rollback shim by shim.Delete API.
func TestRunPodSandboxWithShimDeleteFailure(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip()
}
if os.Getenv("ENABLE_CRI_SANDBOXES") != "" {
t.Skip()
}
testCase := func(restart bool) func(*testing.T) {
return func(t *testing.T) {
t.Log("Init PodSandboxConfig with specific label")
labels := map[string]string{
t.Name(): "true",
}
sbConfig := PodSandboxConfig(t.Name(), "failpoint", WithPodLabels(labels))
t.Log("Inject Shim failpoint")
injectShimFailpoint(t, sbConfig, map[string]string{
"Start": "1*error(failed to start shim)",
"Delete": "1*error(please retry)", // inject failpoint during rollback shim
})
t.Log("Create a sandbox")
_, err := runtimeService.RunPodSandbox(sbConfig, failpointRuntimeHandler)
require.Error(t, err)
require.ErrorContains(t, err, "failed to start shim")
t.Log("ListPodSandbox with the specific label")
l, err := runtimeService.ListPodSandbox(&criapiv1.PodSandboxFilter{LabelSelector: labels})
require.NoError(t, err)
require.Len(t, l, 1)
sb := l[0]
require.Equal(t, sb.State, criapiv1.PodSandboxState_SANDBOX_NOTREADY)
require.Equal(t, sb.Metadata.Name, sbConfig.Metadata.Name)
require.Equal(t, sb.Metadata.Namespace, sbConfig.Metadata.Namespace)
require.Equal(t, sb.Metadata.Uid, sbConfig.Metadata.Uid)
require.Equal(t, sb.Metadata.Attempt, sbConfig.Metadata.Attempt)
t.Log("Check PodSandboxStatus")
sbStatus, err := runtimeService.PodSandboxStatus(sb.Id)
require.NoError(t, err)
require.Equal(t, sbStatus.State, criapiv1.PodSandboxState_SANDBOX_NOTREADY)
require.Greater(t, len(sbStatus.Network.Ip), 0)
if restart {
t.Log("Restart containerd")
RestartContainerd(t)
t.Log("ListPodSandbox with the specific label")
l, err = runtimeService.ListPodSandbox(&criapiv1.PodSandboxFilter{Id: sb.Id})
require.NoError(t, err)
require.Len(t, l, 1)
require.Equal(t, l[0].State, criapiv1.PodSandboxState_SANDBOX_NOTREADY)
t.Log("Check PodSandboxStatus")
sbStatus, err := runtimeService.PodSandboxStatus(sb.Id)
require.NoError(t, err)
t.Log(sbStatus.Network)
require.Equal(t, sbStatus.State, criapiv1.PodSandboxState_SANDBOX_NOTREADY)
}
t.Log("Cleanup leaky sandbox")
err = runtimeService.RemovePodSandbox(sb.Id)
require.NoError(t, err)
}
}
t.Run("CleanupAfterRestart", testCase(true))
t.Run("JustCleanup", testCase(false))
}
// TestRunPodSandboxWithShimStartAndTeardownCNIFailure should keep the sandbox
// record if failed to rollback CNI API.
func TestRunPodSandboxWithShimStartAndTeardownCNIFailure(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip()
}
if os.Getenv("ENABLE_CRI_SANDBOXES") != "" {
t.Skip()
}
testCase := func(restart bool) func(*testing.T) {
return func(t *testing.T) {
t.Log("Init PodSandboxConfig with specific key")
labels := map[string]string{
t.Name(): "true",
}
sbConfig := PodSandboxConfig(t.Name(), "failpoint", WithPodLabels(labels))
t.Log("Inject Shim failpoint")
injectShimFailpoint(t, sbConfig, map[string]string{
"Start": "1*error(failed to start shim)",
})
t.Log("Inject CNI failpoint")
conf := &failpointConf{
Del: "1*error(please retry)",
}
injectCNIFailpoint(t, sbConfig, conf)
t.Log("Create a sandbox")
_, err := runtimeService.RunPodSandbox(sbConfig, failpointRuntimeHandler)
require.Error(t, err)
require.ErrorContains(t, err, "failed to start shim")
t.Log("ListPodSandbox with the specific label")
l, err := runtimeService.ListPodSandbox(&criapiv1.PodSandboxFilter{LabelSelector: labels})
require.NoError(t, err)
require.Len(t, l, 1)
sb := l[0]
require.Equal(t, sb.State, criapiv1.PodSandboxState_SANDBOX_NOTREADY)
require.Equal(t, sb.Metadata.Name, sbConfig.Metadata.Name)
require.Equal(t, sb.Metadata.Namespace, sbConfig.Metadata.Namespace)
require.Equal(t, sb.Metadata.Uid, sbConfig.Metadata.Uid)
require.Equal(t, sb.Metadata.Attempt, sbConfig.Metadata.Attempt)
if restart {
t.Log("Restart containerd")
RestartContainerd(t)
t.Log("ListPodSandbox with the specific label")
l, err = runtimeService.ListPodSandbox(&criapiv1.PodSandboxFilter{Id: sb.Id})
require.NoError(t, err)
require.Len(t, l, 1)
require.Equal(t, l[0].State, criapiv1.PodSandboxState_SANDBOX_NOTREADY)
}
t.Log("Cleanup leaky sandbox")
err = runtimeService.RemovePodSandbox(sb.Id)
require.NoError(t, err)
}
}
t.Run("CleanupAfterRestart", testCase(true))
t.Run("JustCleanup", testCase(false))
}
// failpointConf is used to describe cmdAdd/cmdDel/cmdCheck command's failpoint.
type failpointConf struct {
Add string `json:"cmdAdd"`
@ -101,7 +241,7 @@ func injectCNIFailpoint(t *testing.T, sbConfig *criapiv1.PodSandboxConfig, conf
metadata := sbConfig.Metadata
fpFilename := filepath.Join(stateDir,
fmt.Sprintf("%s-%s.json", metadata.Namespace, metadata.Name))
fmt.Sprintf("%s-%s.json", metadata.Namespace, strings.Replace(metadata.Name, "/", "-", -1)))
data, err := json.Marshal(conf)
require.NoError(t, err)

View File

@ -23,6 +23,10 @@ import (
"context"
"fmt"
"github.com/containerd/containerd"
"github.com/containerd/containerd/containers"
"github.com/containerd/typeurl"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
containerstore "github.com/containerd/containerd/pkg/cri/store/container"
@ -44,3 +48,19 @@ func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.Up
}
return &runtime.UpdateContainerResourcesResponse{}, nil
}
// TODO: Copied from container_update_resources.go because that file is not built for darwin.
// updateContainerSpec updates container spec.
func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error {
any, err := typeurl.MarshalAny(spec)
if err != nil {
return fmt.Errorf("failed to marshal spec %+v: %w", spec, err)
}
if err := cntr.Update(ctx, func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
c.Spec = any
return nil
}); err != nil {
return fmt.Errorf("failed to update container spec: %w", err)
}
return nil
}

View File

@ -70,14 +70,22 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
}
name := makeSandboxName(metadata)
log.G(ctx).WithField("podsandboxid", id).Debugf("generated id for sandbox name %q", name)
// cleanupErr records the last error returned by the critical cleanup operations in deferred functions,
// like CNI teardown and stopping the running sandbox task.
// If cleanup is not completed for some reason, the CRI-plugin will leave the sandbox
// in a not-ready state, which can later be cleaned up by the next execution of the kubelet's syncPod workflow.
var cleanupErr error
// Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the
// same sandbox.
if err := c.sandboxNameIndex.Reserve(name, id); err != nil {
return nil, fmt.Errorf("failed to reserve sandbox name %q: %w", name, err)
}
defer func() {
// Release the name if the function returns with an error.
if retErr != nil {
// Release the name if the function returns with an error and all the resource cleanup is done.
// When cleanupErr != nil, the name will be cleaned in sandbox_remove.
if retErr != nil && cleanupErr == nil {
c.sandboxNameIndex.ReleaseByName(name)
}
}()
@ -111,70 +119,13 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
}
log.G(ctx).WithField("podsandboxid", id).Debugf("use OCI runtime %+v", ociRuntime)
podNetwork := true
if goruntime.GOOS != "windows" &&
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
// Pod network is not needed on linux with host network.
podNetwork = false
}
if goruntime.GOOS == "windows" &&
config.GetWindows().GetSecurityContext().GetHostProcess() {
//Windows HostProcess pods can only run on the host network
podNetwork = false
}
if podNetwork {
netStart := time.Now()
// If it is not in host network namespace then create a namespace and set the sandbox
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
// namespaces. If the pod is in host network namespace then both are empty and should not
// be used.
var netnsMountDir = "/var/run/netns"
if c.config.NetNSMountsUnderStateDir {
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
}
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
if err != nil {
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
}
sandbox.NetNSPath = sandbox.NetNS.GetPath()
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Teardown network if an error is returned.
if err := c.teardownPodNetwork(deferCtx, sandbox); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id)
}
if err := sandbox.NetNS.Remove(); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
}
sandbox.NetNSPath = ""
}
}()
// Setup network for sandbox.
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
// rely on the assumption that CRI shim will not be querying the network namespace to check the
// network states such as IP.
// In future runtime implementation should avoid relying on CRI shim implementation details.
// In this case however caching the IP will add a subtle performance enhancement by avoiding
// calls to network namespace of the pod to query the IP of the veth interface on every
// SandboxStatus request.
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
}
sandboxCreateNetworkTimer.UpdateSince(netStart)
}
runtimeStart := time.Now()
// Create sandbox container.
// NOTE: sandboxContainerSpec SHOULD NOT have side
// effect, e.g. accessing/creating files, so that we can test
// it safely.
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations)
// NOTE: the network namespace path will be created later and update through updateNetNamespacePath function
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, "", ociRuntime.PodAnnotations)
if err != nil {
return nil, fmt.Errorf("failed to generate sandbox container spec: %w", err)
}
@ -222,12 +173,27 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
if err != nil {
return nil, fmt.Errorf("failed to create containerd container: %w", err)
}
// Add container into sandbox store in INIT state.
sandbox.Container = container
defer func() {
if retErr != nil {
// Put the sandbox into sandbox store when the some resource fails to be cleaned.
if retErr != nil && cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("encountered an error cleaning up failed sandbox %q, marking sandbox state as SANDBOX_UNKNOWN", id)
if err := c.sandboxStore.Add(sandbox); err != nil {
log.G(ctx).WithError(err).Errorf("failed to add sandbox %+v into store", sandbox)
}
}
}()
defer func() {
// Delete container only if all the resource cleanup is done.
if retErr != nil && cleanupErr == nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
if err := container.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id)
if cleanupErr = container.Delete(deferCtx, containerd.WithSnapshotCleanup); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to delete containerd container %q", id)
}
}
}()
@ -281,6 +247,82 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
return nil, fmt.Errorf("failed to get sandbox container info: %w", err)
}
podNetwork := true
if goruntime.GOOS != "windows" &&
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
// Pod network is not needed on linux with host network.
podNetwork = false
}
if goruntime.GOOS == "windows" &&
config.GetWindows().GetSecurityContext().GetHostProcess() {
// Windows HostProcess pods can only run on the host network
podNetwork = false
}
if podNetwork {
netStart := time.Now()
// If it is not in host network namespace then create a namespace and set the sandbox
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
// namespaces. If the pod is in host network namespace then both are empty and should not
// be used.
var netnsMountDir = "/var/run/netns"
if c.config.NetNSMountsUnderStateDir {
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
}
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
if err != nil {
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
}
sandbox.NetNSPath = sandbox.NetNS.GetPath()
defer func() {
// Remove the network namespace only if all the resource cleanup is done.
if retErr != nil && cleanupErr == nil {
if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
return
}
sandbox.NetNSPath = ""
}
}()
// Update network namespace in the container's spec
c.updateNetNamespacePath(spec, sandbox.NetNSPath)
if err := updateContainerSpec(ctx, container, spec); err != nil {
return nil, fmt.Errorf("failed to update the network namespace for the sandbox container %q: %w", id, err)
}
// Define this defer to teardownPodNetwork prior to the setupPodNetwork function call.
// This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions.
defer func() {
// Teardown the network only if all the resource cleanup is done.
if retErr != nil && cleanupErr == nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Teardown network if an error is returned.
if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id)
}
}
}()
// Setup network for sandbox.
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
// rely on the assumption that CRI shim will not be querying the network namespace to check the
// network states such as IP.
// In future runtime implementation should avoid relying on CRI shim implementation details.
// In this case however caching the IP will add a subtle performance enhancement by avoiding
// calls to network namespace of the pod to query the IP of the veth interface on every
// SandboxStatus request.
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
}
sandboxCreateNetworkTimer.UpdateSince(netStart)
}
// Create sandbox task in containerd.
log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).",
id, name)
@ -301,6 +343,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
// Cleanup the sandbox container if an error is returned.
if _, err := task.Delete(deferCtx, WithNRISandboxDelete(id), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id)
cleanupErr = err
}
}
}()
@ -339,9 +382,6 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
return nil, fmt.Errorf("failed to update sandbox status: %w", err)
}
// Add sandbox into sandbox store in INIT state.
sandbox.Container = container
if err := c.sandboxStore.Add(sandbox); err != nil {
return nil, fmt.Errorf("failed to add sandbox %+v into store: %w", sandbox, err)
}

View File

@ -348,3 +348,12 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
return taskOpts
}
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
for i := range spec.Linux.Namespaces {
if spec.Linux.Namespaces[i].Type == runtimespec.NetworkNamespace {
spec.Linux.Namespaces[i].Path = nsPath
break
}
}
}

View File

@ -54,3 +54,6 @@ func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxCo
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
return []containerd.NewTaskOpts{}
}
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
}

View File

@ -111,3 +111,7 @@ func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxCo
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
return nil
}
func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
spec.Windows.Network.NetworkNamespace = nsPath
}

View File

@ -29,24 +29,24 @@ import (
// | |
// | Create(Run) | Load
// | |
// Start | |
// (failed) | |
// +------------------+ +-----------+
// | | | |
// | | | |
// | | | |
// | | Start(Run) | |
// | | | |
// | PortForward +----v----+ | |
// | +------+ | | |
// | | | READY <---------+ |
// | +------> | | |
// | +----+----+ | |
// | | | |
// | | Stop/Exit | |
// | | | |
// | +----v----+ | |
// | | <---------+ +----v----+
// | |
// | | Start
// | |(failed and not cleaned)
// Start |--------------|--------------+
//(failed but cleaned)| | |
// +------------------+ |-----------+ |
// | | Start(Run) | | |
// | | | | |
// | PortForward +----v----+ | | |
// | +------+ | | | |
// | | | READY <---------+ | |
// | +------> | | | |
// | +----+----+ | | |
// | | | | |
// | | Stop/Exit | | |
// | | | | |
// | +----v----+ | | |
// | | <---------+ +----v--v-+
// | | NOTREADY| | |
// | | <----------------+ UNKNOWN |
// | +----+----+ Stop | |

View File

@ -42,7 +42,17 @@ mkdir -p "${REPORT_DIR}"
test_setup "${REPORT_DIR}"
# Run integration test.
${sudo} bin/cri-integration.test --test.run="${FOCUS}" --test.v \
CMD=""
if [ -n "${sudo}" ]; then
CMD+="${sudo} "
# sudo strips environment variables, so add ENABLE_CRI_SANDBOXES back if present
if [ -n "${ENABLE_CRI_SANDBOXES}" ]; then
CMD+="ENABLE_CRI_SANDBOXES='${ENABLE_CRI_SANDBOXES}' "
fi
fi
CMD+="${PWD}/bin/cri-integration.test"
${CMD} --test.run="${FOCUS}" --test.v \
--cri-endpoint="${CONTAINERD_SOCK}" \
--cri-root="${CRI_ROOT}" \
--runtime-handler="${RUNTIME}" \