Merge pull request #949 from Random-Liu/fix-ip-leakage

Fix ip leakage
This commit is contained in:
Lantao Liu 2018-10-12 01:18:12 -07:00 committed by GitHub
commit 728f636e32
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 188 additions and 48 deletions

View File

@ -19,7 +19,6 @@ package integration
import (
"sort"
"testing"
"time"
"github.com/containerd/containerd"
"github.com/stretchr/testify/assert"
@ -140,27 +139,8 @@ func TestContainerdRestart(t *testing.T) {
imagesBeforeRestart, err := imageService.ListImages(nil)
assert.NoError(t, err)
t.Logf("Kill containerd")
require.NoError(t, KillProcess("containerd"))
defer func() {
assert.NoError(t, Eventually(func() (bool, error) {
return ConnectDaemons() == nil, nil
}, time.Second, 30*time.Second), "make sure containerd is running before test finish")
}()
t.Logf("Wait until containerd is killed")
require.NoError(t, Eventually(func() (bool, error) {
pid, err := PidOf("containerd")
if err != nil {
return false, err
}
return pid == 0, nil
}, time.Second, 30*time.Second), "wait for containerd to be killed")
t.Logf("Wait until containerd is restarted")
require.NoError(t, Eventually(func() (bool, error) {
return ConnectDaemons() == nil, nil
}, time.Second, 30*time.Second), "wait for containerd to be restarted")
t.Logf("Restart containerd")
RestartContainerd(t)
t.Logf("Check sandbox and container state after restart")
loadedSandboxes, err := runtimeService.ListPodSandbox(&runtime.PodSandboxFilter{})

View File

@ -17,14 +17,23 @@ limitations under the License.
package integration
import (
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/containerd/containerd"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/net/context"
"golang.org/x/sys/unix"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
"github.com/containerd/cri/pkg/server"
)
func TestSandboxCleanRemove(t *testing.T) {
@ -66,3 +75,100 @@ func TestSandboxCleanRemove(t *testing.T) {
assert.NoError(t, runtimeService.StopPodSandbox(sb))
assert.NoError(t, runtimeService.RemovePodSandbox(sb))
}
func TestSandboxRemoveWithoutIPLeakage(t *testing.T) {
ctx := context.Background()
const hostLocalCheckpointDir = "/var/lib/cni"
t.Logf("Make sure host-local ipam is in use")
config, err := CRIConfig()
require.NoError(t, err)
fs, err := ioutil.ReadDir(config.NetworkPluginConfDir)
require.NoError(t, err)
require.NotEmpty(t, fs)
f := filepath.Join(config.NetworkPluginConfDir, fs[0].Name())
cniConfig, err := ioutil.ReadFile(f)
require.NoError(t, err)
if !strings.Contains(string(cniConfig), "host-local") {
t.Skip("host-local ipam is not in use")
}
t.Logf("Create a sandbox")
sbConfig := PodSandboxConfig("sandbox", "remove-without-ip-leakage")
sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
require.NoError(t, err)
defer func() {
// Make sure the sandbox is cleaned up in any case.
runtimeService.StopPodSandbox(sb)
runtimeService.RemovePodSandbox(sb)
}()
t.Logf("Get pod information")
client, err := RawRuntimeClient()
require.NoError(t, err)
resp, err := client.PodSandboxStatus(ctx, &runtime.PodSandboxStatusRequest{
PodSandboxId: sb,
Verbose: true,
})
require.NoError(t, err)
status := resp.GetStatus()
info := resp.GetInfo()
ip := status.GetNetwork().GetIp()
require.NotEmpty(t, ip)
var sbInfo server.SandboxInfo
require.NoError(t, json.Unmarshal([]byte(info["info"]), &sbInfo))
require.NotNil(t, sbInfo.RuntimeSpec.Linux)
var netNS string
for _, n := range sbInfo.RuntimeSpec.Linux.Namespaces {
if n.Type == runtimespec.NetworkNamespace {
netNS = n.Path
}
}
require.NotEmpty(t, netNS, "network namespace should be set")
t.Logf("Should be able to find the pod ip in host-local checkpoint")
checkIP := func(ip string) bool {
found := false
filepath.Walk(hostLocalCheckpointDir, func(_ string, info os.FileInfo, _ error) error {
if info != nil && info.Name() == ip {
found = true
}
return nil
})
return found
}
require.True(t, checkIP(ip))
t.Logf("Kill sandbox container")
require.NoError(t, KillPid(int(sbInfo.Pid)))
t.Logf("Unmount network namespace")
// The umount will take effect after containerd is stopped.
require.NoError(t, unix.Unmount(netNS, unix.MNT_DETACH))
t.Logf("Restart containerd")
RestartContainerd(t)
t.Logf("Sandbox state should be NOTREADY")
assert.NoError(t, Eventually(func() (bool, error) {
status, err := runtimeService.PodSandboxStatus(sb)
if err != nil {
return false, err
}
return status.GetState() == runtime.PodSandboxState_SANDBOX_NOTREADY, nil
}, time.Second, 30*time.Second), "sandbox state should become NOTREADY")
t.Logf("Network namespace should have been removed")
_, err = os.Stat(netNS)
assert.True(t, os.IsNotExist(err))
t.Logf("Should still be able to find the pod ip in host-local checkpoint")
assert.True(t, checkIP(ip))
t.Logf("Should be able to remove the sandbox after properly stopped")
assert.NoError(t, runtimeService.StopPodSandbox(sb))
assert.NoError(t, runtimeService.RemovePodSandbox(sb))
t.Logf("Should not be able to find the pod ip in host-local checkpoint")
assert.False(t, checkIP(ip))
}

View File

@ -24,11 +24,14 @@ import (
"os/exec"
"strconv"
"strings"
"testing"
"time"
"github.com/containerd/containerd"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
"k8s.io/kubernetes/pkg/kubelet/apis/cri"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
@ -295,6 +298,15 @@ func KillProcess(name string) error {
return nil
}
// KillPid kills the process by pid. kill is used.
func KillPid(pid int) error {
output, err := exec.Command("kill", strconv.Itoa(pid)).CombinedOutput()
if err != nil {
return errors.Errorf("failed to kill %d - error: %v, output: %q", pid, err, output)
}
return nil
}
// PidOf returns pid of a process by name.
func PidOf(name string) (int, error) {
b, err := exec.Command("pidof", name).CombinedOutput()
@ -308,8 +320,8 @@ func PidOf(name string) (int, error) {
return strconv.Atoi(output)
}
// CRIConfig gets current cri config from containerd.
func CRIConfig() (*criconfig.Config, error) {
// RawRuntimeClient returns a raw grpc runtime service client.
func RawRuntimeClient() (runtime.RuntimeServiceClient, error) {
addr, dialer, err := kubeletutil.GetAddressAndDialer(*criEndpoint)
if err != nil {
return nil, errors.Wrap(err, "failed to get dialer")
@ -320,8 +332,16 @@ func CRIConfig() (*criconfig.Config, error) {
if err != nil {
return nil, errors.Wrap(err, "failed to connect cri endpoint")
}
client := runtime.NewRuntimeServiceClient(conn)
resp, err := client.Status(ctx, &runtime.StatusRequest{Verbose: true})
return runtime.NewRuntimeServiceClient(conn), nil
}
// CRIConfig gets current cri config from containerd.
func CRIConfig() (*criconfig.Config, error) {
client, err := RawRuntimeClient()
if err != nil {
return nil, errors.Wrap(err, "failed to get raw runtime client")
}
resp, err := client.Status(context.Background(), &runtime.StatusRequest{Verbose: true})
if err != nil {
return nil, errors.Wrap(err, "failed to get status")
}
@ -331,3 +351,21 @@ func CRIConfig() (*criconfig.Config, error) {
}
return config, nil
}
func RestartContainerd(t *testing.T) {
require.NoError(t, KillProcess("containerd"))
// Use assert so that the 3rd wait always runs, this makes sure
// containerd is running before this function returns.
assert.NoError(t, Eventually(func() (bool, error) {
pid, err := PidOf("containerd")
if err != nil {
return false, err
}
return pid == 0, nil
}, time.Second, 30*time.Second), "wait for containerd to be killed")
require.NoError(t, Eventually(func() (bool, error) {
return ConnectDaemons() == nil, nil
}, time.Second, 30*time.Second), "wait for containerd to be restarted")
}

View File

@ -99,7 +99,8 @@ func toCRIContainerStatus(container containerstore.Container, spec *runtime.Imag
}
}
type containerInfo struct {
// ContainerInfo is extra information for a container.
type ContainerInfo struct {
// TODO(random-liu): Add sandboxID in CRI container status.
SandboxID string `json:"sandboxID"`
Pid uint32 `json:"pid"`
@ -122,7 +123,7 @@ func toCRIContainerInfo(ctx context.Context, container containerstore.Container,
status := container.Status.Get()
// TODO(random-liu): Change CRI status info to use array instead of map.
ci := &containerInfo{
ci := &ContainerInfo{
SandboxID: container.SandboxID,
Pid: status.Pid,
Removing: status.Removing,

View File

@ -99,8 +99,9 @@ func toCRISandboxStatus(meta sandboxstore.Metadata, status sandboxstore.Status,
}
}
// SandboxInfo is extra information for sandbox.
// TODO (mikebrow): discuss predefining constants structures for some or all of these field names in CRI
type sandboxInfo struct {
type SandboxInfo struct {
Pid uint32 `json:"pid"`
Status string `json:"processStatus"`
NetNSClosed bool `json:"netNamespaceClosed"`
@ -132,7 +133,7 @@ func toCRISandboxInfo(ctx context.Context, sandbox sandboxstore.Sandbox) (map[st
processStatus = taskStatus.Status
}
si := &sandboxInfo{
si := &SandboxInfo{
Pid: sandbox.Status.Get().Pid,
RuntimeHandler: sandbox.RuntimeHandler,
Status: string(processStatus),

View File

@ -17,7 +17,6 @@ limitations under the License.
package server
import (
"os"
"time"
"github.com/containerd/containerd"
@ -60,23 +59,21 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb
}
// Teardown network for sandbox.
if sandbox.NetNSPath != "" && sandbox.NetNS != nil {
if _, err := os.Stat(sandbox.NetNSPath); err != nil {
if !os.IsNotExist(err) {
return nil, errors.Wrapf(err, "failed to stat network namespace path %s", sandbox.NetNSPath)
}
} else {
if teardownErr := c.teardownPod(id, sandbox.NetNSPath, sandbox.Config); teardownErr != nil {
return nil, errors.Wrapf(teardownErr, "failed to destroy network for sandbox %q", id)
}
if sandbox.NetNSPath != "" {
netNSPath := sandbox.NetNSPath
if sandbox.NetNS == nil || sandbox.NetNS.Closed() {
// Use empty netns path if netns is not available. This is defined in:
// https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md
netNSPath = ""
}
/*TODO:It is still possible that containerd crashes after we teardown the network, but before we remove the network namespace.
In that case, we'll not be able to remove the sandbox anymore. The chance is slim, but we should be aware of that.
In the future, once TearDownPod is idempotent, this will be fixed.*/
//Close the sandbox network namespace if it was created
if err = sandbox.NetNS.Remove(); err != nil {
return nil, errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id)
if err := c.teardownPod(id, netNSPath, sandbox.Config); err != nil {
return nil, errors.Wrapf(err, "failed to destroy network for sandbox %q", id)
}
// Close the sandbox network namespace if it was created
if sandbox.NetNS != nil {
if err = sandbox.NetNS.Remove(); err != nil {
return nil, errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id)
}
}
}

View File

@ -27,6 +27,13 @@ import (
osinterface "github.com/containerd/cri/pkg/os"
)
// The NetNS library assumes only containerd manages the lifecycle of the
// network namespace mount. The only case that netns will be unmounted by
// someone else is node reboot.
// If this assumption is broken, NetNS won't be aware of the external
// unmount, and there will be a state mismatch.
// TODO(random-liu): Don't cache state, always load from the system.
// ErrClosedNetNS is the error returned when network namespace is closed.
var ErrClosedNetNS = errors.New("network namespace is closed")

View File

@ -6,7 +6,7 @@ github.com/containerd/console c12b1e7919c14469339a5d38f2f8ed9b64a9de23
github.com/containerd/containerd 15f19d7a67fa322e6de0ef4c6a1bf9da0f056554
github.com/containerd/continuity bd77b46c8352f74eb12c85bdc01f4b90f69d66b4
github.com/containerd/fifo 3d5202aec260678c48179c56f40e6f38a095738c
github.com/containerd/go-cni 6d7b509a054a3cb1c35ed1865d4fde2f0cb547cd
github.com/containerd/go-cni 40bcf8ec8acd7372be1d77031d585d5d8e561c90
github.com/containerd/go-runc 5a6d9f37cfa36b15efba46dc7ea349fa9b7143c3
github.com/containerd/ttrpc 2a805f71863501300ae1976d29f0454ae003e85a
github.com/containerd/typeurl a93fcdb778cd272c6e9b3028b2f42d813e785d40

View File

@ -18,6 +18,7 @@ package cni
import (
"fmt"
"strings"
"sync"
cnilibrary "github.com/containernetworking/cni/libcni"
@ -127,6 +128,15 @@ func (c *libcni) Remove(id string, path string, opts ...NamespaceOpts) error {
}
for _, network := range c.networks {
if err := network.Remove(ns); err != nil {
// Based on CNI spec v0.7.0, empty network namespace is allowed to
// do best effort cleanup. However, it is not handled consistently
// right now:
// https://github.com/containernetworking/plugins/issues/210
// TODO(random-liu): Remove the error handling when the issue is
// fixed and the CNI spec v0.6.0 support is deprecated.
if path == "" && strings.Contains(err.Error(), "no such file or directory") {
continue
}
return err
}
}