Manage mount lifecycle and remove cached state

Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
Lantao Liu 2018-10-10 17:20:40 -07:00
parent 36893c3ec9
commit c1740d8291
10 changed files with 267 additions and 167 deletions

220
pkg/netns/netns.go Normal file
View File

@ -0,0 +1,220 @@
/*
Copyright 2018 The Containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Copyright 2018 CNI authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package netns
import (
"crypto/rand"
"fmt"
"os"
"path"
"runtime"
"sync"
cnins "github.com/containernetworking/plugins/pkg/ns"
"github.com/docker/docker/pkg/symlink"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
osinterface "github.com/containerd/cri/pkg/os"
)
const nsRunDir = "/var/run/netns"
// Some of the following functions are migrated from
// https://github.com/containernetworking/plugins/blob/master/pkg/testutils/netns_linux.go
// newNS creates a new persistent (bind-mounted) network namespace and returns the
// path to the network namespace.
func newNS() (nsPath string, err error) {
b := make([]byte, 16)
if _, err := rand.Reader.Read(b); err != nil {
return "", errors.Wrap(err, "failed to generate random netns name")
}
// Create the directory for mounting network namespaces
// This needs to be a shared mountpoint in case it is mounted in to
// other namespaces (containers)
if err := os.MkdirAll(nsRunDir, 0755); err != nil {
return "", err
}
// create an empty file at the mount point
nsName := fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
nsPath = path.Join(nsRunDir, nsName)
mountPointFd, err := os.Create(nsPath)
if err != nil {
return "", err
}
mountPointFd.Close()
defer func() {
// Ensure the mount point is cleaned up on errors
if err != nil {
os.RemoveAll(nsPath) // nolint: errcheck
}
}()
var wg sync.WaitGroup
wg.Add(1)
// do namespace work in a dedicated goroutine, so that we can safely
// Lock/Unlock OSThread without upsetting the lock/unlock state of
// the caller of this function
go (func() {
defer wg.Done()
runtime.LockOSThread()
// Don't unlock. By not unlocking, golang will kill the OS thread when the
// goroutine is done (for go1.10+)
var origNS cnins.NetNS
origNS, err = cnins.GetNS(getCurrentThreadNetNSPath())
if err != nil {
return
}
defer origNS.Close()
// create a new netns on the current thread
err = unix.Unshare(unix.CLONE_NEWNET)
if err != nil {
return
}
// Put this thread back to the orig ns, since it might get reused (pre go1.10)
defer origNS.Set() // nolint: errcheck
// bind mount the netns from the current thread (from /proc) onto the
// mount point. This causes the namespace to persist, even when there
// are no threads in the ns.
err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "")
if err != nil {
err = errors.Wrapf(err, "failed to bind mount ns at %s", nsPath)
}
})()
wg.Wait()
if err != nil {
return "", errors.Wrap(err, "failed to create namespace")
}
return nsPath, nil
}
// unmountNS unmounts the NS held by the netns object. unmountNS is idempotent.
func unmountNS(path string) error {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
return nil
}
return errors.Wrap(err, "failed to stat netns")
}
path, err := symlink.FollowSymlinkInScope(path, "/")
if err != nil {
return errors.Wrap(err, "failed to follow symlink")
}
if err := osinterface.Unmount(path); err != nil && !os.IsNotExist(err) {
return errors.Wrap(err, "failed to umount netns")
}
if err := os.RemoveAll(path); err != nil {
return errors.Wrap(err, "failed to remove netns")
}
return nil
}
// getCurrentThreadNetNSPath copied from pkg/ns
func getCurrentThreadNetNSPath() string {
// /proc/self/ns/net returns the namespace of the main thread, not
// of whatever thread this goroutine is running on. Make sure we
// use the thread's net namespace since the thread is switching around
return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
}
// NetNS holds network namespace.
type NetNS struct {
path string
}
// NewNetNS creates a network namespace.
func NewNetNS() (*NetNS, error) {
path, err := newNS()
if err != nil {
return nil, errors.Wrap(err, "failed to setup netns")
}
return &NetNS{path: path}, nil
}
// LoadNetNS loads existing network namespace.
func LoadNetNS(path string) *NetNS {
return &NetNS{path: path}
}
// Remove removes network namepace. Remove is idempotent, meaning it might
// be invoked multiple times and provides consistent result.
func (n *NetNS) Remove() error {
return unmountNS(n.path)
}
// Closed checks whether the network namespace has been closed.
func (n *NetNS) Closed() (bool, error) {
ns, err := cnins.GetNS(n.path)
if err != nil {
if _, ok := err.(cnins.NSPathNotExistErr); ok {
// The network namespace has already been removed.
return true, nil
}
if _, ok := err.(cnins.NSPathNotNSErr); ok {
// The network namespace is not mounted, remove it.
if err := os.RemoveAll(n.path); err != nil {
return false, errors.Wrap(err, "remove netns")
}
return true, nil
}
return false, errors.Wrap(err, "get netns fd")
}
if err := ns.Close(); err != nil {
return false, errors.Wrap(err, "close netns fd")
}
return false, nil
}
// GetPath returns network namespace path for sandbox container
func (n *NetNS) GetPath() string {
return n.path
}
// Do runs a function in the network namespace.
func (n *NetNS) Do(f func(cnins.NetNS) error) error {
ns, err := cnins.GetNS(n.path)
if err != nil {
return errors.Wrap(err, "get netns fd")
}
defer ns.Close() // nolint: errcheck
return ns.Do(f)
}

View File

@ -353,7 +353,7 @@ func checkSelinuxLevel(level string) (bool, error) {
matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}((.c\d{1,4})?,c\d{1,4})*(.c\d{1,4})?(,c\d{1,4}(.c\d{1,4})?)*)?$`, level)
if err != nil || !matched {
return false, fmt.Errorf("the format of 'level' %q is not correct: %v", level, err)
return false, errors.Wrapf(err, "the format of 'level' %q is not correct", level)
}
return true, nil
}

View File

@ -34,6 +34,7 @@ import (
"golang.org/x/net/context"
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
"github.com/containerd/cri/pkg/netns"
cio "github.com/containerd/cri/pkg/server/io"
containerstore "github.com/containerd/cri/pkg/store/container"
sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
@ -394,14 +395,7 @@ func loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.S
// Don't need to load netns for host network sandbox.
return sandbox, nil
}
netNS, err := sandboxstore.LoadNetNS(meta.NetNSPath)
if err != nil {
if err != sandboxstore.ErrClosedNetNS {
return sandbox, errors.Wrapf(err, "failed to load netns %q", meta.NetNSPath)
}
netNS = nil
}
sandbox.NetNS = netNS
sandbox.NetNS = netns.LoadNetNS(meta.NetNSPath)
// It doesn't matter whether task is running or not. If it is running, sandbox
// status will be `READY`; if it is not running, sandbox status will be `NOT_READY`,

View File

@ -59,10 +59,12 @@ func (c *criService) portForward(id string, port int32, stream io.ReadWriteClose
securityContext := s.Config.GetLinux().GetSecurityContext()
hostNet := securityContext.GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE
if !hostNet {
if s.NetNS == nil || s.NetNS.Closed() {
if closed, err := s.NetNS.Closed(); err != nil {
return errors.Wrapf(err, "failed to check netwok namespace closed for sandbox %q", id)
} else if closed {
return errors.Errorf("network namespace for sandbox %q is closed", id)
}
netNSDo = s.NetNS.GetNs().Do
netNSDo = s.NetNS.Do
netNSPath = s.NetNS.GetPath()
} else {
// Run the function directly for host network.

View File

@ -52,8 +52,13 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS
}
// Return error if sandbox network namespace is not closed yet.
if sandbox.NetNS != nil && !sandbox.NetNS.Closed() {
return nil, errors.Errorf("sandbox network namespace %q is not fully closed", sandbox.NetNS.GetPath())
if sandbox.NetNS != nil {
nsPath := sandbox.NetNS.GetPath()
if closed, err := sandbox.NetNS.Closed(); err != nil {
return nil, errors.Wrapf(err, "failed to check sandbox network namespace %q closed", nsPath)
} else if !closed {
return nil, errors.Errorf("sandbox network namespace %q is not fully closed", nsPath)
}
}
// Remove all containers inside the sandbox.

View File

@ -41,6 +41,7 @@ import (
customopts "github.com/containerd/cri/pkg/containerd/opts"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
"github.com/containerd/cri/pkg/log"
"github.com/containerd/cri/pkg/netns"
sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
"github.com/containerd/cri/pkg/util"
)
@ -104,7 +105,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
// namespaces. If the pod is in host network namespace then both are empty and should not
// be used.
sandbox.NetNS, err = sandboxstore.NewNetNS()
sandbox.NetNS, err = netns.NewNetNS()
if err != nil {
return nil, errors.Wrapf(err, "failed to create network namespace for sandbox %q", id)
}

View File

@ -36,7 +36,10 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox
return nil, errors.Wrap(err, "an error occurred when try to find sandbox")
}
ip := c.getIP(sandbox)
ip, err := c.getIP(sandbox)
if err != nil {
return nil, errors.Wrap(err, "failed to get sandbox ip")
}
status := toCRISandboxStatus(sandbox.Metadata, sandbox.Status.Get(), ip)
if !r.GetVerbose() {
return &runtime.PodSandboxStatusResponse{Status: status}, nil
@ -54,21 +57,22 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox
}, nil
}
func (c *criService) getIP(sandbox sandboxstore.Sandbox) string {
func (c *criService) getIP(sandbox sandboxstore.Sandbox) (string, error) {
config := sandbox.Config
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
// For sandboxes using the node network we are not
// responsible for reporting the IP.
return ""
return "", nil
}
// The network namespace has been closed.
if sandbox.NetNS == nil || sandbox.NetNS.Closed() {
return ""
if closed, err := sandbox.NetNS.Closed(); err != nil {
return "", errors.Wrap(err, "check network namespace closed")
} else if closed {
return "", nil
}
return sandbox.IP
return sandbox.IP, nil
}
// toCRISandboxStatus converts sandbox metadata into CRI pod sandbox status.
@ -146,9 +150,13 @@ func toCRISandboxInfo(ctx context.Context, sandbox sandboxstore.Sandbox) (map[st
si.Status = "deleted"
}
if sandbox.NetNSPath != "" {
if sandbox.NetNS != nil {
// Add network closed information if sandbox is not using host network.
si.NetNSClosed = (sandbox.NetNS == nil || sandbox.NetNS.Closed())
closed, err := sandbox.NetNS.Closed()
if err != nil {
return nil, errors.Wrap(err, "failed to check network namespace closed")
}
si.NetNSClosed = closed
}
spec, err := container.Spec(ctx)

View File

@ -59,23 +59,22 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb
}
// Teardown network for sandbox.
if sandbox.NetNSPath != "" {
if sandbox.NetNS != nil {
netNSPath := sandbox.NetNSPath
if sandbox.NetNS == nil || sandbox.NetNS.Closed() {
// Use empty netns path if netns is not available. This is defined in:
// https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md
if closed, err := sandbox.NetNS.Closed(); err != nil {
return nil, errors.Wrap(err, "failed to check network namespace closed")
} else if closed {
netNSPath = ""
}
if err := c.teardownPod(id, netNSPath, sandbox.Config); err != nil {
return nil, errors.Wrapf(err, "failed to destroy network for sandbox %q", id)
}
// Close the sandbox network namespace if it was created
if sandbox.NetNS != nil {
if err = sandbox.NetNS.Remove(); err != nil {
return nil, errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id)
}
}
}
logrus.Infof("TearDown network for sandbox %q successfully", id)

View File

@ -1,132 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sandbox
import (
"os"
"sync"
cnins "github.com/containernetworking/plugins/pkg/ns"
"github.com/docker/docker/pkg/symlink"
"github.com/pkg/errors"
osinterface "github.com/containerd/cri/pkg/os"
)
// The NetNS library assumes only containerd manages the lifecycle of the
// network namespace mount. The only case that netns will be unmounted by
// someone else is node reboot.
// If this assumption is broken, NetNS won't be aware of the external
// unmount, and there will be a state mismatch.
// TODO(random-liu): Don't cache state, always load from the system.
// ErrClosedNetNS is the error returned when network namespace is closed.
var ErrClosedNetNS = errors.New("network namespace is closed")
// NetNS holds network namespace for sandbox
type NetNS struct {
sync.Mutex
ns cnins.NetNS
closed bool
restored bool
}
// NewNetNS creates a network namespace for the sandbox
func NewNetNS() (*NetNS, error) {
netns, err := cnins.NewNS()
if err != nil {
return nil, errors.Wrap(err, "failed to setup network namespace")
}
n := new(NetNS)
n.ns = netns
return n, nil
}
// LoadNetNS loads existing network namespace. It returns ErrClosedNetNS
// if the network namespace has already been closed.
func LoadNetNS(path string) (*NetNS, error) {
ns, err := cnins.GetNS(path)
if err != nil {
if _, ok := err.(cnins.NSPathNotExistErr); ok {
return nil, ErrClosedNetNS
}
if _, ok := err.(cnins.NSPathNotNSErr); ok {
// Do best effort cleanup.
os.RemoveAll(path) // nolint: errcheck
return nil, ErrClosedNetNS
}
return nil, errors.Wrap(err, "failed to load network namespace")
}
return &NetNS{ns: ns, restored: true}, nil
}
// Remove removes network namepace if it exists and not closed. Remove is idempotent,
// meaning it might be invoked multiple times and provides consistent result.
func (n *NetNS) Remove() error {
n.Lock()
defer n.Unlock()
if !n.closed {
err := n.ns.Close()
if err != nil {
return errors.Wrap(err, "failed to close network namespace")
}
n.closed = true
}
if n.restored {
path := n.ns.Path()
// Check netns existence.
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
return nil
}
return errors.Wrap(err, "failed to stat netns")
}
path, err := symlink.FollowSymlinkInScope(path, "/")
if err != nil {
return errors.Wrap(err, "failed to follow symlink")
}
if err := osinterface.Unmount(path); err != nil && !os.IsNotExist(err) {
return errors.Wrap(err, "failed to umount netns")
}
if err := os.RemoveAll(path); err != nil {
return errors.Wrap(err, "failed to remove netns")
}
n.restored = false
}
return nil
}
// Closed checks whether the network namespace has been closed.
func (n *NetNS) Closed() bool {
n.Lock()
defer n.Unlock()
return n.closed && !n.restored
}
// GetPath returns network namespace path for sandbox container
func (n *NetNS) GetPath() string {
n.Lock()
defer n.Unlock()
return n.ns.Path()
}
// GetNs returns the network namespace handle
func (n *NetNS) GetNs() cnins.NetNS {
n.Lock()
defer n.Unlock()
return n.ns
}

View File

@ -22,6 +22,7 @@ import (
"github.com/containerd/containerd"
"github.com/docker/docker/pkg/truncindex"
"github.com/containerd/cri/pkg/netns"
"github.com/containerd/cri/pkg/store"
)
@ -32,10 +33,12 @@ type Sandbox struct {
Metadata
// Status stores the status of the sandbox.
Status StatusStorage
// Container is the containerd sandbox container client
// Container is the containerd sandbox container client.
Container containerd.Container
// CNI network namespace client
NetNS *NetNS
// CNI network namespace client.
// For hostnetwork pod, this is always nil;
// For non hostnetwork pod, this should never be nil.
NetNS *netns.NetNS
// StopCh is used to propagate the stop information of the sandbox.
*store.StopCh
}