diff --git a/pkg/netns/netns.go b/pkg/netns/netns.go new file mode 100644 index 000000000..56da0b6f2 --- /dev/null +++ b/pkg/netns/netns.go @@ -0,0 +1,220 @@ +/* +Copyright 2018 The Containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Copyright 2018 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package netns + +import ( + "crypto/rand" + "fmt" + "os" + "path" + "runtime" + "sync" + + cnins "github.com/containernetworking/plugins/pkg/ns" + "github.com/docker/docker/pkg/symlink" + "github.com/pkg/errors" + "golang.org/x/sys/unix" + + osinterface "github.com/containerd/cri/pkg/os" +) + +const nsRunDir = "/var/run/netns" + +// Some of the following functions are migrated from +// https://github.com/containernetworking/plugins/blob/master/pkg/testutils/netns_linux.go + +// newNS creates a new persistent (bind-mounted) network namespace and returns the +// path to the network namespace. +func newNS() (nsPath string, err error) { + b := make([]byte, 16) + if _, err := rand.Reader.Read(b); err != nil { + return "", errors.Wrap(err, "failed to generate random netns name") + } + + // Create the directory for mounting network namespaces + // This needs to be a shared mountpoint in case it is mounted in to + // other namespaces (containers) + if err := os.MkdirAll(nsRunDir, 0755); err != nil { + return "", err + } + + // create an empty file at the mount point + nsName := fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) + nsPath = path.Join(nsRunDir, nsName) + mountPointFd, err := os.Create(nsPath) + if err != nil { + return "", err + } + mountPointFd.Close() + + defer func() { + // Ensure the mount point is cleaned up on errors + if err != nil { + os.RemoveAll(nsPath) // nolint: errcheck + } + }() + + var wg sync.WaitGroup + wg.Add(1) + + // do namespace work in a dedicated goroutine, so that we can safely + // Lock/Unlock OSThread without upsetting the lock/unlock state of + // the caller of this function + go (func() { + defer wg.Done() + runtime.LockOSThread() + // Don't unlock. By not unlocking, golang will kill the OS thread when the + // goroutine is done (for go1.10+) + + var origNS cnins.NetNS + origNS, err = cnins.GetNS(getCurrentThreadNetNSPath()) + if err != nil { + return + } + defer origNS.Close() + + // create a new netns on the current thread + err = unix.Unshare(unix.CLONE_NEWNET) + if err != nil { + return + } + + // Put this thread back to the orig ns, since it might get reused (pre go1.10) + defer origNS.Set() // nolint: errcheck + + // bind mount the netns from the current thread (from /proc) onto the + // mount point. This causes the namespace to persist, even when there + // are no threads in the ns. + err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "") + if err != nil { + err = errors.Wrapf(err, "failed to bind mount ns at %s", nsPath) + } + })() + wg.Wait() + + if err != nil { + return "", errors.Wrap(err, "failed to create namespace") + } + + return nsPath, nil +} + +// unmountNS unmounts the NS held by the netns object. unmountNS is idempotent. +func unmountNS(path string) error { + if _, err := os.Stat(path); err != nil { + if os.IsNotExist(err) { + return nil + } + return errors.Wrap(err, "failed to stat netns") + } + path, err := symlink.FollowSymlinkInScope(path, "/") + if err != nil { + return errors.Wrap(err, "failed to follow symlink") + } + if err := osinterface.Unmount(path); err != nil && !os.IsNotExist(err) { + return errors.Wrap(err, "failed to umount netns") + } + if err := os.RemoveAll(path); err != nil { + return errors.Wrap(err, "failed to remove netns") + } + return nil +} + +// getCurrentThreadNetNSPath copied from pkg/ns +func getCurrentThreadNetNSPath() string { + // /proc/self/ns/net returns the namespace of the main thread, not + // of whatever thread this goroutine is running on. Make sure we + // use the thread's net namespace since the thread is switching around + return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid()) +} + +// NetNS holds network namespace. +type NetNS struct { + path string +} + +// NewNetNS creates a network namespace. +func NewNetNS() (*NetNS, error) { + path, err := newNS() + if err != nil { + return nil, errors.Wrap(err, "failed to setup netns") + } + return &NetNS{path: path}, nil +} + +// LoadNetNS loads existing network namespace. +func LoadNetNS(path string) *NetNS { + return &NetNS{path: path} +} + +// Remove removes network namepace. Remove is idempotent, meaning it might +// be invoked multiple times and provides consistent result. +func (n *NetNS) Remove() error { + return unmountNS(n.path) +} + +// Closed checks whether the network namespace has been closed. +func (n *NetNS) Closed() (bool, error) { + ns, err := cnins.GetNS(n.path) + if err != nil { + if _, ok := err.(cnins.NSPathNotExistErr); ok { + // The network namespace has already been removed. + return true, nil + } + if _, ok := err.(cnins.NSPathNotNSErr); ok { + // The network namespace is not mounted, remove it. + if err := os.RemoveAll(n.path); err != nil { + return false, errors.Wrap(err, "remove netns") + } + return true, nil + } + return false, errors.Wrap(err, "get netns fd") + } + if err := ns.Close(); err != nil { + return false, errors.Wrap(err, "close netns fd") + } + return false, nil +} + +// GetPath returns network namespace path for sandbox container +func (n *NetNS) GetPath() string { + return n.path +} + +// Do runs a function in the network namespace. +func (n *NetNS) Do(f func(cnins.NetNS) error) error { + ns, err := cnins.GetNS(n.path) + if err != nil { + return errors.Wrap(err, "get netns fd") + } + defer ns.Close() // nolint: errcheck + return ns.Do(f) +} diff --git a/pkg/server/helpers.go b/pkg/server/helpers.go index 9b2744c94..0605f4084 100644 --- a/pkg/server/helpers.go +++ b/pkg/server/helpers.go @@ -353,7 +353,7 @@ func checkSelinuxLevel(level string) (bool, error) { matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}((.c\d{1,4})?,c\d{1,4})*(.c\d{1,4})?(,c\d{1,4}(.c\d{1,4})?)*)?$`, level) if err != nil || !matched { - return false, fmt.Errorf("the format of 'level' %q is not correct: %v", level, err) + return false, errors.Wrapf(err, "the format of 'level' %q is not correct", level) } return true, nil } diff --git a/pkg/server/restart.go b/pkg/server/restart.go index 18d8d60c6..c842a5cac 100644 --- a/pkg/server/restart.go +++ b/pkg/server/restart.go @@ -34,6 +34,7 @@ import ( "golang.org/x/net/context" runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" + "github.com/containerd/cri/pkg/netns" cio "github.com/containerd/cri/pkg/server/io" containerstore "github.com/containerd/cri/pkg/store/container" sandboxstore "github.com/containerd/cri/pkg/store/sandbox" @@ -394,14 +395,7 @@ func loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.S // Don't need to load netns for host network sandbox. return sandbox, nil } - netNS, err := sandboxstore.LoadNetNS(meta.NetNSPath) - if err != nil { - if err != sandboxstore.ErrClosedNetNS { - return sandbox, errors.Wrapf(err, "failed to load netns %q", meta.NetNSPath) - } - netNS = nil - } - sandbox.NetNS = netNS + sandbox.NetNS = netns.LoadNetNS(meta.NetNSPath) // It doesn't matter whether task is running or not. If it is running, sandbox // status will be `READY`; if it is not running, sandbox status will be `NOT_READY`, diff --git a/pkg/server/sandbox_portforward.go b/pkg/server/sandbox_portforward.go index 7106cb673..834de4cbc 100644 --- a/pkg/server/sandbox_portforward.go +++ b/pkg/server/sandbox_portforward.go @@ -59,10 +59,12 @@ func (c *criService) portForward(id string, port int32, stream io.ReadWriteClose securityContext := s.Config.GetLinux().GetSecurityContext() hostNet := securityContext.GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE if !hostNet { - if s.NetNS == nil || s.NetNS.Closed() { + if closed, err := s.NetNS.Closed(); err != nil { + return errors.Wrapf(err, "failed to check netwok namespace closed for sandbox %q", id) + } else if closed { return errors.Errorf("network namespace for sandbox %q is closed", id) } - netNSDo = s.NetNS.GetNs().Do + netNSDo = s.NetNS.Do netNSPath = s.NetNS.GetPath() } else { // Run the function directly for host network. diff --git a/pkg/server/sandbox_remove.go b/pkg/server/sandbox_remove.go index 3d3849a8d..29ebd44ed 100644 --- a/pkg/server/sandbox_remove.go +++ b/pkg/server/sandbox_remove.go @@ -52,8 +52,13 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS } // Return error if sandbox network namespace is not closed yet. - if sandbox.NetNS != nil && !sandbox.NetNS.Closed() { - return nil, errors.Errorf("sandbox network namespace %q is not fully closed", sandbox.NetNS.GetPath()) + if sandbox.NetNS != nil { + nsPath := sandbox.NetNS.GetPath() + if closed, err := sandbox.NetNS.Closed(); err != nil { + return nil, errors.Wrapf(err, "failed to check sandbox network namespace %q closed", nsPath) + } else if !closed { + return nil, errors.Errorf("sandbox network namespace %q is not fully closed", nsPath) + } } // Remove all containers inside the sandbox. diff --git a/pkg/server/sandbox_run.go b/pkg/server/sandbox_run.go index 8b514caf1..468a389c5 100644 --- a/pkg/server/sandbox_run.go +++ b/pkg/server/sandbox_run.go @@ -41,6 +41,7 @@ import ( customopts "github.com/containerd/cri/pkg/containerd/opts" ctrdutil "github.com/containerd/cri/pkg/containerd/util" "github.com/containerd/cri/pkg/log" + "github.com/containerd/cri/pkg/netns" sandboxstore "github.com/containerd/cri/pkg/store/sandbox" "github.com/containerd/cri/pkg/util" ) @@ -104,7 +105,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network // namespaces. If the pod is in host network namespace then both are empty and should not // be used. - sandbox.NetNS, err = sandboxstore.NewNetNS() + sandbox.NetNS, err = netns.NewNetNS() if err != nil { return nil, errors.Wrapf(err, "failed to create network namespace for sandbox %q", id) } diff --git a/pkg/server/sandbox_status.go b/pkg/server/sandbox_status.go index 7965a1a3f..98a07f838 100644 --- a/pkg/server/sandbox_status.go +++ b/pkg/server/sandbox_status.go @@ -36,7 +36,10 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox return nil, errors.Wrap(err, "an error occurred when try to find sandbox") } - ip := c.getIP(sandbox) + ip, err := c.getIP(sandbox) + if err != nil { + return nil, errors.Wrap(err, "failed to get sandbox ip") + } status := toCRISandboxStatus(sandbox.Metadata, sandbox.Status.Get(), ip) if !r.GetVerbose() { return &runtime.PodSandboxStatusResponse{Status: status}, nil @@ -54,21 +57,22 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox }, nil } -func (c *criService) getIP(sandbox sandboxstore.Sandbox) string { +func (c *criService) getIP(sandbox sandboxstore.Sandbox) (string, error) { config := sandbox.Config if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { // For sandboxes using the node network we are not // responsible for reporting the IP. - return "" + return "", nil } - // The network namespace has been closed. - if sandbox.NetNS == nil || sandbox.NetNS.Closed() { - return "" + if closed, err := sandbox.NetNS.Closed(); err != nil { + return "", errors.Wrap(err, "check network namespace closed") + } else if closed { + return "", nil } - return sandbox.IP + return sandbox.IP, nil } // toCRISandboxStatus converts sandbox metadata into CRI pod sandbox status. @@ -146,9 +150,13 @@ func toCRISandboxInfo(ctx context.Context, sandbox sandboxstore.Sandbox) (map[st si.Status = "deleted" } - if sandbox.NetNSPath != "" { + if sandbox.NetNS != nil { // Add network closed information if sandbox is not using host network. - si.NetNSClosed = (sandbox.NetNS == nil || sandbox.NetNS.Closed()) + closed, err := sandbox.NetNS.Closed() + if err != nil { + return nil, errors.Wrap(err, "failed to check network namespace closed") + } + si.NetNSClosed = closed } spec, err := container.Spec(ctx) diff --git a/pkg/server/sandbox_stop.go b/pkg/server/sandbox_stop.go index e0f203309..11c231ce5 100644 --- a/pkg/server/sandbox_stop.go +++ b/pkg/server/sandbox_stop.go @@ -59,21 +59,20 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb } // Teardown network for sandbox. - if sandbox.NetNSPath != "" { + if sandbox.NetNS != nil { netNSPath := sandbox.NetNSPath - if sandbox.NetNS == nil || sandbox.NetNS.Closed() { - // Use empty netns path if netns is not available. This is defined in: - // https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md + // Use empty netns path if netns is not available. This is defined in: + // https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md + if closed, err := sandbox.NetNS.Closed(); err != nil { + return nil, errors.Wrap(err, "failed to check network namespace closed") + } else if closed { netNSPath = "" } if err := c.teardownPod(id, netNSPath, sandbox.Config); err != nil { return nil, errors.Wrapf(err, "failed to destroy network for sandbox %q", id) } - // Close the sandbox network namespace if it was created - if sandbox.NetNS != nil { - if err = sandbox.NetNS.Remove(); err != nil { - return nil, errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id) - } + if err = sandbox.NetNS.Remove(); err != nil { + return nil, errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id) } } diff --git a/pkg/store/sandbox/netns.go b/pkg/store/sandbox/netns.go deleted file mode 100644 index 8a08194cb..000000000 --- a/pkg/store/sandbox/netns.go +++ /dev/null @@ -1,132 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package sandbox - -import ( - "os" - "sync" - - cnins "github.com/containernetworking/plugins/pkg/ns" - "github.com/docker/docker/pkg/symlink" - "github.com/pkg/errors" - - osinterface "github.com/containerd/cri/pkg/os" -) - -// The NetNS library assumes only containerd manages the lifecycle of the -// network namespace mount. The only case that netns will be unmounted by -// someone else is node reboot. -// If this assumption is broken, NetNS won't be aware of the external -// unmount, and there will be a state mismatch. -// TODO(random-liu): Don't cache state, always load from the system. - -// ErrClosedNetNS is the error returned when network namespace is closed. -var ErrClosedNetNS = errors.New("network namespace is closed") - -// NetNS holds network namespace for sandbox -type NetNS struct { - sync.Mutex - ns cnins.NetNS - closed bool - restored bool -} - -// NewNetNS creates a network namespace for the sandbox -func NewNetNS() (*NetNS, error) { - netns, err := cnins.NewNS() - if err != nil { - return nil, errors.Wrap(err, "failed to setup network namespace") - } - n := new(NetNS) - n.ns = netns - return n, nil -} - -// LoadNetNS loads existing network namespace. It returns ErrClosedNetNS -// if the network namespace has already been closed. -func LoadNetNS(path string) (*NetNS, error) { - ns, err := cnins.GetNS(path) - if err != nil { - if _, ok := err.(cnins.NSPathNotExistErr); ok { - return nil, ErrClosedNetNS - } - if _, ok := err.(cnins.NSPathNotNSErr); ok { - // Do best effort cleanup. - os.RemoveAll(path) // nolint: errcheck - return nil, ErrClosedNetNS - } - return nil, errors.Wrap(err, "failed to load network namespace") - } - return &NetNS{ns: ns, restored: true}, nil -} - -// Remove removes network namepace if it exists and not closed. Remove is idempotent, -// meaning it might be invoked multiple times and provides consistent result. -func (n *NetNS) Remove() error { - n.Lock() - defer n.Unlock() - if !n.closed { - err := n.ns.Close() - if err != nil { - return errors.Wrap(err, "failed to close network namespace") - } - n.closed = true - } - if n.restored { - path := n.ns.Path() - // Check netns existence. - if _, err := os.Stat(path); err != nil { - if os.IsNotExist(err) { - return nil - } - return errors.Wrap(err, "failed to stat netns") - } - path, err := symlink.FollowSymlinkInScope(path, "/") - if err != nil { - return errors.Wrap(err, "failed to follow symlink") - } - if err := osinterface.Unmount(path); err != nil && !os.IsNotExist(err) { - return errors.Wrap(err, "failed to umount netns") - } - if err := os.RemoveAll(path); err != nil { - return errors.Wrap(err, "failed to remove netns") - } - n.restored = false - } - return nil -} - -// Closed checks whether the network namespace has been closed. -func (n *NetNS) Closed() bool { - n.Lock() - defer n.Unlock() - return n.closed && !n.restored -} - -// GetPath returns network namespace path for sandbox container -func (n *NetNS) GetPath() string { - n.Lock() - defer n.Unlock() - return n.ns.Path() -} - -// GetNs returns the network namespace handle -func (n *NetNS) GetNs() cnins.NetNS { - n.Lock() - defer n.Unlock() - return n.ns -} diff --git a/pkg/store/sandbox/sandbox.go b/pkg/store/sandbox/sandbox.go index 4e4b6b8e2..764b2760b 100644 --- a/pkg/store/sandbox/sandbox.go +++ b/pkg/store/sandbox/sandbox.go @@ -22,6 +22,7 @@ import ( "github.com/containerd/containerd" "github.com/docker/docker/pkg/truncindex" + "github.com/containerd/cri/pkg/netns" "github.com/containerd/cri/pkg/store" ) @@ -32,10 +33,12 @@ type Sandbox struct { Metadata // Status stores the status of the sandbox. Status StatusStorage - // Container is the containerd sandbox container client + // Container is the containerd sandbox container client. Container containerd.Container - // CNI network namespace client - NetNS *NetNS + // CNI network namespace client. + // For hostnetwork pod, this is always nil; + // For non hostnetwork pod, this should never be nil. + NetNS *netns.NetNS // StopCh is used to propagate the stop information of the sandbox. *store.StopCh }