Merge pull request #138 from abhinandanpb/p_netns

Creating sandbox namespace
This commit is contained in:
Lantao Liu 2017-08-24 11:26:21 -07:00 committed by GitHub
commit 36da027c20
17 changed files with 772 additions and 57 deletions

View File

@ -43,7 +43,7 @@ func TestToCRISandbox(t *testing.T) {
Name: "test-name",
Config: config,
CreatedAt: createdAt,
NetNS: "test-netns",
NetNSPath: "test-netns",
}
state := runtime.PodSandboxState_SANDBOX_READY
expect := &runtime.PodSandbox{

View File

@ -78,8 +78,6 @@ func (c *criContainerdService) RemovePodSandbox(ctx context.Context, r *runtime.
}
}
// TODO(random-liu): [P1] Remove permanent namespace once used.
// Cleanup the sandbox root directory.
sandboxRootDir := getSandboxRootDir(c.rootDir, id)
if err := c.os.RemoveAll(sandboxRootDir); err != nil {

View File

@ -77,9 +77,49 @@ func (c *criContainerdService) RunPodSandbox(ctx context.Context, r *runtime.Run
if err != nil {
return nil, fmt.Errorf("failed to get sandbox image %q: %v", c.sandboxImage, err)
}
//Create Network Namespace if it is not in host network
hostNet := config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostNetwork()
if !hostNet {
// If it is not in host network namespace then create a namespace and set the sandbox
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
// namespaces. If the pod is in host network namespace then both are empty and should not
// be used.
sandbox.NetNS, err = sandboxstore.NewNetNS()
if err != nil {
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %v", id, err)
}
sandbox.NetNSPath = sandbox.NetNS.GetPath()
defer func() {
if retErr != nil {
if err := sandbox.NetNS.Remove(); err != nil {
glog.Errorf("Failed to remove network namespace %s for sandbox %q: %v", sandbox.NetNSPath, id, err)
}
sandbox.NetNSPath = ""
}
}()
// Setup network for sandbox.
podNetwork := ocicni.PodNetwork{
Name: config.GetMetadata().GetName(),
Namespace: config.GetMetadata().GetNamespace(),
ID: id,
NetNS: sandbox.NetNSPath,
PortMappings: toCNIPortMappings(config.GetPortMappings()),
}
if err = c.netPlugin.SetUpPod(podNetwork); err != nil {
return nil, fmt.Errorf("failed to setup network for sandbox %q: %v", id, err)
}
defer func() {
if retErr != nil {
// Teardown network if an error is returned.
if err := c.netPlugin.TearDownPod(podNetwork); err != nil {
glog.Errorf("Failed to destroy network for sandbox %q: %v", id, err)
}
}
}()
}
// Create sandbox container.
spec, err := c.generateSandboxContainerSpec(id, config, image.Config)
spec, err := c.generateSandboxContainerSpec(id, config, image.Config, sandbox.NetNSPath)
if err != nil {
return nil, fmt.Errorf("failed to generate sandbox container spec: %v", err)
}
@ -165,30 +205,6 @@ func (c *criContainerdService) RunPodSandbox(ctx context.Context, r *runtime.Run
}()
sandbox.Pid = task.Pid()
sandbox.NetNS = getNetworkNamespace(task.Pid())
if !config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostNetwork() {
// Setup network for sandbox.
// TODO(random-liu): [P2] Replace with permanent network namespace.
podNetwork := ocicni.PodNetwork{
Name: config.GetMetadata().GetName(),
Namespace: config.GetMetadata().GetNamespace(),
ID: id,
NetNS: sandbox.NetNS,
PortMappings: toCNIPortMappings(config.GetPortMappings()),
}
if err = c.netPlugin.SetUpPod(podNetwork); err != nil {
return nil, fmt.Errorf("failed to setup network for sandbox %q: %v", id, err)
}
defer func() {
if retErr != nil {
// Teardown network if an error is returned.
if err := c.netPlugin.TearDownPod(podNetwork); err != nil {
glog.Errorf("failed to destroy network for sandbox %q: %v", id, err)
}
}
}()
}
if err = task.Start(ctx); err != nil {
return nil, fmt.Errorf("failed to start sandbox container task %q: %v",
id, err)
@ -205,7 +221,7 @@ func (c *criContainerdService) RunPodSandbox(ctx context.Context, r *runtime.Run
}
func (c *criContainerdService) generateSandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
imageConfig *imagespec.ImageConfig) (*runtimespec.Spec, error) {
imageConfig *imagespec.ImageConfig, nsPath string) (*runtimespec.Spec, error) {
// Creates a spec Generator with the default spec.
// TODO(random-liu): [P1] Compare the default settings with docker and containerd default.
spec, err := containerd.GenerateSpec()
@ -252,15 +268,12 @@ func (c *criContainerdService) generateSandboxContainerSpec(id string, config *r
// Set namespace options.
nsOptions := config.GetLinux().GetSecurityContext().GetNamespaceOptions()
// TODO(random-liu): [P1] Create permanent network namespace, so that we could still cleanup
// network namespace after sandbox container dies unexpectedly.
// By default, all namespaces are enabled for the container, runc will create a new namespace
// for it. By removing the namespace, the container will inherit the namespace of the runtime.
if nsOptions.GetHostNetwork() {
g.RemoveLinuxNamespace(string(runtimespec.NetworkNamespace)) // nolint: errcheck
// TODO(random-liu): [P1] Figure out how to handle UTS namespace.
} else {
//TODO(Abhi): May be move this to containerd spec opts (WithLinuxSpaceOption)
g.AddOrReplaceLinuxNamespace(string(runtimespec.NetworkNamespace), nsPath) // nolint: errcheck
}
if nsOptions.GetHostPid() {
g.RemoveLinuxNamespace(string(runtimespec.PIDNamespace)) // nolint: errcheck
}

View File

@ -68,6 +68,7 @@ func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConf
func TestGenerateSandboxContainerSpec(t *testing.T) {
testID := "test-id"
nsPath := "test-cni"
for desc, test := range map[string]struct {
configChange func(*runtime.PodSandboxConfig)
imageConfigChange func(*imagespec.ImageConfig)
@ -80,6 +81,7 @@ func TestGenerateSandboxContainerSpec(t *testing.T) {
require.NotNil(t, spec.Linux)
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.NetworkNamespace,
Path: nsPath,
})
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
Type: runtimespec.PIDNamespace,
@ -136,7 +138,7 @@ func TestGenerateSandboxContainerSpec(t *testing.T) {
if test.imageConfigChange != nil {
test.imageConfigChange(imageConfig)
}
spec, err := c.generateSandboxContainerSpec(testID, config, imageConfig)
spec, err := c.generateSandboxContainerSpec(testID, config, imageConfig, nsPath)
if test.expectErr {
assert.Error(t, err)
assert.Nil(t, spec)

View File

@ -64,7 +64,7 @@ func (c *criContainerdService) PodSandboxStatus(ctx context.Context, r *runtime.
state = runtime.PodSandboxState_SANDBOX_READY
}
}
ip, err := c.netPlugin.GetPodNetworkStatus(sandbox.NetNS)
ip, err := c.netPlugin.GetPodNetworkStatus(sandbox.NetNSPath)
if err != nil {
// Ignore the error on network status
ip = ""

View File

@ -63,22 +63,29 @@ func (c *criContainerdService) StopPodSandbox(ctx context.Context, r *runtime.St
}
// Teardown network for sandbox.
_, err = c.os.Stat(sandbox.NetNS)
if err == nil {
if !sandbox.Config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostNetwork() {
if teardownErr := c.netPlugin.TearDownPod(ocicni.PodNetwork{
Name: sandbox.Config.GetMetadata().GetName(),
Namespace: sandbox.Config.GetMetadata().GetNamespace(),
ID: id,
NetNS: sandbox.NetNS,
PortMappings: toCNIPortMappings(sandbox.Config.GetPortMappings()),
}); teardownErr != nil {
return nil, fmt.Errorf("failed to destroy network for sandbox %q: %v", id, teardownErr)
}
if sandbox.NetNSPath != "" {
if _, err := os.Stat(sandbox.NetNSPath); err != nil {
return nil, fmt.Errorf("failed to stat network namespace path %s :%v", sandbox.NetNSPath, err)
}
if teardownErr := c.netPlugin.TearDownPod(ocicni.PodNetwork{
Name: sandbox.Config.GetMetadata().GetName(),
Namespace: sandbox.Config.GetMetadata().GetNamespace(),
ID: id,
NetNS: sandbox.NetNSPath,
PortMappings: toCNIPortMappings(sandbox.Config.GetPortMappings()),
}); teardownErr != nil {
return nil, fmt.Errorf("failed to destroy network for sandbox %q: %v", id, teardownErr)
}
/*TODO:It is still possible that cri-containerd crashes after we teardown the network, but before we remove the network namespace.
In that case, we'll not be able to remove the sandbox anymore. The chance is slim, but we should be aware of that.
In the future, once TearDownPod is idempotent, this will be fixed.*/
//Close the sandbox network namespace if it was created
if err = sandbox.NetNS.Remove(); err != nil {
return nil, fmt.Errorf("failed to remove network namespace for sandbox %q: %v", id, err)
}
} else if !os.IsNotExist(err) { // It's ok for sandbox.NetNS to *not* exist
return nil, fmt.Errorf("failed to stat netns path for sandbox %q before tearing down the network: %v", id, err)
}
glog.V(2).Infof("TearDown network for sandbox %q successfully", id)
sandboxRoot := getSandboxRootDir(c.rootDir, id)

View File

@ -51,8 +51,8 @@ type Metadata struct {
CreatedAt int64
// Pid is the process id of the sandbox.
Pid uint32
// NetNS is the network namespace used by the sandbox.
NetNS string
// NetNSPath is the network namespace used by the sandbox.
NetNSPath string
}
// Encode encodes Metadata into bytes in json format.

View File

@ -0,0 +1,64 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sandbox
import (
"fmt"
"sync"
cnins "github.com/containernetworking/plugins/pkg/ns"
)
// NetNS holds network namespace for sandbox
type NetNS struct {
sync.Mutex
ns cnins.NetNS
closed bool
}
// NewNetNS creates a network namespace for the sandbox
func NewNetNS() (*NetNS, error) {
netns, err := cnins.NewNS()
if err != nil {
return nil, fmt.Errorf("failed to setup network namespace %v", err)
}
n := new(NetNS)
n.ns = netns
return n, nil
}
// Remove removes network namepace if it exists and not closed. Remove is idempotent,
// meaning it might be invoked multiple times and provides consistent result.
func (n *NetNS) Remove() error {
n.Lock()
defer n.Unlock()
if !n.closed {
err := n.ns.Close()
if err != nil {
return err
}
n.closed = true
}
return nil
}
// GetPath returns network namespace path for sandbox container
func (n *NetNS) GetPath() string {
n.Lock()
defer n.Unlock()
return n.ns.Path()
}

View File

@ -31,7 +31,8 @@ type Sandbox struct {
Metadata
// Containerd sandbox container
Container containerd.Container
// TODO(random-liu): Add cni network namespace client.
// CNI network namespace client
NetNS *NetNS
}
// Store stores all sandboxes.

View File

@ -42,7 +42,7 @@ func TestSandboxStore(t *testing.T) {
},
CreatedAt: time.Now().UnixNano(),
Pid: 1001,
NetNS: "TestNetNS-1",
NetNSPath: "TestNetNS-1",
},
"2": {
ID: "2",
@ -57,7 +57,7 @@ func TestSandboxStore(t *testing.T) {
},
CreatedAt: time.Now().UnixNano(),
Pid: 1002,
NetNS: "TestNetNS-2",
NetNSPath: "TestNetNS-2",
},
"3": {
ID: "3",
@ -72,7 +72,7 @@ func TestSandboxStore(t *testing.T) {
},
CreatedAt: time.Now().UnixNano(),
Pid: 1003,
NetNS: "TestNetNS-3",
NetNSPath: "TestNetNS-3",
},
}
assert := assertlib.New(t)

View File

@ -3,6 +3,7 @@ github.com/boltdb/bolt v1.3.0-58-ge9cf4fa
github.com/containerd/containerd f05281743e5ac9ad11c6e19a72be7a903eab79f5
github.com/containerd/fifo fbfb6a11ec671efbe94ad1c12c2e98773f19e1e6
github.com/containernetworking/cni v0.6.0
github.com/containernetworking/plugins v0.6.0
github.com/cri-o/ocicni 0f90d35d89e9ab7e972a9edeb36b0aaffa250335
github.com/davecgh/go-spew v1.1.0
github.com/docker/distribution b38e5838b7b2f2ad48e06ec4b500011976080621

201
vendor/github.com/containernetworking/plugins/LICENSE generated vendored Normal file
View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,25 @@
[![Build Status](https://travis-ci.org/containernetworking/plugins.svg?branch=master)](https://travis-ci.org/containernetworking/plugins)
# plugins
Some CNI network plugins, maintained by the containernetworking team. For more information, see the individual READMEs.
## Plugins supplied:
### Main: interface-creating
* `bridge`: Creates a bridge, adds the host and the container to it.
* `ipvlan`: Adds an [ipvlan](https://www.kernel.org/doc/Documentation/networking/ipvlan.txt) interface in the container
* `loopback`: Creates a loopback interface
* `macvlan`: Creates a new MAC address, forwards all traffic to that to the container
* `ptp`: Creates a veth pair.
* `vlan`: Allocates a vlan device.
### IPAM: IP address allocation
* `dhcp`: Runs a daemon on the host to make DHCP requests on behalf of the container
* `host-local`: maintains a local database of allocated IPs
### Meta: other plugins
* `flannel`: generates an interface corresponding to a flannel config file
* `tuning`: Tweaks sysctl parameters of an existing interface
* `portmap`: An iptables-based portmapping plugin. Maps ports from the host's address space to the container.
### Sample
The sample plugin provides an example for building your own plugin.

View File

@ -0,0 +1,40 @@
### Namespaces, Threads, and Go
On Linux each OS thread can have a different network namespace. Go's thread scheduling model switches goroutines between OS threads based on OS thread load and whether the goroutine would block other goroutines. This can result in a goroutine switching network namespaces without notice and lead to errors in your code.
### Namespace Switching
Switching namespaces with the `ns.Set()` method is not recommended without additional strategies to prevent unexpected namespace changes when your goroutines switch OS threads.
Go provides the `runtime.LockOSThread()` function to ensure a specific goroutine executes on its current OS thread and prevents any other goroutine from running in that thread until the locked one exits. Careful usage of `LockOSThread()` and goroutines can provide good control over which network namespace a given goroutine executes in.
For example, you cannot rely on the `ns.Set()` namespace being the current namespace after the `Set()` call unless you do two things. First, the goroutine calling `Set()` must have previously called `LockOSThread()`. Second, you must ensure `runtime.UnlockOSThread()` is not called somewhere in-between. You also cannot rely on the initial network namespace remaining the current network namespace if any other code in your program switches namespaces, unless you have already called `LockOSThread()` in that goroutine. Note that `LockOSThread()` prevents the Go scheduler from optimally scheduling goroutines for best performance, so `LockOSThread()` should only be used in small, isolated goroutines that release the lock quickly.
### Do() The Recommended Thing
The `ns.Do()` method provides **partial** control over network namespaces for you by implementing these strategies. All code dependent on a particular network namespace (including the root namespace) should be wrapped in the `ns.Do()` method to ensure the correct namespace is selected for the duration of your code. For example:
```go
targetNs, err := ns.NewNS()
if err != nil {
return err
}
err = targetNs.Do(func(hostNs ns.NetNS) error {
dummy := &netlink.Dummy{
LinkAttrs: netlink.LinkAttrs{
Name: "dummy0",
},
}
return netlink.LinkAdd(dummy)
})
```
Note this requirement to wrap every network call is very onerous - any libraries you call might call out to network services such as DNS, and all such calls need to be protected after you call `ns.Do()`. The CNI plugins all exit very soon after calling `ns.Do()` which helps to minimize the problem.
Also: If the runtime spawns a new OS thread, it will inherit the network namespace of the parent thread, which may have been temporarily switched, and thus the new OS thread will be permanently "stuck in the wrong namespace".
In short, **there is no safe way to change network namespaces from within a long-lived, multithreaded Go process**. If your daemon process needs to be namespace aware, consider spawning a separate process (like a CNI plugin) for each namespace.
### Further Reading
- https://github.com/golang/go/wiki/LockOSThread
- http://morsmachine.dk/go-scheduler
- https://github.com/containernetworking/cni/issues/262
- https://golang.org/pkg/runtime/
- https://www.weave.works/blog/linux-namespaces-and-go-don-t-mix

View File

@ -0,0 +1,178 @@
// Copyright 2015 CNI authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ns
import (
"fmt"
"os"
"runtime"
"sync"
"syscall"
)
type NetNS interface {
// Executes the passed closure in this object's network namespace,
// attempting to restore the original namespace before returning.
// However, since each OS thread can have a different network namespace,
// and Go's thread scheduling is highly variable, callers cannot
// guarantee any specific namespace is set unless operations that
// require that namespace are wrapped with Do(). Also, no code called
// from Do() should call runtime.UnlockOSThread(), or the risk
// of executing code in an incorrect namespace will be greater. See
// https://github.com/golang/go/wiki/LockOSThread for further details.
Do(toRun func(NetNS) error) error
// Sets the current network namespace to this object's network namespace.
// Note that since Go's thread scheduling is highly variable, callers
// cannot guarantee the requested namespace will be the current namespace
// after this function is called; to ensure this wrap operations that
// require the namespace with Do() instead.
Set() error
// Returns the filesystem path representing this object's network namespace
Path() string
// Returns a file descriptor representing this object's network namespace
Fd() uintptr
// Cleans up this instance of the network namespace; if this instance
// is the last user the namespace will be destroyed
Close() error
}
type netNS struct {
file *os.File
mounted bool
closed bool
}
// netNS implements the NetNS interface
var _ NetNS = &netNS{}
const (
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/magic.h
NSFS_MAGIC = 0x6e736673
PROCFS_MAGIC = 0x9fa0
)
type NSPathNotExistErr struct{ msg string }
func (e NSPathNotExistErr) Error() string { return e.msg }
type NSPathNotNSErr struct{ msg string }
func (e NSPathNotNSErr) Error() string { return e.msg }
func IsNSorErr(nspath string) error {
stat := syscall.Statfs_t{}
if err := syscall.Statfs(nspath, &stat); err != nil {
if os.IsNotExist(err) {
err = NSPathNotExistErr{msg: fmt.Sprintf("failed to Statfs %q: %v", nspath, err)}
} else {
err = fmt.Errorf("failed to Statfs %q: %v", nspath, err)
}
return err
}
switch stat.Type {
case PROCFS_MAGIC, NSFS_MAGIC:
return nil
default:
return NSPathNotNSErr{msg: fmt.Sprintf("unknown FS magic on %q: %x", nspath, stat.Type)}
}
}
// Returns an object representing the namespace referred to by @path
func GetNS(nspath string) (NetNS, error) {
err := IsNSorErr(nspath)
if err != nil {
return nil, err
}
fd, err := os.Open(nspath)
if err != nil {
return nil, err
}
return &netNS{file: fd}, nil
}
func (ns *netNS) Path() string {
return ns.file.Name()
}
func (ns *netNS) Fd() uintptr {
return ns.file.Fd()
}
func (ns *netNS) errorIfClosed() error {
if ns.closed {
return fmt.Errorf("%q has already been closed", ns.file.Name())
}
return nil
}
func (ns *netNS) Do(toRun func(NetNS) error) error {
if err := ns.errorIfClosed(); err != nil {
return err
}
containedCall := func(hostNS NetNS) error {
threadNS, err := GetCurrentNS()
if err != nil {
return fmt.Errorf("failed to open current netns: %v", err)
}
defer threadNS.Close()
// switch to target namespace
if err = ns.Set(); err != nil {
return fmt.Errorf("error switching to ns %v: %v", ns.file.Name(), err)
}
defer threadNS.Set() // switch back
return toRun(hostNS)
}
// save a handle to current network namespace
hostNS, err := GetCurrentNS()
if err != nil {
return fmt.Errorf("Failed to open current namespace: %v", err)
}
defer hostNS.Close()
var wg sync.WaitGroup
wg.Add(1)
var innerError error
go func() {
defer wg.Done()
runtime.LockOSThread()
innerError = containedCall(hostNS)
}()
wg.Wait()
return innerError
}
// WithNetNSPath executes the passed closure under the given network
// namespace, restoring the original namespace afterwards.
func WithNetNSPath(nspath string, toRun func(NetNS) error) error {
ns, err := GetNS(nspath)
if err != nil {
return err
}
defer ns.Close()
return ns.Do(toRun)
}

View File

@ -0,0 +1,149 @@
// Copyright 2015-2017 CNI authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ns
import (
"crypto/rand"
"fmt"
"os"
"path"
"runtime"
"sync"
"golang.org/x/sys/unix"
)
// Returns an object representing the current OS thread's network namespace
func GetCurrentNS() (NetNS, error) {
return GetNS(getCurrentThreadNetNSPath())
}
func getCurrentThreadNetNSPath() string {
// /proc/self/ns/net returns the namespace of the main thread, not
// of whatever thread this goroutine is running on. Make sure we
// use the thread's net namespace since the thread is switching around
return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
}
// Creates a new persistent network namespace and returns an object
// representing that namespace, without switching to it
func NewNS() (NetNS, error) {
const nsRunDir = "/var/run/netns"
b := make([]byte, 16)
_, err := rand.Reader.Read(b)
if err != nil {
return nil, fmt.Errorf("failed to generate random netns name: %v", err)
}
err = os.MkdirAll(nsRunDir, 0755)
if err != nil {
return nil, err
}
// create an empty file at the mount point
nsName := fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
nsPath := path.Join(nsRunDir, nsName)
mountPointFd, err := os.Create(nsPath)
if err != nil {
return nil, err
}
mountPointFd.Close()
// Ensure the mount point is cleaned up on errors; if the namespace
// was successfully mounted this will have no effect because the file
// is in-use
defer os.RemoveAll(nsPath)
var wg sync.WaitGroup
wg.Add(1)
// do namespace work in a dedicated goroutine, so that we can safely
// Lock/Unlock OSThread without upsetting the lock/unlock state of
// the caller of this function
var fd *os.File
go (func() {
defer wg.Done()
runtime.LockOSThread()
var origNS NetNS
origNS, err = GetNS(getCurrentThreadNetNSPath())
if err != nil {
return
}
defer origNS.Close()
// create a new netns on the current thread
err = unix.Unshare(unix.CLONE_NEWNET)
if err != nil {
return
}
defer origNS.Set()
// bind mount the new netns from the current thread onto the mount point
err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "")
if err != nil {
return
}
fd, err = os.Open(nsPath)
if err != nil {
return
}
})()
wg.Wait()
if err != nil {
unix.Unmount(nsPath, unix.MNT_DETACH)
return nil, fmt.Errorf("failed to create namespace: %v", err)
}
return &netNS{file: fd, mounted: true}, nil
}
func (ns *netNS) Close() error {
if err := ns.errorIfClosed(); err != nil {
return err
}
if err := ns.file.Close(); err != nil {
return fmt.Errorf("Failed to close %q: %v", ns.file.Name(), err)
}
ns.closed = true
if ns.mounted {
if err := unix.Unmount(ns.file.Name(), unix.MNT_DETACH); err != nil {
return fmt.Errorf("Failed to unmount namespace %s: %v", ns.file.Name(), err)
}
if err := os.RemoveAll(ns.file.Name()); err != nil {
return fmt.Errorf("Failed to clean up namespace %s: %v", ns.file.Name(), err)
}
ns.mounted = false
}
return nil
}
func (ns *netNS) Set() error {
if err := ns.errorIfClosed(); err != nil {
return err
}
if err := unix.Setns(int(ns.Fd()), unix.CLONE_NEWNET); err != nil {
return fmt.Errorf("Error switching to ns %v: %v", ns.file.Name(), err)
}
return nil
}

View File

@ -0,0 +1,36 @@
// Copyright 2015-2017 CNI authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build !linux
package ns
import "github.com/containernetworking/cni/pkg/types"
// Returns an object representing the current OS thread's network namespace
func GetCurrentNS() (NetNS, error) {
return nil, types.NotImplementedError
}
func NewNS() (NetNS, error) {
return nil, types.NotImplementedError
}
func (ns *netNS) Close() error {
return types.NotImplementedError
}
func (ns *netNS) Set() error {
return types.NotImplementedError
}