Merge pull request #563 from Random-Liu/disable-dad

Disable IPv6 dad by default.
This commit is contained in:
Lantao Liu 2018-01-22 17:32:54 -08:00 committed by GitHub
commit 1c2fef9f68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 128 additions and 131 deletions

View File

@ -87,6 +87,9 @@ type PluginConfig struct {
// SkipImageFSUUID skips retrieving imagefs uuid. // SkipImageFSUUID skips retrieving imagefs uuid.
// TODO(random-liu): Remove this after we find a generic way to get imagefs uuid. // TODO(random-liu): Remove this after we find a generic way to get imagefs uuid.
SkipImageFSUUID bool `toml:"skip_imagefs_uuid" json:"skipImageFSUUID,omitempty"` SkipImageFSUUID bool `toml:"skip_imagefs_uuid" json:"skipImageFSUUID,omitempty"`
// EnableIPv6DAD enables IPv6 DAD.
// TODO(random-liu): Use optimistic_dad when it's GA.
EnableIPv6DAD bool `toml:"enable_ipv6_dad" json:"enableIPv6DAD,omitempty"`
} }
// CRIConfig contains toml config related to CRI service. // CRIConfig contains toml config related to CRI service.
@ -189,6 +192,8 @@ func (c *CRIContainerdOptions) AddFlags(fs *pflag.FlagSet) {
defaults.ProfilingAddress, "Profiling address for web interface host:port/debug/pprof/.") defaults.ProfilingAddress, "Profiling address for web interface host:port/debug/pprof/.")
fs.BoolVar(&c.SkipImageFSUUID, "skip-imagefs-uuid", fs.BoolVar(&c.SkipImageFSUUID, "skip-imagefs-uuid",
defaults.SkipImageFSUUID, "Skip retrieval of imagefs uuid. When turned on, kubelet will not be able to get imagefs capacity or perform imagefs disk eviction.") defaults.SkipImageFSUUID, "Skip retrieval of imagefs uuid. When turned on, kubelet will not be able to get imagefs capacity or perform imagefs disk eviction.")
fs.BoolVar(&c.EnableIPv6DAD, "enable-ipv6-dad",
defaults.EnableIPv6DAD, "Enable IPv6 DAD (duplicate address detection) for pod sandbox network. Enabling this will increase pod sandbox start latency by several seconds.")
} }
// InitFlags load configurations from config file, and then overwrite with flags. // InitFlags load configurations from config file, and then overwrite with flags.
@ -257,6 +262,7 @@ func DefaultConfig() Config {
StatsCollectPeriod: 10, StatsCollectPeriod: 10,
SystemdCgroup: false, SystemdCgroup: false,
SkipImageFSUUID: false, SkipImageFSUUID: false,
EnableIPv6DAD: false,
}, },
ContainerdRootDir: "/var/lib/containerd", ContainerdRootDir: "/var/lib/containerd",
ContainerdEndpoint: "/run/containerd/containerd.sock", ContainerdEndpoint: "/run/containerd/containerd.sock",

View File

@ -19,6 +19,7 @@ package server
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"os/exec"
"path" "path"
"path/filepath" "path/filepath"
"strconv" "strconv"
@ -36,6 +37,7 @@ import (
"github.com/opencontainers/selinux/go-selinux/label" "github.com/opencontainers/selinux/go-selinux/label"
"golang.org/x/net/context" "golang.org/x/net/context"
"k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
"k8s.io/kubernetes/pkg/util/sysctl"
"github.com/containerd/cri-containerd/pkg/store" "github.com/containerd/cri-containerd/pkg/store"
imagestore "github.com/containerd/cri-containerd/pkg/store/image" imagestore "github.com/containerd/cri-containerd/pkg/store/image"
@ -383,3 +385,35 @@ func newSpecGenerator(spec *runtimespec.Spec) generate.Generator {
g.HostSpecific = true g.HostSpecific = true
return g return g
} }
// disableNetNSDAD disables duplicate address detection in the network namespace.
// DAD has a negative affect on sandbox start latency, since we have to wait
// a second or more for the addresses to leave the "tentative" state.
func disableNetNSDAD(ns string) error {
dad := "net/ipv6/conf/default/accept_dad"
sysctlBin, err := exec.LookPath("sysctl")
if err != nil {
return fmt.Errorf("could not find sysctl binary: %v", err)
}
nsenterBin, err := exec.LookPath("nsenter")
if err != nil {
return fmt.Errorf("could not find nsenter binary: %v", err)
}
// If the sysctl doesn't exist, it means ipv6 is disabled.
if _, err := sysctl.New().GetSysctl(dad); err != nil {
return nil
}
output, err := exec.Command(nsenterBin,
fmt.Sprintf("--net=%s", ns), "-F", "--",
sysctlBin, "-w", fmt.Sprintf("%s=%s", dad, "0"),
).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to write sysctl %q - output: %s, error: %s",
dad, output, err)
}
return nil
}

View File

@ -102,6 +102,16 @@ func (c *criContainerdService) RunPodSandbox(ctx context.Context, r *runtime.Run
sandbox.NetNSPath = "" sandbox.NetNSPath = ""
} }
}() }()
if !c.config.EnableIPv6DAD {
// It's a known issue that IPv6 DAD increases sandbox start latency by several seconds.
// Disable it when it's not enabled to avoid the latency.
// See:
// * https://github.com/kubernetes/kubernetes/issues/54651
// * https://www.agwa.name/blog/post/beware_the_ipv6_dad_race_condition
if err := disableNetNSDAD(sandbox.NetNSPath); err != nil {
return nil, fmt.Errorf("failed to disable DAD for sandbox %q: %v", id, err)
}
}
// Setup network for sandbox. // Setup network for sandbox.
podNetwork := ocicni.PodNetwork{ podNetwork := ocicni.PodNetwork{
Name: config.GetMetadata().GetName(), Name: config.GetMetadata().GetName(),

View File

@ -1,60 +0,0 @@
## About
This directory contains a collection of scripts used to build and manage this
repository. If there are any issues regarding the intention of a particular
script (or even part of a certain script), please reach out to us.
It may help us either refine our current scripts, or add on new ones
that are appropriate for a given use case.
## DinD (dind.sh)
DinD is a wrapper script which allows Docker to be run inside a Docker
container. DinD requires the container to
be run with privileged mode enabled.
## Generate Authors (generate-authors.sh)
Generates AUTHORS; a file with all the names and corresponding emails of
individual contributors. AUTHORS can be found in the home directory of
this repository.
## Make
There are two make files, each with different extensions. Neither are supposed
to be called directly; only invoke `make`. Both scripts run inside a Docker
container.
### make.ps1
- The Windows native build script that uses PowerShell semantics; it is limited
unlike `hack\make.sh` since it does not provide support for the full set of
operations provided by the Linux counterpart, `make.sh`. However, `make.ps1`
does provide support for local Windows development and Windows to Windows CI.
More information is found within `make.ps1` by the author, @jhowardmsft
### make.sh
- Referenced via `make test` when running tests on a local machine,
or directly referenced when running tests inside a Docker development container.
- When running on a local machine, `make test` to run all tests found in
`test`, `test-unit`, `test-integration`, and `test-docker-py` on
your local machine. The default timeout is set in `make.sh` to 60 minutes
(`${TIMEOUT:=60m}`), since it currently takes up to an hour to run
all of the tests.
- When running inside a Docker development container, `hack/make.sh` does
not have a single target that runs all the tests. You need to provide a
single command line with multiple targets that performs the same thing.
An example referenced from [Run targets inside a development container](https://docs.docker.com/opensource/project/test-and-docs/#run-targets-inside-a-development-container): `root@5f8630b873fe:/go/src/github.com/moby/moby# hack/make.sh dynbinary binary cross test-unit test-integration test-docker-py`
- For more information related to testing outside the scope of this README,
refer to
[Run tests and test documentation](https://docs.docker.com/opensource/project/test-and-docs/)
## Release (release.sh)
Releases any bundles built by `make` on a public AWS S3 bucket.
For information regarding configuration, please view `release.sh`.
## Vendor (vendor.sh)
A shell script that is a wrapper around Vndr. For information on how to use
this, please refer to [vndr's README](https://github.com/LK4D4/vndr/blob/master/README.md)

View File

@ -1,69 +0,0 @@
# Integration Testing on Swarm
IT on Swarm allows you to execute integration test in parallel across a Docker Swarm cluster
## Architecture
### Master service
- Works as a funker caller
- Calls a worker funker (`-worker-service`) with a chunk of `-check.f` filter strings (passed as a file via `-input` flag, typically `/mnt/input`)
### Worker service
- Works as a funker callee
- Executes an equivalent of `TESTFLAGS=-check.f TestFoo|TestBar|TestBaz ... make test-integration-cli` using the bind-mounted API socket (`docker.sock`)
### Client
- Controls master and workers via `docker stack`
- No need to have a local daemon
Typically, the master and workers are supposed to be running on a cloud environment,
while the client is supposed to be running on a laptop, e.g. Docker for Mac/Windows.
## Requirement
- Docker daemon 1.13 or later
- Private registry for distributed execution with multiple nodes
## Usage
### Step 1: Prepare images
$ make build-integration-cli-on-swarm
Following environment variables are known to work in this step:
- `BUILDFLAGS`
- `DOCKER_INCREMENTAL_BINARY`
Note: during the transition into Moby Project, you might need to create a symbolic link `$GOPATH/src/github.com/docker/docker` to `$GOPATH/src/github.com/moby/moby`.
### Step 2: Execute tests
$ ./hack/integration-cli-on-swarm/integration-cli-on-swarm -replicas 40 -push-worker-image YOUR_REGISTRY.EXAMPLE.COM/integration-cli-worker:latest
Following environment variables are known to work in this step:
- `DOCKER_GRAPHDRIVER`
- `DOCKER_EXPERIMENTAL`
#### Flags
Basic flags:
- `-replicas N`: the number of worker service replicas. i.e. degree of parallelism.
- `-chunks N`: the number of chunks. By default, `chunks` == `replicas`.
- `-push-worker-image REGISTRY/IMAGE:TAG`: push the worker image to the registry. Note that if you have only single node and hence you do not need a private registry, you do not need to specify `-push-worker-image`.
Experimental flags for mitigating makespan nonuniformity:
- `-shuffle`: Shuffle the test filter strings
Flags for debugging IT on Swarm itself:
- `-rand-seed N`: the random seed. This flag is useful for deterministic replaying. By default(0), the timestamp is used.
- `-filters-file FILE`: the file contains `-check.f` strings. By default, the file is automatically generated.
- `-dry-run`: skip the actual workload
- `keep-executor`: do not auto-remove executor containers, which is used for running privileged programs on Swarm

View File

@ -1,2 +0,0 @@
# dependencies specific to worker (i.e. github.com/docker/docker/...) are not vendored here
github.com/bfirsh/funker-go eaa0a2e06f30e72c9a0b7f858951e581e26ef773

78
vendor/k8s.io/kubernetes/pkg/util/sysctl/sysctl.go generated vendored Normal file
View File

@ -0,0 +1,78 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"io/ioutil"
"path"
"strconv"
"strings"
)
const (
sysctlBase = "/proc/sys"
VmOvercommitMemory = "vm/overcommit_memory"
VmPanicOnOOM = "vm/panic_on_oom"
KernelPanic = "kernel/panic"
KernelPanicOnOops = "kernel/panic_on_oops"
RootMaxKeys = "kernel/keys/root_maxkeys"
RootMaxBytes = "kernel/keys/root_maxbytes"
VmOvercommitMemoryAlways = 1 // kernel performs no memory over-commit handling
VmPanicOnOOMInvokeOOMKiller = 0 // kernel calls the oom_killer function when OOM occurs
KernelPanicOnOopsAlways = 1 // kernel panics on kernel oops
KernelPanicRebootTimeout = 10 // seconds after a panic for the kernel to reboot
RootMaxKeysSetting = 1000000 // Needed since docker creates a new key per container
RootMaxBytesSetting = RootMaxKeysSetting * 25 // allocate 25 bytes per key * number of MaxKeys
)
// An injectable interface for running sysctl commands.
type Interface interface {
// GetSysctl returns the value for the specified sysctl setting
GetSysctl(sysctl string) (int, error)
// SetSysctl modifies the specified sysctl flag to the new value
SetSysctl(sysctl string, newVal int) error
}
// New returns a new Interface for accessing sysctl
func New() Interface {
return &procSysctl{}
}
// procSysctl implements Interface by reading and writing files under /proc/sys
type procSysctl struct {
}
// GetSysctl returns the value for the specified sysctl setting
func (_ *procSysctl) GetSysctl(sysctl string) (int, error) {
data, err := ioutil.ReadFile(path.Join(sysctlBase, sysctl))
if err != nil {
return -1, err
}
val, err := strconv.Atoi(strings.Trim(string(data), " \n"))
if err != nil {
return -1, err
}
return val, nil
}
// SetSysctl modifies the specified sysctl flag to the new value
func (_ *procSysctl) SetSysctl(sysctl string, newVal int) error {
return ioutil.WriteFile(path.Join(sysctlBase, sysctl), []byte(strconv.Itoa(newVal)), 0640)
}