kubernetes/pkg/kubelet/dockershim/helpers_linux.go
Kubernetes Submit Queue 89e433fca1
Merge pull request #59404 from ohmystack/docker-mem-swap
Automatic merge from submit-queue (batch tested with PRs 50724, 59025, 59710, 59404, 59958). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

dockertools: disable MemorySwap on Linux

In this commit, set `MemorySwap` the same with `Memory` to prevent using swap on Linux.

**What this PR does / why we need it**:

In #39731, @pires tried to disable swap on Linux by setting `MemorySwap` to 0.
However, according to [Docker's docs](https://docs.docker.com/config/containers/resource_constraints/#--memory-swap-details), setting `MemorySwap` to 0 is treated as unset, and its [default behavior](https://github.com/moby/moby/blob/v17.05.0-ce/daemon/daemon_unix.go#L266-L269) is to set to twice the size of `Memory`, which can still cause the container to use the swap.

**Which issue(s) this PR fixes** :

This issue was mentioned in this comment: https://github.com/kubernetes/kubernetes/issues/7294#issuecomment-362722637

**Special notes for your reviewer**:

1. For the case on Windows, we can still use the 0 because [Windows does not support `MemorySwap`](https://github.com/moby/moby/blob/v17.05.0-ce/daemon/daemon_windows.go#L185-L187).
2. There is another place using the `DefaultMemorySwap()` is for [sandbox](https://github.com/kubernetes/kubernetes/blob/v1.9.2/pkg/kubelet/dockershim/docker_sandbox.go#L505).
Maybe setting the sandbox's `MemorySwap` to 0 is fine. I didn't change that.

**Release note**:

```release-note
dockertools: disable memory swap on Linux.
```
2018-02-26 21:34:42 -08:00

153 lines
5.2 KiB
Go

// +build linux
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package dockershim
import (
"bytes"
"crypto/md5"
"encoding/json"
"fmt"
"io/ioutil"
"path/filepath"
"strings"
"github.com/blang/semver"
dockertypes "github.com/docker/docker/api/types"
dockercontainer "github.com/docker/docker/api/types/container"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
)
func DefaultMemorySwap() int64 {
return 0
}
func (ds *dockerService) getSecurityOpts(seccompProfile string, separator rune) ([]string, error) {
// Apply seccomp options.
seccompSecurityOpts, err := getSeccompSecurityOpts(seccompProfile, separator)
if err != nil {
return nil, fmt.Errorf("failed to generate seccomp security options for container: %v", err)
}
return seccompSecurityOpts, nil
}
func getSeccompDockerOpts(seccompProfile string) ([]dockerOpt, error) {
if seccompProfile == "" || seccompProfile == "unconfined" {
// return early the default
return defaultSeccompOpt, nil
}
if seccompProfile == "docker/default" {
// return nil so docker will load the default seccomp profile
return nil, nil
}
if !strings.HasPrefix(seccompProfile, "localhost/") {
return nil, fmt.Errorf("unknown seccomp profile option: %s", seccompProfile)
}
// get the full path of seccomp profile when prefixed with 'localhost/'.
fname := strings.TrimPrefix(seccompProfile, "localhost/")
if !filepath.IsAbs(fname) {
return nil, fmt.Errorf("seccomp profile path must be absolute, but got relative path %q", fname)
}
file, err := ioutil.ReadFile(filepath.FromSlash(fname))
if err != nil {
return nil, fmt.Errorf("cannot load seccomp profile %q: %v", fname, err)
}
b := bytes.NewBuffer(nil)
if err := json.Compact(b, file); err != nil {
return nil, err
}
// Rather than the full profile, just put the filename & md5sum in the event log.
msg := fmt.Sprintf("%s(md5:%x)", fname, md5.Sum(file))
return []dockerOpt{{"seccomp", b.String(), msg}}, nil
}
// getSeccompSecurityOpts gets container seccomp options from container seccomp profile.
// It is an experimental feature and may be promoted to official runtime api in the future.
func getSeccompSecurityOpts(seccompProfile string, separator rune) ([]string, error) {
seccompOpts, err := getSeccompDockerOpts(seccompProfile)
if err != nil {
return nil, err
}
return fmtDockerOpts(seccompOpts, separator), nil
}
func (ds *dockerService) updateCreateConfig(
createConfig *dockertypes.ContainerCreateConfig,
config *runtimeapi.ContainerConfig,
sandboxConfig *runtimeapi.PodSandboxConfig,
podSandboxID string, securityOptSep rune, apiVersion *semver.Version) error {
// Apply Linux-specific options if applicable.
if lc := config.GetLinux(); lc != nil {
// TODO: Check if the units are correct.
// TODO: Can we assume the defaults are sane?
rOpts := lc.GetResources()
if rOpts != nil {
createConfig.HostConfig.Resources = dockercontainer.Resources{
// Memory and MemorySwap are set to the same value, this prevents containers from using any swap.
Memory: rOpts.MemoryLimitInBytes,
MemorySwap: rOpts.MemoryLimitInBytes,
CPUShares: rOpts.CpuShares,
CPUQuota: rOpts.CpuQuota,
CPUPeriod: rOpts.CpuPeriod,
}
createConfig.HostConfig.OomScoreAdj = int(rOpts.OomScoreAdj)
}
// Note: ShmSize is handled in kube_docker_client.go
// Apply security context.
if err := applyContainerSecurityContext(lc, podSandboxID, createConfig.Config, createConfig.HostConfig, securityOptSep); err != nil {
return fmt.Errorf("failed to apply container security context for container %q: %v", config.Metadata.Name, err)
}
modifyContainerPIDNamespaceOverrides(ds.disableSharedPID, apiVersion, createConfig.HostConfig, podSandboxID)
}
// Apply cgroupsParent derived from the sandbox config.
if lc := sandboxConfig.GetLinux(); lc != nil {
// Apply Cgroup options.
cgroupParent, err := ds.GenerateExpectedCgroupParent(lc.CgroupParent)
if err != nil {
return fmt.Errorf("failed to generate cgroup parent in expected syntax for container %q: %v", config.Metadata.Name, err)
}
createConfig.HostConfig.CgroupParent = cgroupParent
}
return nil
}
func (ds *dockerService) determinePodIPBySandboxID(uid string) string {
return ""
}
func getNetworkNamespace(c *dockertypes.ContainerJSON) (string, error) {
if c.State.Pid == 0 {
// Docker reports pid 0 for an exited container.
return "", fmt.Errorf("cannot find network namespace for the terminated container %q", c.ID)
}
return fmt.Sprintf(dockerNetNSFmt, c.State.Pid), nil
}
// applyExperimentalCreateConfig applys experimental configures from sandbox annotations.
func applyExperimentalCreateConfig(createConfig *dockertypes.ContainerCreateConfig, annotations map[string]string) {
}