Fix oom-score-adj policy in kubelet.

Docker daemon and kubelet needs to be protected by setting oom-score-adj to -999.

Signed-off-by: Vishnu kannan <vishnuk@google.com>
This commit is contained in:
Vishnu kannan
2016-09-07 18:32:38 -07:00
parent f59f4aecfa
commit e4acad7afb
9 changed files with 274 additions and 98 deletions

View File

@@ -326,6 +326,7 @@ func (cm *containerManagerImpl) setupNode() error {
systemContainers := []*systemContainer{}
if cm.ContainerRuntime == "docker" {
dockerVersion := getDockerVersion(cm.cadvisorInterface)
if cm.RuntimeCgroupsName != "" {
cont := newSystemCgroups(cm.RuntimeCgroupsName)
var capacity = api.ResourceList{}
@@ -351,13 +352,16 @@ func (cm *containerManagerImpl) setupNode() error {
},
},
}
dockerVersion := getDockerVersion(cm.cadvisorInterface)
cont.ensureStateFunc = func(manager *fs.Manager) error {
return ensureDockerInContainer(dockerVersion, -900, dockerContainer)
return ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, dockerContainer)
}
systemContainers = append(systemContainers, cont)
} else {
cm.periodicTasks = append(cm.periodicTasks, func() {
if err := ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, nil); err != nil {
glog.Error(err)
return
}
cont, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
if err != nil {
glog.Error(err)
@@ -401,11 +405,15 @@ func (cm *containerManagerImpl) setupNode() error {
},
}
cont.ensureStateFunc = func(_ *fs.Manager) error {
return manager.Apply(os.Getpid())
return ensureProcessInContainerWithOOMScore(os.Getpid(), qos.KubeletOOMScoreAdj, &manager)
}
systemContainers = append(systemContainers, cont)
} else {
cm.periodicTasks = append(cm.periodicTasks, func() {
if err := ensureProcessInContainerWithOOMScore(os.Getpid(), qos.KubeletOOMScoreAdj, nil); err != nil {
glog.Error(err)
return
}
cont, err := getContainer(os.Getpid())
if err != nil {
glog.Errorf("failed to find cgroups of kubelet - %v", err)
@@ -578,7 +586,7 @@ func ensureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, mana
// Move if the pid is not already in the desired container.
for _, pid := range pids {
if err := ensureProcessInContainer(pid, oomScoreAdj, manager); err != nil {
if err := ensureProcessInContainerWithOOMScore(pid, oomScoreAdj, manager); err != nil {
errs = append(errs, fmt.Errorf("errors moving %q pid: %v", proc.name, err))
}
}
@@ -586,12 +594,13 @@ func ensureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, mana
return utilerrors.NewAggregate(errs)
}
func ensureProcessInContainer(pid int, oomScoreAdj int, manager *fs.Manager) error {
func ensureProcessInContainerWithOOMScore(pid int, oomScoreAdj int, manager *fs.Manager) error {
if runningInHost, err := isProcessRunningInHost(pid); err != nil {
// Err on the side of caution. Avoid moving the docker daemon unless we are able to identify its context.
return err
} else if !runningInHost {
// Process is running inside a container. Don't touch that.
glog.V(2).Infof("pid %d is running in the host namespaces", pid)
return nil
}
@@ -601,17 +610,18 @@ func ensureProcessInContainer(pid int, oomScoreAdj int, manager *fs.Manager) err
errs = append(errs, fmt.Errorf("failed to find container of PID %d: %v", pid, err))
}
if cont != manager.Cgroups.Name {
if manager != nil && cont != manager.Cgroups.Name {
err = manager.Apply(pid)
if err != nil {
errs = append(errs, fmt.Errorf("failed to move PID %d (in %q) to %q", pid, cont, manager.Cgroups.Name))
errs = append(errs, fmt.Errorf("failed to move PID %d (in %q) to %q: %v", pid, cont, manager.Cgroups.Name, err))
}
}
// Also apply oom-score-adj to processes
oomAdjuster := oom.NewOOMAdjuster()
if err := oomAdjuster.ApplyOOMScoreAdj(pid, oomScoreAdj); err != nil {
errs = append(errs, fmt.Errorf("failed to apply oom score %d to PID %d", oomScoreAdj, pid))
glog.V(3).Infof("Failed to apply oom_score_adj %d for pid %d: %v", oomScoreAdj, pid, err)
errs = append(errs, fmt.Errorf("failed to apply oom score %d to PID %d: %v", oomScoreAdj, pid, err))
}
return utilerrors.NewAggregate(errs)
}