From 8649628c6cc657d07fbebb9949c38a27ef81a7d6 Mon Sep 17 00:00:00 2001 From: Victor Marmol Date: Thu, 19 Feb 2015 19:17:44 -0800 Subject: [PATCH] Make POD container last OOM victim. Setting the oom_score_adj of the PID of the POD container to -100 which is less than the default of 0. This ensures that this PID is the last OOM victim chosen by the kernel. Fixes #3067. --- pkg/kubelet/kubelet.go | 19 ++++++++++++++++++- pkg/kubelet/kubelet_test.go | 14 +++++++------- pkg/kubelet/runonce_test.go | 18 ++++++++++++++++-- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 80184d02e72..2e40384b4b9 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -56,6 +56,10 @@ const minShares = 2 const sharesPerCPU = 1024 const milliCPUToCPU = 1000 +// The oom_score_adj of the POD infrastructure container. The default is 0, so +// any value below that makes it *less* likely to get OOM killed. +const podOomScoreAdj = -100 + // SyncHandler is an interface implemented by Kubelet, for testability type SyncHandler interface { SyncPods([]api.BoundPod) error @@ -938,7 +942,20 @@ func (kl *Kubelet) createPodInfraContainer(pod *api.BoundPod) (dockertools.Docke if ref != nil { record.Eventf(ref, "pulled", "Successfully pulled image %q", container.Image) } - return kl.runContainer(pod, container, nil, "", "") + id, err := kl.runContainer(pod, container, nil, "", "") + if err != nil { + return "", err + } + + // Set OOM score of POD container to lower than those of the other + // containers in the pod. This ensures that it is killed only as a last + // resort. + containerInfo, err := kl.dockerClient.InspectContainer(string(id)) + if err != nil { + return "", err + } + + return id, util.ApplyOomScoreAdj(containerInfo.State.Pid, podOomScoreAdj) } func (kl *Kubelet) pullImage(img string, ref *api.ObjectReference) error { diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 7c0a2c92cb5..daafb6bcbdf 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -437,7 +437,7 @@ func TestSyncPodsWithTerminationLog(t *testing.T) { } kubelet.drainWorkers() verifyCalls(t, fakeDocker, []string{ - "list", "create", "start", "list", "inspect_container", "inspect_image", "list", "create", "start"}) + "list", "create", "start", "inspect_container", "list", "inspect_container", "inspect_image", "list", "create", "start"}) fakeDocker.Lock() parts := strings.Split(fakeDocker.Container.HostConfig.Binds[0], ":") @@ -497,7 +497,7 @@ func TestSyncPodsCreatesNetAndContainer(t *testing.T) { kubelet.drainWorkers() verifyCalls(t, fakeDocker, []string{ - "list", "create", "start", "list", "inspect_container", "inspect_image", "list", "create", "start"}) + "list", "create", "start", "inspect_container", "list", "inspect_container", "inspect_image", "list", "create", "start"}) fakeDocker.Lock() @@ -547,7 +547,7 @@ func TestSyncPodsCreatesNetAndContainerPullsImage(t *testing.T) { kubelet.drainWorkers() verifyCalls(t, fakeDocker, []string{ - "list", "create", "start", "list", "inspect_container", "inspect_image", "list", "create", "start"}) + "list", "create", "start", "inspect_container", "list", "inspect_container", "inspect_image", "list", "create", "start"}) fakeDocker.Lock() @@ -563,7 +563,7 @@ func TestSyncPodsCreatesNetAndContainerPullsImage(t *testing.T) { fakeDocker.Unlock() } -func TestSyncPodsWithNetCreatesContainer(t *testing.T) { +func TestSyncPodsWithPodInfraCreatesContainer(t *testing.T) { kubelet, fakeDocker := newTestKubelet(t) fakeDocker.ContainerList = []docker.APIContainers{ { @@ -604,7 +604,7 @@ func TestSyncPodsWithNetCreatesContainer(t *testing.T) { fakeDocker.Unlock() } -func TestSyncPodsWithNetCreatesContainerCallsHandler(t *testing.T) { +func TestSyncPodsWithPodInfraCreatesContainerCallsHandler(t *testing.T) { kubelet, fakeDocker := newTestKubelet(t) fakeHttp := fakeHTTP{} kubelet.httpClient = &fakeHttp @@ -661,7 +661,7 @@ func TestSyncPodsWithNetCreatesContainerCallsHandler(t *testing.T) { } } -func TestSyncPodsDeletesWithNoNetContainer(t *testing.T) { +func TestSyncPodsDeletesWithNoPodInfraContainer(t *testing.T) { kubelet, fakeDocker := newTestKubelet(t) fakeDocker.ContainerList = []docker.APIContainers{ { @@ -692,7 +692,7 @@ func TestSyncPodsDeletesWithNoNetContainer(t *testing.T) { kubelet.drainWorkers() verifyCalls(t, fakeDocker, []string{ - "list", "stop", "create", "start", "list", "list", "inspect_container", "inspect_image", "list", "create", "start"}) + "list", "stop", "create", "start", "inspect_container", "list", "list", "inspect_container", "inspect_image", "list", "create", "start"}) // A map iteration is used to delete containers, so must not depend on // order here. diff --git a/pkg/kubelet/runonce_test.go b/pkg/kubelet/runonce_test.go index 8a1f9af9b75..3fde0209a25 100644 --- a/pkg/kubelet/runonce_test.go +++ b/pkg/kubelet/runonce_test.go @@ -97,14 +97,28 @@ func TestRunOnce(t *testing.T) { label: "syncPod", container: docker.Container{ Config: &docker.Config{Image: "someimage"}, - State: docker.State{Running: true}, + State: docker.State{Running: true, Pid: 42}, }, }, { label: "syncPod", container: docker.Container{ Config: &docker.Config{Image: "someimage"}, - State: docker.State{Running: true}, + State: docker.State{Running: true, Pid: 42}, + }, + }, + { + label: "syncPod", + container: docker.Container{ + Config: &docker.Config{Image: "someimage"}, + State: docker.State{Running: true, Pid: 42}, + }, + }, + { + label: "syncPod", + container: docker.Container{ + Config: &docker.Config{Image: "someimage"}, + State: docker.State{Running: true, Pid: 42}, }, }, },