kubernetes/pkg/kubelet/qos/policy.go
Kubernetes Submit Queue 3f9f7471af Merge pull request #38989 from sjenning/set-qos-field
Automatic merge from submit-queue (batch tested with PRs 39684, 39577, 38989, 39534, 39702)

Set PodStatus QOSClass field

This PR continues the work for https://github.com/kubernetes/kubernetes/pull/37968

It converts all local usage of the `qos` package class types to the new API level types (first commit) and sets the pod status QOSClass field in the at pod creation time on the API server in `PrepareForCreate` and in the kubelet in the pod status update path (second commit).  This way the pod QOS class is set even if the pod isn't scheduled yet.

Fixes #33255

@ConnorDoyle @derekwaynecarr @vishh
2017-01-10 22:24:13 -08:00

80 lines
3.3 KiB
Go

/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package qos
import (
"k8s.io/kubernetes/pkg/api/v1"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
const (
// PodInfraOOMAdj is very docker specific. For arbitrary runtime, it may not make
// sense to set sandbox level oom score, e.g. a sandbox could only be a namespace
// without a process.
// TODO: Handle infra container oom score adj in a runtime agnostic way.
// TODO: Should handle critical pod oom score adj with a proper preemption priority.
// This is the workaround for https://github.com/kubernetes/kubernetes/issues/38322.
PodInfraOOMAdj int = -998
CriticalPodOOMAdj int = -998
KubeletOOMScoreAdj int = -999
DockerOOMScoreAdj int = -999
KubeProxyOOMScoreAdj int = -999
guaranteedOOMScoreAdj int = -998
besteffortOOMScoreAdj int = 1000
)
// GetContainerOOMAdjust returns the amount by which the OOM score of all processes in the
// container should be adjusted.
// The OOM score of a process is the percentage of memory it consumes
// multiplied by 10 (barring exceptional cases) + a configurable quantity which is between -1000
// and 1000. Containers with higher OOM scores are killed if the system runs out of memory.
// See https://lwn.net/Articles/391222/ for more information.
func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapacity int64) int {
if kubetypes.IsCriticalPod(pod) {
return CriticalPodOOMAdj
}
switch GetPodQOS(pod) {
case v1.PodQOSGuaranteed:
// Guaranteed containers should be the last to get killed.
return guaranteedOOMScoreAdj
case v1.PodQOSBestEffort:
return besteffortOOMScoreAdj
}
// Burstable containers are a middle tier, between Guaranteed and Best-Effort. Ideally,
// we want to protect Burstable containers that consume less memory than requested.
// The formula below is a heuristic. A container requesting for 10% of a system's
// memory will have an OOM score adjust of 900. If a process in container Y
// uses over 10% of memory, its OOM score will be 1000. The idea is that containers
// which use more than their request will have an OOM score of 1000 and will be prime
// targets for OOM kills.
// Note that this is a heuristic, it won't work if a container has many small processes.
memoryRequest := container.Resources.Requests.Memory().Value()
oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
// A guaranteed pod using 100% of memory can have an OOM score of 10. Ensure
// that burstable pods have a higher OOM score adjustment.
if int(oomScoreAdjust) < (1000 + guaranteedOOMScoreAdj) {
return (1000 + guaranteedOOMScoreAdj)
}
// Give burstable pods a higher chance of survival over besteffort pods.
if int(oomScoreAdjust) == besteffortOOMScoreAdj {
return int(oomScoreAdjust - 1)
}
return int(oomScoreAdjust)
}