398 lines
14 KiB
Go
398 lines
14 KiB
Go
/*
|
|
Copyright 2014 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package resource
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"strconv"
|
|
"strings"
|
|
|
|
v1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/api/resource"
|
|
|
|
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
|
)
|
|
|
|
// PodResourcesOptions controls the behavior of PodRequests and PodLimits.
|
|
type PodResourcesOptions struct {
|
|
// Reuse, if provided will be reused to accumulate resources and returned by the PodRequests or PodLimits
|
|
// functions. All existing values in Reuse will be lost.
|
|
Reuse v1.ResourceList
|
|
// InPlacePodVerticalScalingEnabled indicates that the in-place pod vertical scaling feature gate is enabled.
|
|
InPlacePodVerticalScalingEnabled bool
|
|
// ExcludeOverhead controls if pod overhead is excluded from the calculation.
|
|
ExcludeOverhead bool
|
|
// ContainerFn is called with the effective resources required for each container within the pod.
|
|
ContainerFn func(res v1.ResourceList, containerType podutil.ContainerType)
|
|
// NonMissingContainerRequests if provided will replace any missing container level requests for the specified resources
|
|
// with the given values. If the requests for those resources are explicitly set, even if zero, they will not be modified.
|
|
NonMissingContainerRequests v1.ResourceList
|
|
}
|
|
|
|
// PodRequests computes the pod requests per the PodResourcesOptions supplied. If PodResourcesOptions is nil, then
|
|
// the requests are returned including pod overhead. The computation is part of the API and must be reviewed
|
|
// as an API change.
|
|
func PodRequests(pod *v1.Pod, opts PodResourcesOptions) v1.ResourceList {
|
|
// attempt to reuse the maps if passed, or allocate otherwise
|
|
reqs := reuseOrClearResourceList(opts.Reuse)
|
|
|
|
var containerStatuses map[string]*v1.ContainerStatus
|
|
if opts.InPlacePodVerticalScalingEnabled {
|
|
containerStatuses = map[string]*v1.ContainerStatus{}
|
|
for i := range pod.Status.ContainerStatuses {
|
|
containerStatuses[pod.Status.ContainerStatuses[i].Name] = &pod.Status.ContainerStatuses[i]
|
|
}
|
|
}
|
|
|
|
for _, container := range pod.Spec.Containers {
|
|
containerReqs := container.Resources.Requests
|
|
if opts.InPlacePodVerticalScalingEnabled {
|
|
cs, found := containerStatuses[container.Name]
|
|
if found {
|
|
if pod.Status.Resize == v1.PodResizeStatusInfeasible {
|
|
containerReqs = cs.ResourcesAllocated
|
|
} else {
|
|
containerReqs = max(container.Resources.Requests, cs.ResourcesAllocated)
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(opts.NonMissingContainerRequests) > 0 {
|
|
containerReqs = applyNonMissing(containerReqs, opts.NonMissingContainerRequests)
|
|
}
|
|
|
|
if opts.ContainerFn != nil {
|
|
opts.ContainerFn(containerReqs, podutil.Containers)
|
|
}
|
|
|
|
addResourceList(reqs, containerReqs)
|
|
}
|
|
|
|
// init containers define the minimum of any resource
|
|
// Note: In-place resize is not allowed for InitContainers, so no need to check for ResizeStatus value
|
|
for _, container := range pod.Spec.InitContainers {
|
|
containerReqs := container.Resources.Requests
|
|
if len(opts.NonMissingContainerRequests) > 0 {
|
|
containerReqs = applyNonMissing(containerReqs, opts.NonMissingContainerRequests)
|
|
}
|
|
|
|
if opts.ContainerFn != nil {
|
|
opts.ContainerFn(containerReqs, podutil.InitContainers)
|
|
}
|
|
maxResourceList(reqs, containerReqs)
|
|
}
|
|
|
|
// Add overhead for running a pod to the sum of requests if requested:
|
|
if !opts.ExcludeOverhead && pod.Spec.Overhead != nil {
|
|
addResourceList(reqs, pod.Spec.Overhead)
|
|
}
|
|
|
|
return reqs
|
|
}
|
|
|
|
// applyNonMissing will return a copy of the given resource list with any missing values replaced by the nonMissing values
|
|
func applyNonMissing(reqs v1.ResourceList, nonMissing v1.ResourceList) v1.ResourceList {
|
|
cp := v1.ResourceList{}
|
|
for k, v := range reqs {
|
|
cp[k] = v.DeepCopy()
|
|
}
|
|
|
|
for k, v := range nonMissing {
|
|
if _, found := reqs[k]; !found {
|
|
rk := cp[k]
|
|
rk.Add(v)
|
|
cp[k] = rk
|
|
}
|
|
}
|
|
return cp
|
|
}
|
|
|
|
// PodLimits computes the pod limits per the PodResourcesOptions supplied. If PodResourcesOptions is nil, then
|
|
// the limits are returned including pod overhead for any non-zero limits. The computation is part of the API and must be reviewed
|
|
// as an API change.
|
|
func PodLimits(pod *v1.Pod, opts PodResourcesOptions) v1.ResourceList {
|
|
// attempt to reuse the maps if passed, or allocate otherwise
|
|
limits := reuseOrClearResourceList(opts.Reuse)
|
|
|
|
for _, container := range pod.Spec.Containers {
|
|
if opts.ContainerFn != nil {
|
|
opts.ContainerFn(container.Resources.Limits, podutil.Containers)
|
|
}
|
|
addResourceList(limits, container.Resources.Limits)
|
|
}
|
|
// init containers define the minimum of any resource
|
|
for _, container := range pod.Spec.InitContainers {
|
|
if opts.ContainerFn != nil {
|
|
opts.ContainerFn(container.Resources.Limits, podutil.InitContainers)
|
|
}
|
|
maxResourceList(limits, container.Resources.Limits)
|
|
}
|
|
|
|
// Add overhead to non-zero limits if requested:
|
|
if !opts.ExcludeOverhead && pod.Spec.Overhead != nil {
|
|
for name, quantity := range pod.Spec.Overhead {
|
|
if value, ok := limits[name]; ok && !value.IsZero() {
|
|
value.Add(quantity)
|
|
limits[name] = value
|
|
}
|
|
}
|
|
}
|
|
|
|
return limits
|
|
}
|
|
|
|
// addResourceList adds the resources in newList to list.
|
|
func addResourceList(list, newList v1.ResourceList) {
|
|
for name, quantity := range newList {
|
|
if value, ok := list[name]; !ok {
|
|
list[name] = quantity.DeepCopy()
|
|
} else {
|
|
value.Add(quantity)
|
|
list[name] = value
|
|
}
|
|
}
|
|
}
|
|
|
|
// maxResourceList sets list to the greater of list/newList for every resource in newList
|
|
func maxResourceList(list, newList v1.ResourceList) {
|
|
for name, quantity := range newList {
|
|
if value, ok := list[name]; !ok || quantity.Cmp(value) > 0 {
|
|
list[name] = quantity.DeepCopy()
|
|
}
|
|
}
|
|
}
|
|
|
|
// max returns the result of max(a, b) for each named resource and is only used if we can't
|
|
// accumulate into an existing resource list
|
|
func max(a v1.ResourceList, b v1.ResourceList) v1.ResourceList {
|
|
result := v1.ResourceList{}
|
|
for key, value := range a {
|
|
if other, found := b[key]; found {
|
|
if value.Cmp(other) <= 0 {
|
|
result[key] = other.DeepCopy()
|
|
continue
|
|
}
|
|
}
|
|
result[key] = value.DeepCopy()
|
|
}
|
|
for key, value := range b {
|
|
if _, found := result[key]; !found {
|
|
result[key] = value.DeepCopy()
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// reuseOrClearResourceList is a helper for avoiding excessive allocations of
|
|
// resource lists within the inner loop of resource calculations.
|
|
func reuseOrClearResourceList(reuse v1.ResourceList) v1.ResourceList {
|
|
if reuse == nil {
|
|
return make(v1.ResourceList, 4)
|
|
}
|
|
for k := range reuse {
|
|
delete(reuse, k)
|
|
}
|
|
return reuse
|
|
}
|
|
|
|
// GetResourceRequestQuantity finds and returns the request quantity for a specific resource.
|
|
func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resource.Quantity {
|
|
requestQuantity := resource.Quantity{}
|
|
|
|
switch resourceName {
|
|
case v1.ResourceCPU:
|
|
requestQuantity = resource.Quantity{Format: resource.DecimalSI}
|
|
case v1.ResourceMemory, v1.ResourceStorage, v1.ResourceEphemeralStorage:
|
|
requestQuantity = resource.Quantity{Format: resource.BinarySI}
|
|
default:
|
|
requestQuantity = resource.Quantity{Format: resource.DecimalSI}
|
|
}
|
|
|
|
for _, container := range pod.Spec.Containers {
|
|
if rQuantity, ok := container.Resources.Requests[resourceName]; ok {
|
|
requestQuantity.Add(rQuantity)
|
|
}
|
|
}
|
|
|
|
for _, container := range pod.Spec.InitContainers {
|
|
if rQuantity, ok := container.Resources.Requests[resourceName]; ok {
|
|
if requestQuantity.Cmp(rQuantity) < 0 {
|
|
requestQuantity = rQuantity.DeepCopy()
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add overhead for running a pod
|
|
// to the total requests if the resource total is non-zero
|
|
if pod.Spec.Overhead != nil {
|
|
if podOverhead, ok := pod.Spec.Overhead[resourceName]; ok && !requestQuantity.IsZero() {
|
|
requestQuantity.Add(podOverhead)
|
|
}
|
|
}
|
|
|
|
return requestQuantity
|
|
}
|
|
|
|
// GetResourceRequest finds and returns the request value for a specific resource.
|
|
func GetResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 {
|
|
if resource == v1.ResourcePods {
|
|
return 1
|
|
}
|
|
|
|
requestQuantity := GetResourceRequestQuantity(pod, resource)
|
|
|
|
if resource == v1.ResourceCPU {
|
|
return requestQuantity.MilliValue()
|
|
}
|
|
|
|
return requestQuantity.Value()
|
|
}
|
|
|
|
// ExtractResourceValueByContainerName extracts the value of a resource
|
|
// by providing container name
|
|
func ExtractResourceValueByContainerName(fs *v1.ResourceFieldSelector, pod *v1.Pod, containerName string) (string, error) {
|
|
container, err := findContainerInPod(pod, containerName)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return ExtractContainerResourceValue(fs, container)
|
|
}
|
|
|
|
// ExtractResourceValueByContainerNameAndNodeAllocatable extracts the value of a resource
|
|
// by providing container name and node allocatable
|
|
func ExtractResourceValueByContainerNameAndNodeAllocatable(fs *v1.ResourceFieldSelector, pod *v1.Pod, containerName string, nodeAllocatable v1.ResourceList) (string, error) {
|
|
realContainer, err := findContainerInPod(pod, containerName)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
container := realContainer.DeepCopy()
|
|
|
|
MergeContainerResourceLimits(container, nodeAllocatable)
|
|
|
|
return ExtractContainerResourceValue(fs, container)
|
|
}
|
|
|
|
// ExtractContainerResourceValue extracts the value of a resource
|
|
// in an already known container
|
|
func ExtractContainerResourceValue(fs *v1.ResourceFieldSelector, container *v1.Container) (string, error) {
|
|
divisor := resource.Quantity{}
|
|
if divisor.Cmp(fs.Divisor) == 0 {
|
|
divisor = resource.MustParse("1")
|
|
} else {
|
|
divisor = fs.Divisor
|
|
}
|
|
|
|
switch fs.Resource {
|
|
case "limits.cpu":
|
|
return convertResourceCPUToString(container.Resources.Limits.Cpu(), divisor)
|
|
case "limits.memory":
|
|
return convertResourceMemoryToString(container.Resources.Limits.Memory(), divisor)
|
|
case "limits.ephemeral-storage":
|
|
return convertResourceEphemeralStorageToString(container.Resources.Limits.StorageEphemeral(), divisor)
|
|
case "requests.cpu":
|
|
return convertResourceCPUToString(container.Resources.Requests.Cpu(), divisor)
|
|
case "requests.memory":
|
|
return convertResourceMemoryToString(container.Resources.Requests.Memory(), divisor)
|
|
case "requests.ephemeral-storage":
|
|
return convertResourceEphemeralStorageToString(container.Resources.Requests.StorageEphemeral(), divisor)
|
|
}
|
|
// handle extended standard resources with dynamic names
|
|
// example: requests.hugepages-<pageSize> or limits.hugepages-<pageSize>
|
|
if strings.HasPrefix(fs.Resource, "requests.") {
|
|
resourceName := v1.ResourceName(strings.TrimPrefix(fs.Resource, "requests."))
|
|
if IsHugePageResourceName(resourceName) {
|
|
return convertResourceHugePagesToString(container.Resources.Requests.Name(resourceName, resource.BinarySI), divisor)
|
|
}
|
|
}
|
|
if strings.HasPrefix(fs.Resource, "limits.") {
|
|
resourceName := v1.ResourceName(strings.TrimPrefix(fs.Resource, "limits."))
|
|
if IsHugePageResourceName(resourceName) {
|
|
return convertResourceHugePagesToString(container.Resources.Limits.Name(resourceName, resource.BinarySI), divisor)
|
|
}
|
|
}
|
|
return "", fmt.Errorf("unsupported container resource : %v", fs.Resource)
|
|
}
|
|
|
|
// convertResourceCPUToString converts cpu value to the format of divisor and returns
|
|
// ceiling of the value.
|
|
func convertResourceCPUToString(cpu *resource.Quantity, divisor resource.Quantity) (string, error) {
|
|
c := int64(math.Ceil(float64(cpu.MilliValue()) / float64(divisor.MilliValue())))
|
|
return strconv.FormatInt(c, 10), nil
|
|
}
|
|
|
|
// convertResourceMemoryToString converts memory value to the format of divisor and returns
|
|
// ceiling of the value.
|
|
func convertResourceMemoryToString(memory *resource.Quantity, divisor resource.Quantity) (string, error) {
|
|
m := int64(math.Ceil(float64(memory.Value()) / float64(divisor.Value())))
|
|
return strconv.FormatInt(m, 10), nil
|
|
}
|
|
|
|
// convertResourceHugePagesToString converts hugepages value to the format of divisor and returns
|
|
// ceiling of the value.
|
|
func convertResourceHugePagesToString(hugePages *resource.Quantity, divisor resource.Quantity) (string, error) {
|
|
m := int64(math.Ceil(float64(hugePages.Value()) / float64(divisor.Value())))
|
|
return strconv.FormatInt(m, 10), nil
|
|
}
|
|
|
|
// convertResourceEphemeralStorageToString converts ephemeral storage value to the format of divisor and returns
|
|
// ceiling of the value.
|
|
func convertResourceEphemeralStorageToString(ephemeralStorage *resource.Quantity, divisor resource.Quantity) (string, error) {
|
|
m := int64(math.Ceil(float64(ephemeralStorage.Value()) / float64(divisor.Value())))
|
|
return strconv.FormatInt(m, 10), nil
|
|
}
|
|
|
|
// findContainerInPod finds a container by its name in the provided pod
|
|
func findContainerInPod(pod *v1.Pod, containerName string) (*v1.Container, error) {
|
|
for _, container := range pod.Spec.Containers {
|
|
if container.Name == containerName {
|
|
return &container, nil
|
|
}
|
|
}
|
|
for _, container := range pod.Spec.InitContainers {
|
|
if container.Name == containerName {
|
|
return &container, nil
|
|
}
|
|
}
|
|
return nil, fmt.Errorf("container %s not found", containerName)
|
|
}
|
|
|
|
// MergeContainerResourceLimits checks if a limit is applied for
|
|
// the container, and if not, it sets the limit to the passed resource list.
|
|
func MergeContainerResourceLimits(container *v1.Container,
|
|
allocatable v1.ResourceList) {
|
|
if container.Resources.Limits == nil {
|
|
container.Resources.Limits = make(v1.ResourceList)
|
|
}
|
|
// NOTE: we exclude hugepages-* resources because hugepages are never overcommitted.
|
|
// This means that the container always has a limit specified.
|
|
for _, resource := range []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage} {
|
|
if quantity, exists := container.Resources.Limits[resource]; !exists || quantity.IsZero() {
|
|
if cap, exists := allocatable[resource]; exists {
|
|
container.Resources.Limits[resource] = cap.DeepCopy()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// IsHugePageResourceName returns true if the resource name has the huge page
|
|
// resource prefix.
|
|
func IsHugePageResourceName(name v1.ResourceName) bool {
|
|
return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix)
|
|
}
|