/* Copyright 2014 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package resource import ( "fmt" "math" "strconv" "strings" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" podutil "k8s.io/kubernetes/pkg/api/v1/pod" ) // PodResourcesOptions controls the behavior of PodRequests and PodLimits. type PodResourcesOptions struct { // Reuse, if provided will be reused to accumulate resources and returned by the PodRequests or PodLimits // functions. All existing values in Reuse will be lost. Reuse v1.ResourceList // InPlacePodVerticalScalingEnabled indicates that the in-place pod vertical scaling feature gate is enabled. InPlacePodVerticalScalingEnabled bool // ExcludeOverhead controls if pod overhead is excluded from the calculation. ExcludeOverhead bool // ContainerFn is called with the effective resources required for each container within the pod. ContainerFn func(res v1.ResourceList, containerType podutil.ContainerType) // NonMissingContainerRequests if provided will replace any missing container level requests for the specified resources // with the given values. If the requests for those resources are explicitly set, even if zero, they will not be modified. NonMissingContainerRequests v1.ResourceList } // PodRequests computes the pod requests per the PodResourcesOptions supplied. If PodResourcesOptions is nil, then // the requests are returned including pod overhead. The computation is part of the API and must be reviewed // as an API change. func PodRequests(pod *v1.Pod, opts PodResourcesOptions) v1.ResourceList { // attempt to reuse the maps if passed, or allocate otherwise reqs := reuseOrClearResourceList(opts.Reuse) var containerStatuses map[string]*v1.ContainerStatus if opts.InPlacePodVerticalScalingEnabled { containerStatuses = map[string]*v1.ContainerStatus{} for i := range pod.Status.ContainerStatuses { containerStatuses[pod.Status.ContainerStatuses[i].Name] = &pod.Status.ContainerStatuses[i] } } for _, container := range pod.Spec.Containers { containerReqs := container.Resources.Requests if opts.InPlacePodVerticalScalingEnabled { cs, found := containerStatuses[container.Name] if found { if pod.Status.Resize == v1.PodResizeStatusInfeasible { containerReqs = cs.ResourcesAllocated } else { containerReqs = max(container.Resources.Requests, cs.ResourcesAllocated) } } } if len(opts.NonMissingContainerRequests) > 0 { containerReqs = applyNonMissing(containerReqs, opts.NonMissingContainerRequests) } if opts.ContainerFn != nil { opts.ContainerFn(containerReqs, podutil.Containers) } addResourceList(reqs, containerReqs) } // init containers define the minimum of any resource // Note: In-place resize is not allowed for InitContainers, so no need to check for ResizeStatus value for _, container := range pod.Spec.InitContainers { containerReqs := container.Resources.Requests if len(opts.NonMissingContainerRequests) > 0 { containerReqs = applyNonMissing(containerReqs, opts.NonMissingContainerRequests) } if opts.ContainerFn != nil { opts.ContainerFn(containerReqs, podutil.InitContainers) } maxResourceList(reqs, containerReqs) } // Add overhead for running a pod to the sum of requests if requested: if !opts.ExcludeOverhead && pod.Spec.Overhead != nil { addResourceList(reqs, pod.Spec.Overhead) } return reqs } // applyNonMissing will return a copy of the given resource list with any missing values replaced by the nonMissing values func applyNonMissing(reqs v1.ResourceList, nonMissing v1.ResourceList) v1.ResourceList { cp := v1.ResourceList{} for k, v := range reqs { cp[k] = v.DeepCopy() } for k, v := range nonMissing { if _, found := reqs[k]; !found { rk := cp[k] rk.Add(v) cp[k] = rk } } return cp } // PodLimits computes the pod limits per the PodResourcesOptions supplied. If PodResourcesOptions is nil, then // the limits are returned including pod overhead for any non-zero limits. The computation is part of the API and must be reviewed // as an API change. func PodLimits(pod *v1.Pod, opts PodResourcesOptions) v1.ResourceList { // attempt to reuse the maps if passed, or allocate otherwise limits := reuseOrClearResourceList(opts.Reuse) for _, container := range pod.Spec.Containers { if opts.ContainerFn != nil { opts.ContainerFn(container.Resources.Limits, podutil.Containers) } addResourceList(limits, container.Resources.Limits) } // init containers define the minimum of any resource for _, container := range pod.Spec.InitContainers { if opts.ContainerFn != nil { opts.ContainerFn(container.Resources.Limits, podutil.InitContainers) } maxResourceList(limits, container.Resources.Limits) } // Add overhead to non-zero limits if requested: if !opts.ExcludeOverhead && pod.Spec.Overhead != nil { for name, quantity := range pod.Spec.Overhead { if value, ok := limits[name]; ok && !value.IsZero() { value.Add(quantity) limits[name] = value } } } return limits } // addResourceList adds the resources in newList to list. func addResourceList(list, newList v1.ResourceList) { for name, quantity := range newList { if value, ok := list[name]; !ok { list[name] = quantity.DeepCopy() } else { value.Add(quantity) list[name] = value } } } // maxResourceList sets list to the greater of list/newList for every resource in newList func maxResourceList(list, newList v1.ResourceList) { for name, quantity := range newList { if value, ok := list[name]; !ok || quantity.Cmp(value) > 0 { list[name] = quantity.DeepCopy() } } } // max returns the result of max(a, b) for each named resource and is only used if we can't // accumulate into an existing resource list func max(a v1.ResourceList, b v1.ResourceList) v1.ResourceList { result := v1.ResourceList{} for key, value := range a { if other, found := b[key]; found { if value.Cmp(other) <= 0 { result[key] = other.DeepCopy() continue } } result[key] = value.DeepCopy() } for key, value := range b { if _, found := result[key]; !found { result[key] = value.DeepCopy() } } return result } // reuseOrClearResourceList is a helper for avoiding excessive allocations of // resource lists within the inner loop of resource calculations. func reuseOrClearResourceList(reuse v1.ResourceList) v1.ResourceList { if reuse == nil { return make(v1.ResourceList, 4) } for k := range reuse { delete(reuse, k) } return reuse } // GetResourceRequestQuantity finds and returns the request quantity for a specific resource. func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resource.Quantity { requestQuantity := resource.Quantity{} switch resourceName { case v1.ResourceCPU: requestQuantity = resource.Quantity{Format: resource.DecimalSI} case v1.ResourceMemory, v1.ResourceStorage, v1.ResourceEphemeralStorage: requestQuantity = resource.Quantity{Format: resource.BinarySI} default: requestQuantity = resource.Quantity{Format: resource.DecimalSI} } for _, container := range pod.Spec.Containers { if rQuantity, ok := container.Resources.Requests[resourceName]; ok { requestQuantity.Add(rQuantity) } } for _, container := range pod.Spec.InitContainers { if rQuantity, ok := container.Resources.Requests[resourceName]; ok { if requestQuantity.Cmp(rQuantity) < 0 { requestQuantity = rQuantity.DeepCopy() } } } // Add overhead for running a pod // to the total requests if the resource total is non-zero if pod.Spec.Overhead != nil { if podOverhead, ok := pod.Spec.Overhead[resourceName]; ok && !requestQuantity.IsZero() { requestQuantity.Add(podOverhead) } } return requestQuantity } // GetResourceRequest finds and returns the request value for a specific resource. func GetResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 { if resource == v1.ResourcePods { return 1 } requestQuantity := GetResourceRequestQuantity(pod, resource) if resource == v1.ResourceCPU { return requestQuantity.MilliValue() } return requestQuantity.Value() } // ExtractResourceValueByContainerName extracts the value of a resource // by providing container name func ExtractResourceValueByContainerName(fs *v1.ResourceFieldSelector, pod *v1.Pod, containerName string) (string, error) { container, err := findContainerInPod(pod, containerName) if err != nil { return "", err } return ExtractContainerResourceValue(fs, container) } // ExtractResourceValueByContainerNameAndNodeAllocatable extracts the value of a resource // by providing container name and node allocatable func ExtractResourceValueByContainerNameAndNodeAllocatable(fs *v1.ResourceFieldSelector, pod *v1.Pod, containerName string, nodeAllocatable v1.ResourceList) (string, error) { realContainer, err := findContainerInPod(pod, containerName) if err != nil { return "", err } container := realContainer.DeepCopy() MergeContainerResourceLimits(container, nodeAllocatable) return ExtractContainerResourceValue(fs, container) } // ExtractContainerResourceValue extracts the value of a resource // in an already known container func ExtractContainerResourceValue(fs *v1.ResourceFieldSelector, container *v1.Container) (string, error) { divisor := resource.Quantity{} if divisor.Cmp(fs.Divisor) == 0 { divisor = resource.MustParse("1") } else { divisor = fs.Divisor } switch fs.Resource { case "limits.cpu": return convertResourceCPUToString(container.Resources.Limits.Cpu(), divisor) case "limits.memory": return convertResourceMemoryToString(container.Resources.Limits.Memory(), divisor) case "limits.ephemeral-storage": return convertResourceEphemeralStorageToString(container.Resources.Limits.StorageEphemeral(), divisor) case "requests.cpu": return convertResourceCPUToString(container.Resources.Requests.Cpu(), divisor) case "requests.memory": return convertResourceMemoryToString(container.Resources.Requests.Memory(), divisor) case "requests.ephemeral-storage": return convertResourceEphemeralStorageToString(container.Resources.Requests.StorageEphemeral(), divisor) } // handle extended standard resources with dynamic names // example: requests.hugepages- or limits.hugepages- if strings.HasPrefix(fs.Resource, "requests.") { resourceName := v1.ResourceName(strings.TrimPrefix(fs.Resource, "requests.")) if IsHugePageResourceName(resourceName) { return convertResourceHugePagesToString(container.Resources.Requests.Name(resourceName, resource.BinarySI), divisor) } } if strings.HasPrefix(fs.Resource, "limits.") { resourceName := v1.ResourceName(strings.TrimPrefix(fs.Resource, "limits.")) if IsHugePageResourceName(resourceName) { return convertResourceHugePagesToString(container.Resources.Limits.Name(resourceName, resource.BinarySI), divisor) } } return "", fmt.Errorf("unsupported container resource : %v", fs.Resource) } // convertResourceCPUToString converts cpu value to the format of divisor and returns // ceiling of the value. func convertResourceCPUToString(cpu *resource.Quantity, divisor resource.Quantity) (string, error) { c := int64(math.Ceil(float64(cpu.MilliValue()) / float64(divisor.MilliValue()))) return strconv.FormatInt(c, 10), nil } // convertResourceMemoryToString converts memory value to the format of divisor and returns // ceiling of the value. func convertResourceMemoryToString(memory *resource.Quantity, divisor resource.Quantity) (string, error) { m := int64(math.Ceil(float64(memory.Value()) / float64(divisor.Value()))) return strconv.FormatInt(m, 10), nil } // convertResourceHugePagesToString converts hugepages value to the format of divisor and returns // ceiling of the value. func convertResourceHugePagesToString(hugePages *resource.Quantity, divisor resource.Quantity) (string, error) { m := int64(math.Ceil(float64(hugePages.Value()) / float64(divisor.Value()))) return strconv.FormatInt(m, 10), nil } // convertResourceEphemeralStorageToString converts ephemeral storage value to the format of divisor and returns // ceiling of the value. func convertResourceEphemeralStorageToString(ephemeralStorage *resource.Quantity, divisor resource.Quantity) (string, error) { m := int64(math.Ceil(float64(ephemeralStorage.Value()) / float64(divisor.Value()))) return strconv.FormatInt(m, 10), nil } // findContainerInPod finds a container by its name in the provided pod func findContainerInPod(pod *v1.Pod, containerName string) (*v1.Container, error) { for _, container := range pod.Spec.Containers { if container.Name == containerName { return &container, nil } } for _, container := range pod.Spec.InitContainers { if container.Name == containerName { return &container, nil } } return nil, fmt.Errorf("container %s not found", containerName) } // MergeContainerResourceLimits checks if a limit is applied for // the container, and if not, it sets the limit to the passed resource list. func MergeContainerResourceLimits(container *v1.Container, allocatable v1.ResourceList) { if container.Resources.Limits == nil { container.Resources.Limits = make(v1.ResourceList) } // NOTE: we exclude hugepages-* resources because hugepages are never overcommitted. // This means that the container always has a limit specified. for _, resource := range []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage} { if quantity, exists := container.Resources.Limits[resource]; !exists || quantity.IsZero() { if cap, exists := allocatable[resource]; exists { container.Resources.Limits[resource] = cap.DeepCopy() } } } } // IsHugePageResourceName returns true if the resource name has the huge page // resource prefix. func IsHugePageResourceName(name v1.ResourceName) bool { return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix) }