
Currently, the HPA considers unready pods the same as ready pods when looking at their CPU and custom metric usage. However, pods frequently use extra CPU during initialization, so we want to consider them separately. This commit causes the HPA to consider unready pods as having 0 CPU usage when scaling up, and ignores them when scaling down. If, when scaling up, factoring the unready pods as having 0 CPU would cause a downscale instead, we simply choose not to scale. Otherwise, we simply scale up at the reduced amount caculated by factoring the pods in at zero CPU usage. The effect is that unready pods cause the autoscaler to be a bit more conservative -- large increases in CPU usage can still cause scales, even with unready pods in the mix, but will not cause the scale factors to be as large, in anticipation of the new pods later becoming ready and handling load. Similarly, if there are pods for which no metrics have been retrieved, these pods are treated as having 100% of the requested metric when scaling down, and 0% when scaling up. As above, this cannot change the direction of the scale. This commit also changes the HPA to ignore superfluous metrics -- as long as metrics for all ready pods are present, the HPA we make scaling decisions. Currently, this only works for CPU. For custom metrics, we cannot identify which metrics go to which pods if we get superfluous metrics, so we abort the scale.
247 lines
8.7 KiB
Go
247 lines
8.7 KiB
Go
/*
|
|
Copyright 2016 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package podautoscaler
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"time"
|
|
|
|
"k8s.io/kubernetes/pkg/api"
|
|
unversionedcore "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/internalversion"
|
|
metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
|
|
"k8s.io/kubernetes/pkg/labels"
|
|
"k8s.io/kubernetes/pkg/util/sets"
|
|
)
|
|
|
|
type ReplicaCalculator struct {
|
|
metricsClient metricsclient.MetricsClient
|
|
podsGetter unversionedcore.PodsGetter
|
|
}
|
|
|
|
func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter unversionedcore.PodsGetter) *ReplicaCalculator {
|
|
return &ReplicaCalculator{
|
|
metricsClient: metricsClient,
|
|
podsGetter: podsGetter,
|
|
}
|
|
}
|
|
|
|
// GetResourceReplicas calculates the desired replica count based on a target resource utilization percentage
|
|
// of the given resource for pods matching the given selector in the given namespace, and the current replica count
|
|
func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUtilization int32, resource api.ResourceName, namespace string, selector labels.Selector) (replicaCount int32, utilization int32, timestamp time.Time, err error) {
|
|
metrics, timestamp, err := c.metricsClient.GetResourceMetric(resource, namespace, selector)
|
|
if err != nil {
|
|
return 0, 0, time.Time{}, fmt.Errorf("unable to get metrics for resource %s: %v", resource, err)
|
|
}
|
|
|
|
podList, err := c.podsGetter.Pods(namespace).List(api.ListOptions{LabelSelector: selector})
|
|
if err != nil {
|
|
return 0, 0, time.Time{}, fmt.Errorf("unable to get pods while calculating replica count: %v", err)
|
|
}
|
|
|
|
if len(podList.Items) == 0 {
|
|
return 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count")
|
|
}
|
|
|
|
requests := make(map[string]int64, len(podList.Items))
|
|
readyPodCount := 0
|
|
unreadyPods := sets.NewString()
|
|
missingPods := sets.NewString()
|
|
|
|
for _, pod := range podList.Items {
|
|
podSum := int64(0)
|
|
for _, container := range pod.Spec.Containers {
|
|
if containerRequest, ok := container.Resources.Requests[resource]; ok {
|
|
podSum += containerRequest.MilliValue()
|
|
} else {
|
|
return 0, 0, time.Time{}, fmt.Errorf("missing request for %s on container %s in pod %s/%s", resource, container.Name, namespace, pod.Name)
|
|
}
|
|
}
|
|
|
|
requests[pod.Name] = podSum
|
|
|
|
if pod.Status.Phase != api.PodRunning || !api.IsPodReady(&pod) {
|
|
// save this pod name for later, but pretend it doesn't exist for now
|
|
unreadyPods.Insert(pod.Name)
|
|
delete(metrics, pod.Name)
|
|
continue
|
|
}
|
|
|
|
if _, found := metrics[pod.Name]; !found {
|
|
// save this pod name for later, but pretend it doesn't exist for now
|
|
missingPods.Insert(pod.Name)
|
|
continue
|
|
}
|
|
|
|
readyPodCount++
|
|
}
|
|
|
|
if len(metrics) == 0 {
|
|
return 0, 0, time.Time{}, fmt.Errorf("did not receive metrics for any ready pods")
|
|
}
|
|
|
|
usageRatio, utilization, err := metricsclient.GetResourceUtilizationRatio(metrics, requests, targetUtilization)
|
|
if err != nil {
|
|
return 0, 0, time.Time{}, err
|
|
}
|
|
|
|
rebalanceUnready := len(unreadyPods) > 0 && usageRatio > 1.0
|
|
if !rebalanceUnready && len(missingPods) == 0 {
|
|
if math.Abs(1.0-usageRatio) <= tolerance {
|
|
// return the current replicas if the change would be too small
|
|
return currentReplicas, utilization, timestamp, nil
|
|
}
|
|
|
|
// if we don't have any unready or missing pods, we can calculate the new replica count now
|
|
return int32(math.Ceil(usageRatio * float64(readyPodCount))), utilization, timestamp, nil
|
|
}
|
|
|
|
if len(missingPods) > 0 {
|
|
if usageRatio < 1.0 {
|
|
// on a scale-down, treat missing pods as using 100% of the resource request
|
|
for podName := range missingPods {
|
|
metrics[podName] = requests[podName]
|
|
}
|
|
} else {
|
|
// on a scale-up, treat missing pods as using 0% of the resource request
|
|
for podName := range missingPods {
|
|
metrics[podName] = 0
|
|
}
|
|
}
|
|
}
|
|
|
|
if rebalanceUnready {
|
|
// on a scale-up, treat unready pods as using 0% of the resource request
|
|
for podName := range unreadyPods {
|
|
metrics[podName] = 0
|
|
}
|
|
}
|
|
|
|
// re-run the utilization calculation with our new numbers
|
|
newUsageRatio, _, err := metricsclient.GetResourceUtilizationRatio(metrics, requests, targetUtilization)
|
|
if err != nil {
|
|
return 0, utilization, time.Time{}, err
|
|
}
|
|
|
|
if math.Abs(1.0-newUsageRatio) <= tolerance || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) {
|
|
// return the current replicas if the change would be too small,
|
|
// or if the new usage ratio would cause a change in scale direction
|
|
return currentReplicas, utilization, timestamp, nil
|
|
}
|
|
|
|
// return the result, where the number of replicas considered is
|
|
// however many replicas factored into our calculation
|
|
return int32(math.Ceil(newUsageRatio * float64(len(metrics)))), utilization, timestamp, nil
|
|
}
|
|
|
|
// GetMetricReplicas calculates the desired replica count based on a target resource utilization percentage
|
|
// of the given resource for pods matching the given selector in the given namespace, and the current replica count
|
|
func (c *ReplicaCalculator) GetMetricReplicas(currentReplicas int32, targetUtilization float64, metricName string, namespace string, selector labels.Selector) (replicaCount int32, utilization float64, timestamp time.Time, err error) {
|
|
metrics, timestamp, err := c.metricsClient.GetRawMetric(metricName, namespace, selector)
|
|
if err != nil {
|
|
return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v", metricName, err)
|
|
}
|
|
|
|
podList, err := c.podsGetter.Pods(namespace).List(api.ListOptions{LabelSelector: selector})
|
|
if err != nil {
|
|
return 0, 0, time.Time{}, fmt.Errorf("unable to get pods while calculating replica count: %v", err)
|
|
}
|
|
|
|
if len(podList.Items) == 0 {
|
|
return 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count")
|
|
}
|
|
|
|
readyPodCount := 0
|
|
unreadyPods := sets.NewString()
|
|
missingPods := sets.NewString()
|
|
|
|
for _, pod := range podList.Items {
|
|
if pod.Status.Phase != api.PodRunning || !api.IsPodReady(&pod) {
|
|
// save this pod name for later, but pretend it doesn't exist for now
|
|
unreadyPods.Insert(pod.Name)
|
|
delete(metrics, pod.Name)
|
|
continue
|
|
}
|
|
|
|
if _, found := metrics[pod.Name]; !found {
|
|
// save this pod name for later, but pretend it doesn't exist for now
|
|
missingPods.Insert(pod.Name)
|
|
continue
|
|
}
|
|
|
|
readyPodCount++
|
|
}
|
|
|
|
if len(metrics) == 0 {
|
|
return 0, 0, time.Time{}, fmt.Errorf("did not recieve metrics for any ready pods")
|
|
}
|
|
|
|
usageRatio, utilization := metricsclient.GetMetricUtilizationRatio(metrics, targetUtilization)
|
|
if err != nil {
|
|
return 0, 0, time.Time{}, err
|
|
}
|
|
|
|
rebalanceUnready := len(unreadyPods) > 0 && usageRatio > 1.0
|
|
|
|
if !rebalanceUnready && len(missingPods) == 0 {
|
|
if math.Abs(1.0-usageRatio) <= tolerance {
|
|
// return the current replicas if the change would be too small
|
|
return currentReplicas, utilization, timestamp, nil
|
|
}
|
|
|
|
// if we don't have any unready or missing pods, we can calculate the new replica count now
|
|
return int32(math.Ceil(usageRatio * float64(readyPodCount))), utilization, timestamp, nil
|
|
}
|
|
|
|
if len(missingPods) > 0 {
|
|
if usageRatio < 1.0 {
|
|
// on a scale-down, treat missing pods as using 100% of the resource request
|
|
for podName := range missingPods {
|
|
metrics[podName] = targetUtilization
|
|
}
|
|
} else {
|
|
// on a scale-up, treat missing pods as using 0% of the resource request
|
|
for podName := range missingPods {
|
|
metrics[podName] = 0
|
|
}
|
|
}
|
|
}
|
|
|
|
if rebalanceUnready {
|
|
// on a scale-up, treat unready pods as using 0% of the resource request
|
|
for podName := range unreadyPods {
|
|
metrics[podName] = 0
|
|
}
|
|
}
|
|
|
|
// re-run the utilization calculation with our new numbers
|
|
newUsageRatio, _ := metricsclient.GetMetricUtilizationRatio(metrics, targetUtilization)
|
|
if err != nil {
|
|
return 0, utilization, time.Time{}, err
|
|
}
|
|
|
|
if math.Abs(1.0-newUsageRatio) <= tolerance || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) {
|
|
// return the current replicas if the change would be too small,
|
|
// or if the new usage ratio would cause a change in scale direction
|
|
return currentReplicas, utilization, timestamp, nil
|
|
}
|
|
|
|
// return the result, where the number of replicas considered is
|
|
// however many replicas factored into our calculation
|
|
return int32(math.Ceil(newUsageRatio * float64(len(metrics)))), utilization, timestamp, nil
|
|
}
|