GetObjectMetricReplicas ignores unready pods

Previously, when `GetObjectMetricReplicas` calculated the desired
replica count, it multiplied the usage ratio by the current number of replicas.
This method caused over-scaling when there were pods that were not ready
for a long period of time. For example, if there were pods A, B, and C,
and only pod A was ready, and the usage ratio was 500%, we would
previously specify 15 pods as the desired replicas (even though really
only one pod was handling the load).

After this change, we now multiple the usage
ratio by the number of ready pods for `GetObjectMetricReplicas`.
In the example above, we'd only desire 5 replica pods.

This change gives `GetObjectMetricReplicas` the same behavior as the
other replica calculator methods. Only `GetExternalMetricReplicas` and
`GetExternalPerPodMetricRepliacs` still allow unready pods to impact the
number of desired replicas. I will fix this issue in the following
commit.
This commit is contained in:
mattjmcnaughton 2018-03-06 09:51:49 -05:00
parent 48a7048d98
commit 7e3bce7b3e
4 changed files with 87 additions and 13 deletions

View File

@ -223,7 +223,7 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori
switch metricSpec.Type {
case autoscalingv2.ObjectMetricSourceType:
replicaCountProposal, utilizationProposal, timestampProposal, err = a.replicaCalc.GetObjectMetricReplicas(currentReplicas, metricSpec.Object.TargetValue.MilliValue(), metricSpec.Object.MetricName, hpa.Namespace, &metricSpec.Object.Target)
replicaCountProposal, utilizationProposal, timestampProposal, err = a.replicaCalc.GetObjectMetricReplicas(currentReplicas, metricSpec.Object.TargetValue.MilliValue(), metricSpec.Object.MetricName, hpa.Namespace, &metricSpec.Object.Target, selector)
if err != nil {
a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetObjectMetric", err.Error())
setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetObjectMetric", "the HPA was unable to compute the replica count: %v", err)

View File

@ -246,16 +246,32 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
defer tc.Unlock()
obj := &v1.PodList{}
for i := 0; i < len(tc.reportedCPURequests); i++ {
specifiedCPURequests := tc.reportedCPURequests != nil
numPodsToCreate := int(tc.initialReplicas)
if specifiedCPURequests {
numPodsToCreate = len(tc.reportedCPURequests)
}
for i := 0; i < numPodsToCreate; i++ {
podReadiness := v1.ConditionTrue
if tc.reportedPodReadiness != nil {
podReadiness = tc.reportedPodReadiness[i]
}
podPhase := v1.PodRunning
if tc.reportedPodPhase != nil {
podPhase = tc.reportedPodPhase[i]
}
podName := fmt.Sprintf("%s-%d", podNamePrefix, i)
reportedCPURequest := resource.MustParse("1.0")
if specifiedCPURequests {
reportedCPURequest = tc.reportedCPURequests[i]
}
pod := v1.Pod{
Status: v1.PodStatus{
Phase: podPhase,
@ -273,12 +289,13 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
"name": podNamePrefix,
},
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: tc.reportedCPURequests[i],
v1.ResourceCPU: reportedCPURequest,
},
},
},
@ -1348,13 +1365,14 @@ func TestEmptyMetrics(t *testing.T) {
func TestEmptyCPURequest(t *testing.T) {
tc := testCase{
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 1,
desiredReplicas: 1,
CPUTarget: 100,
reportedLevels: []uint64{200},
useMetricsAPI: true,
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 1,
desiredReplicas: 1,
CPUTarget: 100,
reportedLevels: []uint64{200},
reportedCPURequests: []resource.Quantity{},
useMetricsAPI: true,
expectedConditions: []autoscalingv1.HorizontalPodAutoscalerCondition{
{Type: autoscalingv1.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededGetScale"},
{Type: autoscalingv1.ScalingActive, Status: v1.ConditionFalse, Reason: "FailedGetResourceMetric"},

View File

@ -276,7 +276,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
// GetObjectMetricReplicas calculates the desired replica count based on a target metric utilization (as a milli-value)
// for the given object in the given namespace, and the current replica count.
func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targetUtilization int64, metricName string, namespace string, objectRef *autoscaling.CrossVersionObjectReference) (replicaCount int32, utilization int64, timestamp time.Time, err error) {
func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targetUtilization int64, metricName string, namespace string, objectRef *autoscaling.CrossVersionObjectReference, selector labels.Selector) (replicaCount int32, utilization int64, timestamp time.Time, err error) {
utilization, timestamp, err = c.metricsClient.GetObjectMetric(metricName, namespace, objectRef)
if err != nil {
return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v on %s %s/%s", metricName, objectRef.Kind, namespace, objectRef.Name, err)
@ -287,11 +287,47 @@ func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targe
// return the current replicas if the change would be too small
return currentReplicas, utilization, timestamp, nil
}
replicaCount = int32(math.Ceil(usageRatio * float64(currentReplicas)))
// Calculate the total number of ready pods, as we want to base the
// replica count off of how many pods are ready, not how many pods
// exist.
readyPodCount, err := c.getReadyPodsCount(namespace, selector)
if err != nil {
return 0, 0, time.Time{}, fmt.Errorf("unable to calculate ready pods: %s", err)
}
replicaCount = int32(math.Ceil(usageRatio * float64(readyPodCount)))
return replicaCount, utilization, timestamp, nil
}
// @TODO(mattjmcnaughton) Many different functions in this module use variations
// of this function. Make this function generic, so we don't repeat the same
// logic in multiple places.
func (c *ReplicaCalculator) getReadyPodsCount(namespace string, selector labels.Selector) (int64, error) {
podList, err := c.podsGetter.Pods(namespace).List(metav1.ListOptions{LabelSelector: selector.String()})
if err != nil {
return 0, fmt.Errorf("unable to get pods while calculating replica count: %v", err)
}
if len(podList.Items) == 0 {
return 0, fmt.Errorf("no pods returned by selector while calculating replica count")
}
readyPodCount := 0
for _, pod := range podList.Items {
if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) {
continue
}
readyPodCount++
}
return int64(readyPodCount), nil
}
// GetExternalMetricReplicas calculates the desired replica count based on a
// target metric value (as a milli-value) for the external metric in the given
// namespace, and the current replica count.

View File

@ -323,7 +323,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) {
var outTimestamp time.Time
var err error
if tc.metric.singleObject != nil {
outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetObjectMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.singleObject)
outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetObjectMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.singleObject, selector)
} else if tc.metric.selector != nil {
if tc.metric.targetUtilization > 0 {
outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetExternalMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.selector)
@ -497,6 +497,26 @@ func TestReplicaCalcScaleUpCMObject(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcScaleUpCMObjectIgnoreUnreadyPods(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 3,
expectedReplicas: 5, // If we did not ignore unready pods, we'd expect 15 replicas.
podReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse},
metric: &metricInfo{
name: "qps",
levels: []int64{50000},
targetUtilization: 10000,
expectedUtilization: 50000,
singleObject: &autoscalingv2.CrossVersionObjectReference{
Kind: "Deployment",
APIVersion: "extensions/v1beta1",
Name: "some-deployment",
},
},
}
tc.runTest(t)
}
func TestReplicaCalcScaleUpCMExternal(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 1,