Improve HPA sample sanitization

After my previous changes HPA wasn't behaving correctly in the following
situation:

- Pods use a lot of CPU during initilization, become ready right after they initialize,
- Scale up triggers,
- When new pods become ready HPA counts their usage (even though it's not related to any work that needs doing),
- Another scale up, even though existing pods can handle work, no problem.
This commit is contained in:
Joachim Bartosik
2018-08-08 15:00:17 +02:00
parent 816f2a4868
commit 7d6676eab1
6 changed files with 467 additions and 156 deletions

View File

@@ -103,6 +103,7 @@ type testCase struct {
reportedLevels []uint64
reportedCPURequests []resource.Quantity
reportedPodReadiness []v1.ConditionStatus
reportedPodStartTime []metav1.Time
reportedPodPhase []v1.PodPhase
scaleUpdated bool
statusUpdated bool
@@ -261,6 +262,10 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
if tc.reportedPodReadiness != nil {
podReadiness = tc.reportedPodReadiness[i]
}
var podStartTime metav1.Time
if tc.reportedPodStartTime != nil {
podStartTime = tc.reportedPodStartTime[i]
}
podPhase := v1.PodRunning
if tc.reportedPodPhase != nil {
@@ -283,6 +288,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
Status: podReadiness,
},
},
StartTime: &podStartTime,
},
ObjectMeta: metav1.ObjectMeta{
Name: podName,
@@ -660,6 +666,14 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform
return hpaController, informerFactory
}
func hotCpuCreationTime() metav1.Time {
return metav1.Time{Time: time.Now()}
}
func coolCpuCreationTime() metav1.Time {
return metav1.Time{Time: time.Now().Add(-3 * time.Minute)}
}
func (tc *testCase) runTestWithController(t *testing.T, hpaController *HorizontalController, informerFactory informers.SharedInformerFactory) {
stop := make(chan struct{})
defer close(stop)
@@ -716,6 +730,23 @@ func TestScaleUpUnreadyLessScale(t *testing.T) {
tc.runTest(t)
}
func TestScaleUpHotCpuLessScale(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
expectedDesiredReplicas: 4,
CPUTarget: 30,
CPUCurrent: 60,
verifyCPUCurrent: true,
reportedLevels: []uint64{300, 500, 700},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
reportedPodStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()},
useMetricsAPI: true,
}
tc.runTest(t)
}
func TestScaleUpUnreadyNoScale(t *testing.T) {
tc := testCase{
minReplicas: 2,
@@ -738,6 +769,29 @@ func TestScaleUpUnreadyNoScale(t *testing.T) {
tc.runTest(t)
}
func TestScaleUpHotCpuNoScale(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 30,
CPUCurrent: 40,
verifyCPUCurrent: true,
reportedLevels: []uint64{400, 500, 700},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
useMetricsAPI: true,
expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: "ReadyForNewScale",
}),
}
tc.runTest(t)
}
func TestScaleUpIgnoresFailedPods(t *testing.T) {
tc := testCase{
minReplicas: 2,
@@ -818,12 +872,12 @@ func TestScaleUpCM(t *testing.T) {
tc.runTest(t)
}
func TestScaleUpCMUnreadyLessScale(t *testing.T) {
func TestScaleUpCMUnreadyAndHotCpuNoLessScale(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
expectedDesiredReplicas: 4,
expectedDesiredReplicas: 6,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@@ -836,17 +890,18 @@ func TestScaleUpCMUnreadyLessScale(t *testing.T) {
},
reportedLevels: []uint64{50000, 10000, 30000},
reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse},
reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
}
tc.runTest(t)
}
func TestScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
func TestScaleUpCMUnreadyandCpuHot(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
expectedDesiredReplicas: 3,
expectedDesiredReplicas: 6,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@@ -859,11 +914,48 @@ func TestScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
},
reportedLevels: []uint64{50000, 15000, 30000},
reportedPodReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse},
reportedPodStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: "ReadyForNewScale",
Reason: "SucceededRescale",
}, autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.ScalingLimited,
Status: v1.ConditionTrue,
Reason: "TooManyReplicas",
}),
}
tc.runTest(t)
}
func TestScaleUpHotCpuNoScaleWouldScaleDown(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
expectedDesiredReplicas: 6,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.PodsMetricSourceType,
Pods: &autoscalingv2.PodsMetricSource{
MetricName: "qps",
TargetAverageValue: resource.MustParse("15.0"),
},
},
},
reportedLevels: []uint64{50000, 15000, 30000},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
reportedPodStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: "SucceededRescale",
}, autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.ScalingLimited,
Status: v1.ConditionTrue,
Reason: "TooManyReplicas",
}),
}
tc.runTest(t)
@@ -1043,7 +1135,7 @@ func TestScaleDownPerPodCMExternal(t *testing.T) {
tc.runTest(t)
}
func TestScaleDownIgnoresUnreadyPods(t *testing.T) {
func TestScaleDownIncludeUnreadyPods(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
@@ -1060,6 +1152,23 @@ func TestScaleDownIgnoresUnreadyPods(t *testing.T) {
tc.runTest(t)
}
func TestScaleDownIgnoreHotCpuPods(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 5,
expectedDesiredReplicas: 2,
CPUTarget: 50,
CPUCurrent: 30,
verifyCPUCurrent: true,
reportedLevels: []uint64{100, 300, 500, 250, 250},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
useMetricsAPI: true,
reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
}
tc.runTest(t)
}
func TestScaleDownIgnoresFailedPods(t *testing.T) {
tc := testCase{
minReplicas: 2,
@@ -1975,7 +2084,7 @@ func TestAvoidUncessaryUpdates(t *testing.T) {
verifyCPUCurrent: true,
reportedLevels: []uint64{400, 500, 700},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
useMetricsAPI: true,
}
testClient, _, _, _, _ := tc.prepareTestClient(t)