Merge pull request #68068 from krzysztof-jastrzebski/hpas2

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions here: https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md.

Change CPU sample sanitization in HPA.

**What this PR does / why we need it**:
Change CPU sample sanitization in HPA.
    Ignore samples if:
    - Pod is beeing initalized - 5 minutes from start defined by flag
        - pod is unready
        - pod is ready but full window of metric hasn't been colected since
        transition
    - Pod is initialized - 5 minutes from start defined by flag:
        - Pod has never been ready after initial readiness period.

**Release notes:**
```release-note
Improve CPU sample sanitization in HPA by taking metric's freshness into account.
```
This commit is contained in:
Kubernetes Submit Queue
2018-08-31 10:17:44 -07:00
committed by GitHub
23 changed files with 382 additions and 119 deletions

View File

@@ -32,12 +32,10 @@ import (
)
const (
// TODO(jbartosik): use actual value.
cpuSampleWindow = time.Minute
// defaultTestingTolerance is default value for calculating when to
// scale up/scale down.
defaultTestingTolerance = 0.1
defaultTestingCpuTaintAfterStart = 2 * time.Minute
defaultTestingCpuInitializationPeriod = 2 * time.Minute
defaultTestingDelayOfInitialReadinessStatus = 10 * time.Second
)
@@ -45,16 +43,16 @@ type ReplicaCalculator struct {
metricsClient metricsclient.MetricsClient
podsGetter v1coreclient.PodsGetter
tolerance float64
cpuTaintAfterStart time.Duration
cpuInitializationPeriod time.Duration
delayOfInitialReadinessStatus time.Duration
}
func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator {
func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator {
return &ReplicaCalculator{
metricsClient: metricsClient,
podsGetter: podsGetter,
tolerance: tolerance,
cpuTaintAfterStart: cpuTaintAfterStart,
cpuInitializationPeriod: cpuInitializationPeriod,
delayOfInitialReadinessStatus: delayOfInitialReadinessStatus,
}
}
@@ -77,7 +75,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
return 0, 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count")
}
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus)
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuInitializationPeriod, c.delayOfInitialReadinessStatus)
removeMetricsForPods(metrics, ignoredPods)
requests, err := calculatePodRequests(podList.Items, resource)
if err != nil {
@@ -108,12 +106,12 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
if usageRatio < 1.0 {
// on a scale-down, treat missing pods as using 100% of the resource request
for podName := range missingPods {
metrics[podName] = requests[podName]
metrics[podName] = metricsclient.PodMetric{Value: requests[podName]}
}
} else if usageRatio > 1.0 {
// on a scale-up, treat missing pods as using 0% of the resource request
for podName := range missingPods {
metrics[podName] = 0
metrics[podName] = metricsclient.PodMetric{Value: 0}
}
}
}
@@ -121,7 +119,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
if rebalanceIgnored {
// on a scale-up, treat unready pods as using 0% of the resource request
for podName := range ignoredPods {
metrics[podName] = 0
metrics[podName] = metricsclient.PodMetric{Value: 0}
}
}
@@ -178,7 +176,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
return 0, 0, fmt.Errorf("no pods returned by selector while calculating replica count")
}
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus)
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuInitializationPeriod, c.delayOfInitialReadinessStatus)
removeMetricsForPods(metrics, ignoredPods)
if len(metrics) == 0 {
@@ -203,12 +201,12 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
if usageRatio < 1.0 {
// on a scale-down, treat missing pods as using 100% of the resource request
for podName := range missingPods {
metrics[podName] = targetUtilization
metrics[podName] = metricsclient.PodMetric{Value: targetUtilization}
}
} else {
// on a scale-up, treat missing pods as using 0% of the resource request
for podName := range missingPods {
metrics[podName] = 0
metrics[podName] = metricsclient.PodMetric{Value: 0}
}
}
}
@@ -216,7 +214,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
if rebalanceIgnored {
// on a scale-up, treat unready pods as using 0% of the resource request
for podName := range ignoredPods {
metrics[podName] = 0
metrics[podName] = metricsclient.PodMetric{Value: 0}
}
}
@@ -342,14 +340,15 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3
return replicaCount, utilization, timestamp, nil
}
func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) {
func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) {
missingPods = sets.NewString()
ignoredPods = sets.NewString()
for _, pod := range pods {
if pod.DeletionTimestamp != nil || pod.Status.Phase == v1.PodFailed {
continue
}
if _, found := metrics[pod.Name]; !found {
metric, found := metrics[pod.Name]
if !found {
missingPods.Insert(pod.Name)
continue
}
@@ -359,10 +358,13 @@ func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.
if condition == nil || pod.Status.StartTime == nil {
ignorePod = true
} else {
if condition.Status == v1.ConditionTrue {
ignorePod = pod.Status.StartTime.Add(cpuTaintAfterStart + cpuSampleWindow).After(time.Now())
// Pod still within possible initialisation period.
if pod.Status.StartTime.Add(cpuInitializationPeriod).After(time.Now()) {
// Ignore sample if pod is unready or one window of metric wasn't collected since last state transition.
ignorePod = condition.Status == v1.ConditionFalse || metric.Timestamp.Before(condition.LastTransitionTime.Time.Add(metric.Window))
} else {
ignorePod = pod.Status.StartTime.Add(delayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time)
// Ignore metric if pod is unready and it has never been ready.
ignorePod = condition.Status == v1.ConditionFalse && pod.Status.StartTime.Add(delayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time)
}
}
if ignorePod {
@@ -370,7 +372,6 @@ func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.
continue
}
}
readyPodCount++
}
return