Add pod disruption conditions for kubelet initiated failures

This commit is contained in:
Michal Wozniak
2022-10-10 13:58:40 +02:00
parent c519bc02e8
commit 52cd6755eb
17 changed files with 883 additions and 21 deletions

View File

@@ -31,6 +31,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/features"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/pkg/kubelet/eviction"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
@@ -500,6 +501,28 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering [Slow] [Serial] [Disruptive][No
specs[2].pod.Spec.PriorityClassName = highPriorityClassName
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logPidMetrics, specs)
})
ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition)+"; PodDisruptionConditions enabled [NodeFeature:PodDisruptionConditions]", func() {
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
pidsConsumed := int64(10000)
summary := eventuallyGetSummary()
availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses)
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)}
initialConfig.EvictionMinimumReclaim = map[string]string{}
initialConfig.FeatureGates = map[string]bool{
string(features.PodDisruptionConditions): true,
}
})
disruptionTarget := v1.AlphaNoCompatGuaranteeDisruptionTarget
specs := []podEvictSpec{
{
evictionPriority: 1,
pod: pidConsumingPod("fork-bomb-container", 30000),
wantPodDisruptionCondition: &disruptionTarget,
},
}
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logPidMetrics, specs)
})
})
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
@@ -507,8 +530,9 @@ type podEvictSpec struct {
// P0 should never be evicted, P1 shouldn't evict before P2, etc.
// If two are ranked at P1, either is permitted to fail before the other.
// The test ends when all pods other than p0 have been evicted
evictionPriority int
pod *v1.Pod
evictionPriority int
pod *v1.Pod
wantPodDisruptionCondition *v1.PodConditionType
}
// runEvictionTest sets up a testing environment given the provided pods, and checks a few things:
@@ -560,6 +584,9 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
return verifyEvictionOrdering(f, testSpecs)
}, pressureTimeout, evictionPollInterval).Should(gomega.BeNil())
ginkgo.By("checking for the expected pod conditions for evicted pods")
verifyPodConditions(f, testSpecs)
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
// evicts a pod, and when we observe the pressure by querying the API server. Add a delay here to account for this delay
@@ -725,6 +752,21 @@ func verifyEvictionOrdering(f *framework.Framework, testSpecs []podEvictSpec) er
return fmt.Errorf("pods that should be evicted are still running: %#v", pendingPods)
}
func verifyPodConditions(f *framework.Framework, testSpecs []podEvictSpec) {
for _, spec := range testSpecs {
if spec.wantPodDisruptionCondition != nil {
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(context.TODO(), spec.pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err, "Failed to get the recent pod object for name: %q", pod.Name)
cType := *spec.wantPodDisruptionCondition
podDisruptionCondition := e2epod.FindPodConditionByType(&pod.Status, cType)
if podDisruptionCondition == nil {
framework.Failf("pod %q should have the condition: %q, pod status: %v", pod.Name, cType, pod.Status)
}
}
}
}
func verifyEvictionEvents(f *framework.Framework, testSpecs []podEvictSpec, expectedStarvedResource v1.ResourceName) {
for _, spec := range testSpecs {
pod := spec.pod