fix stateful set pod recreation and event spam (#123809)
* fix pods tracking and internal error checking in statefulset tests * fix stateful set pod recreation and event spam - do not emit events when pod reaches terminal phase - do not try to recreate pod until the old pod has been removed from etcd storage * fix conflict race in statefulset rest update statefulset controller does less requests per sync now and thus can reconcile status faster, thus resulting in a higher chance for conflicts
This commit is contained in:
@@ -60,8 +60,7 @@ func setupController(client clientset.Interface) (*fakeObjectManager, *fakeState
|
||||
om := newFakeObjectManager(informerFactory)
|
||||
spc := NewStatefulPodControlFromManager(om, &noopRecorder{})
|
||||
ssu := newFakeStatefulSetStatusUpdater(informerFactory.Apps().V1().StatefulSets())
|
||||
recorder := &noopRecorder{}
|
||||
ssc := NewDefaultStatefulSetControl(spc, ssu, history.NewFakeHistory(informerFactory.Apps().V1().ControllerRevisions()), recorder)
|
||||
ssc := NewDefaultStatefulSetControl(spc, ssu, history.NewFakeHistory(informerFactory.Apps().V1().ControllerRevisions()))
|
||||
|
||||
// The informer is not started. The tests here manipulate the local cache (indexers) directly, and there is no waiting
|
||||
// for client state to sync. In fact, because the client is not updated during tests, informer updates will break tests
|
||||
@@ -171,10 +170,11 @@ func TestStatefulSetControl(t *testing.T) {
|
||||
{ReplacesPods, largeSetFn},
|
||||
{RecreatesFailedPod, simpleSetFn},
|
||||
{RecreatesSucceededPod, simpleSetFn},
|
||||
{RecreatesFailedPodWithDeleteFailure, simpleSetFn},
|
||||
{RecreatesSucceededPodWithDeleteFailure, simpleSetFn},
|
||||
{CreatePodFailure, simpleSetFn},
|
||||
{UpdatePodFailure, simpleSetFn},
|
||||
{UpdateSetStatusFailure, simpleSetFn},
|
||||
{PodRecreateDeleteFailure, simpleSetFn},
|
||||
{NewRevisionDeletePodFailure, simpleSetFn},
|
||||
{RecreatesPVCForPendingPod, simpleSetFn},
|
||||
}
|
||||
@@ -398,9 +398,10 @@ func ReplacesPods(t *testing.T, set *apps.StatefulSet, invariants invariantFunc)
|
||||
}
|
||||
}
|
||||
|
||||
func recreatesPod(t *testing.T, set *apps.StatefulSet, invariants invariantFunc, phase v1.PodPhase) {
|
||||
func recreatesPod(t *testing.T, set *apps.StatefulSet, invariants invariantFunc, terminalPhase v1.PodPhase, testDeletePodFailure bool) {
|
||||
client := fake.NewSimpleClientset()
|
||||
om, _, ssc := setupController(client)
|
||||
expectedNumOfDeleteRequests := 0
|
||||
selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
@@ -415,34 +416,105 @@ func recreatesPod(t *testing.T, set *apps.StatefulSet, invariants invariantFunc,
|
||||
if err := invariants(set, om); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
pods, err = om.podsLister.Pods(set.Namespace).List(selector)
|
||||
if err != nil {
|
||||
if om.deletePodTracker.requests != expectedNumOfDeleteRequests {
|
||||
t.Errorf("Found unexpected number of delete calls, got %v, expected 1", om.deletePodTracker.requests)
|
||||
}
|
||||
if pods, err = om.podsLister.Pods(set.Namespace).List(selector); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
pods[0].Status.Phase = phase
|
||||
om.podsIndexer.Update(pods[0])
|
||||
|
||||
terminalPodOrdinal := -1
|
||||
for i, pod := range pods {
|
||||
// Set at least Pending phase to acknowledge the creation of pods
|
||||
newPhase := v1.PodPending
|
||||
if i == 0 {
|
||||
// Set terminal phase for the first pod
|
||||
newPhase = terminalPhase
|
||||
terminalPodOrdinal = getOrdinal(pod)
|
||||
}
|
||||
pod.Status.Phase = newPhase
|
||||
if err = om.podsIndexer.Update(pod); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
||||
if pods, err = om.podsLister.Pods(set.Namespace).List(selector); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if testDeletePodFailure {
|
||||
// Expect pod deletion failure
|
||||
om.SetDeleteStatefulPodError(apierrors.NewInternalError(errors.New("API server failed")), 0)
|
||||
expectedNumOfDeleteRequests++
|
||||
if _, err = ssc.UpdateStatefulSet(context.TODO(), set, pods); !isOrHasInternalError(err) {
|
||||
t.Errorf("StatefulSetControl did not return InternalError, found %s", err)
|
||||
}
|
||||
if err := invariants(set, om); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if om.deletePodTracker.requests != expectedNumOfDeleteRequests {
|
||||
t.Errorf("Found unexpected number of delete calls, got %v, expected %v", om.deletePodTracker.requests, expectedNumOfDeleteRequests)
|
||||
}
|
||||
if pods, err = om.podsLister.Pods(set.Namespace).List(selector); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Expect pod deletion
|
||||
expectedNumOfDeleteRequests++
|
||||
if _, err := ssc.UpdateStatefulSet(context.TODO(), set, pods); err != nil {
|
||||
t.Errorf("Error updating StatefulSet %s", err)
|
||||
}
|
||||
if err := invariants(set, om); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
pods, err = om.podsLister.Pods(set.Namespace).List(selector)
|
||||
if err != nil {
|
||||
if om.deletePodTracker.requests != expectedNumOfDeleteRequests {
|
||||
t.Errorf("Found unexpected number of delete calls, got %v, expected %v", om.deletePodTracker.requests, expectedNumOfDeleteRequests)
|
||||
}
|
||||
if pods, err = om.podsLister.Pods(set.Namespace).List(selector); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if isCreated(pods[0]) {
|
||||
|
||||
// Expect no additional delete calls and expect pod creation
|
||||
if _, err := ssc.UpdateStatefulSet(context.TODO(), set, pods); err != nil {
|
||||
t.Errorf("Error updating StatefulSet %s", err)
|
||||
}
|
||||
if err := invariants(set, om); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if om.deletePodTracker.requests != expectedNumOfDeleteRequests {
|
||||
t.Errorf("Found unexpected number of delete calls, got %v, expected %v", om.deletePodTracker.requests, expectedNumOfDeleteRequests)
|
||||
}
|
||||
if pods, err = om.podsLister.Pods(set.Namespace).List(selector); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
recreatedPod := findPodByOrdinal(pods, terminalPodOrdinal)
|
||||
// new recreated pod should have empty phase
|
||||
if recreatedPod == nil || isCreated(recreatedPod) {
|
||||
t.Error("StatefulSet did not recreate failed Pod")
|
||||
}
|
||||
expectedNumberOfCreateRequests := 2
|
||||
if monotonic := !allowsBurst(set); !monotonic {
|
||||
expectedNumberOfCreateRequests = int(*set.Spec.Replicas + 1)
|
||||
}
|
||||
if om.createPodTracker.requests != expectedNumberOfCreateRequests {
|
||||
t.Errorf("Found unexpected number of create calls, got %v, expected %v", om.deletePodTracker.requests, expectedNumberOfCreateRequests)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func RecreatesFailedPod(t *testing.T, set *apps.StatefulSet, invariants invariantFunc) {
|
||||
recreatesPod(t, set, invariants, v1.PodFailed)
|
||||
recreatesPod(t, set, invariants, v1.PodFailed, false)
|
||||
}
|
||||
|
||||
func RecreatesSucceededPod(t *testing.T, set *apps.StatefulSet, invariants invariantFunc) {
|
||||
recreatesPod(t, set, invariants, v1.PodSucceeded)
|
||||
recreatesPod(t, set, invariants, v1.PodSucceeded, false)
|
||||
}
|
||||
|
||||
func RecreatesFailedPodWithDeleteFailure(t *testing.T, set *apps.StatefulSet, invariants invariantFunc) {
|
||||
recreatesPod(t, set, invariants, v1.PodFailed, true)
|
||||
}
|
||||
|
||||
func RecreatesSucceededPodWithDeleteFailure(t *testing.T, set *apps.StatefulSet, invariants invariantFunc) {
|
||||
recreatesPod(t, set, invariants, v1.PodSucceeded, true)
|
||||
}
|
||||
|
||||
func CreatePodFailure(t *testing.T, set *apps.StatefulSet, invariants invariantFunc) {
|
||||
@@ -450,8 +522,8 @@ func CreatePodFailure(t *testing.T, set *apps.StatefulSet, invariants invariantF
|
||||
om, _, ssc := setupController(client)
|
||||
om.SetCreateStatefulPodError(apierrors.NewInternalError(errors.New("API server failed")), 2)
|
||||
|
||||
if err := scaleUpStatefulSetControl(set, ssc, om, invariants); err != nil && isOrHasInternalError(err) {
|
||||
t.Errorf("StatefulSetControl did not return InternalError found %s", err)
|
||||
if err := scaleUpStatefulSetControl(set, ssc, om, invariants); !isOrHasInternalError(err) {
|
||||
t.Errorf("StatefulSetControl did not return InternalError, found %s", err)
|
||||
}
|
||||
// Update so set.Status is set for the next scaleUpStatefulSetControl call.
|
||||
var err error
|
||||
@@ -514,8 +586,8 @@ func UpdatePodFailure(t *testing.T, set *apps.StatefulSet, invariants invariantF
|
||||
om.podsIndexer.Update(pods[0])
|
||||
|
||||
// now it should fail
|
||||
if _, err := ssc.UpdateStatefulSet(context.TODO(), set, pods); err != nil && isOrHasInternalError(err) {
|
||||
t.Errorf("StatefulSetControl did not return InternalError found %s", err)
|
||||
if _, err := ssc.UpdateStatefulSet(context.TODO(), set, pods); !isOrHasInternalError(err) {
|
||||
t.Errorf("StatefulSetControl did not return InternalError, found %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -524,8 +596,8 @@ func UpdateSetStatusFailure(t *testing.T, set *apps.StatefulSet, invariants inva
|
||||
om, ssu, ssc := setupController(client)
|
||||
ssu.SetUpdateStatefulSetStatusError(apierrors.NewInternalError(errors.New("API server failed")), 2)
|
||||
|
||||
if err := scaleUpStatefulSetControl(set, ssc, om, invariants); err != nil && isOrHasInternalError(err) {
|
||||
t.Errorf("StatefulSetControl did not return InternalError found %s", err)
|
||||
if err := scaleUpStatefulSetControl(set, ssc, om, invariants); !isOrHasInternalError(err) {
|
||||
t.Errorf("StatefulSetControl did not return InternalError, found %s", err)
|
||||
}
|
||||
// Update so set.Status is set for the next scaleUpStatefulSetControl call.
|
||||
var err error
|
||||
@@ -551,52 +623,6 @@ func UpdateSetStatusFailure(t *testing.T, set *apps.StatefulSet, invariants inva
|
||||
}
|
||||
}
|
||||
|
||||
func PodRecreateDeleteFailure(t *testing.T, set *apps.StatefulSet, invariants invariantFunc) {
|
||||
client := fake.NewSimpleClientset(set)
|
||||
om, _, ssc := setupController(client)
|
||||
|
||||
selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
pods, err := om.podsLister.Pods(set.Namespace).List(selector)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if _, err := ssc.UpdateStatefulSet(context.TODO(), set, pods); err != nil {
|
||||
t.Errorf("Error updating StatefulSet %s", err)
|
||||
}
|
||||
if err := invariants(set, om); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
pods, err = om.podsLister.Pods(set.Namespace).List(selector)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
pods[0].Status.Phase = v1.PodFailed
|
||||
om.podsIndexer.Update(pods[0])
|
||||
om.SetDeleteStatefulPodError(apierrors.NewInternalError(errors.New("API server failed")), 0)
|
||||
if _, err := ssc.UpdateStatefulSet(context.TODO(), set, pods); err != nil && isOrHasInternalError(err) {
|
||||
t.Errorf("StatefulSet failed to %s", err)
|
||||
}
|
||||
if err := invariants(set, om); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if _, err := ssc.UpdateStatefulSet(context.TODO(), set, pods); err != nil {
|
||||
t.Errorf("Error updating StatefulSet %s", err)
|
||||
}
|
||||
if err := invariants(set, om); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
pods, err = om.podsLister.Pods(set.Namespace).List(selector)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if isCreated(pods[0]) {
|
||||
t.Error("StatefulSet did not recreate failed Pod")
|
||||
}
|
||||
}
|
||||
|
||||
func NewRevisionDeletePodFailure(t *testing.T, set *apps.StatefulSet, invariants invariantFunc) {
|
||||
client := fake.NewSimpleClientset(set)
|
||||
om, _, ssc := setupController(client)
|
||||
@@ -792,7 +818,7 @@ func TestStatefulSetControlScaleDownDeleteError(t *testing.T) {
|
||||
}
|
||||
*set.Spec.Replicas = 0
|
||||
om.SetDeleteStatefulPodError(apierrors.NewInternalError(errors.New("API server failed")), 2)
|
||||
if err := scaleDownStatefulSetControl(set, ssc, om, invariants); err != nil && isOrHasInternalError(err) {
|
||||
if err := scaleDownStatefulSetControl(set, ssc, om, invariants); !isOrHasInternalError(err) {
|
||||
t.Errorf("StatefulSetControl failed to throw error on delete %s", err)
|
||||
}
|
||||
set, err = om.setsLister.StatefulSets(set.Namespace).Get(set.Name)
|
||||
@@ -834,8 +860,7 @@ func TestStatefulSetControl_getSetRevisions(t *testing.T) {
|
||||
informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc())
|
||||
spc := NewStatefulPodControlFromManager(newFakeObjectManager(informerFactory), &noopRecorder{})
|
||||
ssu := newFakeStatefulSetStatusUpdater(informerFactory.Apps().V1().StatefulSets())
|
||||
recorder := &noopRecorder{}
|
||||
ssc := defaultStatefulSetControl{spc, ssu, history.NewFakeHistory(informerFactory.Apps().V1().ControllerRevisions()), recorder}
|
||||
ssc := defaultStatefulSetControl{spc, ssu, history.NewFakeHistory(informerFactory.Apps().V1().ControllerRevisions())}
|
||||
|
||||
stop := make(chan struct{})
|
||||
defer close(stop)
|
||||
@@ -2501,6 +2526,11 @@ func (om *fakeObjectManager) GetPod(namespace, podName string) (*v1.Pod, error)
|
||||
}
|
||||
|
||||
func (om *fakeObjectManager) UpdatePod(pod *v1.Pod) error {
|
||||
defer om.updatePodTracker.inc()
|
||||
if om.updatePodTracker.errorReady() {
|
||||
defer om.updatePodTracker.reset()
|
||||
return om.updatePodTracker.getErr()
|
||||
}
|
||||
return om.podsIndexer.Update(pod)
|
||||
}
|
||||
|
||||
@@ -3356,6 +3386,16 @@ func newRevisionOrDie(set *apps.StatefulSet, revision int64) *apps.ControllerRev
|
||||
}
|
||||
|
||||
func isOrHasInternalError(err error) bool {
|
||||
agg, ok := err.(utilerrors.Aggregate)
|
||||
return !ok && !apierrors.IsInternalError(err) || ok && len(agg.Errors()) > 0 && !apierrors.IsInternalError(agg.Errors()[0])
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
var agg utilerrors.Aggregate
|
||||
if errors.As(err, &agg) {
|
||||
for _, e := range agg.Errors() {
|
||||
if apierrors.IsInternalError(e) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return apierrors.IsInternalError(err)
|
||||
}
|
||||
|
Reference in New Issue
Block a user