Merge pull request #76301 from xiuqiaoli/master
Remove FIFO scheduling queue and old pod backoff logic
This commit is contained in:
		| @@ -21,7 +21,6 @@ go_library( | ||||
|         "//pkg/scheduler/internal/queue:go_default_library", | ||||
|         "//pkg/scheduler/plugins:go_default_library", | ||||
|         "//pkg/scheduler/plugins/v1alpha1:go_default_library", | ||||
|         "//pkg/scheduler/util:go_default_library", | ||||
|         "//pkg/scheduler/volumebinder:go_default_library", | ||||
|         "//staging/src/k8s.io/api/core/v1:go_default_library", | ||||
|         "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", | ||||
| @@ -68,6 +67,7 @@ go_test( | ||||
|         "//staging/src/k8s.io/api/core/v1:go_default_library", | ||||
|         "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", | ||||
|         "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", | ||||
|         "//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library", | ||||
|         "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", | ||||
|         "//staging/src/k8s.io/client-go/informers:go_default_library", | ||||
|         "//staging/src/k8s.io/client-go/kubernetes:go_default_library", | ||||
|   | ||||
| @@ -56,7 +56,6 @@ import ( | ||||
| 	internalqueue "k8s.io/kubernetes/pkg/scheduler/internal/queue" | ||||
| 	"k8s.io/kubernetes/pkg/scheduler/plugins" | ||||
| 	pluginsv1alpha1 "k8s.io/kubernetes/pkg/scheduler/plugins/v1alpha1" | ||||
| 	"k8s.io/kubernetes/pkg/scheduler/util" | ||||
| 	"k8s.io/kubernetes/pkg/scheduler/volumebinder" | ||||
| ) | ||||
|  | ||||
| @@ -456,7 +455,6 @@ func (c *configFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String, | ||||
| 		c.percentageOfNodesToScore, | ||||
| 	) | ||||
|  | ||||
| 	podBackoff := internalqueue.NewPodBackoffMap(1*time.Second, 60*time.Second) | ||||
| 	return &Config{ | ||||
| 		SchedulerCache: c.schedulerCache, | ||||
| 		// The scheduler only needs to consider schedulable nodes. | ||||
| @@ -470,7 +468,7 @@ func (c *configFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String, | ||||
| 			return cache.WaitForCacheSync(c.StopEverything, c.scheduledPodsHasSynced) | ||||
| 		}, | ||||
| 		NextPod:         internalqueue.MakeNextPodFunc(c.podQueue), | ||||
| 		Error:           MakeDefaultErrorFunc(c.client, podBackoff, c.podQueue, c.schedulerCache, c.StopEverything), | ||||
| 		Error:           MakeDefaultErrorFunc(c.client, c.podQueue, c.schedulerCache, c.StopEverything), | ||||
| 		StopEverything:  c.StopEverything, | ||||
| 		VolumeBinder:    c.volumeBinder, | ||||
| 		SchedulingQueue: c.podQueue, | ||||
| @@ -639,7 +637,7 @@ func NewPodInformer(client clientset.Interface, resyncPeriod time.Duration) core | ||||
| } | ||||
|  | ||||
| // MakeDefaultErrorFunc construct a function to handle pod scheduler error | ||||
| func MakeDefaultErrorFunc(client clientset.Interface, backoff *internalqueue.PodBackoffMap, podQueue internalqueue.SchedulingQueue, schedulerCache internalcache.Cache, stopEverything <-chan struct{}) func(pod *v1.Pod, err error) { | ||||
| func MakeDefaultErrorFunc(client clientset.Interface, podQueue internalqueue.SchedulingQueue, schedulerCache internalcache.Cache, stopEverything <-chan struct{}) func(pod *v1.Pod, err error) { | ||||
| 	return func(pod *v1.Pod, err error) { | ||||
| 		if err == core.ErrNoNodesAvailable { | ||||
| 			klog.V(4).Infof("Unable to schedule %v/%v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name) | ||||
| @@ -662,7 +660,6 @@ func MakeDefaultErrorFunc(client clientset.Interface, backoff *internalqueue.Pod | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		backoff.CleanupPodsCompletesBackingoff() | ||||
| 		podSchedulingCycle := podQueue.SchedulingCycle() | ||||
| 		// Retry asynchronously. | ||||
| 		// Note that this is extremely rudimentary and we need a more real error handling path. | ||||
| @@ -673,16 +670,9 @@ func MakeDefaultErrorFunc(client clientset.Interface, backoff *internalqueue.Pod | ||||
| 				Name:      pod.Name, | ||||
| 			} | ||||
|  | ||||
| 			// When pod priority is enabled, we would like to place an unschedulable | ||||
| 			// pod in the unschedulable queue. This ensures that if the pod is nominated | ||||
| 			// to run on a node, scheduler takes the pod into account when running | ||||
| 			// predicates for the node. | ||||
| 			if !util.PodPriorityEnabled() { | ||||
| 				if !backoff.TryBackoffAndWait(podID, stopEverything) { | ||||
| 					klog.Warningf("Request for pod %v already in flight, abandoning", podID) | ||||
| 					return | ||||
| 				} | ||||
| 			} | ||||
| 			// An unschedulable pod will be placed in the unschedulable queue. | ||||
| 			// This ensures that if the pod is nominated to run on a node, | ||||
| 			// scheduler takes the pod into account when running predicates for the node. | ||||
| 			// Get the pod again; it may have changed/been scheduled already. | ||||
| 			getBackoff := initialGetBackoff | ||||
| 			for { | ||||
|   | ||||
| @@ -26,6 +26,7 @@ import ( | ||||
| 	"k8s.io/api/core/v1" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/runtime" | ||||
| 	"k8s.io/apimachinery/pkg/util/clock" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	"k8s.io/client-go/informers" | ||||
| 	clientset "k8s.io/client-go/kubernetes" | ||||
| @@ -252,48 +253,125 @@ func TestDefaultErrorFunc(t *testing.T) { | ||||
| 	client := fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testPod}}) | ||||
| 	stopCh := make(chan struct{}) | ||||
| 	defer close(stopCh) | ||||
| 	queue := &internalqueue.FIFO{FIFO: cache.NewFIFO(cache.MetaNamespaceKeyFunc)} | ||||
| 	schedulerCache := internalcache.New(30*time.Second, stopCh) | ||||
| 	podBackoff := internalqueue.NewPodBackoffMap(1*time.Second, 60*time.Second) | ||||
| 	errFunc := MakeDefaultErrorFunc(client, podBackoff, queue, schedulerCache, stopCh) | ||||
|  | ||||
| 	timestamp := time.Now() | ||||
| 	queue := internalqueue.NewPriorityQueueWithClock(nil, clock.NewFakeClock(timestamp)) | ||||
| 	schedulerCache := internalcache.New(30*time.Second, stopCh) | ||||
| 	errFunc := MakeDefaultErrorFunc(client, queue, schedulerCache, stopCh) | ||||
|  | ||||
| 	// Trigger error handling again to put the pod in unschedulable queue | ||||
| 	errFunc(testPod, nil) | ||||
|  | ||||
| 	for { | ||||
| 		// This is a terrible way to do this but I plan on replacing this | ||||
| 		// whole error handling system in the future. The test will time | ||||
| 		// out if something doesn't work. | ||||
| 		time.Sleep(10 * time.Millisecond) | ||||
| 		got, exists, _ := queue.Get(testPod) | ||||
| 		if !exists { | ||||
| 	// Try up to a minute to retrieve the error pod from priority queue | ||||
| 	foundPodFlag := false | ||||
| 	maxIterations := 10 * 60 | ||||
| 	for i := 0; i < maxIterations; i++ { | ||||
| 		time.Sleep(100 * time.Millisecond) | ||||
| 		got := getPodfromPriorityQueue(queue, testPod) | ||||
| 		if got == nil { | ||||
| 			continue | ||||
| 		} | ||||
| 		requestReceived := false | ||||
| 		actions := client.Actions() | ||||
| 		for _, a := range actions { | ||||
| 			if a.GetVerb() == "get" { | ||||
| 				getAction, ok := a.(clienttesting.GetAction) | ||||
| 				if !ok { | ||||
| 					t.Errorf("Can't cast action object to GetAction interface") | ||||
| 					break | ||||
| 				} | ||||
| 				name := getAction.GetName() | ||||
| 				ns := a.GetNamespace() | ||||
| 				if name != "foo" || ns != "bar" { | ||||
| 					t.Errorf("Expected name %s namespace %s, got %s %s", | ||||
| 						"foo", "bar", name, ns) | ||||
| 				} | ||||
| 				requestReceived = true | ||||
| 			} | ||||
| 		} | ||||
| 		if !requestReceived { | ||||
| 			t.Errorf("Get pod request not received") | ||||
| 		} | ||||
|  | ||||
| 		testClientGetPodRequest(client, t, testPod.Namespace, testPod.Name) | ||||
|  | ||||
| 		if e, a := testPod, got; !reflect.DeepEqual(e, a) { | ||||
| 			t.Errorf("Expected %v, got %v", e, a) | ||||
| 		} | ||||
|  | ||||
| 		foundPodFlag = true | ||||
| 		break | ||||
| 	} | ||||
|  | ||||
| 	if !foundPodFlag { | ||||
| 		t.Errorf("Failed to get pod from the unschedulable queue after waiting for a minute: %v", testPod) | ||||
| 	} | ||||
|  | ||||
| 	// Remove the pod from priority queue to test putting error | ||||
| 	// pod in backoff queue. | ||||
| 	queue.Delete(testPod) | ||||
|  | ||||
| 	// Trigger a move request | ||||
| 	queue.MoveAllToActiveQueue() | ||||
|  | ||||
| 	// Trigger error handling again to put the pod in backoff queue | ||||
| 	errFunc(testPod, nil) | ||||
|  | ||||
| 	foundPodFlag = false | ||||
| 	for i := 0; i < maxIterations; i++ { | ||||
| 		time.Sleep(100 * time.Millisecond) | ||||
| 		// The pod should be found from backoff queue at this time | ||||
| 		got := getPodfromPriorityQueue(queue, testPod) | ||||
| 		if got == nil { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		testClientGetPodRequest(client, t, testPod.Namespace, testPod.Name) | ||||
|  | ||||
| 		if e, a := testPod, got; !reflect.DeepEqual(e, a) { | ||||
| 			t.Errorf("Expected %v, got %v", e, a) | ||||
| 		} | ||||
|  | ||||
| 		foundPodFlag = true | ||||
| 		break | ||||
| 	} | ||||
|  | ||||
| 	if !foundPodFlag { | ||||
| 		t.Errorf("Failed to get pod from the backoff queue after waiting for a minute: %v", testPod) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // getPodfromPriorityQueue is the function used in the TestDefaultErrorFunc test to get | ||||
| // the specific pod from the given priority queue. It returns the found pod in the priority queue. | ||||
| func getPodfromPriorityQueue(queue *internalqueue.PriorityQueue, pod *v1.Pod) *v1.Pod { | ||||
| 	podList := queue.PendingPods() | ||||
| 	if len(podList) == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	queryPodKey, err := cache.MetaNamespaceKeyFunc(pod) | ||||
| 	if err != nil { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	for _, foundPod := range podList { | ||||
| 		foundPodKey, err := cache.MetaNamespaceKeyFunc(foundPod) | ||||
| 		if err != nil { | ||||
| 			return nil | ||||
| 		} | ||||
|  | ||||
| 		if foundPodKey == queryPodKey { | ||||
| 			return foundPod | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // testClientGetPodRequest function provides a routine used by TestDefaultErrorFunc test. | ||||
| // It tests whether the fake client can receive request and correctly "get" the namespace | ||||
| // and name of the error pod. | ||||
| func testClientGetPodRequest(client *fake.Clientset, t *testing.T, podNs string, podName string) { | ||||
| 	requestReceived := false | ||||
| 	actions := client.Actions() | ||||
| 	for _, a := range actions { | ||||
| 		if a.GetVerb() == "get" { | ||||
| 			getAction, ok := a.(clienttesting.GetAction) | ||||
| 			if !ok { | ||||
| 				t.Errorf("Can't cast action object to GetAction interface") | ||||
| 				break | ||||
| 			} | ||||
| 			name := getAction.GetName() | ||||
| 			ns := a.GetNamespace() | ||||
| 			if name != podName || ns != podNs { | ||||
| 				t.Errorf("Expected name %s namespace %s, got %s %s", | ||||
| 					podName, podNs, name, ns) | ||||
| 			} | ||||
| 			requestReceived = true | ||||
| 		} | ||||
| 	} | ||||
| 	if !requestReceived { | ||||
| 		t.Errorf("Get pod request not received") | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestBind(t *testing.T) { | ||||
|   | ||||
| @@ -60,20 +60,6 @@ func (pbm *PodBackoffMap) GetBackoffTime(nsPod ktypes.NamespacedName) (time.Time | ||||
| 	return backoffTime, true | ||||
| } | ||||
|  | ||||
| // TryBackoffAndWait tries to perform backoff for a non-preempting pod. | ||||
| // it is invoked from factory.go if util.PodPriorityEnabled() returns false. | ||||
| func (pbm *PodBackoffMap) TryBackoffAndWait(nsPod ktypes.NamespacedName, stop <-chan struct{}) bool { | ||||
| 	pbm.lock.RLock() | ||||
| 	defer pbm.lock.RUnlock() | ||||
| 	backoffDuration := pbm.calculateBackoffDuration(nsPod) | ||||
| 	select { | ||||
| 	case <-time.After(backoffDuration): | ||||
| 		return true | ||||
| 	case <-stop: | ||||
| 		return false | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // calculateBackoffDuration is a helper function for calculating the backoffDuration | ||||
| // based on the number of attempts the pod has made. | ||||
| func (pbm *PodBackoffMap) calculateBackoffDuration(nsPod ktypes.NamespacedName) time.Duration { | ||||
|   | ||||
| @@ -92,18 +92,3 @@ func TestClearPodBackoff(t *testing.T) { | ||||
| 		t.Errorf("Expected backoff of 1s for pod %s, got %s", podID, duration.String()) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestTryBackoffAndWait(t *testing.T) { | ||||
| 	bpm := NewPodBackoffMap(1*time.Second, 60*time.Second) | ||||
|  | ||||
| 	stopCh := make(chan struct{}) | ||||
| 	podID := ktypes.NamespacedName{Namespace: "ns", Name: "pod"} | ||||
| 	if !bpm.TryBackoffAndWait(podID, stopCh) { | ||||
| 		t.Error("Expected TryBackoffAndWait success for new pod, got failure.") | ||||
| 	} | ||||
|  | ||||
| 	close(stopCh) | ||||
| 	if bpm.TryBackoffAndWait(podID, stopCh) { | ||||
| 		t.Error("Expected TryBackoffAndWait failure with closed stopCh, got success.") | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -15,14 +15,11 @@ limitations under the License. | ||||
| */ | ||||
|  | ||||
| // This file contains structures that implement scheduling queue types. | ||||
| // Scheduling queues hold pods waiting to be scheduled. This file has two types | ||||
| // of scheduling queue: 1) a FIFO, which is mostly the same as cache.FIFO, 2) a | ||||
| // Scheduling queues hold pods waiting to be scheduled. This file implements a | ||||
| // priority queue which has two sub queues. One sub-queue holds pods that are | ||||
| // being considered for scheduling. This is called activeQ. Another queue holds | ||||
| // pods that are already tried and are determined to be unschedulable. The latter | ||||
| // is called unschedulableQ. | ||||
| // FIFO is here for flag-gating purposes and allows us to use the traditional | ||||
| // scheduling queue when util.PodPriorityEnabled() returns false. | ||||
|  | ||||
| package queue | ||||
|  | ||||
| @@ -89,113 +86,9 @@ type SchedulingQueue interface { | ||||
| 	NumUnschedulablePods() int | ||||
| } | ||||
|  | ||||
| // NewSchedulingQueue initializes a new scheduling queue. If pod priority is | ||||
| // enabled a priority queue is returned. If it is disabled, a FIFO is returned. | ||||
| // NewSchedulingQueue initializes a priority queue as a new scheduling queue. | ||||
| func NewSchedulingQueue(stop <-chan struct{}) SchedulingQueue { | ||||
| 	if util.PodPriorityEnabled() { | ||||
| 		return NewPriorityQueue(stop) | ||||
| 	} | ||||
| 	return NewFIFO() | ||||
| } | ||||
|  | ||||
| // FIFO is basically a simple wrapper around cache.FIFO to make it compatible | ||||
| // with the SchedulingQueue interface. | ||||
| type FIFO struct { | ||||
| 	*cache.FIFO | ||||
| } | ||||
|  | ||||
| var _ = SchedulingQueue(&FIFO{}) // Making sure that FIFO implements SchedulingQueue. | ||||
|  | ||||
| // Add adds a pod to the FIFO. | ||||
| func (f *FIFO) Add(pod *v1.Pod) error { | ||||
| 	return f.FIFO.Add(pod) | ||||
| } | ||||
|  | ||||
| // AddIfNotPresent adds a pod to the FIFO if it is absent in the FIFO. | ||||
| func (f *FIFO) AddIfNotPresent(pod *v1.Pod) error { | ||||
| 	return f.FIFO.AddIfNotPresent(pod) | ||||
| } | ||||
|  | ||||
| // AddUnschedulableIfNotPresent adds an unschedulable pod back to the queue. In | ||||
| // FIFO it is added to the end of the queue. | ||||
| func (f *FIFO) AddUnschedulableIfNotPresent(pod *v1.Pod, podSchedulingCycle int64) error { | ||||
| 	return f.FIFO.AddIfNotPresent(pod) | ||||
| } | ||||
|  | ||||
| // SchedulingCycle implements SchedulingQueue.SchedulingCycle interface. | ||||
| func (f *FIFO) SchedulingCycle() int64 { | ||||
| 	return 0 | ||||
| } | ||||
|  | ||||
| // Update updates a pod in the FIFO. | ||||
| func (f *FIFO) Update(oldPod, newPod *v1.Pod) error { | ||||
| 	return f.FIFO.Update(newPod) | ||||
| } | ||||
|  | ||||
| // Delete deletes a pod in the FIFO. | ||||
| func (f *FIFO) Delete(pod *v1.Pod) error { | ||||
| 	return f.FIFO.Delete(pod) | ||||
| } | ||||
|  | ||||
| // Pop removes the head of FIFO and returns it. | ||||
| // This is just a copy/paste of cache.Pop(queue Queue) from fifo.go that scheduler | ||||
| // has always been using. There is a comment in that file saying that this method | ||||
| // shouldn't be used in production code, but scheduler has always been using it. | ||||
| // This function does minimal error checking. | ||||
| func (f *FIFO) Pop() (*v1.Pod, error) { | ||||
| 	result, err := f.FIFO.Pop(func(obj interface{}) error { return nil }) | ||||
| 	if err == cache.FIFOClosedError { | ||||
| 		return nil, fmt.Errorf(queueClosed) | ||||
| 	} | ||||
| 	return result.(*v1.Pod), err | ||||
| } | ||||
|  | ||||
| // PendingPods returns all the pods in the queue. | ||||
| func (f *FIFO) PendingPods() []*v1.Pod { | ||||
| 	result := []*v1.Pod{} | ||||
| 	for _, pod := range f.FIFO.List() { | ||||
| 		result = append(result, pod.(*v1.Pod)) | ||||
| 	} | ||||
| 	return result | ||||
| } | ||||
|  | ||||
| // FIFO does not need to react to events, as all pods are always in the active | ||||
| // scheduling queue anyway. | ||||
|  | ||||
| // AssignedPodAdded does nothing here. | ||||
| func (f *FIFO) AssignedPodAdded(pod *v1.Pod) {} | ||||
|  | ||||
| // AssignedPodUpdated does nothing here. | ||||
| func (f *FIFO) AssignedPodUpdated(pod *v1.Pod) {} | ||||
|  | ||||
| // MoveAllToActiveQueue does nothing in FIFO as all pods are always in the active queue. | ||||
| func (f *FIFO) MoveAllToActiveQueue() {} | ||||
|  | ||||
| // NominatedPodsForNode returns pods that are nominated to run on the given node, | ||||
| // but FIFO does not support it. | ||||
| func (f *FIFO) NominatedPodsForNode(nodeName string) []*v1.Pod { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Close closes the FIFO queue. | ||||
| func (f *FIFO) Close() { | ||||
| 	f.FIFO.Close() | ||||
| } | ||||
|  | ||||
| // DeleteNominatedPodIfExists does nothing in FIFO. | ||||
| func (f *FIFO) DeleteNominatedPodIfExists(pod *v1.Pod) {} | ||||
|  | ||||
| // UpdateNominatedPodForNode does nothing in FIFO. | ||||
| func (f *FIFO) UpdateNominatedPodForNode(pod *v1.Pod, nodeName string) {} | ||||
|  | ||||
| // NumUnschedulablePods returns the number of unschedulable pods exist in the SchedulingQueue. | ||||
| func (f *FIFO) NumUnschedulablePods() int { | ||||
| 	return 0 | ||||
| } | ||||
|  | ||||
| // NewFIFO creates a FIFO object. | ||||
| func NewFIFO() *FIFO { | ||||
| 	return &FIFO{FIFO: cache.NewFIFO(cache.MetaNamespaceKeyFunc)} | ||||
| 	return NewPriorityQueue(stop) | ||||
| } | ||||
|  | ||||
| // NominatedNodeName returns nominated node name of a Pod. | ||||
| @@ -203,7 +96,7 @@ func NominatedNodeName(pod *v1.Pod) string { | ||||
| 	return pod.Status.NominatedNodeName | ||||
| } | ||||
|  | ||||
| // PriorityQueue implements a scheduling queue. It is an alternative to FIFO. | ||||
| // PriorityQueue implements a scheduling queue. | ||||
| // The head of PriorityQueue is the highest priority pending pod. This structure | ||||
| // has three sub queues. One sub-queue holds pods that are being considered for | ||||
| // scheduling. This is called activeQ and is a Heap. Another queue holds | ||||
|   | ||||
| @@ -688,11 +688,6 @@ func TestSchedulingQueue_Close(t *testing.T) { | ||||
| 		q           SchedulingQueue | ||||
| 		expectedErr error | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name:        "FIFO close", | ||||
| 			q:           NewFIFO(), | ||||
| 			expectedErr: fmt.Errorf(queueClosed), | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:        "PriorityQueue close", | ||||
| 			q:           NewPriorityQueue(nil), | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot