Scheduler uses TTLStore for assumed pods

2015-03-30 11:17:16 -07:00
parent 43949b41d4
commit a7864aa230
10 changed files with 772 additions and 145 deletions
--- a/plugin/pkg/scheduler/modeler.go
+++ b/plugin/pkg/scheduler/modeler.go
@@ -20,6 +20,7 @@ import (
 	"fmt"
 	"strings"
 	"sync"
+	"time"

 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
@@ -95,7 +96,9 @@ func NewSimpleModeler(queuedPods, scheduledPods ExtendedPodLister) *SimpleModele
 	return &SimpleModeler{
 		queuedPods:    queuedPods,
 		scheduledPods: scheduledPods,
-		assumedPods:   &cache.StoreToPodLister{cache.NewStore(cache.MetaNamespaceKeyFunc)},
+		assumedPods: &cache.StoreToPodLister{
+			cache.NewTTLStore(cache.MetaNamespaceKeyFunc, 30*time.Second),
+		},
 	}
 }

@@ -124,10 +127,6 @@ func (s *SimpleModeler) listPods(selector labels.Selector) (pods []api.Pod, err
 	// Since the assumed list will be short, just check every one.
 	// Goal here is to stop making assumptions about a pod once it shows
 	// up in one of these other lists.
-	// TODO: there's a possibility that a pod could get deleted at the
-	//       exact wrong time and linger in assumedPods forever. So we
-	//       need go through that periodically and check for deleted
-	//       pods.
 	for _, pod := range assumed {
 		qExist, err := s.queuedPods.Exists(&pod)
 		if err != nil {
@@ -151,7 +150,7 @@ func (s *SimpleModeler) listPods(selector labels.Selector) (pods []api.Pod, err
 	if err != nil {
 		return nil, err
 	}
-	// re-get in case we deleted any.
+	// Listing purges the ttl cache and re-gets, in case we deleted any entries.
 	assumed, err = s.assumedPods.List(selector)
 	if err != nil {
 		return nil, err
--- a/plugin/pkg/scheduler/scheduler_test.go
+++ b/plugin/pkg/scheduler/scheduler_test.go
@@ -18,11 +18,14 @@ package scheduler

 import (
 	"errors"
+	"math/rand"
 	"reflect"
 	"testing"
+	"time"

 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi"
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/scheduler"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
@@ -43,6 +46,14 @@ func podWithID(id, desiredHost string) *api.Pod {
 	}
 }

+func podWithPort(id, desiredHost string, port int) *api.Pod {
+	pod := podWithID(id, desiredHost)
+	pod.Spec.Containers = []api.Container{
+		{Name: "ctr", Ports: []api.ContainerPort{{HostPort: port}}},
+	}
+	return pod
+}
+
 type mockScheduler struct {
 	machine string
 	err     error
@@ -144,3 +155,142 @@ func TestScheduler(t *testing.T) {
 		events.Stop()
 	}
 }
+
+func TestSchedulerForgetAssumedPodAfterDelete(t *testing.T) {
+	eventBroadcaster := record.NewBroadcaster()
+	defer eventBroadcaster.StartLogging(t.Logf).Stop()
+
+	// Setup modeler so we control the contents of all 3 stores: assumed,
+	// scheduled and queued
+	scheduledPodStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
+	scheduledPodLister := &cache.StoreToPodLister{scheduledPodStore}
+
+	queuedPodStore := cache.NewFIFO(cache.MetaNamespaceKeyFunc)
+	queuedPodLister := &cache.StoreToPodLister{queuedPodStore}
+
+	modeler := NewSimpleModeler(queuedPodLister, scheduledPodLister)
+
+	// Create a fake clock used to timestamp entries and calculate ttl. Nothing
+	// will expire till we flip to something older than the ttl, at which point
+	// all entries inserted with fakeTime will expire.
+	ttl := 30 * time.Second
+	fakeTime := time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)
+	fakeClock := &util.FakeClock{fakeTime}
+	ttlPolicy := &cache.TTLPolicy{ttl, fakeClock}
+	assumedPodsStore := cache.NewFakeExpirationStore(
+		cache.MetaNamespaceKeyFunc, nil, ttlPolicy, fakeClock)
+	modeler.assumedPods = &cache.StoreToPodLister{assumedPodsStore}
+
+	// Port is the easiest way to cause a fit predicate failure
+	podPort := 8080
+	firstPod := podWithPort("foo", "", podPort)
+
+	// Create the scheduler config
+	algo := scheduler.NewGenericScheduler(
+		map[string]scheduler.FitPredicate{"PodFitsPorts": scheduler.PodFitsPorts},
+		[]scheduler.PriorityConfig{},
+		modeler.PodLister(),
+		rand.New(rand.NewSource(time.Now().UnixNano())))
+
+	var gotBinding *api.Binding
+	c := &Config{
+		Modeler: modeler,
+		MinionLister: scheduler.FakeMinionLister(
+			api.NodeList{Items: []api.Node{{ObjectMeta: api.ObjectMeta{Name: "machine1"}}}},
+		),
+		Algorithm: algo,
+		Binder: fakeBinder{func(b *api.Binding) error {
+			scheduledPodStore.Add(podWithPort(b.Name, b.Target.Name, podPort))
+			gotBinding = b
+			return nil
+		}},
+		NextPod: func() *api.Pod {
+			return queuedPodStore.Pop().(*api.Pod)
+		},
+		Error: func(p *api.Pod, err error) {
+			t.Errorf("Unexpected error when scheduling pod %+v: %v", p, err)
+		},
+		Recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"}),
+	}
+
+	// First scheduling pass should schedule the pod
+	s := New(c)
+	called := make(chan struct{})
+	events := eventBroadcaster.StartEventWatcher(func(e *api.Event) {
+		if e, a := "scheduled", e.Reason; e != a {
+			t.Errorf("expected %v, got %v", e, a)
+		}
+		close(called)
+	})
+
+	queuedPodStore.Add(firstPod)
+	// queuedPodStore: [foo:8080]
+	// scheduledPodStore: []
+	// assumedPods: []
+
+	s.scheduleOne()
+	// queuedPodStore: []
+	// scheduledPodStore: [foo:8080]
+	// assumedPods: [foo:8080]
+
+	pod, exists, _ := scheduledPodStore.GetByKey("foo")
+	if !exists {
+		t.Errorf("Expected scheduled pod store to contain pod")
+	}
+	pod, exists, _ = queuedPodStore.GetByKey("foo")
+	if exists {
+		t.Errorf("Did not expect a queued pod, found %+v", pod)
+	}
+	pod, exists, _ = assumedPodsStore.GetByKey("foo")
+	if !exists {
+		t.Errorf("Assumed pod store should contain stale pod")
+	}
+
+	expectBind := &api.Binding{
+		ObjectMeta: api.ObjectMeta{Name: "foo"},
+		Target:     api.ObjectReference{Kind: "Node", Name: "machine1"},
+	}
+	if ex, ac := expectBind, gotBinding; !reflect.DeepEqual(ex, ac) {
+		t.Errorf("Expected exact match on binding: %s", util.ObjectDiff(ex, ac))
+	}
+
+	<-called
+	events.Stop()
+
+	scheduledPodStore.Delete(pod)
+	_, exists, _ = assumedPodsStore.Get(pod)
+	if !exists {
+		t.Errorf("Expected pod %#v in assumed pod store", pod)
+	}
+
+	secondPod := podWithPort("bar", "", podPort)
+	queuedPodStore.Add(secondPod)
+	// queuedPodStore: [bar:8080]
+	// scheduledPodStore: []
+	// assumedPods: [foo:8080]
+
+	// Second scheduling pass will fail to schedule if the store hasn't expired
+	// the deleted pod. This would normally happen with a timeout.
+	//expirationPolicy.NeverExpire = util.NewStringSet()
+	fakeClock.Time = fakeClock.Time.Add(ttl + 1)
+
+	called = make(chan struct{})
+	events = eventBroadcaster.StartEventWatcher(func(e *api.Event) {
+		if e, a := "scheduled", e.Reason; e != a {
+			t.Errorf("expected %v, got %v", e, a)
+		}
+		close(called)
+	})
+
+	s.scheduleOne()
+
+	expectBind = &api.Binding{
+		ObjectMeta: api.ObjectMeta{Name: "bar"},
+		Target:     api.ObjectReference{Kind: "Node", Name: "machine1"},
+	}
+	if ex, ac := expectBind, gotBinding; !reflect.DeepEqual(ex, ac) {
+		t.Errorf("Expected exact match on binding: %s", util.ObjectDiff(ex, ac))
+	}
+	<-called
+	events.Stop()
+}