Files
kubernetes/contrib/mesos/pkg/scheduler/operations/schedulerloop.go
2015-11-12 01:33:50 +01:00

147 lines
4.9 KiB
Go

/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package operations
import (
"net/http"
"time"
log "github.com/golang/glog"
"k8s.io/kubernetes/contrib/mesos/pkg/backoff"
"k8s.io/kubernetes/contrib/mesos/pkg/queue"
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/config"
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/podtask"
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/queuer"
types "k8s.io/kubernetes/contrib/mesos/pkg/scheduler/types"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/client/cache"
"k8s.io/kubernetes/pkg/client/record"
client "k8s.io/kubernetes/pkg/client/unversioned"
)
const (
recoveryDelay = 100 * time.Millisecond // delay after scheduler plugin crashes, before we resume scheduling
FailedScheduling = "FailedScheduling"
Scheduled = "Scheduled"
)
type SchedulerLoopInterface interface {
Run(<-chan struct{})
}
type SchedulerLoop struct {
algorithm *SchedulerAlgorithm
binder *Binder
nextPod func() *api.Pod
error func(*api.Pod, error)
recorder record.EventRecorder
client *client.Client
started chan<- struct{} // startup latch
}
func NewScheduler(c *config.Config, fw types.Framework, client *client.Client, recorder record.EventRecorder,
terminate <-chan struct{}, mux *http.ServeMux, podsWatcher *cache.ListWatch) (SchedulerLoopInterface, *PodReconciler) {
// Watch and queue pods that need scheduling.
updates := make(chan queue.Entry, c.UpdatesBacklog)
podUpdates := &podStoreAdapter{queue.NewHistorical(updates)}
reflector := cache.NewReflector(podsWatcher, &api.Pod{}, podUpdates, 0)
// lock that guards critial sections that involve transferring pods from
// the store (cache) to the scheduling queue; its purpose is to maintain
// an ordering (vs interleaving) of operations that's easier to reason about.
q := queuer.New(podUpdates)
podDeleter := NewDeleter(fw, q)
podReconciler := NewPodReconciler(fw, client, q, podDeleter)
startLatch := make(chan struct{})
eventBroadcaster := record.NewBroadcaster()
runtime.On(startLatch, func() {
eventBroadcaster.StartRecordingToSink(client.Events(""))
reflector.Run() // TODO(jdef) should listen for termination
podDeleter.Run(updates, terminate)
q.Run(terminate)
q.InstallDebugHandlers(mux)
podtask.InstallDebugHandlers(fw.Tasks(), mux)
})
return NewSchedulerLoop(c, fw, client, recorder, podUpdates, q, startLatch), podReconciler
}
func NewSchedulerLoop(c *config.Config, fw types.Framework, client *client.Client,
recorder record.EventRecorder, podUpdates queue.FIFO, q *queuer.Queuer,
started chan<- struct{}) *SchedulerLoop {
bo := backoff.New(c.InitialPodBackoff.Duration, c.MaxPodBackoff.Duration)
return &SchedulerLoop{
algorithm: NewSchedulerAlgorithm(fw, podUpdates),
binder: NewBinder(fw),
nextPod: q.Yield,
error: NewErrorHandler(fw, bo, q).Error,
recorder: recorder,
client: client,
started: started,
}
}
func (s *SchedulerLoop) Run(done <-chan struct{}) {
defer close(s.started)
go runtime.Until(s.scheduleOne, recoveryDelay, done)
}
// hacked from GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/scheduler.go,
// with the Modeler stuff removed since we don't use it because we have mesos.
func (s *SchedulerLoop) scheduleOne() {
pod := s.nextPod()
// pods which are pre-scheduled (i.e. NodeName is set) are deleted by the kubelet
// in upstream. Not so in Mesos because the kubelet hasn't see that pod yet. Hence,
// the scheduler has to take care of this:
if pod.Spec.NodeName != "" && pod.DeletionTimestamp != nil {
log.V(3).Infof("deleting pre-scheduled, not yet running pod: %s/%s", pod.Namespace, pod.Name)
s.client.Pods(pod.Namespace).Delete(pod.Name, api.NewDeleteOptions(0))
return
}
log.V(3).Infof("Attempting to schedule: %+v", pod)
dest, err := s.algorithm.Schedule(pod)
if err != nil {
log.V(1).Infof("Failed to schedule: %+v", pod)
s.recorder.Eventf(pod, FailedScheduling, "Error scheduling: %v", err)
s.error(pod, err)
return
}
b := &api.Binding{
ObjectMeta: api.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name},
Target: api.ObjectReference{
Kind: "Node",
Name: dest,
},
}
if err := s.binder.Bind(b); err != nil {
log.V(1).Infof("Failed to bind pod: %+v", err)
s.recorder.Eventf(pod, FailedScheduling, "Binding rejected: %v", err)
s.error(pod, err)
return
}
s.recorder.Eventf(pod, Scheduled, "Successfully assigned %v to %v", pod.Name, dest)
}