Kubernetes Mesos integration

This commit includes the fundamental components of the Kubernetes Mesos integration: * Kubernetes-Mesos scheduler * Kubernetes-Mesos executor * Supporting libs Dependencies and upstream changes are included in a separate commit for easy review. After this initial upstream, there'll be two PRs following. * km (hypercube) and k8sm-controller-manager #9265 * Static pods support #9077 Fixes applied: - Precise metrics subsystems definitions - mesosphere/kubernetes-mesos#331 - https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion_r31875232 - https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion_r31875240 - Improve comments and add clarifications - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875208 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875226 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875227 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875228 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875239 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875243 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875234 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875256 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875255 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875251 - Clarify which Schedule function is actually called - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875246
2015-06-10 20:58:22 +00:00
parent 7d66559725
commit 932c58a497
105 changed files with 15162 additions and 0 deletions
--- a/contrib/mesos/cmd/k8sm-executor/doc.go
+++ b/contrib/mesos/cmd/k8sm-executor/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // This package main implements the executable Kubernetes Mesos executor.
 package main
--- a/contrib/mesos/cmd/k8sm-executor/main.go
+++ b/contrib/mesos/cmd/k8sm-executor/main.go
@@ -0,0 +1,47 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package main
 import (
 	"fmt"
 	"os"
 	"runtime"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/service"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/version/verflag"
 	"github.com/spf13/pflag"
 )
 func main() {
 	runtime.GOMAXPROCS(runtime.NumCPU())
 	s := service.NewKubeletExecutorServer()
 	s.AddStandaloneFlags(pflag.CommandLine)
 	util.InitFlags()
 	util.InitLogs()
 	defer util.FlushLogs()
 	verflag.PrintAndExitIfRequested()
 	if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil {
 		fmt.Fprintf(os.Stderr, err.Error())
 		os.Exit(1)
 	}
 }
--- a/contrib/mesos/cmd/k8sm-redirfd/doc.go
+++ b/contrib/mesos/cmd/k8sm-redirfd/doc.go
@@ -0,0 +1,21 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // This package main is used for testing the redirfd package.
 // Inspired by http://skarnet.org/software/execline/redirfd.html.
 // Usage:
 //     k8sm-redirfb [-n] [-b] {mode} {fd} {file} {prog...}
 package main
--- a/contrib/mesos/cmd/k8sm-redirfd/redirfd.go
+++ b/contrib/mesos/cmd/k8sm-redirfd/redirfd.go
@@ -0,0 +1,105 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package main
 import (
 	"flag"
 	"fmt"
 	"os"
 	"os/exec"
 	"syscall"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/redirfd"
 )
 func main() {
 	nonblock := flag.Bool("n", false, "open file in non-blocking mode")
 	changemode := flag.Bool("b", false, "change mode of file after opening it: to non-blocking mode if the -n option was not given, to blocking mode if it was")
 	flag.Parse()
 	args := flag.Args()
 	if len(args) < 4 {
 		fmt.Fprintf(os.Stderr, "expected {mode} {fd} {file} instead of: %v\n", args)
 		os.Exit(1)
 	}
 	var mode redirfd.RedirectMode
 	switch m := args[0]; m {
 	case "r":
 		mode = redirfd.Read
 	case "w":
 		mode = redirfd.Write
 	case "u":
 		mode = redirfd.Update
 	case "a":
 		mode = redirfd.Append
 	case "c":
 		mode = redirfd.AppendExisting
 	case "x":
 		mode = redirfd.WriteNew
 	default:
 		fmt.Fprintf(os.Stderr, "unrecognized mode %q\n", mode)
 		os.Exit(1)
 	}
 	fd, err := redirfd.ParseFileDescriptor(args[1])
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "failed to parse file descriptor: %v\n", err)
 		os.Exit(1)
 	}
 	file := args[2]
 	f, err := mode.Redirect(*nonblock, *changemode, fd, file)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "redirect failed: %q, %v\n", args[1], err)
 		os.Exit(1)
 	}
 	var pargs []string
 	if len(args) > 4 {
 		pargs = args[4:]
 	}
 	cmd := exec.Command(args[3], pargs...)
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	switch fd {
 	case redirfd.Stdin:
 		cmd.Stdin = f
 	case redirfd.Stdout:
 		cmd.Stdout = f
 	case redirfd.Stderr:
 		cmd.Stderr = f
 	default:
 		cmd.ExtraFiles = []*os.File{f}
 	}
 	defer f.Close()
 	if err = cmd.Run(); err != nil {
 		exiterr := err.(*exec.ExitError)
 		state := exiterr.ProcessState
 		if state != nil {
 			sys := state.Sys()
 			if waitStatus, ok := sys.(syscall.WaitStatus); ok {
 				if waitStatus.Signaled() {
 					os.Exit(256 + int(waitStatus.Signal()))
 				} else {
 					os.Exit(waitStatus.ExitStatus())
 				}
 			}
 		}
 		os.Exit(3)
 	}
 }
--- a/contrib/mesos/cmd/k8sm-scheduler/doc.go
+++ b/contrib/mesos/cmd/k8sm-scheduler/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // This package main implements the executable Kubernetes Mesos scheduler.
 package main
--- a/contrib/mesos/cmd/k8sm-scheduler/main.go
+++ b/contrib/mesos/cmd/k8sm-scheduler/main.go
@@ -0,0 +1,46 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package main
 import (
 	"fmt"
 	"os"
 	"runtime"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/service"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/version/verflag"
 	"github.com/spf13/pflag"
 )
 func main() {
 	runtime.GOMAXPROCS(runtime.NumCPU())
 	s := service.NewSchedulerServer()
 	s.AddStandaloneFlags(pflag.CommandLine)
 	util.InitFlags()
 	util.InitLogs()
 	defer util.FlushLogs()
 	verflag.PrintAndExitIfRequested()
 	if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil {
 		fmt.Fprintf(os.Stderr, err.Error())
 		os.Exit(1)
 	}
 }
--- a/contrib/mesos/pkg/assert/assert.go
+++ b/contrib/mesos/pkg/assert/assert.go
@@ -0,0 +1,43 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package assert
 import (
 	"testing"
 	"time"
 	"github.com/stretchr/testify/assert"
 )
 // EventuallyTrue asserts that the given predicate becomes true within the given timeout. It
 // checks the predicate regularly each 100ms.
 func EventuallyTrue(t *testing.T, timeout time.Duration, fn func() bool, msgAndArgs ...interface{}) bool {
 	start := time.Now()
 	for {
 		if fn() {
 			return true
 		}
 		if time.Now().Sub(start) > timeout {
 			if len(msgAndArgs) > 0 {
 				return assert.Fail(t, msgAndArgs[0].(string), msgAndArgs[1:]...)
 			} else {
 				return assert.Fail(t, "predicate fn has not been true after %v", timeout.String())
 			}
 		}
 		time.Sleep(100 * time.Millisecond)
 	}
 }
--- a/contrib/mesos/pkg/assert/doc.go
+++ b/contrib/mesos/pkg/assert/doc.go
@@ -0,0 +1,19 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package assert is an utility package containing reusable testing functionality
 // extending github.com/stretchr/testify/assert
 package assert
--- a/contrib/mesos/pkg/backoff/backoff.go
+++ b/contrib/mesos/pkg/backoff/backoff.go
@@ -0,0 +1,96 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package backoff
 import (
 	"math/rand"
 	"sync"
 	"time"
 	log "github.com/golang/glog"
 )
 type clock interface {
 	Now() time.Time
 }
 type realClock struct{}
 func (realClock) Now() time.Time {
 	return time.Now()
 }
 type backoffEntry struct {
 	backoff    time.Duration
 	lastUpdate time.Time
 }
 type Backoff struct {
 	perItemBackoff  map[string]*backoffEntry
 	lock            sync.Mutex
 	clock           clock
 	defaultDuration time.Duration
 	maxDuration     time.Duration
 }
 func New(initial, max time.Duration) *Backoff {
 	return &Backoff{
 		perItemBackoff:  map[string]*backoffEntry{},
 		clock:           realClock{},
 		defaultDuration: initial,
 		maxDuration:     max,
 	}
 }
 func (p *Backoff) getEntry(id string) *backoffEntry {
 	p.lock.Lock()
 	defer p.lock.Unlock()
 	entry, ok := p.perItemBackoff[id]
 	if !ok {
 		entry = &backoffEntry{backoff: p.defaultDuration}
 		p.perItemBackoff[id] = entry
 	}
 	entry.lastUpdate = p.clock.Now()
 	return entry
 }
 func (p *Backoff) Get(id string) time.Duration {
 	entry := p.getEntry(id)
 	duration := entry.backoff
 	entry.backoff *= 2
 	if entry.backoff > p.maxDuration {
 		entry.backoff = p.maxDuration
 	}
 	//TODO(jdef) parameterize use of jitter?
 	// add jitter, get better backoff distribution
 	duration = time.Duration(rand.Int63n(int64(duration)))
 	log.V(3).Infof("Backing off %v for pod %s", duration, id)
 	return duration
 }
 // Garbage collect records that have aged past maxDuration. Backoff users are expected
 // to invoke this periodically.
 func (p *Backoff) GC() {
 	p.lock.Lock()
 	defer p.lock.Unlock()
 	now := p.clock.Now()
 	for id, entry := range p.perItemBackoff {
 		if now.Sub(entry.lastUpdate) > p.maxDuration {
 			delete(p.perItemBackoff, id)
 		}
 	}
 }
--- a/contrib/mesos/pkg/backoff/doc.go
+++ b/contrib/mesos/pkg/backoff/doc.go
@@ -0,0 +1,19 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package backoff provides backoff functionality with a simple API.
 // Originally copied from Kubernetes: plugin/pkg/scheduler/factory/factory.go
 package backoff
--- a/contrib/mesos/pkg/election/doc.go
+++ b/contrib/mesos/pkg/election/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package election provides interfaces used for master election.
 package election
--- a/contrib/mesos/pkg/election/etcd_master.go
+++ b/contrib/mesos/pkg/election/etcd_master.go
@@ -0,0 +1,185 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package election
 import (
 	"fmt"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/tools"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
 	"github.com/coreos/go-etcd/etcd"
 	"github.com/golang/glog"
 )
 // Master is used to announce the current elected master.
 type Master string
 // IsAnAPIObject is used solely so we can work with the watch package.
 // TODO(k8s): Either fix watch so this isn't necessary, or make this a real API Object.
 // TODO(k8s): when it becomes clear how this package will be used, move these declarations to
 // to the proper place.
 func (Master) IsAnAPIObject() {}
 // NewEtcdMasterElector returns an implementation of election.MasterElector backed by etcd.
 func NewEtcdMasterElector(h tools.EtcdGetSet) MasterElector {
 	return &etcdMasterElector{etcd: h}
 }
 type empty struct{}
 // internal implementation struct
 type etcdMasterElector struct {
 	etcd   tools.EtcdGetSet
 	done   chan empty
 	events chan watch.Event
 }
 // Elect implements the election.MasterElector interface.
 func (e *etcdMasterElector) Elect(path, id string) watch.Interface {
 	e.done = make(chan empty)
 	e.events = make(chan watch.Event)
 	go util.Forever(func() { e.run(path, id) }, time.Second*5)
 	return e
 }
 func (e *etcdMasterElector) run(path, id string) {
 	masters := make(chan string)
 	errors := make(chan error)
 	go e.master(path, id, 30, masters, errors, e.done) // TODO(jdef) extract constant
 	for {
 		select {
 		case m := <-masters:
 			e.events <- watch.Event{
 				Type:   watch.Modified,
 				Object: Master(m),
 			}
 		case e := <-errors:
 			glog.Errorf("error in election: %v", e)
 		}
 	}
 }
 // ResultChan implements the watch.Interface interface.
 func (e *etcdMasterElector) ResultChan() <-chan watch.Event {
 	return e.events
 }
 // extendMaster attempts to extend ownership of a master lock for TTL seconds.
 // returns "", nil if extension failed
 // returns id, nil if extension succeeded
 // returns "", err if an error occurred
 func (e *etcdMasterElector) extendMaster(path, id string, ttl uint64, res *etcd.Response) (string, error) {
 	// If it matches the passed in id, extend the lease by writing a new entry.
 	// Uses compare and swap, so that if we TTL out in the meantime, the write will fail.
 	// We don't handle the TTL delete w/o a write case here, it's handled in the next loop
 	// iteration.
 	_, err := e.etcd.CompareAndSwap(path, id, ttl, "", res.Node.ModifiedIndex)
 	if err != nil && !tools.IsEtcdTestFailed(err) {
 		return "", err
 	}
 	if err != nil && tools.IsEtcdTestFailed(err) {
 		return "", nil
 	}
 	return id, nil
 }
 // becomeMaster attempts to become the master for this lock.
 // returns "", nil if the attempt failed
 // returns id, nil if the attempt succeeded
 // returns "", err if an error occurred
 func (e *etcdMasterElector) becomeMaster(path, id string, ttl uint64) (string, error) {
 	_, err := e.etcd.Create(path, id, ttl)
 	if err != nil && !tools.IsEtcdNodeExist(err) {
 		// unexpected error
 		return "", err
 	}
 	if err != nil && tools.IsEtcdNodeExist(err) {
 		return "", nil
 	}
 	return id, nil
 }
 // handleMaster performs one loop of master locking.
 // on success it returns <master>, nil
 // on error it returns "", err
 // in situations where you should try again due to concurrent state changes (e.g. another actor simultaneously acquiring the lock)
 // it returns "", nil
 func (e *etcdMasterElector) handleMaster(path, id string, ttl uint64) (string, error) {
 	res, err := e.etcd.Get(path, false, false)
 	// Unexpected error, bail out
 	if err != nil && !tools.IsEtcdNotFound(err) {
 		return "", err
 	}
 	// There is no master, try to become the master.
 	if err != nil && tools.IsEtcdNotFound(err) {
 		return e.becomeMaster(path, id, ttl)
 	}
 	// This should never happen.
 	if res.Node == nil {
 		return "", fmt.Errorf("unexpected response: %#v", res)
 	}
 	// We're not the master, just return the current value
 	if res.Node.Value != id {
 		return res.Node.Value, nil
 	}
 	// We are the master, try to extend out lease
 	return e.extendMaster(path, id, ttl, res)
 }
 // master provices a distributed master election lock, maintains lock until failure, or someone sends something in the done channel.
 // The basic algorithm is:
 // while !done
 //   Get the current master
 //   If there is no current master
 //      Try to become the master
 //   Otherwise
 //      If we are the master, extend the lease
 //      If the master is different than the last time through the loop, report the master
 //   Sleep 80% of TTL
 func (e *etcdMasterElector) master(path, id string, ttl uint64, masters chan<- string, errors chan<- error, done <-chan empty) {
 	lastMaster := ""
 	for {
 		master, err := e.handleMaster(path, id, ttl)
 		if err != nil {
 			errors <- err
 		} else if len(master) == 0 {
 			continue
 		} else if master != lastMaster {
 			lastMaster = master
 			masters <- master
 		}
 		// TODO(k8s): Add Watch here, skip the polling for faster reactions
 		// If done is closed, break out.
 		select {
 		case <-done:
 			return
 		case <-time.After(time.Duration((ttl*8)/10) * time.Second):
 		}
 	}
 }
 // ResultChan implements the watch.Interface interface
 func (e *etcdMasterElector) Stop() {
 	close(e.done)
 }
--- a/contrib/mesos/pkg/election/etcd_master_test.go
+++ b/contrib/mesos/pkg/election/etcd_master_test.go
@@ -0,0 +1,98 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package election
 import (
 	"testing"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/tools"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
 	"github.com/coreos/go-etcd/etcd"
 )
 func TestEtcdMasterOther(t *testing.T) {
 	path := "foo"
 	etcd := tools.NewFakeEtcdClient(t)
 	etcd.Set(path, "baz", 0)
 	master := NewEtcdMasterElector(etcd)
 	w := master.Elect(path, "bar")
 	result := <-w.ResultChan()
 	if result.Type != watch.Modified || result.Object.(Master) != "baz" {
 		t.Errorf("unexpected event: %#v", result)
 	}
 	w.Stop()
 }
 func TestEtcdMasterNoOther(t *testing.T) {
 	path := "foo"
 	e := tools.NewFakeEtcdClient(t)
 	e.TestIndex = true
 	e.Data["foo"] = tools.EtcdResponseWithError{
 		R: &etcd.Response{
 			Node: nil,
 		},
 		E: &etcd.EtcdError{
 			ErrorCode: tools.EtcdErrorCodeNotFound,
 		},
 	}
 	master := NewEtcdMasterElector(e)
 	w := master.Elect(path, "bar")
 	result := <-w.ResultChan()
 	if result.Type != watch.Modified || result.Object.(Master) != "bar" {
 		t.Errorf("unexpected event: %#v", result)
 	}
 	w.Stop()
 }
 func TestEtcdMasterNoOtherThenConflict(t *testing.T) {
 	path := "foo"
 	e := tools.NewFakeEtcdClient(t)
 	e.TestIndex = true
 	// Ok, so we set up a chain of responses from etcd:
 	//   1) Nothing there
 	//   2) conflict (someone else wrote)
 	//   3) new value (the data they wrote)
 	empty := tools.EtcdResponseWithError{
 		R: &etcd.Response{
 			Node: nil,
 		},
 		E: &etcd.EtcdError{
 			ErrorCode: tools.EtcdErrorCodeNotFound,
 		},
 	}
 	empty.N = &tools.EtcdResponseWithError{
 		R: &etcd.Response{},
 		E: &etcd.EtcdError{
 			ErrorCode: tools.EtcdErrorCodeNodeExist,
 		},
 	}
 	empty.N.N = &tools.EtcdResponseWithError{
 		R: &etcd.Response{
 			Node: &etcd.Node{
 				Value: "baz",
 			},
 		},
 	}
 	e.Data["foo"] = empty
 	master := NewEtcdMasterElector(e)
 	w := master.Elect(path, "bar")
 	result := <-w.ResultChan()
 	if result.Type != watch.Modified || result.Object.(Master) != "bar" {
 		t.Errorf("unexpected event: %#v", result)
 	}
 	w.Stop()
 }
--- a/contrib/mesos/pkg/election/fake.go
+++ b/contrib/mesos/pkg/election/fake.go
@@ -0,0 +1,53 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package election
 import (
 	"sync"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
 )
 // Fake allows for testing of anything consuming a MasterElector.
 type Fake struct {
 	mux           *watch.Broadcaster
 	currentMaster Master
 	lock          sync.Mutex // Protect access of currentMaster
 }
 // NewFake makes a new fake MasterElector.
 func NewFake() *Fake {
 	// 0 means block for clients.
 	return &Fake{mux: watch.NewBroadcaster(0, watch.WaitIfChannelFull)}
 }
 func (f *Fake) ChangeMaster(newMaster Master) {
 	f.lock.Lock()
 	defer f.lock.Unlock()
 	f.mux.Action(watch.Modified, newMaster)
 	f.currentMaster = newMaster
 }
 func (f *Fake) Elect(path, id string) watch.Interface {
 	f.lock.Lock()
 	defer f.lock.Unlock()
 	w := f.mux.Watch()
 	if f.currentMaster != "" {
 		f.mux.Action(watch.Modified, f.currentMaster)
 	}
 	return w
 }
--- a/contrib/mesos/pkg/election/master.go
+++ b/contrib/mesos/pkg/election/master.go
@@ -0,0 +1,134 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package election
 import (
 	"sync"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
 	"github.com/golang/glog"
 )
 // MasterElector is an interface for services that can elect masters.
 // Important Note: MasterElectors are not inter-operable, all participants in the election need to be
 //  using the same underlying implementation of this interface for correct behavior.
 type MasterElector interface {
 	// Elect makes the caller represented by 'id' enter into a master election for the
 	// distributed lock defined by 'path'
 	// The returned watch.Interface provides a stream of Master objects which
 	// contain the current master.
 	// Calling Stop on the returned interface relinquishes ownership (if currently possesed)
 	// and removes the caller from the election
 	Elect(path, id string) watch.Interface
 }
 // Service represents anything that can start and stop on demand.
 type Service interface {
 	Validate(desired, current Master)
 	Start()
 	Stop()
 }
 type notifier struct {
 	lock sync.Mutex
 	cond *sync.Cond
 	// desired is updated with every change, current is updated after
 	// Start()/Stop() finishes. 'cond' is used to signal that a change
 	// might be needed. This handles the case where mastership flops
 	// around without calling Start()/Stop() excessively.
 	desired, current Master
 	// for comparison, to see if we are master.
 	id Master
 	service Service
 }
 // Notify runs Elect() on m, and calls Start()/Stop() on s when the
 // elected master starts/stops matching 'id'. Never returns.
 func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) {
 	n := &notifier{id: Master(id), service: s}
 	n.cond = sync.NewCond(&n.lock)
 	finished := runtime.After(func() {
 		runtime.Until(func() {
 			for {
 				w := m.Elect(path, id)
 				for {
 					select {
 					case <-abort:
 						return
 					case event, open := <-w.ResultChan():
 						if !open {
 							break
 						}
 						if event.Type != watch.Modified {
 							continue
 						}
 						electedMaster, ok := event.Object.(Master)
 						if !ok {
 							glog.Errorf("Unexpected object from election channel: %v", event.Object)
 							break
 						}
 						func() {
 							n.lock.Lock()
 							defer n.lock.Unlock()
 							n.desired = electedMaster
 							if n.desired != n.current {
 								n.cond.Signal()
 							}
 						}()
 					}
 				}
 			}
 		}, 0, abort)
 	})
 	runtime.Until(func() { n.serviceLoop(finished) }, 0, abort)
 }
 // serviceLoop waits for changes, and calls Start()/Stop() as needed.
 func (n *notifier) serviceLoop(abort <-chan struct{}) {
 	n.lock.Lock()
 	defer n.lock.Unlock()
 	for {
 		select {
 		case <-abort:
 			return
 		default:
 			for n.desired == n.current {
 				ch := runtime.After(n.cond.Wait)
 				select {
 				case <-abort:
 					n.cond.Signal() // ensure that Wait() returns
 					<-ch
 					return
 				case <-ch:
 					// we were notified and have the lock, proceed..
 				}
 			}
 			if n.current != n.id && n.desired == n.id {
 				n.service.Validate(n.desired, n.current)
 				n.service.Start()
 			} else if n.current == n.id && n.desired != n.id {
 				n.service.Stop()
 			}
 			n.current = n.desired
 		}
 	}
 }
--- a/contrib/mesos/pkg/election/master_test.go
+++ b/contrib/mesos/pkg/election/master_test.go
@@ -0,0 +1,98 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package election
 import (
 	"testing"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
 )
 type slowService struct {
 	t  *testing.T
 	on bool
 	// We explicitly have no lock to prove that
 	// Start and Stop are not called concurrently.
 	changes chan<- bool
 	done    <-chan struct{}
 }
 func (s *slowService) Validate(d, c Master) {
 	// noop
 }
 func (s *slowService) Start() {
 	select {
 	case <-s.done:
 		return // avoid writing to closed changes chan
 	default:
 	}
 	if s.on {
 		s.t.Errorf("started already on service")
 	}
 	time.Sleep(2 * time.Millisecond)
 	s.on = true
 	s.changes <- true
 }
 func (s *slowService) Stop() {
 	select {
 	case <-s.done:
 		return // avoid writing to closed changes chan
 	default:
 	}
 	if !s.on {
 		s.t.Errorf("stopped already off service")
 	}
 	time.Sleep(2 * time.Millisecond)
 	s.on = false
 	s.changes <- false
 }
 func Test(t *testing.T) {
 	m := NewFake()
 	changes := make(chan bool, 1500)
 	done := make(chan struct{})
 	s := &slowService{t: t, changes: changes, done: done}
 	notifyDone := runtime.After(func() { Notify(m, "", "me", s, done) })
 	go func() {
 		defer close(done)
 		for i := 0; i < 500; i++ {
 			for _, key := range []string{"me", "notme", "alsonotme"} {
 				m.ChangeMaster(Master(key))
 			}
 		}
 	}()
 	<-notifyDone
 	close(changes)
 	changeList := []bool{}
 	for {
 		change, ok := <-changes
 		if !ok {
 			break
 		}
 		changeList = append(changeList, change)
 	}
 	if len(changeList) > 1000 {
 		t.Errorf("unexpected number of changes: %v", len(changeList))
 	}
 }
--- a/contrib/mesos/pkg/executor/config/config.go
+++ b/contrib/mesos/pkg/executor/config/config.go
@@ -0,0 +1,29 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package config
 import (
 	"time"
 )
 // default values to use when constructing mesos ExecutorInfo messages
 const (
 	DefaultInfoID         = "k8sm-executor"
 	DefaultInfoSource     = "kubernetes"
 	DefaultInfoName       = "Kubelet-Executor"
 	DefaultSuicideTimeout = 20 * time.Minute
 )
--- a/contrib/mesos/pkg/executor/config/doc.go
+++ b/contrib/mesos/pkg/executor/config/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package config contains executor configuration constants.
 package config
--- a/contrib/mesos/pkg/executor/doc.go
+++ b/contrib/mesos/pkg/executor/doc.go
@@ -0,0 +1,21 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 /*
 Package executor includes a mesos executor, which contains
 a kubelet as its member to manage containers.
 */
 package executor
--- a/contrib/mesos/pkg/executor/executor.go
+++ b/contrib/mesos/pkg/executor/executor.go
@@ -0,0 +1,846 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package executor
 import (
 	"encoding/json"
 	"fmt"
 	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
 	"github.com/fsouza/go-dockerclient"
 	"github.com/gogo/protobuf/proto"
 	log "github.com/golang/glog"
 	bindings "github.com/mesos/mesos-go/executor"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	mutil "github.com/mesos/mesos-go/mesosutil"
 )
 const (
 	containerPollTime = 300 * time.Millisecond
 	launchGracePeriod = 5 * time.Minute
 )
 type stateType int32
 const (
 	disconnectedState stateType = iota
 	connectedState
 	suicidalState
 	terminalState
 )
 func (s *stateType) get() stateType {
 	return stateType(atomic.LoadInt32((*int32)(s)))
 }
 func (s *stateType) transition(from, to stateType) bool {
 	return atomic.CompareAndSwapInt32((*int32)(s), int32(from), int32(to))
 }
 func (s *stateType) transitionTo(to stateType, unless ...stateType) bool {
 	if len(unless) == 0 {
 		atomic.StoreInt32((*int32)(s), int32(to))
 		return true
 	}
 	for {
 		state := s.get()
 		for _, x := range unless {
 			if state == x {
 				return false
 			}
 		}
 		if s.transition(state, to) {
 			return true
 		}
 	}
 }
 type kuberTask struct {
 	mesosTaskInfo *mesos.TaskInfo
 	podName       string
 }
 // func that attempts suicide
 type jumper func(bindings.ExecutorDriver, <-chan struct{})
 type suicideWatcher interface {
 	Next(time.Duration, bindings.ExecutorDriver, jumper) suicideWatcher
 	Reset(time.Duration) bool
 	Stop() bool
 }
 type podStatusFunc func() (*api.PodStatus, error)
 // KubernetesExecutor is an mesos executor that runs pods
 // in a minion machine.
 type KubernetesExecutor struct {
 	kl                  *kubelet.Kubelet   // the kubelet instance.
 	updateChan          chan<- interface{} // to send pod config updates to the kubelet
 	state               stateType
 	tasks               map[string]*kuberTask
 	pods                map[string]*api.Pod
 	lock                sync.RWMutex
 	sourcename          string
 	client              *client.Client
 	events              <-chan watch.Event
 	done                chan struct{}                     // signals shutdown
 	outgoing            chan func() (mesos.Status, error) // outgoing queue to the mesos driver
 	dockerClient        dockertools.DockerInterface
 	suicideWatch        suicideWatcher
 	suicideTimeout      time.Duration
 	shutdownAlert       func()          // invoked just prior to executor shutdown
 	kubeletFinished     <-chan struct{} // signals that kubelet Run() died
 	initialRegistration sync.Once
 	exitFunc            func(int)
 	podStatusFunc       func(*kubelet.Kubelet, *api.Pod) (*api.PodStatus, error)
 }
 type Config struct {
 	Kubelet         *kubelet.Kubelet
 	Updates         chan<- interface{} // to send pod config updates to the kubelet
 	SourceName      string
 	APIClient       *client.Client
 	Watch           watch.Interface
 	Docker          dockertools.DockerInterface
 	ShutdownAlert   func()
 	SuicideTimeout  time.Duration
 	KubeletFinished <-chan struct{} // signals that kubelet Run() died
 	ExitFunc        func(int)
 	PodStatusFunc   func(*kubelet.Kubelet, *api.Pod) (*api.PodStatus, error)
 }
 func (k *KubernetesExecutor) isConnected() bool {
 	return connectedState == (&k.state).get()
 }
 // New creates a new kubernetes executor.
 func New(config Config) *KubernetesExecutor {
 	k := &KubernetesExecutor{
 		kl:              config.Kubelet,
 		updateChan:      config.Updates,
 		state:           disconnectedState,
 		tasks:           make(map[string]*kuberTask),
 		pods:            make(map[string]*api.Pod),
 		sourcename:      config.SourceName,
 		client:          config.APIClient,
 		done:            make(chan struct{}),
 		outgoing:        make(chan func() (mesos.Status, error), 1024),
 		dockerClient:    config.Docker,
 		suicideTimeout:  config.SuicideTimeout,
 		kubeletFinished: config.KubeletFinished,
 		suicideWatch:    &suicideTimer{},
 		shutdownAlert:   config.ShutdownAlert,
 		exitFunc:        config.ExitFunc,
 		podStatusFunc:   config.PodStatusFunc,
 	}
 	//TODO(jdef) do something real with these events..
 	if config.Watch != nil {
 		events := config.Watch.ResultChan()
 		if events != nil {
 			go func() {
 				for e := range events {
 					// e ~= watch.Event { ADDED, *api.Event }
 					log.V(1).Info(e)
 				}
 			}()
 			k.events = events
 		}
 	}
 	return k
 }
 func (k *KubernetesExecutor) Init(driver bindings.ExecutorDriver) {
 	k.killKubeletContainers()
 	k.resetSuicideWatch(driver)
 	go k.sendLoop()
 	//TODO(jdef) monitor kubeletFinished and shutdown if it happens
 }
 func (k *KubernetesExecutor) Done() <-chan struct{} {
 	return k.done
 }
 func (k *KubernetesExecutor) isDone() bool {
 	select {
 	case <-k.done:
 		return true
 	default:
 		return false
 	}
 }
 // Registered is called when the executor is successfully registered with the slave.
 func (k *KubernetesExecutor) Registered(driver bindings.ExecutorDriver,
 	executorInfo *mesos.ExecutorInfo, frameworkInfo *mesos.FrameworkInfo, slaveInfo *mesos.SlaveInfo) {
 	if k.isDone() {
 		return
 	}
 	log.Infof("Executor %v of framework %v registered with slave %v\n",
 		executorInfo, frameworkInfo, slaveInfo)
 	if !(&k.state).transition(disconnectedState, connectedState) {
 		log.Errorf("failed to register/transition to a connected state")
 	}
 	k.initialRegistration.Do(k.onInitialRegistration)
 }
 // Reregistered is called when the executor is successfully re-registered with the slave.
 // This can happen when the slave fails over.
 func (k *KubernetesExecutor) Reregistered(driver bindings.ExecutorDriver, slaveInfo *mesos.SlaveInfo) {
 	if k.isDone() {
 		return
 	}
 	log.Infof("Reregistered with slave %v\n", slaveInfo)
 	if !(&k.state).transition(disconnectedState, connectedState) {
 		log.Errorf("failed to reregister/transition to a connected state")
 	}
 	k.initialRegistration.Do(k.onInitialRegistration)
 }
 func (k *KubernetesExecutor) onInitialRegistration() {
 	// emit an empty update to allow the mesos "source" to be marked as seen
 	k.updateChan <- kubelet.PodUpdate{
 		Pods:   []*api.Pod{},
 		Op:     kubelet.SET,
 		Source: k.sourcename,
 	}
 }
 // Disconnected is called when the executor is disconnected from the slave.
 func (k *KubernetesExecutor) Disconnected(driver bindings.ExecutorDriver) {
 	if k.isDone() {
 		return
 	}
 	log.Infof("Slave is disconnected\n")
 	if !(&k.state).transition(connectedState, disconnectedState) {
 		log.Errorf("failed to disconnect/transition to a disconnected state")
 	}
 }
 // LaunchTask is called when the executor receives a request to launch a task.
 // The happens when the k8sm scheduler has decided to schedule the pod
 // (which corresponds to a Mesos Task) onto the node where this executor
 // is running, but the binding is not recorded in the Kubernetes store yet.
 // This function is invoked to tell the executor to record the binding in the
 // Kubernetes store and start the pod via the Kubelet.
 func (k *KubernetesExecutor) LaunchTask(driver bindings.ExecutorDriver, taskInfo *mesos.TaskInfo) {
 	if k.isDone() {
 		return
 	}
 	log.Infof("Launch task %v\n", taskInfo)
 	if !k.isConnected() {
 		log.Errorf("Ignore launch task because the executor is disconnected\n")
 		k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
 			messages.ExecutorUnregistered))
 		return
 	}
 	obj, err := api.Codec.Decode(taskInfo.GetData())
 	if err != nil {
 		log.Errorf("failed to extract yaml data from the taskInfo.data %v", err)
 		k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
 			messages.UnmarshalTaskDataFailure))
 		return
 	}
 	pod, ok := obj.(*api.Pod)
 	if !ok {
 		log.Errorf("expected *api.Pod instead of %T: %+v", pod, pod)
 		k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
 			messages.UnmarshalTaskDataFailure))
 		return
 	}
 	k.lock.Lock()
 	defer k.lock.Unlock()
 	taskId := taskInfo.GetTaskId().GetValue()
 	if _, found := k.tasks[taskId]; found {
 		log.Errorf("task already launched\n")
 		// Not to send back TASK_RUNNING here, because
 		// may be duplicated messages or duplicated task id.
 		return
 	}
 	// remember this task so that:
 	// (a) we ignore future launches for it
 	// (b) we have a record of it so that we can kill it if needed
 	// (c) we're leaving podName == "" for now, indicates we don't need to delete containers
 	k.tasks[taskId] = &kuberTask{
 		mesosTaskInfo: taskInfo,
 	}
 	k.resetSuicideWatch(driver)
 	go k.launchTask(driver, taskId, pod)
 }
 // TODO(jdef) add metrics for this?
 type suicideTimer struct {
 	timer *time.Timer
 }
 func (w *suicideTimer) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher {
 	return &suicideTimer{
 		timer: time.AfterFunc(d, func() {
 			log.Warningf("Suicide timeout (%v) expired", d)
 			f(driver, nil)
 		}),
 	}
 }
 func (w *suicideTimer) Stop() (result bool) {
 	if w != nil && w.timer != nil {
 		log.Infoln("stopping suicide watch") //TODO(jdef) debug
 		result = w.timer.Stop()
 	}
 	return
 }
 // return true if the timer was successfully reset
 func (w *suicideTimer) Reset(d time.Duration) bool {
 	if w != nil && w.timer != nil {
 		log.Infoln("resetting suicide watch") //TODO(jdef) debug
 		w.timer.Reset(d)
 		return true
 	}
 	return false
 }
 // determine whether we need to start a suicide countdown. if so, then start
 // a timer that, upon expiration, causes this executor to commit suicide.
 // this implementation runs asynchronously. callers that wish to wait for the
 // reset to complete may wait for the returned signal chan to close.
 func (k *KubernetesExecutor) resetSuicideWatch(driver bindings.ExecutorDriver) <-chan struct{} {
 	ch := make(chan struct{})
 	go func() {
 		defer close(ch)
 		k.lock.Lock()
 		defer k.lock.Unlock()
 		if k.suicideTimeout < 1 {
 			return
 		}
 		if k.suicideWatch != nil {
 			if len(k.tasks) > 0 {
 				k.suicideWatch.Stop()
 				return
 			}
 			if k.suicideWatch.Reset(k.suicideTimeout) {
 				// valid timer, reset was successful
 				return
 			}
 		}
 		//TODO(jdef) reduce verbosity here once we're convinced that suicide watch is working properly
 		log.Infof("resetting suicide watch timer for %v", k.suicideTimeout)
 		k.suicideWatch = k.suicideWatch.Next(k.suicideTimeout, driver, jumper(k.attemptSuicide))
 	}()
 	return ch
 }
 func (k *KubernetesExecutor) attemptSuicide(driver bindings.ExecutorDriver, abort <-chan struct{}) {
 	k.lock.Lock()
 	defer k.lock.Unlock()
 	// this attempt may have been queued and since been aborted
 	select {
 	case <-abort:
 		//TODO(jdef) reduce verbosity once suicide watch is working properly
 		log.Infof("aborting suicide attempt since watch was cancelled")
 		return
 	default: // continue
 	}
 	// fail-safe, will abort kamikaze attempts if there are tasks
 	if len(k.tasks) > 0 {
 		ids := []string{}
 		for taskid := range k.tasks {
 			ids = append(ids, taskid)
 		}
 		log.Errorf("suicide attempt failed, there are still running tasks: %v", ids)
 		return
 	}
 	log.Infoln("Attempting suicide")
 	if (&k.state).transitionTo(suicidalState, suicidalState, terminalState) {
 		//TODO(jdef) let the scheduler know?
 		//TODO(jdef) is suicide more graceful than slave-demanded shutdown?
 		k.doShutdown(driver)
 	}
 }
 // async continuation of LaunchTask
 func (k *KubernetesExecutor) launchTask(driver bindings.ExecutorDriver, taskId string, pod *api.Pod) {
 	//HACK(jdef): cloned binding construction from k8s plugin/pkg/scheduler/scheduler.go
 	binding := &api.Binding{
 		ObjectMeta: api.ObjectMeta{
 			Namespace:   pod.Namespace,
 			Name:        pod.Name,
 			Annotations: make(map[string]string),
 		},
 		Target: api.ObjectReference{
 			Kind: "Node",
 			Name: pod.Annotations[meta.BindingHostKey],
 		},
 	}
 	// forward the annotations that the scheduler wants to apply
 	for k, v := range pod.Annotations {
 		binding.Annotations[k] = v
 	}
 	deleteTask := func() {
 		k.lock.Lock()
 		defer k.lock.Unlock()
 		delete(k.tasks, taskId)
 		k.resetSuicideWatch(driver)
 	}
 	log.Infof("Binding '%v/%v' to '%v' with annotations %+v...", pod.Namespace, pod.Name, binding.Target.Name, binding.Annotations)
 	ctx := api.WithNamespace(api.NewContext(), binding.Namespace)
 	// TODO(k8s): use Pods interface for binding once clusters are upgraded
 	// return b.Pods(binding.Namespace).Bind(binding)
 	err := k.client.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error()
 	if err != nil {
 		deleteTask()
 		k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED,
 			messages.CreateBindingFailure))
 		return
 	}
 	podFullName := container.GetPodFullName(pod)
 	// allow a recently failed-over scheduler the chance to recover the task/pod binding:
 	// it may have failed and recovered before the apiserver is able to report the updated
 	// binding information. replays of this status event will signal to the scheduler that
 	// the apiserver should be up-to-date.
 	data, err := json.Marshal(api.PodStatusResult{
 		ObjectMeta: api.ObjectMeta{
 			Name:     podFullName,
 			SelfLink: "/podstatusresult",
 		},
 	})
 	if err != nil {
 		deleteTask()
 		log.Errorf("failed to marshal pod status result: %v", err)
 		k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED,
 			err.Error()))
 		return
 	}
 	k.lock.Lock()
 	defer k.lock.Unlock()
 	// Add the task.
 	task, found := k.tasks[taskId]
 	if !found {
 		log.V(1).Infof("task %v not found, probably killed: aborting launch, reporting lost", taskId)
 		k.reportLostTask(driver, taskId, messages.LaunchTaskFailed)
 		return
 	}
 	//TODO(jdef) check for duplicate pod name, if found send TASK_ERROR
 	// from here on, we need to delete containers associated with the task
 	// upon it going into a terminal state
 	task.podName = podFullName
 	k.pods[podFullName] = pod
 	// send the latest snapshot of the set of pods to the kubelet via the pod update channel
 	update := kubelet.PodUpdate{Op: kubelet.SET}
 	for _, p := range k.pods {
 		update.Pods = append(update.Pods, p)
 	}
 	k.updateChan <- update
 	statusUpdate := &mesos.TaskStatus{
 		TaskId:  mutil.NewTaskID(taskId),
 		State:   mesos.TaskState_TASK_STARTING.Enum(),
 		Message: proto.String(messages.CreateBindingSuccess),
 		Data:    data,
 	}
 	k.sendStatus(driver, statusUpdate)
 	// Delay reporting 'task running' until container is up.
 	psf := podStatusFunc(func() (*api.PodStatus, error) {
 		return k.podStatusFunc(k.kl, pod)
 	})
 	go k._launchTask(driver, taskId, podFullName, psf)
 }
 func (k *KubernetesExecutor) _launchTask(driver bindings.ExecutorDriver, taskId, podFullName string, psf podStatusFunc) {
 	expired := make(chan struct{})
 	time.AfterFunc(launchGracePeriod, func() { close(expired) })
 	getMarshalledInfo := func() (data []byte, cancel bool) {
 		// potentially long call..
 		if podStatus, err := psf(); err == nil && podStatus != nil {
 			select {
 			case <-expired:
 				cancel = true
 			default:
 				k.lock.Lock()
 				defer k.lock.Unlock()
 				if _, found := k.tasks[taskId]; !found {
 					// don't bother with the pod status if the task is already gone
 					cancel = true
 					break
 				} else if podStatus.Phase != api.PodRunning {
 					// avoid sending back a running status before it's really running
 					break
 				}
 				log.V(2).Infof("Found pod status: '%v'", podStatus)
 				result := api.PodStatusResult{
 					ObjectMeta: api.ObjectMeta{
 						Name:     podFullName,
 						SelfLink: "/podstatusresult",
 					},
 					Status: *podStatus,
 				}
 				if data, err = json.Marshal(result); err != nil {
 					log.Errorf("failed to marshal pod status result: %v", err)
 				}
 			}
 		}
 		return
 	}
 waitForRunningPod:
 	for {
 		select {
 		case <-expired:
 			log.Warningf("Launch expired grace period of '%v'", launchGracePeriod)
 			break waitForRunningPod
 		case <-time.After(containerPollTime):
 			if data, cancel := getMarshalledInfo(); cancel {
 				break waitForRunningPod
 			} else if data == nil {
 				continue waitForRunningPod
 			} else {
 				k.lock.Lock()
 				defer k.lock.Unlock()
 				if _, found := k.tasks[taskId]; !found {
 					goto reportLost
 				}
 				statusUpdate := &mesos.TaskStatus{
 					TaskId:  mutil.NewTaskID(taskId),
 					State:   mesos.TaskState_TASK_RUNNING.Enum(),
 					Message: proto.String(fmt.Sprintf("pod-running:%s", podFullName)),
 					Data:    data,
 				}
 				k.sendStatus(driver, statusUpdate)
 				// continue to monitor the health of the pod
 				go k.__launchTask(driver, taskId, podFullName, psf)
 				return
 			}
 		}
 	}
 	k.lock.Lock()
 	defer k.lock.Unlock()
 reportLost:
 	k.reportLostTask(driver, taskId, messages.LaunchTaskFailed)
 }
 func (k *KubernetesExecutor) __launchTask(driver bindings.ExecutorDriver, taskId, podFullName string, psf podStatusFunc) {
 	// TODO(nnielsen): Monitor health of pod and report if lost.
 	// Should we also allow this to fail a couple of times before reporting lost?
 	// What if the docker daemon is restarting and we can't connect, but it's
 	// going to bring the pods back online as soon as it restarts?
 	knownPod := func() bool {
 		_, err := psf()
 		return err == nil
 	}
 	// Wait for the pod to go away and stop monitoring once it does
 	// TODO (jdefelice) replace with an /events watch?
 	for {
 		time.Sleep(containerPollTime)
 		if k.checkForLostPodTask(driver, taskId, knownPod) {
 			return
 		}
 	}
 }
 // Intended to be executed as part of the pod monitoring loop, this fn (ultimately) checks with Docker
 // whether the pod is running. It will only return false if the task is still registered and the pod is
 // registered in Docker. Otherwise it returns true. If there's still a task record on file, but no pod
 // in Docker, then we'll also send a TASK_LOST event.
 func (k *KubernetesExecutor) checkForLostPodTask(driver bindings.ExecutorDriver, taskId string, isKnownPod func() bool) bool {
 	// TODO (jdefelice) don't send false alarms for deleted pods (KILLED tasks)
 	k.lock.Lock()
 	defer k.lock.Unlock()
 	// TODO(jdef) we should really consider k.pods here, along with what docker is reporting, since the
 	// kubelet may constantly attempt to instantiate a pod as long as it's in the pod state that we're
 	// handing to it. otherwise, we're probably reporting a TASK_LOST prematurely. Should probably
 	// consult RestartPolicy to determine appropriate behavior. Should probably also gracefully handle
 	// docker daemon restarts.
 	if _, ok := k.tasks[taskId]; ok {
 		if isKnownPod() {
 			return false
 		} else {
 			log.Warningf("Detected lost pod, reporting lost task %v", taskId)
 			k.reportLostTask(driver, taskId, messages.ContainersDisappeared)
 		}
 	} else {
 		log.V(2).Infof("Task %v no longer registered, stop monitoring for lost pods", taskId)
 	}
 	return true
 }
 // KillTask is called when the executor receives a request to kill a task.
 func (k *KubernetesExecutor) KillTask(driver bindings.ExecutorDriver, taskId *mesos.TaskID) {
 	if k.isDone() {
 		return
 	}
 	log.Infof("Kill task %v\n", taskId)
 	if !k.isConnected() {
 		//TODO(jdefelice) sent TASK_LOST here?
 		log.Warningf("Ignore kill task because the executor is disconnected\n")
 		return
 	}
 	k.lock.Lock()
 	defer k.lock.Unlock()
 	k.removePodTask(driver, taskId.GetValue(), messages.TaskKilled, mesos.TaskState_TASK_KILLED)
 }
 // Reports a lost task to the slave and updates internal task and pod tracking state.
 // Assumes that the caller is locking around pod and task state.
 func (k *KubernetesExecutor) reportLostTask(driver bindings.ExecutorDriver, tid, reason string) {
 	k.removePodTask(driver, tid, reason, mesos.TaskState_TASK_LOST)
 }
 // deletes the pod and task associated with the task identified by tid and sends a task
 // status update to mesos. also attempts to reset the suicide watch.
 // Assumes that the caller is locking around pod and task state.
 func (k *KubernetesExecutor) removePodTask(driver bindings.ExecutorDriver, tid, reason string, state mesos.TaskState) {
 	task, ok := k.tasks[tid]
 	if !ok {
 		log.V(1).Infof("Failed to remove task, unknown task %v\n", tid)
 		return
 	}
 	delete(k.tasks, tid)
 	k.resetSuicideWatch(driver)
 	pid := task.podName
 	if _, found := k.pods[pid]; !found {
 		log.Warningf("Cannot remove unknown pod %v for task %v", pid, tid)
 	} else {
 		log.V(2).Infof("deleting pod %v for task %v", pid, tid)
 		delete(k.pods, pid)
 		// Send the pod updates to the channel.
 		update := kubelet.PodUpdate{Op: kubelet.SET}
 		for _, p := range k.pods {
 			update.Pods = append(update.Pods, p)
 		}
 		k.updateChan <- update
 	}
 	// TODO(jdef): ensure that the update propagates, perhaps return a signal chan?
 	k.sendStatus(driver, newStatus(mutil.NewTaskID(tid), state, reason))
 }
 // FrameworkMessage is called when the framework sends some message to the executor
 func (k *KubernetesExecutor) FrameworkMessage(driver bindings.ExecutorDriver, message string) {
 	if k.isDone() {
 		return
 	}
 	if !k.isConnected() {
 		log.Warningf("Ignore framework message because the executor is disconnected\n")
 		return
 	}
 	log.Infof("Receives message from framework %v\n", message)
 	//TODO(jdef) master reported a lost task, reconcile this! @see scheduler.go:handleTaskLost
 	if strings.HasPrefix(message, "task-lost:") && len(message) > 10 {
 		taskId := message[10:]
 		if taskId != "" {
 			// clean up pod state
 			k.lock.Lock()
 			defer k.lock.Unlock()
 			k.reportLostTask(driver, taskId, messages.TaskLostAck)
 		}
 	}
 	switch message {
 	case messages.Kamikaze:
 		k.attemptSuicide(driver, nil)
 	}
 }
 // Shutdown is called when the executor receives a shutdown request.
 func (k *KubernetesExecutor) Shutdown(driver bindings.ExecutorDriver) {
 	k.lock.Lock()
 	defer k.lock.Unlock()
 	k.doShutdown(driver)
 }
 // assumes that caller has obtained state lock
 func (k *KubernetesExecutor) doShutdown(driver bindings.ExecutorDriver) {
 	defer func() {
 		log.Errorf("exiting with unclean shutdown: %v", recover())
 		if k.exitFunc != nil {
 			k.exitFunc(1)
 		}
 	}()
 	(&k.state).transitionTo(terminalState)
 	// signal to all listeners that this KubeletExecutor is done!
 	close(k.done)
 	if k.shutdownAlert != nil {
 		func() {
 			util.HandleCrash()
 			k.shutdownAlert()
 		}()
 	}
 	log.Infoln("Stopping executor driver")
 	_, err := driver.Stop()
 	if err != nil {
 		log.Warningf("failed to stop executor driver: %v", err)
 	}
 	log.Infoln("Shutdown the executor")
 	// according to docs, mesos will generate TASK_LOST updates for us
 	// if needed, so don't take extra time to do that here.
 	k.tasks = map[string]*kuberTask{}
 	select {
 	// the main Run() func may still be running... wait for it to finish: it will
 	// clear the pod configuration cleanly, telling k8s "there are no pods" and
 	// clean up resources (pods, volumes, etc).
 	case <-k.kubeletFinished:
 	//TODO(jdef) attempt to wait for events to propagate to API server?
 	// TODO(jdef) extract constant, should be smaller than whatever the
 	// slave graceful shutdown timeout period is.
 	case <-time.After(15 * time.Second):
 		log.Errorf("timed out waiting for kubelet Run() to die")
 	}
 	log.Infoln("exiting")
 	if k.exitFunc != nil {
 		k.exitFunc(0)
 	}
 }
 // Destroy existing k8s containers
 func (k *KubernetesExecutor) killKubeletContainers() {
 	if containers, err := dockertools.GetKubeletDockerContainers(k.dockerClient, true); err == nil {
 		opts := docker.RemoveContainerOptions{
 			RemoveVolumes: true,
 			Force:         true,
 		}
 		for _, container := range containers {
 			opts.ID = container.ID
 			log.V(2).Infof("Removing container: %v", opts.ID)
 			if err := k.dockerClient.RemoveContainer(opts); err != nil {
 				log.Warning(err)
 			}
 		}
 	} else {
 		log.Warningf("Failed to list kubelet docker containers: %v", err)
 	}
 }
 // Error is called when some error happens.
 func (k *KubernetesExecutor) Error(driver bindings.ExecutorDriver, message string) {
 	log.Errorln(message)
 }
 func newStatus(taskId *mesos.TaskID, state mesos.TaskState, message string) *mesos.TaskStatus {
 	return &mesos.TaskStatus{
 		TaskId:  taskId,
 		State:   &state,
 		Message: proto.String(message),
 	}
 }
 func (k *KubernetesExecutor) sendStatus(driver bindings.ExecutorDriver, status *mesos.TaskStatus) {
 	select {
 	case <-k.done:
 	default:
 		k.outgoing <- func() (mesos.Status, error) { return driver.SendStatusUpdate(status) }
 	}
 }
 func (k *KubernetesExecutor) sendFrameworkMessage(driver bindings.ExecutorDriver, msg string) {
 	select {
 	case <-k.done:
 	default:
 		k.outgoing <- func() (mesos.Status, error) { return driver.SendFrameworkMessage(msg) }
 	}
 }
 func (k *KubernetesExecutor) sendLoop() {
 	defer log.V(1).Info("sender loop exiting")
 	for {
 		select {
 		case <-k.done:
 			return
 		default:
 			if !k.isConnected() {
 				select {
 				case <-k.done:
 				case <-time.After(1 * time.Second):
 				}
 				continue
 			}
 			sender, ok := <-k.outgoing
 			if !ok {
 				// programming error
 				panic("someone closed the outgoing channel")
 			}
 			if status, err := sender(); err == nil {
 				continue
 			} else {
 				log.Error(err)
 				if status == mesos.Status_DRIVER_ABORTED {
 					return
 				}
 			}
 			// attempt to re-queue the sender
 			select {
 			case <-k.done:
 			case k.outgoing <- sender:
 			}
 		}
 	}
 }
--- a/contrib/mesos/pkg/executor/executor_test.go
+++ b/contrib/mesos/pkg/executor/executor_test.go
@@ -0,0 +1,618 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package executor
 import (
 	"fmt"
 	"net/http"
 	"net/http/httptest"
 	"reflect"
 	"sync"
 	"sync/atomic"
 	"testing"
 	"time"
 	assertext "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/assert"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages"
 	kmruntime "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
 	"github.com/golang/glog"
 	bindings "github.com/mesos/mesos-go/executor"
 	"github.com/mesos/mesos-go/mesosproto"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
 )
 type suicideTracker struct {
 	suicideWatcher
 	stops  uint32
 	resets uint32
 	timers uint32
 	jumps  *uint32
 }
 func (t *suicideTracker) Reset(d time.Duration) bool {
 	defer func() { t.resets++ }()
 	return t.suicideWatcher.Reset(d)
 }
 func (t *suicideTracker) Stop() bool {
 	defer func() { t.stops++ }()
 	return t.suicideWatcher.Stop()
 }
 func (t *suicideTracker) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher {
 	tracker := &suicideTracker{
 		stops:  t.stops,
 		resets: t.resets,
 		jumps:  t.jumps,
 		timers: t.timers + 1,
 	}
 	jumper := tracker.makeJumper(f)
 	tracker.suicideWatcher = t.suicideWatcher.Next(d, driver, jumper)
 	return tracker
 }
 func (t *suicideTracker) makeJumper(_ jumper) jumper {
 	return jumper(func(driver bindings.ExecutorDriver, cancel <-chan struct{}) {
 		glog.Warningln("jumping?!")
 		if t.jumps != nil {
 			atomic.AddUint32(t.jumps, 1)
 		}
 	})
 }
 func TestSuicide_zeroTimeout(t *testing.T) {
 	defer glog.Flush()
 	k := New(Config{})
 	tracker := &suicideTracker{suicideWatcher: k.suicideWatch}
 	k.suicideWatch = tracker
 	ch := k.resetSuicideWatch(nil)
 	select {
 	case <-ch:
 	case <-time.After(2 * time.Second):
 		t.Fatalf("timeout waiting for reset of suicide watch")
 	}
 	if tracker.stops != 0 {
 		t.Fatalf("expected no stops since suicideWatchTimeout was never set")
 	}
 	if tracker.resets != 0 {
 		t.Fatalf("expected no resets since suicideWatchTimeout was never set")
 	}
 	if tracker.timers != 0 {
 		t.Fatalf("expected no timers since suicideWatchTimeout was never set")
 	}
 }
 func TestSuicide_WithTasks(t *testing.T) {
 	defer glog.Flush()
 	k := New(Config{
 		SuicideTimeout: 50 * time.Millisecond,
 	})
 	jumps := uint32(0)
 	tracker := &suicideTracker{suicideWatcher: k.suicideWatch, jumps: &jumps}
 	k.suicideWatch = tracker
 	k.tasks["foo"] = &kuberTask{} // prevent suicide attempts from succeeding
 	// call reset with a nil timer
 	glog.Infoln("resetting suicide watch with 1 task")
 	select {
 	case <-k.resetSuicideWatch(nil):
 		tracker = k.suicideWatch.(*suicideTracker)
 		if tracker.stops != 1 {
 			t.Fatalf("expected suicide attempt to Stop() since there are registered tasks")
 		}
 		if tracker.resets != 0 {
 			t.Fatalf("expected no resets since")
 		}
 		if tracker.timers != 0 {
 			t.Fatalf("expected no timers since")
 		}
 	case <-time.After(1 * time.Second):
 		t.Fatalf("initial suicide watch setup failed")
 	}
 	delete(k.tasks, "foo") // zero remaining tasks
 	k.suicideTimeout = 1500 * time.Millisecond
 	suicideStart := time.Now()
 	// reset the suicide watch, which should actually start a timer now
 	glog.Infoln("resetting suicide watch with 0 tasks")
 	select {
 	case <-k.resetSuicideWatch(nil):
 		tracker = k.suicideWatch.(*suicideTracker)
 		if tracker.stops != 1 {
 			t.Fatalf("did not expect suicide attempt to Stop() since there are no registered tasks")
 		}
 		if tracker.resets != 1 {
 			t.Fatalf("expected 1 resets instead of %d", tracker.resets)
 		}
 		if tracker.timers != 1 {
 			t.Fatalf("expected 1 timers instead of %d", tracker.timers)
 		}
 	case <-time.After(1 * time.Second):
 		t.Fatalf("2nd suicide watch setup failed")
 	}
 	k.lock.Lock()
 	k.tasks["foo"] = &kuberTask{} // prevent suicide attempts from succeeding
 	k.lock.Unlock()
 	// reset the suicide watch, which should stop the existing timer
 	glog.Infoln("resetting suicide watch with 1 task")
 	select {
 	case <-k.resetSuicideWatch(nil):
 		tracker = k.suicideWatch.(*suicideTracker)
 		if tracker.stops != 2 {
 			t.Fatalf("expected 2 stops instead of %d since there are registered tasks", tracker.stops)
 		}
 		if tracker.resets != 1 {
 			t.Fatalf("expected 1 resets instead of %d", tracker.resets)
 		}
 		if tracker.timers != 1 {
 			t.Fatalf("expected 1 timers instead of %d", tracker.timers)
 		}
 	case <-time.After(1 * time.Second):
 		t.Fatalf("3rd suicide watch setup failed")
 	}
 	k.lock.Lock()
 	delete(k.tasks, "foo") // allow suicide attempts to schedule
 	k.lock.Unlock()
 	// reset the suicide watch, which should reset a stopped timer
 	glog.Infoln("resetting suicide watch with 0 tasks")
 	select {
 	case <-k.resetSuicideWatch(nil):
 		tracker = k.suicideWatch.(*suicideTracker)
 		if tracker.stops != 2 {
 			t.Fatalf("expected 2 stops instead of %d since there are no registered tasks", tracker.stops)
 		}
 		if tracker.resets != 2 {
 			t.Fatalf("expected 2 resets instead of %d", tracker.resets)
 		}
 		if tracker.timers != 1 {
 			t.Fatalf("expected 1 timers instead of %d", tracker.timers)
 		}
 	case <-time.After(1 * time.Second):
 		t.Fatalf("4th suicide watch setup failed")
 	}
 	sinceWatch := time.Since(suicideStart)
 	time.Sleep(3*time.Second - sinceWatch) // give the first timer to misfire (it shouldn't since Stop() was called)
 	if j := atomic.LoadUint32(&jumps); j != 1 {
 		t.Fatalf("expected 1 jumps instead of %d since stop was called", j)
 	} else {
 		glog.Infoln("jumps verified") // glog so we get a timestamp
 	}
 }
 // TestExecutorRegister ensures that the executor thinks it is connected
 // after Register is called.
 func TestExecutorRegister(t *testing.T) {
 	mockDriver := &MockExecutorDriver{}
 	updates := make(chan interface{}, 1024)
 	executor := New(Config{
 		Docker:     dockertools.ConnectToDockerOrDie("fake://"),
 		Updates:    updates,
 		SourceName: "executor_test",
 	})
 	executor.Init(mockDriver)
 	executor.Registered(mockDriver, nil, nil, nil)
 	initialPodUpdate := kubelet.PodUpdate{
 		Pods:   []*api.Pod{},
 		Op:     kubelet.SET,
 		Source: executor.sourcename,
 	}
 	receivedInitialPodUpdate := false
 	select {
 	case m := <-updates:
 		update, ok := m.(kubelet.PodUpdate)
 		if ok {
 			if reflect.DeepEqual(initialPodUpdate, update) {
 				receivedInitialPodUpdate = true
 			}
 		}
 	case <-time.After(time.Second):
 	}
 	assert.Equal(t, true, receivedInitialPodUpdate,
 		"executor should have sent an initial PodUpdate "+
 			"to the updates chan upon registration")
 	assert.Equal(t, true, executor.isConnected(), "executor should be connected")
 	mockDriver.AssertExpectations(t)
 }
 // TestExecutorDisconnect ensures that the executor thinks that it is not
 // connected after a call to Disconnected has occured.
 func TestExecutorDisconnect(t *testing.T) {
 	mockDriver := &MockExecutorDriver{}
 	executor := NewTestKubernetesExecutor()
 	executor.Init(mockDriver)
 	executor.Registered(mockDriver, nil, nil, nil)
 	executor.Disconnected(mockDriver)
 	assert.Equal(t, false, executor.isConnected(),
 		"executor should not be connected after Disconnected")
 	mockDriver.AssertExpectations(t)
 }
 // TestExecutorReregister ensures that the executor thinks it is connected
 // after a connection problem happens, followed by a call to Reregistered.
 func TestExecutorReregister(t *testing.T) {
 	mockDriver := &MockExecutorDriver{}
 	executor := NewTestKubernetesExecutor()
 	executor.Init(mockDriver)
 	executor.Registered(mockDriver, nil, nil, nil)
 	executor.Disconnected(mockDriver)
 	executor.Reregistered(mockDriver, nil)
 	assert.Equal(t, true, executor.isConnected(), "executor should be connected")
 	mockDriver.AssertExpectations(t)
 }
 // TestExecutorLaunchAndKillTask ensures that the executor is able to launch
 // and kill tasks while properly bookkeping its tasks.
 func TestExecutorLaunchAndKillTask(t *testing.T) {
 	// create a fake pod watch. We use that below to submit new pods to the scheduler
 	podListWatch := NewMockPodsListWatch(api.PodList{})
 	// create fake apiserver
 	testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list)
 	defer testApiServer.server.Close()
 	mockDriver := &MockExecutorDriver{}
 	updates := make(chan interface{}, 1024)
 	config := Config{
 		Docker:  dockertools.ConnectToDockerOrDie("fake://"),
 		Updates: updates,
 		APIClient: client.NewOrDie(&client.Config{
 			Host:    testApiServer.server.URL,
 			Version: testapi.Version(),
 		}),
 		Kubelet: &kubelet.Kubelet{},
 		PodStatusFunc: func(kl *kubelet.Kubelet, pod *api.Pod) (*api.PodStatus, error) {
 			return &api.PodStatus{
 				ContainerStatuses: []api.ContainerStatus{
 					{
 						Name: "foo",
 						State: api.ContainerState{
 							Running: &api.ContainerStateRunning{},
 						},
 					},
 				},
 				Phase: api.PodRunning,
 			}, nil
 		},
 	}
 	executor := New(config)
 	executor.Init(mockDriver)
 	executor.Registered(mockDriver, nil, nil, nil)
 	select {
 	case <-updates:
 	case <-time.After(time.Second):
 		t.Fatalf("Executor should send an intial update on Registration")
 	}
 	pod := NewTestPod(1)
 	podTask, err := podtask.New(api.NewDefaultContext(), "",
 		*pod, &mesosproto.ExecutorInfo{})
 	assert.Equal(t, nil, err, "must be able to create a task from a pod")
 	taskInfo := podTask.BuildTaskInfo()
 	data, err := testapi.Codec().Encode(pod)
 	assert.Equal(t, nil, err, "must be able to encode a pod's spec data")
 	taskInfo.Data = data
 	var statusUpdateCalls sync.WaitGroup
 	statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() }
 	statusUpdateCalls.Add(1)
 	mockDriver.On(
 		"SendStatusUpdate",
 		mesosproto.TaskState_TASK_STARTING,
 	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
 	statusUpdateCalls.Add(1)
 	mockDriver.On(
 		"SendStatusUpdate",
 		mesosproto.TaskState_TASK_RUNNING,
 	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
 	executor.LaunchTask(mockDriver, taskInfo)
 	assertext.EventuallyTrue(t, 5*time.Second, func() bool {
 		executor.lock.Lock()
 		defer executor.lock.Unlock()
 		return len(executor.tasks) == 1 && len(executor.pods) == 1
 	}, "executor must be able to create a task and a pod")
 	gotPodUpdate := false
 	select {
 	case m := <-updates:
 		update, ok := m.(kubelet.PodUpdate)
 		if ok && len(update.Pods) == 1 {
 			gotPodUpdate = true
 		}
 	case <-time.After(time.Second):
 	}
 	assert.Equal(t, true, gotPodUpdate,
 		"the executor should send an update about a new pod to "+
 			"the updates chan when creating a new one.")
 	// Allow some time for asynchronous requests to the driver.
 	finished := kmruntime.After(statusUpdateCalls.Wait)
 	select {
 	case <-finished:
 	case <-time.After(5 * time.Second):
 		t.Fatalf("timed out waiting for status update calls to finish")
 	}
 	statusUpdateCalls.Add(1)
 	mockDriver.On(
 		"SendStatusUpdate",
 		mesosproto.TaskState_TASK_KILLED,
 	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
 	executor.KillTask(mockDriver, taskInfo.TaskId)
 	assertext.EventuallyTrue(t, 5*time.Second, func() bool {
 		executor.lock.Lock()
 		defer executor.lock.Unlock()
 		return len(executor.tasks) == 0 && len(executor.pods) == 0
 	}, "executor must be able to kill a created task and pod")
 	// Allow some time for asynchronous requests to the driver.
 	finished = kmruntime.After(statusUpdateCalls.Wait)
 	select {
 	case <-finished:
 	case <-time.After(5 * time.Second):
 		t.Fatalf("timed out waiting for status update calls to finish")
 	}
 	mockDriver.AssertExpectations(t)
 }
 // TestExecutorFrameworkMessage ensures that the executor is able to
 // handle messages from the framework, specifically about lost tasks
 // and Kamikaze.  When a task is lost, the executor needs to clean up
 // its state.  When a Kamikaze message is received, the executor should
 // attempt suicide.
 func TestExecutorFrameworkMessage(t *testing.T) {
 	mockDriver := &MockExecutorDriver{}
 	kubeletFinished := make(chan struct{})
 	config := Config{
 		Docker:  dockertools.ConnectToDockerOrDie("fake://"),
 		Updates: make(chan interface{}, 1024),
 		APIClient: client.NewOrDie(&client.Config{
 			Host:    "fakehost",
 			Version: testapi.Version(),
 		}),
 		ShutdownAlert: func() {
 			close(kubeletFinished)
 		},
 		KubeletFinished: kubeletFinished,
 	}
 	executor := New(config)
 	executor.Init(mockDriver)
 	executor.Registered(mockDriver, nil, nil, nil)
 	executor.FrameworkMessage(mockDriver, "test framework message")
 	// set up a pod to then lose
 	pod := NewTestPod(1)
 	podTask, _ := podtask.New(api.NewDefaultContext(), "foo",
 		*pod, &mesosproto.ExecutorInfo{})
 	taskInfo := podTask.BuildTaskInfo()
 	data, _ := testapi.Codec().Encode(pod)
 	taskInfo.Data = data
 	executor.LaunchTask(mockDriver, taskInfo)
 	// send task-lost message for it
 	called := make(chan struct{})
 	mockDriver.On(
 		"SendStatusUpdate",
 		mesosproto.TaskState_TASK_LOST,
 	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
 	executor.FrameworkMessage(mockDriver, "task-lost:foo")
 	assertext.EventuallyTrue(t, 5*time.Second, func() bool {
 		executor.lock.Lock()
 		defer executor.lock.Unlock()
 		return len(executor.tasks) == 0 && len(executor.pods) == 0
 	}, "executor must be able to kill a created task and pod")
 	select {
 	case <-called:
 	case <-time.After(5 * time.Second):
 		t.Fatalf("timed out waiting for SendStatusUpdate")
 	}
 	mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()
 	executor.FrameworkMessage(mockDriver, messages.Kamikaze)
 	assert.Equal(t, true, executor.isDone(),
 		"executor should have shut down after receiving a Kamikaze message")
 	mockDriver.AssertExpectations(t)
 }
 // Create a pod with a given index, requiring one port
 func NewTestPod(i int) *api.Pod {
 	name := fmt.Sprintf("pod%d", i)
 	return &api.Pod{
 		TypeMeta: api.TypeMeta{APIVersion: testapi.Version()},
 		ObjectMeta: api.ObjectMeta{
 			Name:      name,
 			Namespace: api.NamespaceDefault,
 			SelfLink:  testapi.SelfLink("pods", string(i)),
 		},
 		Spec: api.PodSpec{
 			Containers: []api.Container{
 				{
 					Ports: []api.ContainerPort{
 						{
 							ContainerPort: 8000 + i,
 							Protocol:      api.ProtocolTCP,
 						},
 					},
 				},
 			},
 		},
 		Status: api.PodStatus{
 			Conditions: []api.PodCondition{
 				{
 					Type:   api.PodReady,
 					Status: api.ConditionTrue,
 				},
 			},
 		},
 	}
 }
 // Create mock of pods ListWatch, usually listening on the apiserver pods watch endpoint
 type MockPodsListWatch struct {
 	ListWatch   cache.ListWatch
 	fakeWatcher *watch.FakeWatcher
 	list        api.PodList
 }
 // A apiserver mock which partially mocks the pods API
 type TestServer struct {
 	server *httptest.Server
 	Stats  map[string]uint
 	lock   sync.Mutex
 }
 func NewTestServer(t *testing.T, namespace string, pods *api.PodList) *TestServer {
 	ts := TestServer{
 		Stats: map[string]uint{},
 	}
 	mux := http.NewServeMux()
 	mux.HandleFunc(testapi.ResourcePath("bindings", namespace, ""), func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusOK)
 	})
 	ts.server = httptest.NewServer(mux)
 	return &ts
 }
 func NewMockPodsListWatch(initialPodList api.PodList) *MockPodsListWatch {
 	lw := MockPodsListWatch{
 		fakeWatcher: watch.NewFake(),
 		list:        initialPodList,
 	}
 	lw.ListWatch = cache.ListWatch{
 		WatchFunc: func(resourceVersion string) (watch.Interface, error) {
 			return lw.fakeWatcher, nil
 		},
 		ListFunc: func() (runtime.Object, error) {
 			return &lw.list, nil
 		},
 	}
 	return &lw
 }
 // TestExecutorShutdown ensures that the executor properly shuts down
 // when Shutdown is called.
 func TestExecutorShutdown(t *testing.T) {
 	mockDriver := &MockExecutorDriver{}
 	kubeletFinished := make(chan struct{})
 	var exitCalled int32 = 0
 	config := Config{
 		Docker:  dockertools.ConnectToDockerOrDie("fake://"),
 		Updates: make(chan interface{}, 1024),
 		ShutdownAlert: func() {
 			close(kubeletFinished)
 		},
 		KubeletFinished: kubeletFinished,
 		ExitFunc: func(_ int) {
 			atomic.AddInt32(&exitCalled, 1)
 		},
 	}
 	executor := New(config)
 	executor.Init(mockDriver)
 	executor.Registered(mockDriver, nil, nil, nil)
 	mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()
 	executor.Shutdown(mockDriver)
 	assert.Equal(t, false, executor.isConnected(),
 		"executor should not be connected after Shutdown")
 	assert.Equal(t, true, executor.isDone(),
 		"executor should be in Done state after Shutdown")
 	select {
 	case <-executor.Done():
 	default:
 		t.Fatal("done channel should be closed after shutdown")
 	}
 	assert.Equal(t, true, atomic.LoadInt32(&exitCalled) > 0,
 		"the executor should call its ExitFunc when it is ready to close down")
 	mockDriver.AssertExpectations(t)
 }
 func TestExecutorsendFrameworkMessage(t *testing.T) {
 	mockDriver := &MockExecutorDriver{}
 	executor := NewTestKubernetesExecutor()
 	executor.Init(mockDriver)
 	executor.Registered(mockDriver, nil, nil, nil)
 	called := make(chan struct{})
 	mockDriver.On(
 		"SendFrameworkMessage",
 		"foo bar baz",
 	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
 	executor.sendFrameworkMessage(mockDriver, "foo bar baz")
 	// guard against data race in mock driver between AssertExpectations and Called
 	select {
 	case <-called: // expected
 	case <-time.After(5 * time.Second):
 		t.Fatalf("expected call to SendFrameworkMessage")
 	}
 	mockDriver.AssertExpectations(t)
 }
--- a/contrib/mesos/pkg/executor/messages/doc.go
+++ b/contrib/mesos/pkg/executor/messages/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package messages exposes executor event/message names as constants.
 package messages
--- a/contrib/mesos/pkg/executor/messages/messages.go
+++ b/contrib/mesos/pkg/executor/messages/messages.go
@@ -0,0 +1,32 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package messages
 // messages that ship with TaskStatus objects
 const (
 	ContainersDisappeared    = "containers-disappeared"
 	CreateBindingFailure     = "create-binding-failure"
 	CreateBindingSuccess     = "create-binding-success"
 	ExecutorUnregistered     = "executor-unregistered"
 	ExecutorShutdown         = "executor-shutdown"
 	LaunchTaskFailed         = "launch-task-failed"
 	TaskKilled               = "task-killed"
 	UnmarshalTaskDataFailure = "unmarshal-task-data-failure"
 	TaskLostAck              = "task-lost-ack" // executor acknowledgement of forwarded TASK_LOST framework message
 	Kamikaze                 = "kamikaze"
 )
--- a/contrib/mesos/pkg/executor/mock_test.go
+++ b/contrib/mesos/pkg/executor/mock_test.go
@@ -0,0 +1,81 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package executor
 import (
 	"testing"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
 	"github.com/mesos/mesos-go/mesosproto"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
 )
 type MockExecutorDriver struct {
 	mock.Mock
 }
 func (m *MockExecutorDriver) Start() (mesosproto.Status, error) {
 	args := m.Called()
 	return args.Get(0).(mesosproto.Status), args.Error(1)
 }
 func (m *MockExecutorDriver) Stop() (mesosproto.Status, error) {
 	args := m.Called()
 	return args.Get(0).(mesosproto.Status), args.Error(1)
 }
 func (m *MockExecutorDriver) Abort() (mesosproto.Status, error) {
 	args := m.Called()
 	return args.Get(0).(mesosproto.Status), args.Error(1)
 }
 func (m *MockExecutorDriver) Join() (mesosproto.Status, error) {
 	args := m.Called()
 	return args.Get(0).(mesosproto.Status), args.Error(1)
 }
 func (m *MockExecutorDriver) Run() (mesosproto.Status, error) {
 	args := m.Called()
 	return args.Get(0).(mesosproto.Status), args.Error(1)
 }
 func (m *MockExecutorDriver) SendStatusUpdate(taskStatus *mesosproto.TaskStatus) (mesosproto.Status, error) {
 	args := m.Called(*taskStatus.State)
 	return args.Get(0).(mesosproto.Status), args.Error(1)
 }
 func (m *MockExecutorDriver) SendFrameworkMessage(msg string) (mesosproto.Status, error) {
 	args := m.Called(msg)
 	return args.Get(0).(mesosproto.Status), args.Error(1)
 }
 func NewTestKubernetesExecutor() *KubernetesExecutor {
 	return New(Config{
 		Docker:  dockertools.ConnectToDockerOrDie("fake://"),
 		Updates: make(chan interface{}, 1024),
 	})
 }
 func TestExecutorNew(t *testing.T) {
 	mockDriver := &MockExecutorDriver{}
 	executor := NewTestKubernetesExecutor()
 	executor.Init(mockDriver)
 	assert.Equal(t, executor.isDone(), false, "executor should not be in Done state on initialization")
 	assert.Equal(t, executor.isConnected(), false, "executor should not be connected on initialization")
 }
--- a/contrib/mesos/pkg/executor/service/doc.go
+++ b/contrib/mesos/pkg/executor/service/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package service contains the cmd/k8sm-executor glue code.
 package service
--- a/contrib/mesos/pkg/executor/service/service.go
+++ b/contrib/mesos/pkg/executor/service/service.go
@@ -0,0 +1,600 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package service
 import (
 	"bufio"
 	"fmt"
 	"io"
 	"math/rand"
 	"net"
 	"net/http"
 	"os"
 	"os/exec"
 	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/cmd/kubelet/app"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/config"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/redirfd"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/credentialprovider"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/healthz"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
 	kconfig "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/config"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util/mount"
 	log "github.com/golang/glog"
 	"github.com/kardianos/osext"
 	bindings "github.com/mesos/mesos-go/executor"
 	"github.com/spf13/pflag"
 )
 const (
 	// if we don't use this source then the kubelet will do funny, mirror things.
 	// @see ConfigSourceAnnotationKey
 	MESOS_CFG_SOURCE = kubelet.ApiserverSource
 )
 type KubeletExecutorServer struct {
 	*app.KubeletServer
 	RunProxy       bool
 	ProxyLogV      int
 	ProxyExec      string
 	ProxyLogfile   string
 	ProxyBindall   bool
 	SuicideTimeout time.Duration
 	ShutdownFD     int
 	ShutdownFIFO   string
 }
 func NewKubeletExecutorServer() *KubeletExecutorServer {
 	k := &KubeletExecutorServer{
 		KubeletServer:  app.NewKubeletServer(),
 		RunProxy:       true,
 		ProxyExec:      "./kube-proxy",
 		ProxyLogfile:   "./proxy-log",
 		SuicideTimeout: config.DefaultSuicideTimeout,
 	}
 	if pwd, err := os.Getwd(); err != nil {
 		log.Warningf("failed to determine current directory: %v", err)
 	} else {
 		k.RootDirectory = pwd // mesos sandbox dir
 	}
 	k.Address = util.IP(net.ParseIP(defaultBindingAddress()))
 	k.ShutdownFD = -1 // indicates unspecified FD
 	return k
 }
 func NewHyperKubeletExecutorServer() *KubeletExecutorServer {
 	s := NewKubeletExecutorServer()
 	// cache this for later use
 	binary, err := osext.Executable()
 	if err != nil {
 		log.Fatalf("failed to determine currently running executable: %v", err)
 	}
 	s.ProxyExec = binary
 	return s
 }
 func (s *KubeletExecutorServer) addCoreFlags(fs *pflag.FlagSet) {
 	s.KubeletServer.AddFlags(fs)
 	fs.BoolVar(&s.RunProxy, "run-proxy", s.RunProxy, "Maintain a running kube-proxy instance as a child proc of this kubelet-executor.")
 	fs.IntVar(&s.ProxyLogV, "proxy-logv", s.ProxyLogV, "Log verbosity of the child kube-proxy.")
 	fs.StringVar(&s.ProxyLogfile, "proxy-logfile", s.ProxyLogfile, "Path to the kube-proxy log file.")
 	fs.BoolVar(&s.ProxyBindall, "proxy-bindall", s.ProxyBindall, "When true will cause kube-proxy to bind to 0.0.0.0.")
 	fs.DurationVar(&s.SuicideTimeout, "suicide-timeout", s.SuicideTimeout, "Self-terminate after this period of inactivity. Zero disables suicide watch.")
 	fs.IntVar(&s.ShutdownFD, "shutdown-fd", s.ShutdownFD, "File descriptor used to signal shutdown to external watchers, requires shutdown-fifo flag")
 	fs.StringVar(&s.ShutdownFIFO, "shutdown-fifo", s.ShutdownFIFO, "FIFO used to signal shutdown to external watchers, requires shutdown-fd flag")
 }
 func (s *KubeletExecutorServer) AddStandaloneFlags(fs *pflag.FlagSet) {
 	s.addCoreFlags(fs)
 	fs.StringVar(&s.ProxyExec, "proxy-exec", s.ProxyExec, "Path to the kube-proxy executable.")
 }
 func (s *KubeletExecutorServer) AddHyperkubeFlags(fs *pflag.FlagSet) {
 	s.addCoreFlags(fs)
 }
 // returns a Closer that should be closed to signal impending shutdown, but only if ShutdownFD
 // and ShutdownFIFO were specified. if they are specified, then this func blocks until there's
 // a reader on the FIFO stream.
 func (s *KubeletExecutorServer) syncExternalShutdownWatcher() (io.Closer, error) {
 	if s.ShutdownFD == -1 || s.ShutdownFIFO == "" {
 		return nil, nil
 	}
 	// redirfd -w n fifo ...  # (blocks until the fifo is read)
 	log.Infof("blocked, waiting for shutdown reader for FD %d FIFO at %s", s.ShutdownFD, s.ShutdownFIFO)
 	return redirfd.Write.Redirect(true, false, redirfd.FileDescriptor(s.ShutdownFD), s.ShutdownFIFO)
 }
 // Run runs the specified KubeletExecutorServer.
 func (s *KubeletExecutorServer) Run(hks hyperkube.Interface, _ []string) error {
 	rand.Seed(time.Now().UTC().UnixNano())
 	if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
 		log.Info(err)
 	}
 	var apiclient *client.Client
 	clientConfig, err := s.CreateAPIServerClientConfig()
 	if err == nil {
 		apiclient, err = client.New(clientConfig)
 	}
 	if err != nil {
 		// required for k8sm since we need to send api.Binding information
 		// back to the apiserver
 		log.Fatalf("No API client: %v", err)
 	}
 	log.Infof("Using root directory: %v", s.RootDirectory)
 	credentialprovider.SetPreferredDockercfgPath(s.RootDirectory)
 	shutdownCloser, err := s.syncExternalShutdownWatcher()
 	if err != nil {
 		return err
 	}
 	cadvisorInterface, err := cadvisor.New(s.CadvisorPort)
 	if err != nil {
 		return err
 	}
 	imageGCPolicy := kubelet.ImageGCPolicy{
 		HighThresholdPercent: s.ImageGCHighThresholdPercent,
 		LowThresholdPercent:  s.ImageGCLowThresholdPercent,
 	}
 	diskSpacePolicy := kubelet.DiskSpacePolicy{
 		DockerFreeDiskMB: s.LowDiskSpaceThresholdMB,
 		RootFreeDiskMB:   s.LowDiskSpaceThresholdMB,
 	}
 	//TODO(jdef) intentionally NOT initializing a cloud provider here since:
 	//(a) the kubelet doesn't actually use it
 	//(b) we don't need to create N-kubelet connections to zookeeper for no good reason
 	//cloud := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile)
 	//log.Infof("Successfully initialized cloud provider: %q from the config file: %q\n", s.CloudProvider, s.CloudConfigFile)
 	hostNetworkSources, err := kubelet.GetValidatedSources(strings.Split(s.HostNetworkSources, ","))
 	if err != nil {
 		return err
 	}
 	tlsOptions, err := s.InitializeTLS()
 	if err != nil {
 		return err
 	}
 	mounter := mount.New()
 	if s.Containerized {
 		log.V(2).Info("Running kubelet in containerized mode (experimental)")
 		mounter = &mount.NsenterMounter{}
 	}
 	var dockerExecHandler dockertools.ExecHandler
 	switch s.DockerExecHandlerName {
 	case "native":
 		dockerExecHandler = &dockertools.NativeExecHandler{}
 	case "nsenter":
 		dockerExecHandler = &dockertools.NsenterExecHandler{}
 	default:
 		log.Warningf("Unknown Docker exec handler %q; defaulting to native", s.DockerExecHandlerName)
 		dockerExecHandler = &dockertools.NativeExecHandler{}
 	}
 	kcfg := app.KubeletConfig{
 		Address:            s.Address,
 		AllowPrivileged:    s.AllowPrivileged,
 		HostNetworkSources: hostNetworkSources,
 		HostnameOverride:   s.HostnameOverride,
 		RootDirectory:      s.RootDirectory,
 		// ConfigFile: ""
 		// ManifestURL: ""
 		// FileCheckFrequency
 		// HTTPCheckFrequency
 		PodInfraContainerImage:         s.PodInfraContainerImage,
 		SyncFrequency:                  s.SyncFrequency,
 		RegistryPullQPS:                s.RegistryPullQPS,
 		RegistryBurst:                  s.RegistryBurst,
 		MinimumGCAge:                   s.MinimumGCAge,
 		MaxPerPodContainerCount:        s.MaxPerPodContainerCount,
 		MaxContainerCount:              s.MaxContainerCount,
 		RegisterNode:                   s.RegisterNode,
 		ClusterDomain:                  s.ClusterDomain,
 		ClusterDNS:                     s.ClusterDNS,
 		Runonce:                        s.RunOnce,
 		Port:                           s.Port,
 		ReadOnlyPort:                   s.ReadOnlyPort,
 		CadvisorInterface:              cadvisorInterface,
 		EnableServer:                   s.EnableServer,
 		EnableDebuggingHandlers:        s.EnableDebuggingHandlers,
 		DockerClient:                   dockertools.ConnectToDockerOrDie(s.DockerEndpoint),
 		KubeClient:                     apiclient,
 		MasterServiceNamespace:         s.MasterServiceNamespace,
 		VolumePlugins:                  app.ProbeVolumePlugins(),
 		NetworkPlugins:                 app.ProbeNetworkPlugins(),
 		NetworkPluginName:              s.NetworkPluginName,
 		StreamingConnectionIdleTimeout: s.StreamingConnectionIdleTimeout,
 		TLSOptions:                     tlsOptions,
 		ImageGCPolicy:                  imageGCPolicy,
 		DiskSpacePolicy:                diskSpacePolicy,
 		Cloud:                          nil, // TODO(jdef) Cloud, specifying null here because we don't want all kubelets polling mesos-master; need to account for this in the cloudprovider impl
 		NodeStatusUpdateFrequency: s.NodeStatusUpdateFrequency,
 		ResourceContainer:         s.ResourceContainer,
 		CgroupRoot:                s.CgroupRoot,
 		ContainerRuntime:          s.ContainerRuntime,
 		Mounter:                   mounter,
 		DockerDaemonContainer:     s.DockerDaemonContainer,
 		SystemContainer:           s.SystemContainer,
 		ConfigureCBR0:             s.ConfigureCBR0,
 		MaxPods:                   s.MaxPods,
 		DockerExecHandler:         dockerExecHandler,
 	}
 	err = app.RunKubelet(&kcfg, app.KubeletBuilder(func(kc *app.KubeletConfig) (app.KubeletBootstrap, *kconfig.PodConfig, error) {
 		return s.createAndInitKubelet(kc, hks, clientConfig, shutdownCloser)
 	}))
 	if err != nil {
 		return err
 	}
 	if s.HealthzPort > 0 {
 		healthz.DefaultHealthz()
 		go util.Forever(func() {
 			err := http.ListenAndServe(net.JoinHostPort(s.HealthzBindAddress.String(), strconv.Itoa(s.HealthzPort)), nil)
 			if err != nil {
 				log.Errorf("Starting health server failed: %v", err)
 			}
 		}, 5*time.Second)
 	}
 	// block until executor is shut down or commits shutdown
 	select {}
 }
 func defaultBindingAddress() string {
 	libProcessIP := os.Getenv("LIBPROCESS_IP")
 	if libProcessIP == "" {
 		return "0.0.0.0"
 	} else {
 		return libProcessIP
 	}
 }
 func (ks *KubeletExecutorServer) createAndInitKubelet(
 	kc *app.KubeletConfig,
 	hks hyperkube.Interface,
 	clientConfig *client.Config,
 	shutdownCloser io.Closer,
 ) (app.KubeletBootstrap, *kconfig.PodConfig, error) {
 	// TODO(k8s): block until all sources have delivered at least one update to the channel, or break the sync loop
 	// up into "per source" synchronizations
 	// TODO(k8s): KubeletConfig.KubeClient should be a client interface, but client interface misses certain methods
 	// used by kubelet. Since NewMainKubelet expects a client interface, we need to make sure we are not passing
 	// a nil pointer to it when what we really want is a nil interface.
 	var kubeClient client.Interface
 	if kc.KubeClient == nil {
 		kubeClient = nil
 	} else {
 		kubeClient = kc.KubeClient
 	}
 	gcPolicy := kubelet.ContainerGCPolicy{
 		MinAge:             kc.MinimumGCAge,
 		MaxPerPodContainer: kc.MaxPerPodContainerCount,
 		MaxContainers:      kc.MaxContainerCount,
 	}
 	pc := kconfig.NewPodConfig(kconfig.PodConfigNotificationSnapshotAndUpdates, kc.Recorder)
 	updates := pc.Channel(MESOS_CFG_SOURCE)
 	klet, err := kubelet.NewMainKubelet(
 		kc.Hostname,
 		kc.DockerClient,
 		kubeClient,
 		kc.RootDirectory,
 		kc.PodInfraContainerImage,
 		kc.SyncFrequency,
 		float32(kc.RegistryPullQPS),
 		kc.RegistryBurst,
 		gcPolicy,
 		pc.SeenAllSources,
 		kc.RegisterNode,
 		kc.ClusterDomain,
 		net.IP(kc.ClusterDNS),
 		kc.MasterServiceNamespace,
 		kc.VolumePlugins,
 		kc.NetworkPlugins,
 		kc.NetworkPluginName,
 		kc.StreamingConnectionIdleTimeout,
 		kc.Recorder,
 		kc.CadvisorInterface,
 		kc.ImageGCPolicy,
 		kc.DiskSpacePolicy,
 		kc.Cloud,
 		kc.NodeStatusUpdateFrequency,
 		kc.ResourceContainer,
 		kc.OSInterface,
 		kc.CgroupRoot,
 		kc.ContainerRuntime,
 		kc.Mounter,
 		kc.DockerDaemonContainer,
 		kc.SystemContainer,
 		kc.ConfigureCBR0,
 		kc.MaxPods,
 		kc.DockerExecHandler,
 	)
 	if err != nil {
 		return nil, nil, err
 	}
 	//TODO(jdef) either configure Watch here with something useful, or else
 	// get rid of it from executor.Config
 	kubeletFinished := make(chan struct{})
 	exec := executor.New(executor.Config{
 		Kubelet:         klet,
 		Updates:         updates,
 		SourceName:      MESOS_CFG_SOURCE,
 		APIClient:       kc.KubeClient,
 		Docker:          kc.DockerClient,
 		SuicideTimeout:  ks.SuicideTimeout,
 		KubeletFinished: kubeletFinished,
 		ShutdownAlert: func() {
 			if shutdownCloser != nil {
 				if e := shutdownCloser.Close(); e != nil {
 					log.Warningf("failed to signal shutdown to external watcher: %v", e)
 				}
 			}
 		},
 		ExitFunc: os.Exit,
 		PodStatusFunc: func(kl *kubelet.Kubelet, pod *api.Pod) (*api.PodStatus, error) {
 			return kl.GetRuntime().GetPodStatus(pod)
 		},
 	})
 	k := &kubeletExecutor{
 		Kubelet:         klet,
 		runProxy:        ks.RunProxy,
 		proxyLogV:       ks.ProxyLogV,
 		proxyExec:       ks.ProxyExec,
 		proxyLogfile:    ks.ProxyLogfile,
 		proxyBindall:    ks.ProxyBindall,
 		address:         ks.Address,
 		dockerClient:    kc.DockerClient,
 		hks:             hks,
 		kubeletFinished: kubeletFinished,
 		executorDone:    exec.Done(),
 		clientConfig:    clientConfig,
 	}
 	dconfig := bindings.DriverConfig{
 		Executor:         exec,
 		HostnameOverride: ks.HostnameOverride,
 		BindingAddress:   net.IP(ks.Address),
 	}
 	if driver, err := bindings.NewMesosExecutorDriver(dconfig); err != nil {
 		log.Fatalf("failed to create executor driver: %v", err)
 	} else {
 		k.driver = driver
 	}
 	log.V(2).Infof("Initialize executor driver...")
 	k.BirthCry()
 	exec.Init(k.driver)
 	k.StartGarbageCollection()
 	return k, pc, nil
 }
 // kubelet decorator
 type kubeletExecutor struct {
 	*kubelet.Kubelet
 	initialize      sync.Once
 	driver          bindings.ExecutorDriver
 	runProxy        bool
 	proxyLogV       int
 	proxyExec       string
 	proxyLogfile    string
 	proxyBindall    bool
 	address         util.IP
 	dockerClient    dockertools.DockerInterface
 	hks             hyperkube.Interface
 	kubeletFinished chan struct{}   // closed once kubelet.Run() returns
 	executorDone    <-chan struct{} // from KubeletExecutor.Done()
 	clientConfig    *client.Config
 }
 func (kl *kubeletExecutor) ListenAndServe(address net.IP, port uint, tlsOptions *kubelet.TLSOptions, enableDebuggingHandlers bool) {
 	// this func could be called many times, depending how often the HTTP server crashes,
 	// so only execute certain initialization procs once
 	kl.initialize.Do(func() {
 		if kl.runProxy {
 			go runtime.Until(kl.runProxyService, 5*time.Second, kl.executorDone)
 		}
 		go func() {
 			if _, err := kl.driver.Run(); err != nil {
 				log.Fatalf("executor driver failed: %v", err)
 			}
 			log.Info("executor Run completed")
 		}()
 	})
 	log.Infof("Starting kubelet server...")
 	kubelet.ListenAndServeKubeletServer(kl, address, port, tlsOptions, enableDebuggingHandlers)
 }
 // this function blocks as long as the proxy service is running; intended to be
 // executed asynchronously.
 func (kl *kubeletExecutor) runProxyService() {
 	log.Infof("Starting proxy process...")
 	const KM_PROXY = "proxy" //TODO(jdef) constant should be shared with km package
 	args := []string{}
 	if kl.hks.FindServer(KM_PROXY) {
 		args = append(args, KM_PROXY)
 		log.V(1).Infof("attempting to using km proxy service")
 	} else if _, err := os.Stat(kl.proxyExec); os.IsNotExist(err) {
 		log.Errorf("failed to locate proxy executable at '%v' and km not present: %v", kl.proxyExec, err)
 		return
 	}
 	bindAddress := "0.0.0.0"
 	if !kl.proxyBindall {
 		bindAddress = kl.address.String()
 	}
 	args = append(args,
 		fmt.Sprintf("--bind-address=%s", bindAddress),
 		fmt.Sprintf("--v=%d", kl.proxyLogV),
 		"--logtostderr=true",
 	)
 	// add client.Config args here. proxy still calls client.BindClientConfigFlags
 	appendStringArg := func(name, value string) {
 		if value != "" {
 			args = append(args, fmt.Sprintf("--%s=%s", name, value))
 		}
 	}
 	appendStringArg("master", kl.clientConfig.Host)
 	/* TODO(jdef) move these flags to a config file pointed to by --kubeconfig
 	appendStringArg("api-version", kl.clientConfig.Version)
 	appendStringArg("client-certificate", kl.clientConfig.CertFile)
 	appendStringArg("client-key", kl.clientConfig.KeyFile)
 	appendStringArg("certificate-authority", kl.clientConfig.CAFile)
 	args = append(args, fmt.Sprintf("--insecure-skip-tls-verify=%t", kl.clientConfig.Insecure))
 	*/
 	log.Infof("Spawning process executable %s with args '%+v'", kl.proxyExec, args)
 	cmd := exec.Command(kl.proxyExec, args...)
 	if _, err := cmd.StdoutPipe(); err != nil {
 		log.Fatal(err)
 	}
 	proxylogs, err := cmd.StderrPipe()
 	if err != nil {
 		log.Fatal(err)
 	}
 	//TODO(jdef) append instead of truncate? what if the disk is full?
 	logfile, err := os.Create(kl.proxyLogfile)
 	if err != nil {
 		log.Fatal(err)
 	}
 	defer logfile.Close()
 	ch := make(chan struct{})
 	go func() {
 		defer func() {
 			select {
 			case <-ch:
 				log.Infof("killing proxy process..")
 				if err = cmd.Process.Kill(); err != nil {
 					log.Errorf("failed to kill proxy process: %v", err)
 				}
 			default:
 			}
 		}()
 		writer := bufio.NewWriter(logfile)
 		defer writer.Flush()
 		<-ch
 		written, err := io.Copy(writer, proxylogs)
 		if err != nil {
 			log.Errorf("error writing data to proxy log: %v", err)
 		}
 		log.Infof("wrote %d bytes to proxy log", written)
 	}()
 	// if the proxy fails to start then we exit the executor, otherwise
 	// wait for the proxy process to end (and release resources after).
 	if err := cmd.Start(); err != nil {
 		log.Fatal(err)
 	}
 	close(ch)
 	if err := cmd.Wait(); err != nil {
 		log.Error(err)
 	}
 }
 // runs the main kubelet loop, closing the kubeletFinished chan when the loop exits.
 // never returns.
 func (kl *kubeletExecutor) Run(updates <-chan kubelet.PodUpdate) {
 	defer func() {
 		close(kl.kubeletFinished)
 		util.HandleCrash()
 		log.Infoln("kubelet run terminated") //TODO(jdef) turn down verbosity
 		// important: never return! this is in our contract
 		select {}
 	}()
 	// push updates through a closable pipe. when the executor indicates shutdown
 	// via Done() we want to stop the Kubelet from processing updates.
 	pipe := make(chan kubelet.PodUpdate)
 	go func() {
 		// closing pipe will cause our patched kubelet's syncLoop() to exit
 		defer close(pipe)
 	pipeLoop:
 		for {
 			select {
 			case <-kl.executorDone:
 				break pipeLoop
 			default:
 				select {
 				case u := <-updates:
 					select {
 					case pipe <- u: // noop
 					case <-kl.executorDone:
 						break pipeLoop
 					}
 				case <-kl.executorDone:
 					break pipeLoop
 				}
 			}
 		}
 	}()
 	// we expect that Run() will complete after the pipe is closed and the
 	// kubelet's syncLoop() has finished processing its backlog, which hopefully
 	// will not take very long. Peeking into the future (current k8s master) it
 	// seems that the backlog has grown from 1 to 50 -- this may negatively impact
 	// us going forward, time will tell.
 	util.Until(func() { kl.Kubelet.Run(pipe) }, 0, kl.executorDone)
 	//TODO(jdef) revisit this if/when executor failover lands
 	err := kl.SyncPods([]*api.Pod{}, nil, nil, time.Now())
 	if err != nil {
 		log.Errorf("failed to cleanly remove all pods and associated state: %v", err)
 	}
 }
--- a/contrib/mesos/pkg/hyperkube/doc.go
+++ b/contrib/mesos/pkg/hyperkube/doc.go
@@ -0,0 +1,21 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package hyperkube facilitates the combination of multiple
 // kubernetes-mesos components into a single binary form, providing a
 // simple mechanism for intra-component discovery as per the original
 // Kubernetes hyperkube package.
 package hyperkube
--- a/contrib/mesos/pkg/hyperkube/types.go
+++ b/contrib/mesos/pkg/hyperkube/types.go
@@ -0,0 +1,54 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package hyperkube
 import (
 	"github.com/spf13/pflag"
 )
 var (
 	nilKube = &nilKubeType{}
 )
 type Interface interface {
 	// FindServer will find a specific server named name.
 	FindServer(name string) bool
 	// The executable name, used for help and soft-link invocation
 	Name() string
 	// Flags returns a flagset for "global" flags.
 	Flags() *pflag.FlagSet
 }
 type nilKubeType struct{}
 func (n *nilKubeType) FindServer(_ string) bool {
 	return false
 }
 func (n *nilKubeType) Name() string {
 	return ""
 }
 func (n *nilKubeType) Flags() *pflag.FlagSet {
 	return nil
 }
 func Nil() Interface {
 	return nilKube
 }
--- a/contrib/mesos/pkg/offers/doc.go
+++ b/contrib/mesos/pkg/offers/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package offers contains code that manages Mesos offers.
 package offers
--- a/contrib/mesos/pkg/offers/metrics/doc.go
+++ b/contrib/mesos/pkg/offers/metrics/doc.go
@@ -0,0 +1,19 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package metrics defines and exposes instrumentation metrics related to
 // Mesos offers.
 package metrics
--- a/contrib/mesos/pkg/offers/metrics/metrics.go
+++ b/contrib/mesos/pkg/offers/metrics/metrics.go
@@ -0,0 +1,89 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package metrics
 import (
 	"sync"
 	"time"
 	"github.com/prometheus/client_golang/prometheus"
 )
 const (
 	offerSubsystem = "mesos_offers"
 )
 type OfferDeclinedReason string
 const (
 	OfferExpired   = OfferDeclinedReason("expired")
 	OfferRescinded = OfferDeclinedReason("rescinded")
 	OfferCompat    = OfferDeclinedReason("compat")
 )
 var (
 	OffersReceived = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Subsystem: offerSubsystem,
 			Name:      "received",
 			Help:      "Counter of offers received from Mesos broken out by slave host.",
 		},
 		[]string{"hostname"},
 	)
 	OffersDeclined = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Subsystem: offerSubsystem,
 			Name:      "declined",
 			Help:      "Counter of offers declined by the framework broken out by slave host.",
 		},
 		[]string{"hostname", "reason"},
 	)
 	OffersAcquired = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Subsystem: offerSubsystem,
 			Name:      "acquired",
 			Help:      "Counter of offers acquired for task launch broken out by slave host.",
 		},
 		[]string{"hostname"},
 	)
 	OffersReleased = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Subsystem: offerSubsystem,
 			Name:      "released",
 			Help:      "Counter of previously-acquired offers later released, broken out by slave host.",
 		},
 		[]string{"hostname"},
 	)
 )
 var registerMetrics sync.Once
 func Register() {
 	registerMetrics.Do(func() {
 		prometheus.MustRegister(OffersReceived)
 		prometheus.MustRegister(OffersDeclined)
 		prometheus.MustRegister(OffersAcquired)
 		prometheus.MustRegister(OffersReleased)
 	})
 }
 func InMicroseconds(d time.Duration) float64 {
 	return float64(d.Nanoseconds() / time.Microsecond.Nanoseconds())
 }
--- a/contrib/mesos/pkg/offers/offers.go
+++ b/contrib/mesos/pkg/offers/offers.go
@@ -0,0 +1,570 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package offers
 import (
 	"fmt"
 	"reflect"
 	"sync"
 	"sync/atomic"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers/metrics"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	log "github.com/golang/glog"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 )
 const (
 	offerListenerMaxAge      = 12              // max number of times we'll attempt to fit an offer to a listener before requiring them to re-register themselves
 	offerIdCacheTTL          = 1 * time.Second // determines expiration of cached offer ids, used in listener notification
 	deferredDeclineTtlFactor = 2               // this factor, multiplied by the offer ttl, determines how long to wait before attempting to decline previously claimed offers that were subsequently deleted, then released. see offerStorage.Delete
 	notifyListenersDelay     = 0               // delay between offer listener notification attempts
 )
 type Filter func(*mesos.Offer) bool
 type Registry interface {
 	// Initialize the instance, spawning necessary housekeeping go routines.
 	Init(<-chan struct{})
 	// Add offers to this registry, rejecting those that are deemed incompatible.
 	Add([]*mesos.Offer)
 	// Listen for arriving offers that are acceptable to the filter, sending
 	// a signal on (by closing) the returned channel. A listener will only
 	// ever be notified once, if at all.
 	Listen(id string, f Filter) <-chan struct{}
 	// invoked when offers are rescinded or expired
 	Delete(string, metrics.OfferDeclinedReason)
 	// when true, returns the offer that's registered for the given ID
 	Get(offerId string) (Perishable, bool)
 	// iterate through non-expired offers in this registry
 	Walk(Walker) error
 	// invalidate one or all (when offerId="") offers; offers are not declined,
 	// but are simply flagged as expired in the offer history
 	Invalidate(offerId string)
 	// invalidate all offers associated with the slave identified by slaveId.
 	InvalidateForSlave(slaveId string)
 }
 // callback that is invoked during a walk through a series of live offers,
 // returning with stop=true (or err != nil) if the walk should stop permaturely.
 type Walker func(offer Perishable) (stop bool, err error)
 type RegistryConfig struct {
 	DeclineOffer  func(offerId string) <-chan error // tell Mesos that we're declining the offer
 	Compat        func(*mesos.Offer) bool           // returns true if offer is compatible; incompatible offers are declined
 	TTL           time.Duration                     // determines a perishable offer's expiration deadline: now+ttl
 	LingerTTL     time.Duration                     // if zero, offers will not linger in the FIFO past their expiration deadline
 	ListenerDelay time.Duration                     // specifies the sleep time between offer listener notifications
 }
 type offerStorage struct {
 	RegistryConfig
 	offers    *cache.FIFO       // collection of Perishable, both live and expired
 	listeners *queue.DelayFIFO  // collection of *offerListener
 	delayed   *queue.DelayQueue // deadline-oriented offer-event queue
 	slaves    *slaveStorage     // slave to offer mappings
 }
 type liveOffer struct {
 	*mesos.Offer
 	expiration time.Time
 	acquired   int32 // 1 = acquired, 0 = free
 }
 type expiredOffer struct {
 	offerSpec
 	deadline time.Time
 }
 // subset of mesos.OfferInfo useful for recordkeeping
 type offerSpec struct {
 	id       string
 	hostname string
 }
 // offers that may perish (all of them?) implement this interface.
 // callers may expect to access these funcs concurrently so implementations
 // must provide their own form of synchronization around mutable state.
 type Perishable interface {
 	// returns true if this offer has expired
 	HasExpired() bool
 	// if not yet expired, return mesos offer details; otherwise nil
 	Details() *mesos.Offer
 	// mark this offer as acquired, returning true if it was previously unacquired. thread-safe.
 	Acquire() bool
 	// mark this offer as un-acquired. thread-safe.
 	Release()
 	// expire or delete this offer from storage
 	age(s *offerStorage)
 	// return a unique identifier for this offer
 	Id() string
 	// return the slave host for this offer
 	Host() string
 	addTo(*queue.DelayQueue)
 }
 func (e *expiredOffer) addTo(q *queue.DelayQueue) {
 	q.Add(e)
 }
 func (e *expiredOffer) Id() string {
 	return e.id
 }
 func (e *expiredOffer) Host() string {
 	return e.hostname
 }
 func (e *expiredOffer) HasExpired() bool {
 	return true
 }
 func (e *expiredOffer) Details() *mesos.Offer {
 	return nil
 }
 func (e *expiredOffer) Acquire() bool {
 	return false
 }
 func (e *expiredOffer) Release() {}
 func (e *expiredOffer) age(s *offerStorage) {
 	log.V(3).Infof("Delete lingering offer: %v", e.id)
 	s.offers.Delete(e)
 	s.slaves.deleteOffer(e.id)
 }
 // return the time left to linger
 func (e *expiredOffer) GetDelay() time.Duration {
 	return e.deadline.Sub(time.Now())
 }
 func (to *liveOffer) HasExpired() bool {
 	return time.Now().After(to.expiration)
 }
 func (to *liveOffer) Details() *mesos.Offer {
 	return to.Offer
 }
 func (to *liveOffer) Acquire() (acquired bool) {
 	if acquired = atomic.CompareAndSwapInt32(&to.acquired, 0, 1); acquired {
 		metrics.OffersAcquired.WithLabelValues(to.Host()).Inc()
 	}
 	return
 }
 func (to *liveOffer) Release() {
 	if released := atomic.CompareAndSwapInt32(&to.acquired, 1, 0); released {
 		metrics.OffersReleased.WithLabelValues(to.Host()).Inc()
 	}
 }
 func (to *liveOffer) age(s *offerStorage) {
 	s.Delete(to.Id(), metrics.OfferExpired)
 }
 func (to *liveOffer) Id() string {
 	return to.Offer.Id.GetValue()
 }
 func (to *liveOffer) Host() string {
 	return to.Offer.GetHostname()
 }
 func (to *liveOffer) addTo(q *queue.DelayQueue) {
 	q.Add(to)
 }
 // return the time remaining before the offer expires
 func (to *liveOffer) GetDelay() time.Duration {
 	return to.expiration.Sub(time.Now())
 }
 func CreateRegistry(c RegistryConfig) Registry {
 	metrics.Register()
 	return &offerStorage{
 		RegistryConfig: c,
 		offers: cache.NewFIFO(cache.KeyFunc(func(v interface{}) (string, error) {
 			if perishable, ok := v.(Perishable); !ok {
 				return "", fmt.Errorf("expected perishable offer, not '%+v'", v)
 			} else {
 				return perishable.Id(), nil
 			}
 		})),
 		listeners: queue.NewDelayFIFO(),
 		delayed:   queue.NewDelayQueue(),
 		slaves:    newSlaveStorage(),
 	}
 }
 func (s *offerStorage) declineOffer(offerId, hostname string, reason metrics.OfferDeclinedReason) {
 	//TODO(jdef) might be nice to spec an abort chan here
 	runtime.Signal(proc.OnError(s.DeclineOffer(offerId), func(err error) {
 		log.Warningf("decline failed for offer id %v: %v", offerId, err)
 	}, nil)).Then(func() {
 		metrics.OffersDeclined.WithLabelValues(hostname, string(reason)).Inc()
 	})
 }
 func (s *offerStorage) Add(offers []*mesos.Offer) {
 	now := time.Now()
 	for _, offer := range offers {
 		if !s.Compat(offer) {
 			//TODO(jdef) would be nice to batch these up
 			offerId := offer.Id.GetValue()
 			log.V(3).Infof("Declining incompatible offer %v", offerId)
 			s.declineOffer(offerId, offer.GetHostname(), metrics.OfferCompat)
 			return
 		}
 		timed := &liveOffer{
 			Offer:      offer,
 			expiration: now.Add(s.TTL),
 			acquired:   0,
 		}
 		log.V(3).Infof("Receiving offer %v", timed.Id())
 		s.offers.Add(timed)
 		s.delayed.Add(timed)
 		s.slaves.add(offer.SlaveId.GetValue(), timed.Id())
 		metrics.OffersReceived.WithLabelValues(timed.Host()).Inc()
 	}
 }
 // delete an offer from storage, implicitly expires the offer
 func (s *offerStorage) Delete(offerId string, reason metrics.OfferDeclinedReason) {
 	if offer, ok := s.Get(offerId); ok {
 		log.V(3).Infof("Deleting offer %v", offerId)
 		// attempt to block others from consuming the offer. if it's already been
 		// claimed and is not yet lingering then don't decline it - just mark it as
 		// expired in the history: allow a prior claimant to attempt to launch with it
 		notYetClaimed := offer.Acquire()
 		if offer.Details() != nil {
 			if notYetClaimed {
 				log.V(3).Infof("Declining offer %v", offerId)
 				s.declineOffer(offerId, offer.Host(), reason)
 			} else {
 				// some pod has acquired this and may attempt to launch a task with it
 				// failed schedule/launch attempts are requried to Release() any claims on the offer
 				// TODO(jdef): not sure what a good value is here. the goal is to provide a
 				// launchTasks (driver) operation enough time to complete so that we don't end
 				// up declining an offer that we're actually attempting to use.
 				time.AfterFunc(deferredDeclineTtlFactor*s.TTL, func() {
 					// at this point the offer is in one of five states:
 					// a) permanently deleted: expired due to timeout
 					// b) permanently deleted: expired due to having been rescinded
 					// c) lingering: expired due to timeout
 					// d) lingering: expired due to having been rescinded
 					// e) claimed: task launched and it using resources from this offer
 					// we want to **avoid** declining an offer that's claimed: attempt to acquire
 					if offer.Acquire() {
 						// previously claimed offer was released, perhaps due to a launch
 						// failure, so we should attempt to decline
 						log.V(3).Infof("attempting to decline (previously claimed) offer %v", offerId)
 						s.declineOffer(offerId, offer.Host(), reason)
 					}
 				})
 			}
 		}
 		s.expireOffer(offer)
 	} // else, ignore offers not in the history
 }
 func (s *offerStorage) InvalidateForSlave(slaveId string) {
 	offerIds := s.slaves.deleteSlave(slaveId)
 	for oid := range offerIds {
 		s.invalidateOne(oid)
 	}
 }
 // if offerId == "" then expire all known, live offers, otherwise only the offer indicated
 func (s *offerStorage) Invalidate(offerId string) {
 	if offerId != "" {
 		s.invalidateOne(offerId)
 		return
 	}
 	obj := s.offers.List()
 	for _, o := range obj {
 		offer, ok := o.(Perishable)
 		if !ok {
 			log.Errorf("Expected perishable offer, not %v", o)
 			continue
 		}
 		offer.Acquire() // attempt to block others from using it
 		s.expireOffer(offer)
 		// don't decline, we already know that it's an invalid offer
 	}
 }
 func (s *offerStorage) invalidateOne(offerId string) {
 	if offer, ok := s.Get(offerId); ok {
 		offer.Acquire() // attempt to block others from using it
 		s.expireOffer(offer)
 		// don't decline, we already know that it's an invalid offer
 	}
 }
 // Walk the collection of offers. The walk stops either as indicated by the
 // Walker or when the end of the offer list is reached. Expired offers are
 // never passed to a Walker.
 func (s *offerStorage) Walk(w Walker) error {
 	for _, v := range s.offers.List() {
 		offer, ok := v.(Perishable)
 		if !ok {
 			// offer disappeared...
 			continue
 		}
 		if offer.HasExpired() {
 			// never pass expired offers to walkers
 			continue
 		}
 		if stop, err := w(offer); err != nil {
 			return err
 		} else if stop {
 			return nil
 		}
 	}
 	return nil
 }
 func Expired(offerId, hostname string, ttl time.Duration) *expiredOffer {
 	return &expiredOffer{offerSpec{id: offerId, hostname: hostname}, time.Now().Add(ttl)}
 }
 func (s *offerStorage) expireOffer(offer Perishable) {
 	// the offer may or may not be expired due to TTL so check for details
 	// since that's a more reliable determinant of lingering status
 	if details := offer.Details(); details != nil {
 		// recently expired, should linger
 		offerId := details.Id.GetValue()
 		log.V(3).Infof("Expiring offer %v", offerId)
 		if s.LingerTTL > 0 {
 			log.V(3).Infof("offer will linger: %v", offerId)
 			expired := Expired(offerId, offer.Host(), s.LingerTTL)
 			s.offers.Update(expired)
 			s.delayed.Add(expired)
 		} else {
 			log.V(3).Infof("Permanently deleting offer %v", offerId)
 			s.offers.Delete(offerId)
 			s.slaves.deleteOffer(offerId)
 		}
 	} // else, it's still lingering...
 }
 func (s *offerStorage) Get(id string) (Perishable, bool) {
 	if obj, ok, _ := s.offers.GetByKey(id); !ok {
 		return nil, false
 	} else {
 		to, ok := obj.(Perishable)
 		if !ok {
 			log.Errorf("invalid offer object in fifo '%v'", obj)
 		}
 		return to, ok
 	}
 }
 type offerListener struct {
 	id         string
 	accepts    Filter
 	notify     chan<- struct{}
 	age        int
 	deadline   time.Time
 	sawVersion uint64
 }
 func (l *offerListener) GetUID() string {
 	return l.id
 }
 func (l *offerListener) Deadline() (time.Time, bool) {
 	return l.deadline, true
 }
 // register a listener for new offers, whom we'll notify upon receiving such.
 // notification is delivered in the form of closing the channel, nothing is ever sent.
 func (s *offerStorage) Listen(id string, f Filter) <-chan struct{} {
 	if f == nil {
 		return nil
 	}
 	ch := make(chan struct{})
 	listen := &offerListener{
 		id:       id,
 		accepts:  f,
 		notify:   ch,
 		deadline: time.Now().Add(s.ListenerDelay),
 	}
 	log.V(3).Infof("Registering offer listener %s", listen.id)
 	s.listeners.Offer(listen, queue.ReplaceExisting)
 	return ch
 }
 func (s *offerStorage) ageOffers() {
 	offer, ok := s.delayed.Pop().(Perishable)
 	if !ok {
 		log.Errorf("Expected Perishable, not %v", offer)
 		return
 	}
 	if details := offer.Details(); details != nil && !offer.HasExpired() {
 		// live offer has not expired yet: timed out early
 		// FWIW: early timeouts are more frequent when GOMAXPROCS is > 1
 		offer.addTo(s.delayed)
 	} else {
 		offer.age(s)
 	}
 }
 func (s *offerStorage) nextListener() *offerListener {
 	obj := s.listeners.Pop()
 	if listen, ok := obj.(*offerListener); !ok {
 		//programming error
 		panic(fmt.Sprintf("unexpected listener object %v", obj))
 	} else {
 		return listen
 	}
 }
 // notify listeners if we find an acceptable offer for them. listeners
 // are garbage collected after a certain age (see offerListenerMaxAge).
 // ids lists offer IDs that are retrievable from offer storage.
 func (s *offerStorage) notifyListeners(ids func() (util.StringSet, uint64)) {
 	listener := s.nextListener() // blocking
 	offerIds, version := ids()
 	if listener.sawVersion == version {
 		// no changes to offer list, avoid growing older - just wait for new offers to arrive
 		listener.deadline = time.Now().Add(s.ListenerDelay)
 		s.listeners.Offer(listener, queue.KeepExisting)
 		return
 	}
 	listener.sawVersion = version
 	// notify if we find an acceptable offer
 	for id := range offerIds {
 		if offer, ok := s.Get(id); !ok || offer.HasExpired() {
 			continue
 		} else if listener.accepts(offer.Details()) {
 			log.V(3).Infof("Notifying offer listener %s", listener.id)
 			close(listener.notify)
 			return
 		}
 	}
 	// no interesting offers found, re-queue the listener
 	listener.age++
 	if listener.age < offerListenerMaxAge {
 		listener.deadline = time.Now().Add(s.ListenerDelay)
 		s.listeners.Offer(listener, queue.KeepExisting)
 	} else {
 		// garbage collection is as simple as not re-adding the listener to the queue
 		log.V(3).Infof("garbage collecting offer listener %s", listener.id)
 	}
 }
 func (s *offerStorage) Init(done <-chan struct{}) {
 	// zero delay, reap offers as soon as they expire
 	go runtime.Until(s.ageOffers, 0, done)
 	// cached offer ids for the purposes of listener notification
 	idCache := &stringsCache{
 		refill: func() util.StringSet {
 			result := util.NewStringSet()
 			for _, v := range s.offers.List() {
 				if offer, ok := v.(Perishable); ok {
 					result.Insert(offer.Id())
 				}
 			}
 			return result
 		},
 		ttl: offerIdCacheTTL,
 	}
 	go runtime.Until(func() { s.notifyListeners(idCache.Strings) }, notifyListenersDelay, done)
 }
 type stringsCache struct {
 	expiresAt time.Time
 	cached    util.StringSet
 	ttl       time.Duration
 	refill    func() util.StringSet
 	version   uint64
 }
 // not thread-safe
 func (c *stringsCache) Strings() (util.StringSet, uint64) {
 	now := time.Now()
 	if c.expiresAt.Before(now) {
 		old := c.cached
 		c.cached = c.refill()
 		c.expiresAt = now.Add(c.ttl)
 		if !reflect.DeepEqual(old, c.cached) {
 			c.version++
 		}
 	}
 	return c.cached, c.version
 }
 type slaveStorage struct {
 	sync.Mutex
 	index map[string]string // map offerId to slaveId
 }
 func newSlaveStorage() *slaveStorage {
 	return &slaveStorage{
 		index: make(map[string]string),
 	}
 }
 // create a mapping between a slave and an offer
 func (self *slaveStorage) add(slaveId, offerId string) {
 	self.Lock()
 	defer self.Unlock()
 	self.index[offerId] = slaveId
 }
 // delete the slave-offer mappings for slaveId, returns the IDs of the offers that were unmapped
 func (self *slaveStorage) deleteSlave(slaveId string) util.StringSet {
 	offerIds := util.NewStringSet()
 	self.Lock()
 	defer self.Unlock()
 	for oid, sid := range self.index {
 		if sid == slaveId {
 			offerIds.Insert(oid)
 			delete(self.index, oid)
 		}
 	}
 	return offerIds
 }
 // delete the slave-offer mappings for offerId
 func (self *slaveStorage) deleteOffer(offerId string) {
 	self.Lock()
 	defer self.Unlock()
 	delete(self.index, offerId)
 }
--- a/contrib/mesos/pkg/offers/offers_test.go
+++ b/contrib/mesos/pkg/offers/offers_test.go
@@ -0,0 +1,391 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package offers
 import (
 	"errors"
 	"sync/atomic"
 	"testing"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	util "github.com/mesos/mesos-go/mesosutil"
 )
 func TestExpiredOffer(t *testing.T) {
 	t.Parallel()
 	ttl := 2 * time.Second
 	o := Expired("test", "testhost", ttl)
 	if o.Id() != "test" {
 		t.Error("expiredOffer does not return its Id")
 	}
 	if o.Host() != "testhost" {
 		t.Error("expiredOffer does not return its hostname")
 	}
 	if o.HasExpired() != true {
 		t.Error("expiredOffer is not expired")
 	}
 	if o.Details() != nil {
 		t.Error("expiredOffer does not return nil Details")
 	}
 	if o.Acquire() != false {
 		t.Error("expiredOffer must not be able to be acquired")
 	}
 	if delay := o.GetDelay(); !(0 < delay && delay <= ttl) {
 		t.Error("expiredOffer does not return a valid deadline")
 	}
 } // TestExpiredOffer
 func TestTimedOffer(t *testing.T) {
 	t.Parallel()
 	ttl := 2 * time.Second
 	now := time.Now()
 	o := &liveOffer{nil, now.Add(ttl), 0}
 	if o.HasExpired() {
 		t.Errorf("offer ttl was %v and should not have expired yet", ttl)
 	}
 	if !o.Acquire() {
 		t.Fatal("1st acquisition of offer failed")
 	}
 	o.Release()
 	if !o.Acquire() {
 		t.Fatal("2nd acquisition of offer failed")
 	}
 	if o.Acquire() {
 		t.Fatal("3rd acquisition of offer passed but prior claim was not released")
 	}
 	o.Release()
 	if !o.Acquire() {
 		t.Fatal("4th acquisition of offer failed")
 	}
 	o.Release()
 	time.Sleep(ttl)
 	if !o.HasExpired() {
 		t.Fatal("offer not expired after ttl passed")
 	}
 	if !o.Acquire() {
 		t.Fatal("5th acquisition of offer failed; should not be tied to expiration")
 	}
 	if o.Acquire() {
 		t.Fatal("6th acquisition of offer succeeded; should already be acquired")
 	}
 } // TestTimedOffer
 func TestOfferStorage(t *testing.T) {
 	ttl := time.Second / 4
 	var declinedNum int32
 	getDeclinedNum := func() int32 { return atomic.LoadInt32(&declinedNum) }
 	config := RegistryConfig{
 		DeclineOffer: func(offerId string) <-chan error {
 			atomic.AddInt32(&declinedNum, 1)
 			return proc.ErrorChan(nil)
 		},
 		Compat: func(o *mesos.Offer) bool {
 			return o.Hostname == nil || *o.Hostname != "incompatiblehost"
 		},
 		TTL:       ttl,
 		LingerTTL: 2 * ttl,
 	}
 	storage := CreateRegistry(config)
 	done := make(chan struct{})
 	storage.Init(done)
 	// Add offer
 	id := util.NewOfferID("foo")
 	o := &mesos.Offer{Id: id}
 	storage.Add([]*mesos.Offer{o})
 	// Added offer should be in the storage
 	if obj, ok := storage.Get(id.GetValue()); obj == nil || !ok {
 		t.Error("offer not added")
 	}
 	if obj, _ := storage.Get(id.GetValue()); obj.Details() != o {
 		t.Error("added offer differs from returned offer")
 	}
 	// Not-added offer is not in storage
 	if obj, ok := storage.Get("bar"); obj != nil || ok {
 		t.Error("offer bar should not exist in storage")
 	}
 	// Deleted offer lingers in storage, is acquired and declined
 	offer, _ := storage.Get(id.GetValue())
 	declinedNumBefore := getDeclinedNum()
 	storage.Delete(id.GetValue(), "deleted for test")
 	if obj, _ := storage.Get(id.GetValue()); obj == nil {
 		t.Error("deleted offer is not lingering")
 	}
 	if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
 		t.Error("deleted offer is no expired")
 	}
 	if ok := offer.Acquire(); ok {
 		t.Error("deleted offer can be acquired")
 	}
 	if getDeclinedNum() <= declinedNumBefore {
 		t.Error("deleted offer was not declined")
 	}
 	// Acquired offer is only declined after 2*ttl
 	id = util.NewOfferID("foo2")
 	o = &mesos.Offer{Id: id}
 	storage.Add([]*mesos.Offer{o})
 	offer, _ = storage.Get(id.GetValue())
 	declinedNumBefore = getDeclinedNum()
 	offer.Acquire()
 	storage.Delete(id.GetValue(), "deleted for test")
 	if getDeclinedNum() > declinedNumBefore {
 		t.Error("acquired offer is declined")
 	}
 	offer.Release()
 	time.Sleep(3 * ttl)
 	if getDeclinedNum() <= declinedNumBefore {
 		t.Error("released offer is not declined after 2*ttl")
 	}
 	// Added offer should be expired after ttl, but lingering
 	id = util.NewOfferID("foo3")
 	o = &mesos.Offer{Id: id}
 	storage.Add([]*mesos.Offer{o})
 	time.Sleep(2 * ttl)
 	obj, ok := storage.Get(id.GetValue())
 	if obj == nil || !ok {
 		t.Error("offer not lingering after ttl")
 	}
 	if !obj.HasExpired() {
 		t.Error("offer is not expired after ttl")
 	}
 	// Should be deleted when waiting longer than LingerTTL
 	time.Sleep(2 * ttl)
 	if obj, ok := storage.Get(id.GetValue()); obj != nil || ok {
 		t.Error("offer not deleted after LingerTTL")
 	}
 	// Incompatible offer is declined
 	id = util.NewOfferID("foo4")
 	incompatibleHostname := "incompatiblehost"
 	o = &mesos.Offer{Id: id, Hostname: &incompatibleHostname}
 	declinedNumBefore = getDeclinedNum()
 	storage.Add([]*mesos.Offer{o})
 	if obj, ok := storage.Get(id.GetValue()); obj != nil || ok {
 		t.Error("incompatible offer not rejected")
 	}
 	if getDeclinedNum() <= declinedNumBefore {
 		t.Error("incompatible offer is not declined")
 	}
 	// Invalidated offer are not declined, but expired
 	id = util.NewOfferID("foo5")
 	o = &mesos.Offer{Id: id}
 	storage.Add([]*mesos.Offer{o})
 	offer, _ = storage.Get(id.GetValue())
 	declinedNumBefore = getDeclinedNum()
 	storage.Invalidate(id.GetValue())
 	if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
 		t.Error("invalidated offer is not expired")
 	}
 	if getDeclinedNum() > declinedNumBefore {
 		t.Error("invalidated offer is declined")
 	}
 	if ok := offer.Acquire(); ok {
 		t.Error("invalidated offer can be acquired")
 	}
 	// Invalidate "" will invalidate all offers
 	id = util.NewOfferID("foo6")
 	o = &mesos.Offer{Id: id}
 	storage.Add([]*mesos.Offer{o})
 	id2 := util.NewOfferID("foo7")
 	o2 := &mesos.Offer{Id: id2}
 	storage.Add([]*mesos.Offer{o2})
 	storage.Invalidate("")
 	if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
 		t.Error("invalidated offer is not expired")
 	}
 	if obj2, _ := storage.Get(id2.GetValue()); !obj2.HasExpired() {
 		t.Error("invalidated offer is not expired")
 	}
 	// InvalidateForSlave invalides all offers for that slave, but only those
 	id = util.NewOfferID("foo8")
 	slaveId := util.NewSlaveID("test-slave")
 	o = &mesos.Offer{Id: id, SlaveId: slaveId}
 	storage.Add([]*mesos.Offer{o})
 	id2 = util.NewOfferID("foo9")
 	o2 = &mesos.Offer{Id: id2}
 	storage.Add([]*mesos.Offer{o2})
 	storage.InvalidateForSlave(slaveId.GetValue())
 	if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
 		t.Error("invalidated offer for test-slave is not expired")
 	}
 	if obj2, _ := storage.Get(id2.GetValue()); obj2.HasExpired() {
 		t.Error("invalidated offer another slave is expired")
 	}
 	close(done)
 } // TestOfferStorage
 func TestListen(t *testing.T) {
 	ttl := time.Second / 4
 	config := RegistryConfig{
 		DeclineOffer: func(offerId string) <-chan error {
 			return proc.ErrorChan(nil)
 		},
 		Compat: func(o *mesos.Offer) bool {
 			return true
 		},
 		TTL:           ttl,
 		ListenerDelay: ttl / 2,
 	}
 	storage := CreateRegistry(config)
 	done := make(chan struct{})
 	storage.Init(done)
 	// Create two listeners with a hostname filter
 	hostname1 := "hostname1"
 	hostname2 := "hostname2"
 	listener1 := storage.Listen("listener1", func(offer *mesos.Offer) bool {
 		return offer.GetHostname() == hostname1
 	})
 	listener2 := storage.Listen("listener2", func(offer *mesos.Offer) bool {
 		return offer.GetHostname() == hostname2
 	})
 	// Add hostname1 offer
 	id := util.NewOfferID("foo")
 	o := &mesos.Offer{Id: id, Hostname: &hostname1}
 	storage.Add([]*mesos.Offer{o})
 	// listener1 is notified by closing channel
 	select {
 	case _, more := <-listener1:
 		if more {
 			t.Error("listener1 is not closed")
 		}
 	}
 	// listener2 is not notified within ttl
 	select {
 	case <-listener2:
 		t.Error("listener2 is notified")
 	case <-time.After(ttl):
 	}
 	close(done)
 } // TestListen
 func TestWalk(t *testing.T) {
 	t.Parallel()
 	config := RegistryConfig{
 		DeclineOffer: func(offerId string) <-chan error {
 			return proc.ErrorChan(nil)
 		},
 		TTL:           0 * time.Second,
 		LingerTTL:     0 * time.Second,
 		ListenerDelay: 0 * time.Second,
 	}
 	storage := CreateRegistry(config)
 	acceptedOfferId := ""
 	walked := 0
 	walker1 := func(p Perishable) (bool, error) {
 		walked++
 		if p.Acquire() {
 			acceptedOfferId = p.Details().Id.GetValue()
 			return true, nil
 		}
 		return false, nil
 	}
 	// sanity check
 	err := storage.Walk(walker1)
 	if err != nil {
 		t.Fatalf("received impossible error %v", err)
 	}
 	if walked != 0 {
 		t.Fatal("walked empty storage")
 	}
 	if acceptedOfferId != "" {
 		t.Fatal("somehow found an offer when registry was empty")
 	}
 	impl, ok := storage.(*offerStorage)
 	if !ok {
 		t.Fatal("unexpected offer storage impl")
 	}
 	// single offer
 	ttl := 2 * time.Second
 	now := time.Now()
 	o := &liveOffer{&mesos.Offer{Id: util.NewOfferID("foo")}, now.Add(ttl), 0}
 	impl.offers.Add(o)
 	err = storage.Walk(walker1)
 	if err != nil {
 		t.Fatalf("received impossible error %v", err)
 	}
 	if walked != 1 {
 		t.Fatalf("walk count %d", walked)
 	}
 	if acceptedOfferId != "foo" {
 		t.Fatalf("found offer %v", acceptedOfferId)
 	}
 	acceptedOfferId = ""
 	err = storage.Walk(walker1)
 	if err != nil {
 		t.Fatalf("received impossible error %v", err)
 	}
 	if walked != 2 {
 		t.Fatalf("walk count %d", walked)
 	}
 	if acceptedOfferId != "" {
 		t.Fatalf("found offer %v", acceptedOfferId)
 	}
 	walker2 := func(p Perishable) (bool, error) {
 		walked++
 		return true, nil
 	}
 	err = storage.Walk(walker2)
 	if err != nil {
 		t.Fatalf("received impossible error %v", err)
 	}
 	if walked != 3 {
 		t.Fatalf("walk count %d", walked)
 	}
 	if acceptedOfferId != "" {
 		t.Fatalf("found offer %v", acceptedOfferId)
 	}
 	walker3 := func(p Perishable) (bool, error) {
 		walked++
 		return true, errors.New("baz")
 	}
 	err = storage.Walk(walker3)
 	if err == nil {
 		t.Fatal("expected error")
 	}
 	if walked != 4 {
 		t.Fatalf("walk count %d", walked)
 	}
 }
--- a/contrib/mesos/pkg/proc/doc.go
+++ b/contrib/mesos/pkg/proc/doc.go
@@ -0,0 +1,19 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package proc provides opinionated utilities for processing background
 // operations and future errors, somewhat inspired by libprocess.
 package proc
--- a/contrib/mesos/pkg/proc/errors.go
+++ b/contrib/mesos/pkg/proc/errors.go
@@ -0,0 +1,34 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package proc
 import (
 	"errors"
 )
 var (
 	errProcessTerminated = errors.New("cannot execute action because process has terminated")
 	errIllegalState      = errors.New("illegal state, cannot execute action")
 )
 func IsProcessTerminated(err error) bool {
 	return err == errProcessTerminated
 }
 func IsIllegalState(err error) bool {
 	return err == errIllegalState
 }
--- a/contrib/mesos/pkg/proc/proc.go
+++ b/contrib/mesos/pkg/proc/proc.go
@@ -0,0 +1,377 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package proc
 import (
 	"fmt"
 	"sync"
 	"sync/atomic"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
 	log "github.com/golang/glog"
 )
 const (
 	// if the action processor crashes (if some Action panics) then we
 	// wait this long before spinning up the action processor again.
 	defaultActionHandlerCrashDelay = 100 * time.Millisecond
 	// how many actions we can store in the backlog
 	defaultActionQueueDepth = 1024
 )
 type procImpl struct {
 	Config
 	backlog   chan Action    // action queue
 	terminate chan struct{}  // signaled via close()
 	wg        sync.WaitGroup // End() terminates when the wait is over
 	done      runtime.Signal
 	state     *stateType
 	pid       uint32
 	writeLock sync.Mutex    // avoid data race between write and close of backlog
 	changed   *sync.Cond    // wait/signal for backlog changes
 	engine    DoerFunc      // isolated this for easier unit testing later on
 	running   chan struct{} // closes once event loop processing starts
 	dead      chan struct{} // closes upon completion of process termination
 }
 type Config struct {
 	// cooldown period in between deferred action crashes
 	actionHandlerCrashDelay time.Duration
 	// determines the size of the deferred action backlog
 	actionQueueDepth uint32
 }
 var (
 	defaultConfig = Config{
 		actionHandlerCrashDelay: defaultActionHandlerCrashDelay,
 		actionQueueDepth:        defaultActionQueueDepth,
 	}
 	pid           uint32
 	closedErrChan <-chan error
 )
 func init() {
 	ch := make(chan error)
 	close(ch)
 	closedErrChan = ch
 }
 func New() Process {
 	return newConfigured(defaultConfig)
 }
 func newConfigured(config Config) Process {
 	state := stateNew
 	pi := &procImpl{
 		Config:    config,
 		backlog:   make(chan Action, config.actionQueueDepth),
 		terminate: make(chan struct{}),
 		state:     &state,
 		pid:       atomic.AddUint32(&pid, 1),
 		running:   make(chan struct{}),
 		dead:      make(chan struct{}),
 	}
 	pi.engine = DoerFunc(pi.doLater)
 	pi.changed = sync.NewCond(&pi.writeLock)
 	pi.wg.Add(1) // symmetrical to wg.Done() in End()
 	pi.done = pi.begin()
 	return pi
 }
 // returns a chan that closes upon termination of the action processing loop
 func (self *procImpl) Done() <-chan struct{} {
 	return self.done
 }
 func (self *procImpl) Running() <-chan struct{} {
 	return self.running
 }
 func (self *procImpl) begin() runtime.Signal {
 	if !self.state.transition(stateNew, stateRunning) {
 		panic(fmt.Errorf("failed to transition from New to Idle state"))
 	}
 	defer log.V(2).Infof("started process %d", self.pid)
 	var entered runtime.Latch
 	// execute actions on the backlog chan
 	return runtime.After(func() {
 		runtime.Until(func() {
 			if entered.Acquire() {
 				close(self.running)
 				self.wg.Add(1)
 			}
 			for action := range self.backlog {
 				select {
 				case <-self.terminate:
 					return
 				default:
 					// signal to indicate there's room in the backlog now
 					self.changed.Broadcast()
 					// rely on Until to handle action panics
 					action()
 				}
 			}
 		}, self.actionHandlerCrashDelay, self.terminate)
 	}).Then(func() {
 		log.V(2).Infof("finished processing action backlog for process %d", self.pid)
 		if !entered.Acquire() {
 			self.wg.Done()
 		}
 	})
 }
 // execute some action in the context of the current process. Actions
 // executed via this func are to be executed in a concurrency-safe manner:
 // no two actions should execute at the same time. invocations of this func
 // should not block for very long, unless the action backlog is full or the
 // process is terminating.
 // returns errProcessTerminated if the process already ended.
 func (self *procImpl) doLater(deferredAction Action) (err <-chan error) {
 	a := Action(func() {
 		self.wg.Add(1)
 		defer self.wg.Done()
 		deferredAction()
 	})
 	scheduled := false
 	self.writeLock.Lock()
 	defer self.writeLock.Unlock()
 	for err == nil && !scheduled {
 		switch s := self.state.get(); s {
 		case stateRunning:
 			select {
 			case self.backlog <- a:
 				scheduled = true
 			default:
 				self.changed.Wait()
 			}
 		case stateTerminal:
 			err = ErrorChan(errProcessTerminated)
 		default:
 			err = ErrorChan(errIllegalState)
 		}
 	}
 	return
 }
 // implementation of Doer interface, schedules some action to be executed via
 // the current execution engine
 func (self *procImpl) Do(a Action) <-chan error {
 	return self.engine(a)
 }
 // spawn a goroutine that waits for an error. if a non-nil error is read from the
 // channel then the handler func is invoked, otherwise (nil error or closed chan)
 // the handler is skipped. if a nil handler is specified then it's not invoked.
 // the signal chan that's returned closes once the error process logic (and handler,
 // if any) has completed.
 func OnError(ch <-chan error, f func(error), abort <-chan struct{}) <-chan struct{} {
 	return runtime.After(func() {
 		if ch == nil {
 			return
 		}
 		select {
 		case err, ok := <-ch:
 			if ok && err != nil && f != nil {
 				f(err)
 			}
 		case <-abort:
 			if f != nil {
 				f(errProcessTerminated)
 			}
 		}
 	})
 }
 func (self *procImpl) OnError(ch <-chan error, f func(error)) <-chan struct{} {
 	return OnError(ch, f, self.Done())
 }
 func (self *procImpl) flush() {
 	log.V(2).Infof("flushing action backlog for process %d", self.pid)
 	i := 0
 	//TODO: replace with `for range self.backlog` once Go 1.3 support is dropped
 	for {
 		_, open := <-self.backlog
 		if !open {
 			break
 		}
 		i++
 	}
 	log.V(2).Infof("flushed %d backlog actions for process %d", i, self.pid)
 }
 func (self *procImpl) End() <-chan struct{} {
 	if self.state.transitionTo(stateTerminal, stateTerminal) {
 		go func() {
 			defer close(self.dead)
 			self.writeLock.Lock()
 			defer self.writeLock.Unlock()
 			log.V(2).Infof("terminating process %d", self.pid)
 			close(self.backlog)
 			close(self.terminate)
 			self.wg.Done()
 			self.changed.Broadcast()
 			log.V(2).Infof("waiting for deferred actions to complete")
 			// wait for all pending actions to complete, then flush the backlog
 			self.wg.Wait()
 			self.flush()
 		}()
 	}
 	return self.dead
 }
 type errorOnce struct {
 	once  sync.Once
 	err   chan error
 	abort <-chan struct{}
 }
 func NewErrorOnce(abort <-chan struct{}) ErrorOnce {
 	return &errorOnce{
 		err:   make(chan error, 1),
 		abort: abort,
 	}
 }
 func (b *errorOnce) Err() <-chan error {
 	return b.err
 }
 func (b *errorOnce) Reportf(msg string, args ...interface{}) {
 	b.Report(fmt.Errorf(msg, args...))
 }
 func (b *errorOnce) Report(err error) {
 	b.once.Do(func() {
 		select {
 		case b.err <- err:
 		default:
 		}
 	})
 }
 func (b *errorOnce) Send(errIn <-chan error) ErrorOnce {
 	go b.forward(errIn)
 	return b
 }
 func (b *errorOnce) forward(errIn <-chan error) {
 	if errIn == nil {
 		b.Report(nil)
 		return
 	}
 	select {
 	case err, _ := <-errIn:
 		b.Report(err)
 	case <-b.abort:
 		b.Report(errProcessTerminated)
 	}
 }
 type processAdapter struct {
 	parent   Process
 	delegate Doer
 }
 func (p *processAdapter) Do(a Action) <-chan error {
 	if p == nil || p.parent == nil || p.delegate == nil {
 		return ErrorChan(errIllegalState)
 	}
 	errCh := NewErrorOnce(p.Done())
 	go func() {
 		errOuter := p.parent.Do(func() {
 			errInner := p.delegate.Do(a)
 			errCh.forward(errInner)
 		})
 		// if the outer err is !nil then either the parent failed to schedule the
 		// the action, or else it backgrounded the scheduling task.
 		if errOuter != nil {
 			errCh.forward(errOuter)
 		}
 	}()
 	return errCh.Err()
 }
 func (p *processAdapter) End() <-chan struct{} {
 	if p != nil && p.parent != nil {
 		return p.parent.End()
 	}
 	return nil
 }
 func (p *processAdapter) Done() <-chan struct{} {
 	if p != nil && p.parent != nil {
 		return p.parent.Done()
 	}
 	return nil
 }
 func (p *processAdapter) Running() <-chan struct{} {
 	if p != nil && p.parent != nil {
 		return p.parent.Running()
 	}
 	return nil
 }
 func (p *processAdapter) OnError(ch <-chan error, f func(error)) <-chan struct{} {
 	if p != nil && p.parent != nil {
 		return p.parent.OnError(ch, f)
 	}
 	return nil
 }
 // returns a process that, within its execution context, delegates to the specified Doer.
 // if the given Doer instance is nil, a valid Process is still returned though calls to its
 // Do() implementation will always return errIllegalState.
 // if the given Process instance is nil then in addition to the behavior in the prior sentence,
 // calls to End() and Done() are effectively noops.
 func DoWith(other Process, d Doer) Process {
 	return &processAdapter{
 		parent:   other,
 		delegate: d,
 	}
 }
 func ErrorChanf(msg string, args ...interface{}) <-chan error {
 	return ErrorChan(fmt.Errorf(msg, args...))
 }
 func ErrorChan(err error) <-chan error {
 	if err == nil {
 		return closedErrChan
 	}
 	ch := make(chan error, 1)
 	ch <- err
 	return ch
 }
 // invoke the f on action a. returns an illegal state error if f is nil.
 func (f DoerFunc) Do(a Action) <-chan error {
 	if f != nil {
 		return f(a)
 	}
 	return ErrorChan(errIllegalState)
 }
--- a/contrib/mesos/pkg/proc/proc_test.go
+++ b/contrib/mesos/pkg/proc/proc_test.go
@@ -0,0 +1,373 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package proc
 import (
 	"fmt"
 	"sync"
 	"testing"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
 	log "github.com/golang/glog"
 )
 // logs a testing.Fatalf if the elapsed time d passes before signal chan done is closed
 func fatalAfter(t *testing.T, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) {
 	select {
 	case <-done:
 	case <-time.After(d):
 		t.Fatalf(msg, args...)
 	}
 }
 func errorAfter(errOnce ErrorOnce, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) {
 	select {
 	case <-done:
 	case <-time.After(d):
 		errOnce.Reportf(msg, args...)
 	}
 }
 // logs a testing.Fatalf if the signal chan closes before the elapsed time d passes
 func fatalOn(t *testing.T, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) {
 	select {
 	case <-done:
 		t.Fatalf(msg, args...)
 	case <-time.After(d):
 	}
 }
 func TestProc_manyEndings(t *testing.T) {
 	p := New()
 	const COUNT = 20
 	var wg sync.WaitGroup
 	wg.Add(COUNT)
 	for i := 0; i < COUNT; i++ {
 		runtime.On(p.End(), wg.Done)
 	}
 	fatalAfter(t, runtime.After(wg.Wait), 5*time.Second, "timed out waiting for loose End()s")
 	fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
 }
 func TestProc_singleAction(t *testing.T) {
 	p := New()
 	scheduled := make(chan struct{})
 	called := make(chan struct{})
 	go func() {
 		log.Infof("do'ing deferred action")
 		defer close(scheduled)
 		err := p.Do(func() {
 			defer close(called)
 			log.Infof("deferred action invoked")
 		})
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 	}()
 	fatalAfter(t, scheduled, 5*time.Second, "timed out waiting for deferred action to be scheduled")
 	fatalAfter(t, called, 5*time.Second, "timed out waiting for deferred action to be invoked")
 	p.End()
 	fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
 }
 func TestProc_singleActionEnd(t *testing.T) {
 	p := New()
 	scheduled := make(chan struct{})
 	called := make(chan struct{})
 	go func() {
 		log.Infof("do'ing deferred action")
 		defer close(scheduled)
 		err := p.Do(func() {
 			defer close(called)
 			log.Infof("deferred action invoked")
 			p.End()
 		})
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 	}()
 	fatalAfter(t, scheduled, 5*time.Second, "timed out waiting for deferred action to be scheduled")
 	fatalAfter(t, called, 5*time.Second, "timed out waiting for deferred action to be invoked")
 	fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
 }
 func TestProc_multiAction(t *testing.T) {
 	p := New()
 	const COUNT = 10
 	var called sync.WaitGroup
 	called.Add(COUNT)
 	// test FIFO property
 	next := 0
 	for i := 0; i < COUNT; i++ {
 		log.Infof("do'ing deferred action %d", i)
 		idx := i
 		err := p.Do(func() {
 			defer called.Done()
 			log.Infof("deferred action invoked")
 			if next != idx {
 				t.Fatalf("expected index %d instead of %d", idx, next)
 			}
 			next++
 		})
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 	}
 	fatalAfter(t, runtime.After(called.Wait), 2*time.Second, "timed out waiting for deferred actions to be invoked")
 	p.End()
 	fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
 }
 func TestProc_goodLifecycle(t *testing.T) {
 	p := New()
 	p.End()
 	fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
 }
 func TestProc_doWithDeadProc(t *testing.T) {
 	p := New()
 	p.End()
 	time.Sleep(100 * time.Millisecond)
 	errUnexpected := fmt.Errorf("unexpected execution of delegated action")
 	decorated := DoWith(p, DoerFunc(func(_ Action) <-chan error {
 		return ErrorChan(errUnexpected)
 	}))
 	decorated.Do(func() {})
 	fatalAfter(t, decorated.Done(), 5*time.Second, "timed out waiting for process death")
 }
 func TestProc_doWith(t *testing.T) {
 	p := New()
 	delegated := false
 	decorated := DoWith(p, DoerFunc(func(a Action) <-chan error {
 		delegated = true
 		a()
 		return nil
 	}))
 	executed := make(chan struct{})
 	err := decorated.Do(func() {
 		defer close(executed)
 		if !delegated {
 			t.Fatalf("expected delegated execution")
 		}
 	})
 	if err == nil {
 		t.Fatalf("expected !nil error chan")
 	}
 	fatalAfter(t, executed, 5*time.Second, "timed out waiting deferred execution")
 	fatalAfter(t, decorated.OnError(err, func(e error) {
 		t.Fatalf("unexpected error: %v", err)
 	}), 1*time.Second, "timed out waiting for doer result")
 	decorated.End()
 	fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
 }
 func TestProc_doWithNestedTwice(t *testing.T) {
 	p := New()
 	delegated := false
 	decorated := DoWith(p, DoerFunc(func(a Action) <-chan error {
 		a()
 		return nil
 	}))
 	decorated2 := DoWith(decorated, DoerFunc(func(a Action) <-chan error {
 		delegated = true
 		a()
 		return nil
 	}))
 	executed := make(chan struct{})
 	err := decorated2.Do(func() {
 		defer close(executed)
 		if !delegated {
 			t.Fatalf("expected delegated execution")
 		}
 	})
 	if err == nil {
 		t.Fatalf("expected !nil error chan")
 	}
 	fatalAfter(t, executed, 5*time.Second, "timed out waiting deferred execution")
 	fatalAfter(t, decorated2.OnError(err, func(e error) {
 		t.Fatalf("unexpected error: %v", err)
 	}), 1*time.Second, "timed out waiting for doer result")
 	decorated2.End()
 	fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
 }
 func TestProc_doWithNestedErrorPropagation(t *testing.T) {
 	p := New()
 	delegated := false
 	decorated := DoWith(p, DoerFunc(func(a Action) <-chan error {
 		a()
 		return nil
 	}))
 	expectedErr := fmt.Errorf("expecting this")
 	errOnce := NewErrorOnce(p.Done())
 	decorated2 := DoWith(decorated, DoerFunc(func(a Action) <-chan error {
 		delegated = true
 		a()
 		errOnce.Reportf("unexpected error in decorator2")
 		return ErrorChanf("another unexpected error in decorator2")
 	}))
 	executed := make(chan struct{})
 	err := decorated2.Do(func() {
 		defer close(executed)
 		if !delegated {
 			t.Fatalf("expected delegated execution")
 		}
 		errOnce.Report(expectedErr)
 	})
 	if err == nil {
 		t.Fatalf("expected !nil error chan")
 	}
 	errOnce.Send(err)
 	foundError := false
 	fatalAfter(t, executed, 1*time.Second, "timed out waiting deferred execution")
 	fatalAfter(t, decorated2.OnError(errOnce.Err(), func(e error) {
 		if e != expectedErr {
 			t.Fatalf("unexpected error: %v", err)
 		} else {
 			foundError = true
 		}
 	}), 1*time.Second, "timed out waiting for doer result")
 	if !foundError {
 		t.Fatalf("expected a propagated error")
 	}
 	decorated2.End()
 	fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
 }
 func runDelegationTest(t *testing.T, p Process, name string, errOnce ErrorOnce) {
 	defer func() {
 		t.Logf("runDelegationTest finished at " + time.Now().String())
 	}()
 	var decorated Process
 	decorated = p
 	const DEPTH = 100
 	var wg sync.WaitGroup
 	wg.Add(DEPTH)
 	y := 0
 	for x := 1; x <= DEPTH; x++ {
 		x := x
 		nextp := DoWith(decorated, DoerFunc(func(a Action) <-chan error {
 			if x == 1 {
 				t.Logf("delegate chain invoked for " + name)
 			}
 			y++
 			if y != x {
 				return ErrorChanf("out of order delegated execution")
 			}
 			defer wg.Done()
 			a()
 			return nil
 		}))
 		decorated = nextp
 	}
 	executed := make(chan struct{})
 	errCh := decorated.Do(func() {
 		defer close(executed)
 		if y != DEPTH {
 			errOnce.Reportf("expected delegated execution")
 		}
 		t.Logf("executing deferred action: " + name + " at " + time.Now().String())
 		errOnce.Send(nil) // we completed without error, let the listener know
 	})
 	if errCh == nil {
 		t.Fatalf("expected !nil error chan")
 	}
 	// forward any scheduling errors to the listener; NOTHING else should attempt to read
 	// from errCh after this point
 	errOnce.Send(errCh)
 	errorAfter(errOnce, executed, 5*time.Second, "timed out waiting deferred execution")
 	t.Logf("runDelegationTest received executed signal at " + time.Now().String())
 }
 func TestProc_doWithNestedX(t *testing.T) {
 	t.Logf("starting test case at " + time.Now().String())
 	p := New()
 	errOnce := NewErrorOnce(p.Done())
 	runDelegationTest(t, p, "nested", errOnce)
 	<-p.End()
 	select {
 	case err := <-errOnce.Err():
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 	case <-time.After(5 * time.Second):
 		t.Fatalf("timed out waiting for doer result")
 	}
 	fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
 }
 // intended to be run with -race
 func TestProc_doWithNestedXConcurrent(t *testing.T) {
 	p := New()
 	errOnce := NewErrorOnce(p.Done())
 	var wg sync.WaitGroup
 	const CONC = 20
 	wg.Add(CONC)
 	for i := 0; i < CONC; i++ {
 		i := i
 		runtime.After(func() { runDelegationTest(t, p, fmt.Sprintf("nested%d", i), errOnce) }).Then(wg.Done)
 	}
 	ch := runtime.After(wg.Wait)
 	fatalAfter(t, ch, 10*time.Second, "timed out waiting for concurrent delegates")
 	<-p.End()
 	select {
 	case err := <-errOnce.Err():
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 	case <-time.After(5 * time.Second):
 		t.Fatalf("timed out waiting for doer result")
 	}
 	fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
 }
--- a/contrib/mesos/pkg/proc/state.go
+++ b/contrib/mesos/pkg/proc/state.go
@@ -0,0 +1,55 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package proc
 import (
 	"sync/atomic"
 )
 type stateType int32
 const (
 	stateNew stateType = iota
 	stateRunning
 	stateTerminal
 )
 func (s *stateType) get() stateType {
 	return stateType(atomic.LoadInt32((*int32)(s)))
 }
 func (s *stateType) transition(from, to stateType) bool {
 	return atomic.CompareAndSwapInt32((*int32)(s), int32(from), int32(to))
 }
 func (s *stateType) transitionTo(to stateType, unless ...stateType) bool {
 	if len(unless) == 0 {
 		atomic.StoreInt32((*int32)(s), int32(to))
 		return true
 	}
 	for {
 		state := s.get()
 		for _, x := range unless {
 			if state == x {
 				return false
 			}
 		}
 		if s.transition(state, to) {
 			return true
 		}
 	}
 }
--- a/contrib/mesos/pkg/proc/types.go
+++ b/contrib/mesos/pkg/proc/types.go
@@ -0,0 +1,71 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package proc
 // something that executes in the context of a process
 type Action func()
 type Context interface {
 	// end (terminate) the execution context
 	End() <-chan struct{}
 	// return a signal chan that will close upon the termination of this process
 	Done() <-chan struct{}
 }
 type Doer interface {
 	// execute some action in some context. actions are to be executed in a
 	// concurrency-safe manner: no two actions should execute at the same time.
 	// errors are generated if the action cannot be executed (not by the execution
 	// of the action) and should be testable with the error API of this package,
 	// for example, IsProcessTerminated.
 	Do(Action) <-chan error
 }
 // adapter func for Doer interface
 type DoerFunc func(Action) <-chan error
 type Process interface {
 	Context
 	Doer
 	// see top level OnError func. this implementation will terminate upon the arrival of
 	// an error (and subsequently invoke the error handler, if given) or else the termination
 	// of the process (testable via IsProcessTerminated).
 	OnError(<-chan error, func(error)) <-chan struct{}
 	// return a signal chan that will close once the process is ready to run actions
 	Running() <-chan struct{}
 }
 // this is an error promise. if we ever start building out support for other promise types it will probably
 // make sense to group them in some sort of "promises" package.
 type ErrorOnce interface {
 	// return a chan that only ever sends one error, either obtained via Report() or Forward()
 	Err() <-chan error
 	// reports the given error via Err(), but only if no other errors have been reported or forwarded
 	Report(error)
 	Reportf(string, ...interface{})
 	// waits for an error on the incoming chan, the result of which is later obtained via Err() (if no
 	// other errors have been reported or forwarded)
 	forward(<-chan error)
 	// non-blocking, spins up a goroutine that reports an error (if any) that occurs on the error chan.
 	Send(<-chan error) ErrorOnce
 }
--- a/contrib/mesos/pkg/profile/doc.go
+++ b/contrib/mesos/pkg/profile/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package profile contains reusable code for profiling Go programs with pprof.
 package profile
--- a/contrib/mesos/pkg/profile/profile.go
+++ b/contrib/mesos/pkg/profile/profile.go
@@ -0,0 +1,27 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package profile
 import "net/http"
 import "net/http/pprof"
 func InstallHandler(m *http.ServeMux) {
 	// register similar endpoints as net/http/pprof.init() does
 	m.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index))
 	m.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile))
 	m.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
 }
--- a/contrib/mesos/pkg/queue/delay.go
+++ b/contrib/mesos/pkg/queue/delay.go
@@ -0,0 +1,373 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package queue
 import (
 	"container/heap"
 	"sync"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 )
 type qitem struct {
 	value    interface{}
 	priority Priority
 	index    int
 	readd    func(item *qitem) // re-add the value of the item to the queue
 }
 // A priorityQueue implements heap.Interface and holds qitems.
 type priorityQueue []*qitem
 func (pq priorityQueue) Len() int { return len(pq) }
 func (pq priorityQueue) Less(i, j int) bool {
 	return pq[i].priority.ts.Before(pq[j].priority.ts)
 }
 func (pq priorityQueue) Swap(i, j int) {
 	pq[i], pq[j] = pq[j], pq[i]
 	pq[i].index = i
 	pq[j].index = j
 }
 func (pq *priorityQueue) Push(x interface{}) {
 	n := len(*pq)
 	item := x.(*qitem)
 	item.index = n
 	*pq = append(*pq, item)
 }
 func (pq *priorityQueue) Pop() interface{} {
 	old := *pq
 	n := len(old)
 	item := old[n-1]
 	item.index = -1 // for safety
 	*pq = old[0 : n-1]
 	return item
 }
 // concurrency-safe, deadline-oriented queue that returns items after their
 // delay period has expired.
 type DelayQueue struct {
 	queue priorityQueue
 	lock  sync.RWMutex
 	cond  sync.Cond
 }
 func NewDelayQueue() *DelayQueue {
 	q := &DelayQueue{}
 	q.cond.L = &q.lock
 	return q
 }
 func (q *DelayQueue) Add(d Delayed) {
 	deadline := extractFromDelayed(d)
 	q.lock.Lock()
 	defer q.lock.Unlock()
 	// readd using the original deadline computed from the original delay
 	var readd func(*qitem)
 	readd = func(qp *qitem) {
 		q.lock.Lock()
 		defer q.lock.Unlock()
 		heap.Push(&q.queue, &qitem{
 			value:    d,
 			priority: deadline,
 			readd:    readd,
 		})
 		q.cond.Broadcast()
 	}
 	heap.Push(&q.queue, &qitem{
 		value:    d,
 		priority: deadline,
 		readd:    readd,
 	})
 	q.cond.Broadcast()
 }
 // If there's a deadline reported by d.Deadline() then `d` is added to the
 // queue and this func returns true.
 func (q *DelayQueue) Offer(d Deadlined) bool {
 	deadline, ok := extractFromDeadlined(d)
 	if ok {
 		q.lock.Lock()
 		defer q.lock.Unlock()
 		heap.Push(&q.queue, &qitem{
 			value:    d,
 			priority: deadline,
 			readd: func(qp *qitem) {
 				q.Offer(qp.value.(Deadlined))
 			},
 		})
 		q.cond.Broadcast()
 	}
 	return ok
 }
 // wait for the delay of the next item in the queue to expire, blocking if
 // there are no items in the queue. does not guarantee first-come-first-serve
 // ordering with respect to clients.
 func (q *DelayQueue) Pop() interface{} {
 	// doesn't implement cancellation, will always return a non-nil value
 	return q.pop(func() *qitem {
 		q.lock.Lock()
 		defer q.lock.Unlock()
 		for q.queue.Len() == 0 {
 			q.cond.Wait()
 		}
 		x := heap.Pop(&q.queue)
 		item := x.(*qitem)
 		return item
 	}, nil)
 }
 // returns a non-nil value from the queue, or else nil if/when cancelled; if cancel
 // is nil then cancellation is disabled and this func must return a non-nil value.
 func (q *DelayQueue) pop(next func() *qitem, cancel <-chan struct{}) interface{} {
 	var ch chan struct{}
 	for {
 		item := next()
 		if item == nil {
 			// cancelled
 			return nil
 		}
 		x := item.value
 		waitingPeriod := item.priority.ts.Sub(time.Now())
 		if waitingPeriod >= 0 {
 			// listen for calls to Add() while we're waiting for the deadline
 			if ch == nil {
 				ch = make(chan struct{}, 1)
 			}
 			go func() {
 				q.lock.Lock()
 				defer q.lock.Unlock()
 				q.cond.Wait()
 				ch <- struct{}{}
 			}()
 			select {
 			case <-cancel:
 				item.readd(item)
 				return nil
 			case <-ch:
 				// we may no longer have the earliest deadline, re-try
 				item.readd(item)
 				continue
 			case <-time.After(waitingPeriod):
 				// noop
 			case <-item.priority.notify:
 				// noop
 			}
 		}
 		return x
 	}
 }
 // If multiple adds/updates of a single item happen while an item is in the
 // queue before it has been processed, it will only be processed once, and
 // when it is processed, the most recent version will be processed. Items are
 // popped in order of their priority, currently controlled by a delay or
 // deadline assigned to each item in the queue.
 type DelayFIFO struct {
 	// internal deadline-based priority queue
 	delegate *DelayQueue
 	// We depend on the property that items in the set are in the queue and vice versa.
 	items          map[string]*qitem
 	deadlinePolicy DeadlinePolicy
 }
 func (q *DelayFIFO) lock() {
 	q.delegate.lock.Lock()
 }
 func (q *DelayFIFO) unlock() {
 	q.delegate.lock.Unlock()
 }
 func (q *DelayFIFO) rlock() {
 	q.delegate.lock.RLock()
 }
 func (q *DelayFIFO) runlock() {
 	q.delegate.lock.RUnlock()
 }
 func (q *DelayFIFO) queue() *priorityQueue {
 	return &q.delegate.queue
 }
 func (q *DelayFIFO) cond() *sync.Cond {
 	return &q.delegate.cond
 }
 // Add inserts an item, and puts it in the queue. The item is only enqueued
 // if it doesn't already exist in the set.
 func (q *DelayFIFO) Add(d UniqueDelayed, rp ReplacementPolicy) {
 	deadline := extractFromDelayed(d)
 	id := d.GetUID()
 	var adder func(*qitem)
 	adder = func(*qitem) {
 		q.add(id, deadline, d, KeepExisting, adder)
 	}
 	q.add(id, deadline, d, rp, adder)
 }
 func (q *DelayFIFO) Offer(d UniqueDeadlined, rp ReplacementPolicy) bool {
 	if deadline, ok := extractFromDeadlined(d); ok {
 		id := d.GetUID()
 		q.add(id, deadline, d, rp, func(qp *qitem) { q.Offer(qp.value.(UniqueDeadlined), KeepExisting) })
 		return true
 	}
 	return false
 }
 func (q *DelayFIFO) add(id string, deadline Priority, value interface{}, rp ReplacementPolicy, adder func(*qitem)) {
 	q.lock()
 	defer q.unlock()
 	if item, exists := q.items[id]; !exists {
 		item = &qitem{
 			value:    value,
 			priority: deadline,
 			readd:    adder,
 		}
 		heap.Push(q.queue(), item)
 		q.items[id] = item
 	} else {
 		// this is an update of an existing item
 		item.value = rp.replacementValue(item.value, value)
 		item.priority = q.deadlinePolicy.nextDeadline(item.priority, deadline)
 		heap.Fix(q.queue(), item.index)
 	}
 	q.cond().Broadcast()
 }
 // Delete removes an item. It doesn't add it to the queue, because
 // this implementation assumes the consumer only cares about the objects,
 // not their priority order.
 func (f *DelayFIFO) Delete(id string) {
 	f.lock()
 	defer f.unlock()
 	delete(f.items, id)
 }
 // List returns a list of all the items.
 func (f *DelayFIFO) List() []UniqueID {
 	f.rlock()
 	defer f.runlock()
 	list := make([]UniqueID, 0, len(f.items))
 	for _, item := range f.items {
 		list = append(list, item.value.(UniqueDelayed))
 	}
 	return list
 }
 // ContainedIDs returns a util.StringSet containing all IDs of the stored items.
 // This is a snapshot of a moment in time, and one should keep in mind that
 // other go routines can add or remove items after you call this.
 func (c *DelayFIFO) ContainedIDs() util.StringSet {
 	c.rlock()
 	defer c.runlock()
 	set := util.StringSet{}
 	for id := range c.items {
 		set.Insert(id)
 	}
 	return set
 }
 // Get returns the requested item, or sets exists=false.
 func (f *DelayFIFO) Get(id string) (UniqueID, bool) {
 	f.rlock()
 	defer f.runlock()
 	if item, exists := f.items[id]; exists {
 		return item.value.(UniqueID), true
 	}
 	return nil, false
 }
 // Variant of DelayQueue.Pop() for UniqueDelayed items
 func (q *DelayFIFO) Await(timeout time.Duration) UniqueID {
 	cancel := make(chan struct{})
 	ch := make(chan interface{}, 1)
 	go func() { ch <- q.pop(cancel) }()
 	var x interface{}
 	select {
 	case <-time.After(timeout):
 		close(cancel)
 		x = <-ch
 	case x = <-ch:
 		// noop
 	}
 	if x != nil {
 		return x.(UniqueID)
 	}
 	return nil
 }
 // Variant of DelayQueue.Pop() for UniqueDelayed items
 func (q *DelayFIFO) Pop() UniqueID {
 	return q.pop(nil).(UniqueID)
 }
 // variant of DelayQueue.Pop that implements optional cancellation
 func (q *DelayFIFO) pop(cancel chan struct{}) interface{} {
 	next := func() *qitem {
 		q.lock()
 		defer q.unlock()
 		for {
 			for q.queue().Len() == 0 {
 				signal := make(chan struct{})
 				go func() {
 					defer close(signal)
 					q.cond().Wait()
 				}()
 				select {
 				case <-cancel:
 					// we may not have the lock yet, so
 					// broadcast to abort Wait, then
 					// return after lock re-acquisition
 					q.cond().Broadcast()
 					<-signal
 					return nil
 				case <-signal:
 					// we have the lock, re-check
 					// the queue for data...
 				}
 			}
 			x := heap.Pop(q.queue())
 			item := x.(*qitem)
 			unique := item.value.(UniqueID)
 			uid := unique.GetUID()
 			if _, ok := q.items[uid]; !ok {
 				// item was deleted, keep looking
 				continue
 			}
 			delete(q.items, uid)
 			return item
 		}
 	}
 	return q.delegate.pop(next, cancel)
 }
 func NewDelayFIFO() *DelayFIFO {
 	f := &DelayFIFO{
 		delegate: NewDelayQueue(),
 		items:    map[string]*qitem{},
 	}
 	return f
 }
--- a/contrib/mesos/pkg/queue/delay_test.go
+++ b/contrib/mesos/pkg/queue/delay_test.go
@@ -0,0 +1,406 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package queue
 import (
 	"sync/atomic"
 	"testing"
 	"time"
 	"github.com/stretchr/testify/assert"
 )
 const (
 	tolerance = 100 * time.Millisecond // go time delays aren't perfect, this is our tolerance for errors WRT expected timeouts
 )
 func timedPriority(t time.Time) Priority {
 	return Priority{ts: t}
 }
 func TestPQ(t *testing.T) {
 	t.Parallel()
 	var pq priorityQueue
 	if pq.Len() != 0 {
 		t.Fatalf("pq should be empty")
 	}
 	now := timedPriority(time.Now())
 	now2 := timedPriority(now.ts.Add(2 * time.Second))
 	pq.Push(&qitem{priority: now2})
 	if pq.Len() != 1 {
 		t.Fatalf("pq.len should be 1")
 	}
 	x := pq.Pop()
 	if x == nil {
 		t.Fatalf("x is nil")
 	}
 	if pq.Len() != 0 {
 		t.Fatalf("pq should be empty")
 	}
 	item := x.(*qitem)
 	if !item.priority.Equal(now2) {
 		t.Fatalf("item.priority != now2")
 	}
 	pq.Push(&qitem{priority: now2})
 	pq.Push(&qitem{priority: now2})
 	pq.Push(&qitem{priority: now2})
 	pq.Push(&qitem{priority: now2})
 	pq.Push(&qitem{priority: now2})
 	pq.Pop()
 	pq.Pop()
 	pq.Pop()
 	pq.Pop()
 	pq.Pop()
 	if pq.Len() != 0 {
 		t.Fatalf("pq should be empty")
 	}
 	now4 := timedPriority(now.ts.Add(4 * time.Second))
 	now6 := timedPriority(now.ts.Add(4 * time.Second))
 	pq.Push(&qitem{priority: now2})
 	pq.Push(&qitem{priority: now4})
 	pq.Push(&qitem{priority: now6})
 	pq.Swap(0, 2)
 	if !pq[0].priority.Equal(now6) || !pq[2].priority.Equal(now2) {
 		t.Fatalf("swap failed")
 	}
 	if pq.Less(1, 2) {
 		t.Fatalf("now4 < now2")
 	}
 }
 func TestPopEmptyPQ(t *testing.T) {
 	t.Parallel()
 	defer func() {
 		if r := recover(); r == nil {
 			t.Fatalf("Expected panic from popping an empty PQ")
 		}
 	}()
 	var pq priorityQueue
 	pq.Pop()
 }
 type testjob struct {
 	d        time.Duration
 	t        time.Time
 	deadline *time.Time
 	uid      string
 	instance int
 }
 func (j *testjob) GetDelay() time.Duration {
 	return j.d
 }
 func (j testjob) GetUID() string {
 	return j.uid
 }
 func (td *testjob) Deadline() (deadline time.Time, ok bool) {
 	if td.deadline != nil {
 		return *td.deadline, true
 	} else {
 		return time.Now(), false
 	}
 }
 func TestDQ_sanity_check(t *testing.T) {
 	t.Parallel()
 	dq := NewDelayQueue()
 	delay := 2 * time.Second
 	dq.Add(&testjob{d: delay})
 	before := time.Now()
 	x := dq.Pop()
 	now := time.Now()
 	waitPeriod := now.Sub(before)
 	if waitPeriod+tolerance < delay {
 		t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
 	}
 	if x == nil {
 		t.Fatalf("x is nil")
 	}
 	item := x.(*testjob)
 	if item.d != delay {
 		t.Fatalf("d != delay")
 	}
 }
 func TestDQ_Offer(t *testing.T) {
 	t.Parallel()
 	assert := assert.New(t)
 	dq := NewDelayQueue()
 	delay := time.Second
 	added := dq.Offer(&testjob{})
 	if added {
 		t.Fatalf("DelayQueue should not add offered job without deadline")
 	}
 	deadline := time.Now().Add(delay)
 	added = dq.Offer(&testjob{deadline: &deadline})
 	if !added {
 		t.Fatalf("DelayQueue should add offered job with deadline")
 	}
 	before := time.Now()
 	x := dq.Pop()
 	now := time.Now()
 	waitPeriod := now.Sub(before)
 	if waitPeriod+tolerance < delay {
 		t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
 	}
 	assert.NotNil(x)
 	assert.Equal(x.(*testjob).deadline, &deadline)
 }
 func TestDQ_ordered_add_pop(t *testing.T) {
 	t.Parallel()
 	dq := NewDelayQueue()
 	dq.Add(&testjob{d: 2 * time.Second})
 	dq.Add(&testjob{d: 1 * time.Second})
 	dq.Add(&testjob{d: 3 * time.Second})
 	var finished [3]*testjob
 	before := time.Now()
 	idx := int32(-1)
 	ch := make(chan bool, 3)
 	//TODO: replace with `for range finished` once Go 1.3 support is dropped
 	for n := 0; n < len(finished); n++ {
 		go func() {
 			var ok bool
 			x := dq.Pop()
 			i := atomic.AddInt32(&idx, 1)
 			if finished[i], ok = x.(*testjob); !ok {
 				t.Fatalf("expected a *testjob, not %v", x)
 			}
 			finished[i].t = time.Now()
 			ch <- true
 		}()
 	}
 	<-ch
 	<-ch
 	<-ch
 	after := time.Now()
 	totalDelay := after.Sub(before)
 	if totalDelay+tolerance < (3 * time.Second) {
 		t.Fatalf("totalDelay < 3s: %v", totalDelay)
 	}
 	for i, v := range finished {
 		if v == nil {
 			t.Fatalf("task %d was nil", i)
 		}
 		expected := time.Duration(i+1) * time.Second
 		if v.d != expected {
 			t.Fatalf("task %d had delay-priority %v, expected %v", i, v.d, expected)
 		}
 		actualDelay := v.t.Sub(before)
 		if actualDelay+tolerance < v.d {
 			t.Fatalf("task %d had actual-delay %v < expected delay %v", i, actualDelay, v.d)
 		}
 	}
 }
 func TestDQ_always_pop_earliest_deadline(t *testing.T) {
 	t.Parallel()
 	// add a testjob with delay of 2s
 	// spawn a func f1 that attempts to Pop() and wait for f1 to begin
 	// add a testjob with a delay of 1s
 	// check that the func f1 actually popped the 1s task (not the 2s task)
 	dq := NewDelayQueue()
 	dq.Add(&testjob{d: 2 * time.Second})
 	ch := make(chan *testjob)
 	started := make(chan bool)
 	go func() {
 		started <- true
 		x := dq.Pop()
 		job := x.(*testjob)
 		job.t = time.Now()
 		ch <- job
 	}()
 	<-started
 	time.Sleep(500 * time.Millisecond) // give plently of time for Pop() to enter
 	expected := 1 * time.Second
 	dq.Add(&testjob{d: expected})
 	job := <-ch
 	if expected != job.d {
 		t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
 	}
 	job = dq.Pop().(*testjob)
 	expected = 2 * time.Second
 	if expected != job.d {
 		t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
 	}
 }
 func TestDQ_always_pop_earliest_deadline_multi(t *testing.T) {
 	t.Parallel()
 	dq := NewDelayQueue()
 	dq.Add(&testjob{d: 2 * time.Second})
 	ch := make(chan *testjob)
 	multi := 10
 	started := make(chan bool, multi)
 	go func() {
 		started <- true
 		for i := 0; i < multi; i++ {
 			x := dq.Pop()
 			job := x.(*testjob)
 			job.t = time.Now()
 			ch <- job
 		}
 	}()
 	<-started
 	time.Sleep(500 * time.Millisecond) // give plently of time for Pop() to enter
 	expected := 1 * time.Second
 	for i := 0; i < multi; i++ {
 		dq.Add(&testjob{d: expected})
 	}
 	for i := 0; i < multi; i++ {
 		job := <-ch
 		if expected != job.d {
 			t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
 		}
 	}
 	job := dq.Pop().(*testjob)
 	expected = 2 * time.Second
 	if expected != job.d {
 		t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
 	}
 }
 func TestDQ_negative_delay(t *testing.T) {
 	t.Parallel()
 	dq := NewDelayQueue()
 	delay := -2 * time.Second
 	dq.Add(&testjob{d: delay})
 	before := time.Now()
 	x := dq.Pop()
 	now := time.Now()
 	waitPeriod := now.Sub(before)
 	if waitPeriod > tolerance {
 		t.Fatalf("delay too long: %v, expected something less than: %v", waitPeriod, tolerance)
 	}
 	if x == nil {
 		t.Fatalf("x is nil")
 	}
 	item := x.(*testjob)
 	if item.d != delay {
 		t.Fatalf("d != delay")
 	}
 }
 func TestDFIFO_sanity_check(t *testing.T) {
 	t.Parallel()
 	assert := assert.New(t)
 	df := NewDelayFIFO()
 	delay := 2 * time.Second
 	df.Add(&testjob{d: delay, uid: "a", instance: 1}, ReplaceExisting)
 	assert.True(df.ContainedIDs().Has("a"))
 	// re-add by ReplaceExisting
 	df.Add(&testjob{d: delay, uid: "a", instance: 2}, ReplaceExisting)
 	assert.True(df.ContainedIDs().Has("a"))
 	a, ok := df.Get("a")
 	assert.True(ok)
 	assert.Equal(a.(*testjob).instance, 2)
 	// re-add by KeepExisting
 	df.Add(&testjob{d: delay, uid: "a", instance: 3}, KeepExisting)
 	assert.True(df.ContainedIDs().Has("a"))
 	a, ok = df.Get("a")
 	assert.True(ok)
 	assert.Equal(a.(*testjob).instance, 2)
 	// pop last
 	before := time.Now()
 	x := df.Pop()
 	assert.Equal(a.(*testjob).instance, 2)
 	now := time.Now()
 	waitPeriod := now.Sub(before)
 	if waitPeriod+tolerance < delay {
 		t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
 	}
 	if x == nil {
 		t.Fatalf("x is nil")
 	}
 	item := x.(*testjob)
 	if item.d != delay {
 		t.Fatalf("d != delay")
 	}
 }
 func TestDFIFO_Offer(t *testing.T) {
 	t.Parallel()
 	assert := assert.New(t)
 	dq := NewDelayFIFO()
 	delay := time.Second
 	added := dq.Offer(&testjob{instance: 1}, ReplaceExisting)
 	if added {
 		t.Fatalf("DelayFIFO should not add offered job without deadline")
 	}
 	deadline := time.Now().Add(delay)
 	added = dq.Offer(&testjob{deadline: &deadline, instance: 2}, ReplaceExisting)
 	if !added {
 		t.Fatalf("DelayFIFO should add offered job with deadline")
 	}
 	before := time.Now()
 	x := dq.Pop()
 	now := time.Now()
 	waitPeriod := now.Sub(before)
 	if waitPeriod+tolerance < delay {
 		t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
 	}
 	assert.NotNil(x)
 	assert.Equal(x.(*testjob).instance, 2)
 }
--- a/contrib/mesos/pkg/queue/doc.go
+++ b/contrib/mesos/pkg/queue/doc.go
@@ -0,0 +1,19 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package queue provides several queue implementations, originally
 // inspired by Kubernetes pkg/client/cache/fifo.
 package queue
--- a/contrib/mesos/pkg/queue/historical.go
+++ b/contrib/mesos/pkg/queue/historical.go
@@ -0,0 +1,403 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package queue
 import (
 	"fmt"
 	"reflect"
 	"sync"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 )
 type entry struct {
 	value UniqueCopyable
 	event EventType
 }
 type deletedEntry struct {
 	*entry
 	expiration time.Time
 }
 func (e *entry) Value() UniqueCopyable {
 	return e.value
 }
 func (e *entry) Copy() Copyable {
 	if e == nil {
 		return nil
 	}
 	return &entry{e.value.Copy().(UniqueCopyable), e.event}
 }
 func (e *entry) Is(types EventType) bool {
 	return types&e.event != 0
 }
 func (e *deletedEntry) Copy() Copyable {
 	if e == nil {
 		return nil
 	}
 	return &deletedEntry{e.entry.Copy().(*entry), e.expiration}
 }
 // deliver a message
 type pigeon func(msg Entry)
 func dead(msg Entry) {
 	// intentionally blank
 }
 // HistoricalFIFO receives adds and updates from a Reflector, and puts them in a queue for
 // FIFO order processing. If multiple adds/updates of a single item happen while
 // an item is in the queue before it has been processed, it will only be
 // processed once, and when it is processed, the most recent version will be
 // processed. This can't be done with a channel.
 type HistoricalFIFO struct {
 	lock      sync.RWMutex
 	cond      sync.Cond
 	items     map[string]Entry // We depend on the property that items in the queue are in the set.
 	queue     []string
 	carrier   pigeon // may be dead, but never nil
 	gcc       int
 	lingerTTL time.Duration
 }
 // panics if obj doesn't implement UniqueCopyable; otherwise returns the same, typecast object
 func checkType(obj interface{}) UniqueCopyable {
 	if v, ok := obj.(UniqueCopyable); !ok {
 		panic(fmt.Sprintf("Illegal object type, expected UniqueCopyable: %T", obj))
 	} else {
 		return v
 	}
 }
 // Add inserts an item, and puts it in the queue. The item is only enqueued
 // if it doesn't already exist in the set.
 func (f *HistoricalFIFO) Add(v interface{}) error {
 	obj := checkType(v)
 	notifications := []Entry(nil)
 	defer func() {
 		for _, e := range notifications {
 			f.carrier(e)
 		}
 	}()
 	f.lock.Lock()
 	defer f.lock.Unlock()
 	id := obj.GetUID()
 	if entry, exists := f.items[id]; !exists {
 		f.queue = append(f.queue, id)
 	} else {
 		if entry.Is(DELETE_EVENT | POP_EVENT) {
 			f.queue = append(f.queue, id)
 		}
 	}
 	notifications = f.merge(id, obj)
 	f.cond.Broadcast()
 	return nil
 }
 // Update is the same as Add in this implementation.
 func (f *HistoricalFIFO) Update(obj interface{}) error {
 	return f.Add(obj)
 }
 // Delete removes an item. It doesn't add it to the queue, because
 // this implementation assumes the consumer only cares about the objects,
 // not the order in which they were created/added.
 func (f *HistoricalFIFO) Delete(v interface{}) error {
 	obj := checkType(v)
 	deleteEvent := (Entry)(nil)
 	defer func() {
 		f.carrier(deleteEvent)
 	}()
 	f.lock.Lock()
 	defer f.lock.Unlock()
 	id := obj.GetUID()
 	item, exists := f.items[id]
 	if exists && !item.Is(DELETE_EVENT) {
 		e := item.(*entry)
 		e.event = DELETE_EVENT
 		deleteEvent = &deletedEntry{e, time.Now().Add(f.lingerTTL)}
 		f.items[id] = deleteEvent
 	}
 	return nil
 }
 // List returns a list of all the items.
 func (f *HistoricalFIFO) List() []interface{} {
 	f.lock.RLock()
 	defer f.lock.RUnlock()
 	// TODO(jdef): slightly overallocates b/c of deleted items
 	list := make([]interface{}, 0, len(f.queue))
 	for _, entry := range f.items {
 		if entry.Is(DELETE_EVENT | POP_EVENT) {
 			continue
 		}
 		list = append(list, entry.Value().Copy())
 	}
 	return list
 }
 // List returns a list of all the items.
 func (f *HistoricalFIFO) ListKeys() []string {
 	f.lock.RLock()
 	defer f.lock.RUnlock()
 	// TODO(jdef): slightly overallocates b/c of deleted items
 	list := make([]string, 0, len(f.queue))
 	for key, entry := range f.items {
 		if entry.Is(DELETE_EVENT | POP_EVENT) {
 			continue
 		}
 		list = append(list, key)
 	}
 	return list
 }
 // ContainedIDs returns a util.StringSet containing all IDs of the stored items.
 // This is a snapshot of a moment in time, and one should keep in mind that
 // other go routines can add or remove items after you call this.
 func (c *HistoricalFIFO) ContainedIDs() util.StringSet {
 	c.lock.RLock()
 	defer c.lock.RUnlock()
 	set := util.StringSet{}
 	for id, entry := range c.items {
 		if entry.Is(DELETE_EVENT | POP_EVENT) {
 			continue
 		}
 		set.Insert(id)
 	}
 	return set
 }
 // Get returns the requested item, or sets exists=false.
 func (f *HistoricalFIFO) Get(v interface{}) (interface{}, bool, error) {
 	obj := checkType(v)
 	return f.GetByKey(obj.GetUID())
 }
 // Get returns the requested item, or sets exists=false.
 func (f *HistoricalFIFO) GetByKey(id string) (interface{}, bool, error) {
 	f.lock.RLock()
 	defer f.lock.RUnlock()
 	entry, exists := f.items[id]
 	if exists && !entry.Is(DELETE_EVENT|POP_EVENT) {
 		return entry.Value().Copy(), true, nil
 	}
 	return nil, false, nil
 }
 // Get returns the requested item, or sets exists=false.
 func (f *HistoricalFIFO) Poll(id string, t EventType) bool {
 	f.lock.RLock()
 	defer f.lock.RUnlock()
 	entry, exists := f.items[id]
 	return exists && entry.Is(t)
 }
 // Variant of DelayQueue.Pop() for UniqueDelayed items
 func (q *HistoricalFIFO) Await(timeout time.Duration) interface{} {
 	cancel := make(chan struct{})
 	ch := make(chan interface{}, 1)
 	go func() { ch <- q.pop(cancel) }()
 	select {
 	case <-time.After(timeout):
 		close(cancel)
 		return <-ch
 	case x := <-ch:
 		return x
 	}
 }
 func (f *HistoricalFIFO) Pop() interface{} {
 	return f.pop(nil)
 }
 func (f *HistoricalFIFO) pop(cancel chan struct{}) interface{} {
 	popEvent := (Entry)(nil)
 	defer func() {
 		f.carrier(popEvent)
 	}()
 	f.lock.Lock()
 	defer f.lock.Unlock()
 	for {
 		for len(f.queue) == 0 {
 			signal := make(chan struct{})
 			go func() {
 				defer close(signal)
 				f.cond.Wait()
 			}()
 			select {
 			case <-cancel:
 				// we may not have the lock yet, so
 				// broadcast to abort Wait, then
 				// return after lock re-acquisition
 				f.cond.Broadcast()
 				<-signal
 				return nil
 			case <-signal:
 				// we have the lock, re-check
 				// the queue for data...
 			}
 		}
 		id := f.queue[0]
 		f.queue = f.queue[1:]
 		item, ok := f.items[id]
 		if !ok || item.Is(DELETE_EVENT|POP_EVENT) {
 			// Item may have been deleted subsequently.
 			continue
 		}
 		value := item.Value()
 		popEvent = &entry{value, POP_EVENT}
 		f.items[id] = popEvent
 		return value.Copy()
 	}
 }
 func (f *HistoricalFIFO) Replace(objs []interface{}) error {
 	notifications := make([]Entry, 0, len(objs))
 	defer func() {
 		for _, e := range notifications {
 			f.carrier(e)
 		}
 	}()
 	idToObj := make(map[string]interface{})
 	for _, v := range objs {
 		obj := checkType(v)
 		idToObj[obj.GetUID()] = v
 	}
 	f.lock.Lock()
 	defer f.lock.Unlock()
 	f.queue = f.queue[:0]
 	now := time.Now()
 	for id, v := range f.items {
 		if _, exists := idToObj[id]; !exists && !v.Is(DELETE_EVENT) {
 			// a non-deleted entry in the items list that doesn't show up in the
 			// new list: mark it as deleted
 			ent := v.(*entry)
 			ent.event = DELETE_EVENT
 			e := &deletedEntry{ent, now.Add(f.lingerTTL)}
 			f.items[id] = e
 			notifications = append(notifications, e)
 		}
 	}
 	for id, v := range idToObj {
 		obj := checkType(v)
 		f.queue = append(f.queue, id)
 		n := f.merge(id, obj)
 		notifications = append(notifications, n...)
 	}
 	if len(f.queue) > 0 {
 		f.cond.Broadcast()
 	}
 	return nil
 }
 // garbage collect DELETEd items whose TTL has expired; the IDs of such items are removed
 // from the queue. This impl assumes that caller has acquired state lock.
 func (f *HistoricalFIFO) gc() {
 	now := time.Now()
 	deleted := make(map[string]struct{})
 	for id, v := range f.items {
 		if v.Is(DELETE_EVENT) {
 			ent := v.(*deletedEntry)
 			if ent.expiration.Before(now) {
 				delete(f.items, id)
 				deleted[id] = struct{}{}
 			}
 		}
 	}
 	// remove deleted items from the queue, will likely (slightly) overallocate here
 	queue := make([]string, 0, len(f.queue))
 	for _, id := range f.queue {
 		if _, exists := deleted[id]; !exists {
 			queue = append(queue, id)
 		}
 	}
 	f.queue = queue
 }
 // Assumes that the caller has acquired the state lock.
 func (f *HistoricalFIFO) merge(id string, obj UniqueCopyable) (notifications []Entry) {
 	item, exists := f.items[id]
 	now := time.Now()
 	if !exists {
 		e := &entry{obj.Copy().(UniqueCopyable), ADD_EVENT}
 		f.items[id] = e
 		notifications = append(notifications, e)
 	} else {
 		if !item.Is(DELETE_EVENT) && item.Value().GetUID() != obj.GetUID() {
 			// hidden DELETE!
 			// (1) append a DELETE
 			// (2) append an ADD
 			// .. and notify listeners in that order
 			ent := item.(*entry)
 			ent.event = DELETE_EVENT
 			e1 := &deletedEntry{ent, now.Add(f.lingerTTL)}
 			e2 := &entry{obj.Copy().(UniqueCopyable), ADD_EVENT}
 			f.items[id] = e2
 			notifications = append(notifications, e1, e2)
 		} else if !reflect.DeepEqual(obj, item.Value()) {
 			//TODO(jdef): it would be nice if we could rely on resource versions
 			//instead of doing a DeepEqual. Maybe someday we'll be able to.
 			e := &entry{obj.Copy().(UniqueCopyable), UPDATE_EVENT}
 			f.items[id] = e
 			notifications = append(notifications, e)
 		}
 	}
 	// check for garbage collection
 	f.gcc++
 	if f.gcc%256 == 0 { //TODO(jdef): extract constant
 		f.gcc = 0
 		f.gc()
 	}
 	return
 }
 // NewHistorical returns a Store which can be used to queue up items to
 // process. If a non-nil Mux is provided, then modifications to the
 // the FIFO are delivered on a channel specific to this fifo.
 func NewHistorical(ch chan<- Entry) FIFO {
 	carrier := dead
 	if ch != nil {
 		carrier = func(msg Entry) {
 			if msg != nil {
 				ch <- msg.Copy().(Entry)
 			}
 		}
 	}
 	f := &HistoricalFIFO{
 		items:     map[string]Entry{},
 		queue:     []string{},
 		carrier:   carrier,
 		lingerTTL: 5 * time.Minute, // TODO(jdef): extract constant
 	}
 	f.cond.L = &f.lock
 	return f
 }
--- a/contrib/mesos/pkg/queue/historical_test.go
+++ b/contrib/mesos/pkg/queue/historical_test.go
@@ -0,0 +1,191 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package queue
 import (
 	"fmt"
 	"testing"
 	"time"
 )
 type _int int
 type _uint uint
 func (i _int) Copy() Copyable {
 	return i
 }
 func (i _int) GetUID() string {
 	return fmt.Sprintf("INT%d", int(i))
 }
 func (i _uint) Copy() Copyable {
 	return i
 }
 func (i _uint) GetUID() string {
 	return fmt.Sprintf("UINT%d", uint64(i))
 }
 type testObj struct {
 	id    string
 	value int
 }
 func (i *testObj) Copy() Copyable {
 	if i == nil {
 		return nil
 	} else {
 		return &testObj{i.id, i.value}
 	}
 }
 func (i *testObj) GetUID() string {
 	return i.id
 }
 func TestFIFO_basic(t *testing.T) {
 	f := NewHistorical(nil)
 	const amount = 500
 	go func() {
 		for i := 0; i < amount; i++ {
 			f.Add(_int(i + 1))
 		}
 	}()
 	go func() {
 		for u := uint(0); u < amount; u++ {
 			f.Add(_uint(u + 1))
 		}
 	}()
 	lastInt := _int(0)
 	lastUint := _uint(0)
 	for i := 0; i < amount*2; i++ {
 		switch obj := f.Pop().(type) {
 		case _int:
 			if obj <= lastInt {
 				t.Errorf("got %v (int) out of order, last was %v", obj, lastInt)
 			}
 			lastInt = obj
 		case _uint:
 			if obj <= lastUint {
 				t.Errorf("got %v (uint) out of order, last was %v", obj, lastUint)
 			} else {
 				lastUint = obj
 			}
 		default:
 			t.Fatalf("unexpected type %#v", obj)
 		}
 	}
 }
 func TestFIFO_addUpdate(t *testing.T) {
 	f := NewHistorical(nil)
 	f.Add(&testObj{"foo", 10})
 	f.Update(&testObj{"foo", 15})
 	got := make(chan *testObj, 2)
 	go func() {
 		for {
 			got <- f.Pop().(*testObj)
 		}
 	}()
 	first := <-got
 	if e, a := 15, first.value; e != a {
 		t.Errorf("Didn't get updated value (%v), got %v", e, a)
 	}
 	select {
 	case unexpected := <-got:
 		t.Errorf("Got second value %v", unexpected)
 	case <-time.After(50 * time.Millisecond):
 	}
 	_, exists, _ := f.GetByKey("foo")
 	if exists {
 		t.Errorf("item did not get removed")
 	}
 }
 func TestFIFO_addReplace(t *testing.T) {
 	f := NewHistorical(nil)
 	f.Add(&testObj{"foo", 10})
 	f.Replace([]interface{}{&testObj{"foo", 15}})
 	got := make(chan *testObj, 2)
 	go func() {
 		for {
 			got <- f.Pop().(*testObj)
 		}
 	}()
 	first := <-got
 	if e, a := 15, first.value; e != a {
 		t.Errorf("Didn't get updated value (%v), got %v", e, a)
 	}
 	select {
 	case unexpected := <-got:
 		t.Errorf("Got second value %v", unexpected)
 	case <-time.After(50 * time.Millisecond):
 	}
 	_, exists, _ := f.GetByKey("foo")
 	if exists {
 		t.Errorf("item did not get removed")
 	}
 }
 func TestFIFO_detectLineJumpers(t *testing.T) {
 	f := NewHistorical(nil)
 	f.Add(&testObj{"foo", 10})
 	f.Add(&testObj{"bar", 1})
 	f.Add(&testObj{"foo", 11})
 	f.Add(&testObj{"foo", 13})
 	f.Add(&testObj{"zab", 30})
 	err := error(nil)
 	done := make(chan struct{})
 	go func() {
 		defer close(done)
 		if e, a := 13, f.Pop().(*testObj).value; a != e {
 			err = fmt.Errorf("expected %d, got %d", e, a)
 			return
 		}
 		f.Add(&testObj{"foo", 14}) // ensure foo doesn't jump back in line
 		if e, a := 1, f.Pop().(*testObj).value; a != e {
 			err = fmt.Errorf("expected %d, got %d", e, a)
 			return
 		}
 		if e, a := 30, f.Pop().(*testObj).value; a != e {
 			err = fmt.Errorf("expected %d, got %d", e, a)
 			return
 		}
 		if e, a := 14, f.Pop().(*testObj).value; a != e {
 			err = fmt.Errorf("expected %d, got %d", e, a)
 			return
 		}
 	}()
 	select {
 	case <-done:
 		if err != nil {
 			t.Fatal(err)
 		}
 	case <-time.After(1 * time.Second):
 		t.Fatal("Deadlocked unit test")
 	}
 }
--- a/contrib/mesos/pkg/queue/interface.go
+++ b/contrib/mesos/pkg/queue/interface.go
@@ -0,0 +1,103 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package queue
 import (
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
 )
 type EventType int
 const (
 	ADD_EVENT EventType = 1 << iota
 	UPDATE_EVENT
 	DELETE_EVENT
 	POP_EVENT
 )
 type Entry interface {
 	Copyable
 	Value() UniqueCopyable
 	// types is a logically OR'd combination of EventType, e.g. ADD_EVENT|UPDATE_EVENT
 	Is(types EventType) bool
 }
 type Copyable interface {
 	// return an independent copy (deep clone) of the current object
 	Copy() Copyable
 }
 type UniqueID interface {
 	GetUID() string
 }
 type UniqueCopyable interface {
 	Copyable
 	UniqueID
 }
 type FIFO interface {
 	cache.Store
 	// Pop waits until an item is ready and returns it. If multiple items are
 	// ready, they are returned in the order in which they were added/updated.
 	// The item is removed from the queue (and the store) before it is returned,
 	// so if you don't succesfully process it, you need to add it back with Add().
 	Pop() interface{}
 	// Await attempts to Pop within the given interval; upon success the non-nil
 	// item is returned, otherwise nil
 	Await(timeout time.Duration) interface{}
 	// Is there an entry for the id that matches the event mask?
 	Poll(id string, types EventType) bool
 }
 type Delayed interface {
 	// return the remaining delay; a non-positive value indicates no delay
 	GetDelay() time.Duration
 }
 type Deadlined interface {
 	// when ok, returns the time when this object should be activated/executed/evaluated
 	Deadline() (deadline time.Time, ok bool)
 }
 // No objects are ever expected to be sent over this channel. References to BreakChan
 // instances may be nil (always blocking). Signalling over this channel is performed by
 // closing the channel. As such there can only ever be a single signal sent over the
 // lifetime of the channel.
 type BreakChan <-chan struct{}
 // an optional interface to be implemented by Delayed objects; returning a nil
 // channel from Breaker() results in waiting the full delay duration
 type Breakout interface {
 	// return a channel that signals early departure from a blocking delay
 	Breaker() BreakChan
 }
 type UniqueDelayed interface {
 	UniqueID
 	Delayed
 }
 type UniqueDeadlined interface {
 	UniqueID
 	Deadlined
 }
--- a/contrib/mesos/pkg/queue/policy.go
+++ b/contrib/mesos/pkg/queue/policy.go
@@ -0,0 +1,70 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package queue
 // Decide whether a pre-existing deadline for an item in a delay-queue should be
 // updated if an attempt is made to offer/add a new deadline for said item. Whether
 // the deadline changes or not has zero impact on the data blob associated with the
 // entry in the queue.
 type DeadlinePolicy int
 const (
 	PreferLatest DeadlinePolicy = iota
 	PreferEarliest
 )
 // Decide whether a pre-existing data blob in a delay-queue should be replaced if an
 // an attempt is made to add/offer a new data blob in its place. Whether the data is
 // replaced has no bearing on the deadline (priority) of the item in the queue.
 type ReplacementPolicy int
 const (
 	KeepExisting ReplacementPolicy = iota
 	ReplaceExisting
 )
 func (rp ReplacementPolicy) replacementValue(original, replacement interface{}) (result interface{}) {
 	switch rp {
 	case KeepExisting:
 		result = original
 	case ReplaceExisting:
 		fallthrough
 	default:
 		result = replacement
 	}
 	return
 }
 func (dp DeadlinePolicy) nextDeadline(a, b Priority) (result Priority) {
 	switch dp {
 	case PreferEarliest:
 		if a.ts.Before(b.ts) {
 			result = a
 		} else {
 			result = b
 		}
 	case PreferLatest:
 		fallthrough
 	default:
 		if a.ts.After(b.ts) {
 			result = a
 		} else {
 			result = b
 		}
 	}
 	return
 }
--- a/contrib/mesos/pkg/queue/priority.go
+++ b/contrib/mesos/pkg/queue/priority.go
@@ -0,0 +1,56 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package queue
 import (
 	"time"
 )
 type Priority struct {
 	ts     time.Time // timestamp
 	notify BreakChan // notification channel
 }
 func (p Priority) Equal(other Priority) bool {
 	return p.ts.Equal(other.ts) && p.notify == other.notify
 }
 func extractFromDelayed(d Delayed) Priority {
 	deadline := time.Now().Add(d.GetDelay())
 	breaker := BreakChan(nil)
 	if breakout, good := d.(Breakout); good {
 		breaker = breakout.Breaker()
 	}
 	return Priority{
 		ts:     deadline,
 		notify: breaker,
 	}
 }
 func extractFromDeadlined(d Deadlined) (Priority, bool) {
 	if ts, ok := d.Deadline(); ok {
 		breaker := BreakChan(nil)
 		if breakout, good := d.(Breakout); good {
 			breaker = breakout.Breaker()
 		}
 		return Priority{
 			ts:     ts,
 			notify: breaker,
 		}, true
 	}
 	return Priority{}, false
 }
--- a/contrib/mesos/pkg/redirfd/doc.go
+++ b/contrib/mesos/pkg/redirfd/doc.go
@@ -0,0 +1,19 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Some file descriptor manipulation funcs (Unix-Only), inspired by
 // https://github.com/skarnet/execline/blob/master/src/execline/redirfd.c
 package redirfd
--- a/contrib/mesos/pkg/redirfd/file_descriptor.go
+++ b/contrib/mesos/pkg/redirfd/file_descriptor.go
@@ -0,0 +1,41 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package redirfd
 import (
 	"fmt"
 	"strconv"
 )
 // FileDescriptor mirrors unix-specific indexes for cross-platform use
 type FileDescriptor int
 const (
 	InvalidFD FileDescriptor = -1
 	Stdin     FileDescriptor = 0
 	Stdout    FileDescriptor = 1
 	Stderr    FileDescriptor = 2
 )
 // ParseFileDescriptor parses a string formatted file descriptor
 func ParseFileDescriptor(fdstr string) (FileDescriptor, error) {
 	fdint, err := strconv.Atoi(fdstr)
 	if err != nil {
 		return InvalidFD, fmt.Errorf("file descriptor must be an integer: %q", fdstr)
 	}
 	return FileDescriptor(fdint), nil
 }
--- a/contrib/mesos/pkg/redirfd/file_descriptor_test.go
+++ b/contrib/mesos/pkg/redirfd/file_descriptor_test.go
@@ -0,0 +1,54 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package redirfd
 import (
 	"testing"
 	. "github.com/onsi/gomega"
 )
 func TestParseFileDescriptor(t *testing.T) {
 	RegisterTestingT(t)
 	valid := map[string]FileDescriptor{
 		"-1": InvalidFD,
 		"0":  Stdin,
 		"1":  Stdout,
 		"2":  Stderr,
 		"3":  FileDescriptor(3),
 	}
 	for input, expected := range valid {
 		fd, err := ParseFileDescriptor(input)
 		Expect(err).ToNot(HaveOccurred(), "Input: '%s'", input)
 		Expect(fd).To(Equal(expected), "Input: '%s'", input)
 	}
 	invalid := []string{
 		"a",
 		" 1",
 		"blue",
 		"stderr",
 		"STDERR",
 	}
 	for _, input := range invalid {
 		_, err := ParseFileDescriptor(input)
 		Expect(err).To(HaveOccurred(), "Input: '%s'", input)
 	}
 }
--- a/contrib/mesos/pkg/redirfd/redirfd_unix.go
+++ b/contrib/mesos/pkg/redirfd/redirfd_unix.go
@@ -0,0 +1,208 @@
 // +build !windows
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package redirfd
 import (
 	"fmt"
 	"os"
 	"syscall"
 )
 type RedirectMode int
 const (
 	Read           RedirectMode = iota // open file for reading
 	Write                              // open file for writing, truncating if it exists
 	Update                             // open file for read & write
 	Append                             // open file for append, create if it does not exist
 	AppendExisting                     // open file for append, do not create if it does not already exist
 	WriteNew                           // open file for writing, creating it, failing if it already exists
 )
 // see https://github.com/skarnet/execline/blob/master/src/execline/redirfd.c
 func (mode RedirectMode) Redirect(nonblock, changemode bool, fd FileDescriptor, name string) (*os.File, error) {
 	flags := 0
 	what := -1
 	switch mode {
 	case Read:
 		what = syscall.O_RDONLY
 		flags &= ^(syscall.O_APPEND | syscall.O_CREAT | syscall.O_TRUNC | syscall.O_EXCL)
 	case Write:
 		what = syscall.O_WRONLY
 		flags |= syscall.O_CREAT | syscall.O_TRUNC
 		flags &= ^(syscall.O_APPEND | syscall.O_EXCL)
 	case Update:
 		what = syscall.O_RDWR
 		flags &= ^(syscall.O_APPEND | syscall.O_CREAT | syscall.O_TRUNC | syscall.O_EXCL)
 	case Append:
 		what = syscall.O_WRONLY
 		flags |= syscall.O_CREAT | syscall.O_APPEND
 		flags &= ^(syscall.O_TRUNC | syscall.O_EXCL)
 	case AppendExisting:
 		what = syscall.O_WRONLY
 		flags |= syscall.O_APPEND
 		flags &= ^(syscall.O_CREAT | syscall.O_TRUNC | syscall.O_EXCL)
 	case WriteNew:
 		what = syscall.O_WRONLY
 		flags |= syscall.O_CREAT | syscall.O_EXCL
 		flags &= ^(syscall.O_APPEND | syscall.O_TRUNC)
 	default:
 		return nil, fmt.Errorf("unexpected mode %d", mode)
 	}
 	if nonblock {
 		flags |= syscall.O_NONBLOCK
 	}
 	flags |= what
 	fd2, e := open(name, flags, 0666)
 	if (what == syscall.O_WRONLY) && (e == syscall.ENXIO) {
 		// Opens file in read-only, non-blocking mode. Returns a valid fd number if it succeeds, or -1 (and sets errno) if it fails.
 		fdr, e2 := open(name, syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
 		if e2 != nil {
 			return nil, &os.PathError{"open_read", name, e2}
 		}
 		fd2, e = open(name, flags, 0666)
 		fd_close(fdr)
 	}
 	if e != nil {
 		return nil, &os.PathError{"open", name, e}
 	}
 	if e = fd_move(fd, fd2); e != nil {
 		return nil, &os.PathError{"fd_move", name, e}
 	}
 	if changemode {
 		if nonblock {
 			e = ndelay_off(fd)
 		} else {
 			e = ndelay_on(fd)
 		}
 		if e != nil {
 			return nil, &os.PathError{"ndelay", name, e}
 		}
 	}
 	return os.NewFile(uintptr(fd2), name), nil
 }
 // proxy to return a FileDescriptor
 func open(path string, openmode int, perm uint32) (FileDescriptor, error) {
 	fdint, err := syscall.Open(path, openmode, perm)
 	return FileDescriptor(fdint), err
 }
 // see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/fd_move.c
 func fd_move(to, from FileDescriptor) (err error) {
 	if to == from {
 		return
 	}
 	for {
 		_, _, e1 := syscall.RawSyscall(syscall.SYS_DUP2, uintptr(from), uintptr(to), 0)
 		if e1 != syscall.EINTR {
 			if e1 != 0 {
 				err = e1
 			}
 			break
 		}
 	}
 	if err != nil {
 		err = fd_close(from)
 	}
 	return
 	/*
 	   do
 	     r = dup2(from, to) ;
 	   while ((r == -1) && (errno == EINTR)) ;
 	   return (r == -1) ? -1 : fd_close(from) ;
 	*/
 }
 // see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/fd_close.c
 func fd_close(fd FileDescriptor) (err error) {
 	i := 0
 	var e error
 	for {
 		if e = syscall.Close(int(fd)); e != nil {
 			return nil
 		}
 		i++
 		if e != syscall.EINTR {
 			break
 		}
 	}
 	if e == syscall.EBADF && i > 1 {
 		return nil
 	}
 	return e
 }
 /*
 int fd_close (int fd)
 {
  register unsigned int i = 0 ;
 doit:
  if (!close(fd)) return 0 ;
  i++ ;
  if (errno == EINTR) goto doit ;
  return ((errno == EBADF) && (i > 1)) ? 0 : -1 ;
 }
 */
 // see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/ndelay_on.c
 func ndelay_on(fd FileDescriptor) error {
 	// 32-bit will likely break because it needs SYS_FCNTL64
 	got, _, e := syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_GETFL), 0)
 	if e != 0 {
 		return e
 	}
 	_, _, e = syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_SETFL), uintptr(got|syscall.O_NONBLOCK))
 	if e != 0 {
 		return e
 	}
 	return nil
 }
 /*
 int ndelay_on (int fd)
 {
  register int got = fcntl(fd, F_GETFL) ;
  return (got == -1) ? -1 : fcntl(fd, F_SETFL, got | O_NONBLOCK) ;
 }
 */
 // see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/ndelay_off.c
 func ndelay_off(fd FileDescriptor) error {
 	// 32-bit will likely break because it needs SYS_FCNTL64
 	got, _, e := syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_GETFL), 0)
 	if e != 0 {
 		return e
 	}
 	_, _, e = syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_SETFL), uintptr(int(got) & ^syscall.O_NONBLOCK))
 	if e != 0 {
 		return e
 	}
 	return nil
 }
 /*
 int ndelay_off (int fd)
 {
  register int got = fcntl(fd, F_GETFL) ;
  return (got == -1) ? -1 : fcntl(fd, F_SETFL, got & ^O_NONBLOCK) ;
 }
 */
--- a/contrib/mesos/pkg/redirfd/redirfd_windows.go
+++ b/contrib/mesos/pkg/redirfd/redirfd_windows.go
@@ -0,0 +1,39 @@
 // +build windows
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package redirfd
 import (
 	"fmt"
 	"os"
 )
 type RedirectMode int
 const (
 	Read           RedirectMode = iota // open file for reading
 	Write                              // open file for writing, truncating if it exists
 	Update                             // open file for read & write
 	Append                             // open file for append, create if it does not exist
 	AppendExisting                     // open file for append, do not create if it does not already exist
 	WriteNew                           // open file for writing, creating it, failing if it already exists
 )
 func (mode RedirectMode) Redirect(nonblock, changemode bool, fd FileDescriptor, name string) (*os.File, error) {
 	return nil, fmt.Errorf("Redirect(%s, %s, %d, \"%s\") not supported on windows", nonblock, changemode, fd, name)
 }
--- a/contrib/mesos/pkg/runtime/doc.go
+++ b/contrib/mesos/pkg/runtime/doc.go
@@ -0,0 +1,19 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package runtime provides utilities for semaphores (chan struct{}),
 // a simple Latch implementation, and metrics for reporting handled panics.
 package runtime
--- a/contrib/mesos/pkg/runtime/latch.go
+++ b/contrib/mesos/pkg/runtime/latch.go
@@ -0,0 +1,35 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package runtime
 import (
 	"sync/atomic"
 )
 type Latch struct {
 	int32
 }
 // return true if this latch was successfully acquired. concurrency safe. will only return true
 // upon the first invocation, all subsequent invocations will return false. always returns false
 // when self is nil.
 func (self *Latch) Acquire() bool {
 	if self == nil {
 		return false
 	}
 	return atomic.CompareAndSwapInt32(&self.int32, 0, 1)
 }
--- a/contrib/mesos/pkg/runtime/latch_test.go
+++ b/contrib/mesos/pkg/runtime/latch_test.go
@@ -0,0 +1,61 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package runtime
 import (
 	"sync"
 	"sync/atomic"
 	"testing"
 	"time"
 )
 func Test_LatchAcquireBasic(t *testing.T) {
 	var x Latch
 	if !x.Acquire() {
 		t.Fatalf("expected first acquire to succeed")
 	}
 	if x.Acquire() {
 		t.Fatalf("expected second acquire to fail")
 	}
 	if x.Acquire() {
 		t.Fatalf("expected third acquire to fail")
 	}
 }
 func Test_LatchAcquireConcurrent(t *testing.T) {
 	var x Latch
 	const NUM = 10
 	ch := make(chan struct{})
 	var success int32
 	var wg sync.WaitGroup
 	wg.Add(NUM)
 	for i := 0; i < NUM; i++ {
 		go func() {
 			defer wg.Done()
 			<-ch
 			if x.Acquire() {
 				atomic.AddInt32(&success, 1)
 			}
 		}()
 	}
 	time.Sleep(200 * time.Millisecond)
 	close(ch)
 	wg.Wait()
 	if success != 1 {
 		t.Fatalf("expected single acquire to succeed instead of %d", success)
 	}
 }
--- a/contrib/mesos/pkg/runtime/metrics.go
+++ b/contrib/mesos/pkg/runtime/metrics.go
@@ -0,0 +1,47 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package runtime
 import (
 	"sync"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	"github.com/prometheus/client_golang/prometheus"
 )
 const (
 	runtimeSubsystem = "runtime"
 )
 var (
 	panicCounter = prometheus.NewCounter(
 		prometheus.CounterOpts{
 			Subsystem: runtimeSubsystem,
 			Name:      "panics",
 			Help:      "Counter of panics handled by the internal crash handler.",
 		},
 	)
 )
 var registerMetrics sync.Once
 func Register() {
 	registerMetrics.Do(func() {
 		prometheus.MustRegister(panicCounter)
 		util.PanicHandlers = append(util.PanicHandlers, func(interface{}) { panicCounter.Inc() })
 	})
 }
--- a/contrib/mesos/pkg/runtime/util.go
+++ b/contrib/mesos/pkg/runtime/util.go
@@ -0,0 +1,122 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package runtime
 import (
 	"os"
 	"sync"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 )
 type Signal <-chan struct{}
 // return a func that will close the signal chan.
 // multiple invocations of the returned func will not generate a panic.
 // two funcs from separate invocations of Closer() (on the same sig chan) will cause a panic if both invoked.
 // for example:
 //     // good
 //     x := runtime.After(func() { ... })
 //     f := x.Closer()
 //     f()
 //     f()
 //
 //     // bad
 //     x := runtime.After(func() { ... })
 //     f := x.Closer()
 //     g := x.Closer()
 //     f()
 //     g() // this will panic
 func Closer(sig chan<- struct{}) func() {
 	var once sync.Once
 	return func() {
 		once.Do(func() { close(sig) })
 	}
 }
 // upon receiving signal sig invoke function f and immediately return a signal
 // that indicates f's completion. used to chain handler funcs, for example:
 //    On(job.Done(), response.Send).Then(wg.Done)
 func (sig Signal) Then(f func()) Signal {
 	if sig == nil {
 		return nil
 	}
 	return On(sig, f)
 }
 // execute a callback function after the specified signal chan closes.
 // immediately returns a signal that indicates f's completion.
 func On(sig <-chan struct{}, f func()) Signal {
 	if sig == nil {
 		return nil
 	}
 	return After(func() {
 		<-sig
 		if f != nil {
 			f()
 		}
 	})
 }
 func OnOSSignal(sig <-chan os.Signal, f func(os.Signal)) Signal {
 	if sig == nil {
 		return nil
 	}
 	return After(func() {
 		if s, ok := <-sig; ok && f != nil {
 			f(s)
 		}
 	})
 }
 // spawn a goroutine to execute a func, immediately returns a chan that closes
 // upon completion of the func. returns a nil signal chan if the given func is nil.
 func After(f func()) Signal {
 	ch := make(chan struct{})
 	go func() {
 		defer close(ch)
 		defer util.HandleCrash()
 		if f != nil {
 			f()
 		}
 	}()
 	return Signal(ch)
 }
 // periodically execute the given function, stopping once stopCh is closed.
 // this func blocks until stopCh is closed, it's intended to be run as a goroutine.
 func Until(f func(), period time.Duration, stopCh <-chan struct{}) {
 	if f == nil {
 		return
 	}
 	for {
 		select {
 		case <-stopCh:
 			return
 		default:
 		}
 		func() {
 			defer util.HandleCrash()
 			f()
 		}()
 		select {
 		case <-stopCh:
 		case <-time.After(period):
 		}
 	}
 }
--- a/contrib/mesos/pkg/runtime/util_test.go
+++ b/contrib/mesos/pkg/runtime/util_test.go
@@ -0,0 +1,64 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package runtime
 import (
 	"testing"
 	"time"
 )
 func TestUntil(t *testing.T) {
 	ch := make(chan struct{})
 	close(ch)
 	Until(func() {
 		t.Fatal("should not have been invoked")
 	}, 0, ch)
 	//--
 	ch = make(chan struct{})
 	called := make(chan struct{})
 	After(func() {
 		Until(func() {
 			called <- struct{}{}
 		}, 0, ch)
 	}).Then(func() { close(called) })
 	<-called
 	close(ch)
 	<-called
 	//--
 	ch = make(chan struct{})
 	called = make(chan struct{})
 	running := make(chan struct{})
 	After(func() {
 		Until(func() {
 			close(running)
 			called <- struct{}{}
 		}, 2*time.Second, ch)
 	}).Then(func() { close(called) })
 	<-running
 	close(ch)
 	<-called // unblock the goroutine
 	now := time.Now()
 	<-called
 	if time.Since(now) > 1800*time.Millisecond {
 		t.Fatalf("Until should not have waited the full timeout period since we closed the stop chan")
 	}
 }
--- a/contrib/mesos/pkg/scheduler/config/config.go
+++ b/contrib/mesos/pkg/scheduler/config/config.go
@@ -0,0 +1,109 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package config
 import (
 	"io"
 	"time"
 	"code.google.com/p/gcfg"
 )
 const (
 	DefaultOfferTTL                           = 5 * time.Second   // duration an offer is viable, prior to being expired
 	DefaultOfferLingerTTL                     = 120 * time.Second // duration an expired offer lingers in history
 	DefaultListenerDelay                      = 1 * time.Second   // duration between offer listener notifications
 	DefaultUpdatesBacklog                     = 2048              // size of the pod updates channel
 	DefaultFrameworkIdRefreshInterval         = 30 * time.Second  // interval we update the frameworkId stored in etcd
 	DefaultInitialImplicitReconciliationDelay = 15 * time.Second  // wait this amount of time after initial registration before attempting implicit reconciliation
 	DefaultExplicitReconciliationMaxBackoff   = 2 * time.Minute   // interval in between internal task status checks/updates
 	DefaultExplicitReconciliationAbortTimeout = 30 * time.Second  // waiting period after attempting to cancel an ongoing reconciliation
 	DefaultInitialPodBackoff                  = 1 * time.Second
 	DefaultMaxPodBackoff                      = 60 * time.Second
 	DefaultHttpHandlerTimeout                 = 10 * time.Second
 	DefaultHttpBindInterval                   = 5 * time.Second
 )
 // Example scheduler configuration file:
 //
 // [scheduler]
 //  info-name        = Kubernetes
 //  offer-ttl        = 5s
 //  offer-linger-ttl = 2m
 type ConfigWrapper struct {
 	Scheduler Config
 }
 type Config struct {
 	OfferTTL                           WrappedDuration `gcfg:"offer-ttl"`
 	OfferLingerTTL                     WrappedDuration `gcfg:"offer-linger-ttl"`
 	ListenerDelay                      WrappedDuration `gcfg:"listener-delay"`
 	UpdatesBacklog                     int             `gcfg:"updates-backlog"`
 	FrameworkIdRefreshInterval         WrappedDuration `gcfg:"framework-id-refresh-interval"`
 	InitialImplicitReconciliationDelay WrappedDuration `gcfg:"initial-implicit-reconciliation-delay"`
 	ExplicitReconciliationMaxBackoff   WrappedDuration `gcfg:"explicit-reconciliantion-max-backoff"`
 	ExplicitReconciliationAbortTimeout WrappedDuration `gcfg:"explicit-reconciliantion-abort-timeout"`
 	InitialPodBackoff                  WrappedDuration `gcfg:"initial-pod-backoff"`
 	MaxPodBackoff                      WrappedDuration `gcfg:"max-pod-backoff"`
 	HttpHandlerTimeout                 WrappedDuration `gcfg:"http-handler-timeout"`
 	HttpBindInterval                   WrappedDuration `gcfg:"http-bind-interval"`
 }
 type WrappedDuration struct {
 	time.Duration
 }
 func (wd *WrappedDuration) UnmarshalText(data []byte) error {
 	d, err := time.ParseDuration(string(data))
 	if err == nil {
 		wd.Duration = d
 	}
 	return err
 }
 func (c *Config) SetDefaults() {
 	c.OfferTTL = WrappedDuration{DefaultOfferTTL}
 	c.OfferLingerTTL = WrappedDuration{DefaultOfferLingerTTL}
 	c.ListenerDelay = WrappedDuration{DefaultListenerDelay}
 	c.UpdatesBacklog = DefaultUpdatesBacklog
 	c.FrameworkIdRefreshInterval = WrappedDuration{DefaultFrameworkIdRefreshInterval}
 	c.InitialImplicitReconciliationDelay = WrappedDuration{DefaultInitialImplicitReconciliationDelay}
 	c.ExplicitReconciliationMaxBackoff = WrappedDuration{DefaultExplicitReconciliationMaxBackoff}
 	c.ExplicitReconciliationAbortTimeout = WrappedDuration{DefaultExplicitReconciliationAbortTimeout}
 	c.InitialPodBackoff = WrappedDuration{DefaultInitialPodBackoff}
 	c.MaxPodBackoff = WrappedDuration{DefaultMaxPodBackoff}
 	c.HttpHandlerTimeout = WrappedDuration{DefaultHttpHandlerTimeout}
 	c.HttpBindInterval = WrappedDuration{DefaultHttpBindInterval}
 }
 func CreateDefaultConfig() *Config {
 	c := &Config{}
 	c.SetDefaults()
 	return c
 }
 func (c *Config) Read(configReader io.Reader) error {
 	wrapper := &ConfigWrapper{Scheduler: *c}
 	if configReader != nil {
 		if err := gcfg.ReadInto(wrapper, configReader); err != nil {
 			return err
 		}
 		*c = wrapper.Scheduler
 	}
 	return nil
 }
--- a/contrib/mesos/pkg/scheduler/config/config_test.go
+++ b/contrib/mesos/pkg/scheduler/config/config_test.go
@@ -0,0 +1,112 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package config
 import (
 	"strings"
 	"testing"
 	"time"
 	"github.com/stretchr/testify/assert"
 )
 func is_default(c *Config, t *testing.T) {
 	assert := assert.New(t)
 	assert.Equal(DefaultOfferTTL, c.OfferTTL.Duration)
 	assert.Equal(DefaultOfferLingerTTL, c.OfferLingerTTL.Duration)
 	assert.Equal(DefaultListenerDelay, c.ListenerDelay.Duration)
 	assert.Equal(DefaultUpdatesBacklog, c.UpdatesBacklog)
 	assert.Equal(DefaultFrameworkIdRefreshInterval, c.FrameworkIdRefreshInterval.Duration)
 	assert.Equal(DefaultInitialImplicitReconciliationDelay, c.InitialImplicitReconciliationDelay.Duration)
 	assert.Equal(DefaultExplicitReconciliationMaxBackoff, c.ExplicitReconciliationMaxBackoff.Duration)
 	assert.Equal(DefaultExplicitReconciliationAbortTimeout, c.ExplicitReconciliationAbortTimeout.Duration)
 	assert.Equal(DefaultInitialPodBackoff, c.InitialPodBackoff.Duration)
 	assert.Equal(DefaultMaxPodBackoff, c.MaxPodBackoff.Duration)
 	assert.Equal(DefaultHttpHandlerTimeout, c.HttpHandlerTimeout.Duration)
 	assert.Equal(DefaultHttpBindInterval, c.HttpBindInterval.Duration)
 }
 // Check that SetDefaults sets the default values
 func TestConfig_SetDefaults(t *testing.T) {
 	c := &Config{}
 	c.SetDefaults()
 	is_default(c, t)
 }
 // Check that CreateDefaultConfig returns a default config
 func TestConfig_CreateDefaultConfig(t *testing.T) {
 	c := CreateDefaultConfig()
 	is_default(c, t)
 }
 // Check that a config string can be parsed
 func TestConfig_Read(t *testing.T) {
 	assert := assert.New(t)
 	c := CreateDefaultConfig()
 	reader := strings.NewReader(`
 	[scheduler]
 	offer-ttl=42s
 	offer-linger-ttl=42s
 	listener-delay=42s
 	updates-backlog=42
 	framework-id-refresh-interval=42s
 	initial-implicit-reconciliation-delay=42s
 	explicit-reconciliantion-max-backoff=42s
 	explicit-reconciliantion-abort-timeout=42s
 	initial-pod-backoff=42s
 	max-pod-backoff=42s
 	http-handler-timeout=42s
 	http-bind-interval=42s
 	`)
 	err := c.Read(reader)
 	if err != nil {
 		t.Fatal("Cannot parse scheduler config: " + err.Error())
 	}
 	assert.Equal(42*time.Second, c.OfferTTL.Duration)
 	assert.Equal(42*time.Second, c.OfferLingerTTL.Duration)
 	assert.Equal(42*time.Second, c.ListenerDelay.Duration)
 	assert.Equal(42, c.UpdatesBacklog)
 	assert.Equal(42*time.Second, c.FrameworkIdRefreshInterval.Duration)
 	assert.Equal(42*time.Second, c.InitialImplicitReconciliationDelay.Duration)
 	assert.Equal(42*time.Second, c.ExplicitReconciliationMaxBackoff.Duration)
 	assert.Equal(42*time.Second, c.ExplicitReconciliationAbortTimeout.Duration)
 	assert.Equal(42*time.Second, c.InitialPodBackoff.Duration)
 	assert.Equal(42*time.Second, c.MaxPodBackoff.Duration)
 	assert.Equal(42*time.Second, c.HttpHandlerTimeout.Duration)
 	assert.Equal(42*time.Second, c.HttpBindInterval.Duration)
 }
 // check that an invalid config is rejected and non of the values to overwritten
 func TestConfig_ReadError(t *testing.T) {
 	assert := assert.New(t)
 	c := CreateDefaultConfig()
 	reader := strings.NewReader(`
 	[scheduler]
 	offer-ttl = 42s
 	invalid-setting = 42s
 	`)
 	err := c.Read(reader)
 	if err == nil {
 		t.Fatal("Invalid scheduler config should lead to an error")
 	}
 	assert.NotEqual(42*time.Second, c.OfferTTL.Duration)
 }
--- a/contrib/mesos/pkg/scheduler/config/doc.go
+++ b/contrib/mesos/pkg/scheduler/config/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package config provides mechanisms for low-level scheduler tuning.
 package config
--- a/contrib/mesos/pkg/scheduler/constraint/constraint.go
+++ b/contrib/mesos/pkg/scheduler/constraint/constraint.go
@@ -0,0 +1,106 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package constraint
 import (
 	"encoding/json"
 	"fmt"
 )
 type OperatorType int
 const (
 	UniqueOperator OperatorType = iota
 	LikeOperator
 	ClusterOperator
 	GroupByOperator
 	UnlikeOperator
 )
 var (
 	labels = []string{
 		"UNIQUE",
 		"LIKE",
 		"CLUSTER",
 		"GROUP_BY",
 		"UNLIKE",
 	}
 	labelToType map[string]OperatorType
 )
 func init() {
 	labelToType = make(map[string]OperatorType)
 	for i, s := range labels {
 		labelToType[s] = OperatorType(i)
 	}
 }
 func (t OperatorType) String() string {
 	switch t {
 	case UniqueOperator, LikeOperator, ClusterOperator, GroupByOperator, UnlikeOperator:
 		return labels[int(t)]
 	default:
 		panic(fmt.Sprintf("unrecognized operator type: %d", int(t)))
 	}
 }
 func parseOperatorType(s string) (OperatorType, error) {
 	t, found := labelToType[s]
 	if !found {
 		return UniqueOperator, fmt.Errorf("unrecognized operator %q", s)
 	}
 	return t, nil
 }
 type Constraint struct {
 	Field    string       // required
 	Operator OperatorType // required
 	Value    string       // optional
 }
 func (c *Constraint) MarshalJSON() ([]byte, error) {
 	var a []string
 	if c != nil {
 		if c.Value != "" {
 			a = append(a, c.Field, c.Operator.String(), c.Value)
 		} else {
 			a = append(a, c.Field, c.Operator.String())
 		}
 	}
 	return json.Marshal(a)
 }
 func (c *Constraint) UnmarshalJSON(buf []byte) (err error) {
 	var a []string
 	if err = json.Unmarshal(buf, &a); err != nil {
 		return err
 	}
 	switch x := len(a); {
 	case x < 2:
 		err = fmt.Errorf("not enough arguments to form constraint")
 	case x > 3:
 		err = fmt.Errorf("too many arguments to form constraint")
 	case x == 3:
 		c.Value = a[2]
 		fallthrough
 	case x == 2:
 		c.Field = a[0]
 		c.Operator, err = parseOperatorType(a[1])
 	}
 	return err
 }
--- a/contrib/mesos/pkg/scheduler/constraint/constraint_test.go
+++ b/contrib/mesos/pkg/scheduler/constraint/constraint_test.go
@@ -0,0 +1,79 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package constraint
 import (
 	"encoding/json"
 	"testing"
 )
 func TestDeserialize(t *testing.T) {
 	shouldMatch := func(js string, field string, operator OperatorType, value string) (err error) {
 		constraint := Constraint{}
 		if err = json.Unmarshal(([]byte)(js), &constraint); err != nil {
 			return
 		}
 		if field != constraint.Field {
 			t.Fatalf("expected field %q instead of %q", field, constraint.Field)
 		}
 		if operator != constraint.Operator {
 			t.Fatalf("expected operator %v instead of %v", operator, constraint.Operator)
 		}
 		if value != constraint.Value {
 			t.Fatalf("expected value %q instead of %q", value, constraint.Value)
 		}
 		return
 	}
 	failOnError := func(err error) {
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 	}
 	failOnError(shouldMatch(`["hostname","UNIQUE"]`, "hostname", UniqueOperator, ""))
 	failOnError(shouldMatch(`["rackid","GROUP_BY","1"]`, "rackid", GroupByOperator, "1"))
 	failOnError(shouldMatch(`["jdk","LIKE","7"]`, "jdk", LikeOperator, "7"))
 	failOnError(shouldMatch(`["jdk","UNLIKE","7"]`, "jdk", UnlikeOperator, "7"))
 	failOnError(shouldMatch(`["bob","CLUSTER","foo"]`, "bob", ClusterOperator, "foo"))
 	err := shouldMatch(`["bill","NOT_REALLY_AN_OPERATOR","pete"]`, "bill", ClusterOperator, "pete")
 	if err == nil {
 		t.Fatalf("expected unmarshalling error for invalid operator")
 	}
 }
 func TestSerialize(t *testing.T) {
 	shouldMatch := func(expected string, constraint *Constraint) error {
 		data, err := json.Marshal(constraint)
 		if err != nil {
 			return err
 		}
 		js := string(data)
 		if js != expected {
 			t.Fatalf("expected json %q instead of %q", expected, js)
 		}
 		return nil
 	}
 	failOnError := func(err error) {
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 	}
 	failOnError(shouldMatch(`["hostname","UNIQUE"]`, &Constraint{"hostname", UniqueOperator, ""}))
 	failOnError(shouldMatch(`["rackid","GROUP_BY","1"]`, &Constraint{"rackid", GroupByOperator, "1"}))
 	failOnError(shouldMatch(`["jdk","LIKE","7"]`, &Constraint{"jdk", LikeOperator, "7"}))
 	failOnError(shouldMatch(`["jdk","UNLIKE","7"]`, &Constraint{"jdk", UnlikeOperator, "7"}))
 	failOnError(shouldMatch(`["bob","CLUSTER","foo"]`, &Constraint{"bob", ClusterOperator, "foo"}))
 }
--- a/contrib/mesos/pkg/scheduler/constraint/doc.go
+++ b/contrib/mesos/pkg/scheduler/constraint/doc.go
@@ -0,0 +1,19 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package constraint exposes Marathon-like constraints for scheduling pods.
 // Incomplete.
 package constraint
--- a/contrib/mesos/pkg/scheduler/doc.go
+++ b/contrib/mesos/pkg/scheduler/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package scheduler implements the Kubernetes Mesos scheduler.
 package scheduler
--- a/contrib/mesos/pkg/scheduler/fcfs.go
+++ b/contrib/mesos/pkg/scheduler/fcfs.go
@@ -0,0 +1,57 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package scheduler
 import (
 	"fmt"
 	log "github.com/golang/glog"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
 )
 // A first-come-first-serve scheduler: acquires the first offer that can support the task
 func FCFSScheduleFunc(r offers.Registry, unused SlaveIndex, task *podtask.T) (offers.Perishable, error) {
 	podName := fmt.Sprintf("%s/%s", task.Pod.Namespace, task.Pod.Name)
 	var acceptedOffer offers.Perishable
 	err := r.Walk(func(p offers.Perishable) (bool, error) {
 		offer := p.Details()
 		if offer == nil {
 			return false, fmt.Errorf("nil offer while scheduling task %v", task.ID)
 		}
 		if task.AcceptOffer(offer) {
 			if p.Acquire() {
 				acceptedOffer = p
 				log.V(3).Infof("Pod %s accepted offer %v", podName, offer.Id.GetValue())
 				return true, nil // stop, we found an offer
 			}
 		}
 		return false, nil // continue
 	})
 	if acceptedOffer != nil {
 		if err != nil {
 			log.Warningf("problems walking the offer registry: %v, attempting to continue", err)
 		}
 		return acceptedOffer, nil
 	}
 	if err != nil {
 		log.V(2).Infof("failed to find a fit for pod: %s, err = %v", podName, err)
 		return nil, err
 	}
 	log.V(2).Infof("failed to find a fit for pod: %s", podName)
 	return nil, noSuitableOffersErr
 }
--- a/contrib/mesos/pkg/scheduler/ha/doc.go
+++ b/contrib/mesos/pkg/scheduler/ha/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package ha encapsulates high-availability scheduler concerns.
 package ha
--- a/contrib/mesos/pkg/scheduler/ha/election.go
+++ b/contrib/mesos/pkg/scheduler/ha/election.go
@@ -0,0 +1,73 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package ha
 import (
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/election"
 	log "github.com/golang/glog"
 )
 type roleType int
 const (
 	followerRole roleType = iota
 	masterRole
 	retiredRole
 )
 type candidateService struct {
 	sched     *SchedulerProcess
 	newDriver DriverFactory
 	role      roleType
 	valid     ValidationFunc
 }
 type ValidationFunc func(desiredUid, currentUid string)
 func NewCandidate(s *SchedulerProcess, f DriverFactory, v ValidationFunc) election.Service {
 	return &candidateService{
 		sched:     s,
 		newDriver: f,
 		role:      followerRole,
 		valid:     v,
 	}
 }
 func (self *candidateService) Validate(desired, current election.Master) {
 	if self.valid != nil {
 		self.valid(string(desired), string(current))
 	}
 }
 func (self *candidateService) Start() {
 	if self.role == followerRole {
 		log.Info("elected as master")
 		self.role = masterRole
 		self.sched.Elect(self.newDriver)
 	}
 }
 func (self *candidateService) Stop() {
 	if self.role == masterRole {
 		log.Info("retiring from master")
 		self.role = retiredRole
 		// order is important here, watchers of a SchedulerProcess will
 		// check SchedulerProcess.Failover() once Done() is closed.
 		close(self.sched.failover)
 		self.sched.End()
 	}
 }
--- a/contrib/mesos/pkg/scheduler/ha/ha.go
+++ b/contrib/mesos/pkg/scheduler/ha/ha.go
@@ -0,0 +1,285 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package ha
 import (
 	"fmt"
 	"sync/atomic"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
 	log "github.com/golang/glog"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	bindings "github.com/mesos/mesos-go/scheduler"
 )
 type DriverFactory func() (bindings.SchedulerDriver, error)
 type stageType int32
 const (
 	initStage stageType = iota
 	standbyStage
 	masterStage
 	finStage
 )
 func (stage *stageType) transition(from, to stageType) bool {
 	return atomic.CompareAndSwapInt32((*int32)(stage), int32(from), int32(to))
 }
 func (s *stageType) transitionTo(to stageType, unless ...stageType) bool {
 	if len(unless) == 0 {
 		atomic.StoreInt32((*int32)(s), int32(to))
 		return true
 	}
 	for {
 		state := s.get()
 		for _, x := range unless {
 			if state == x {
 				return false
 			}
 		}
 		if s.transition(state, to) {
 			return true
 		}
 	}
 }
 func (stage *stageType) get() stageType {
 	return stageType(atomic.LoadInt32((*int32)(stage)))
 }
 // execute some action in the deferred context of the process, but only if we
 // match the stage of the process at the time the action is executed.
 func (stage stageType) Do(p *SchedulerProcess, a proc.Action) <-chan error {
 	errOnce := proc.NewErrorOnce(p.fin)
 	errOuter := p.Do(proc.Action(func() {
 		switch stage {
 		case standbyStage:
 			//await standby signal or death
 			select {
 			case <-p.standby:
 			case <-p.Done():
 			}
 		case masterStage:
 			//await elected signal or death
 			select {
 			case <-p.elected:
 			case <-p.Done():
 			}
 		case finStage:
 			errOnce.Reportf("scheduler process is dying, dropping action")
 			return
 		default:
 		}
 		errOnce.Report(stage.When(p, a))
 	}))
 	return errOnce.Send(errOuter).Err()
 }
 // execute some action only if we match the stage of the scheduler process
 func (stage stageType) When(p *SchedulerProcess, a proc.Action) (err error) {
 	if stage != (&p.stage).get() {
 		err = fmt.Errorf("failed to execute deferred action, expected lifecycle stage %v instead of %v", stage, p.stage)
 	} else {
 		a()
 	}
 	return
 }
 type SchedulerProcess struct {
 	proc.Process
 	bindings.Scheduler
 	stage    stageType
 	elected  chan struct{} // upon close we've been elected
 	failover chan struct{} // closed indicates that we should failover upon End()
 	standby  chan struct{}
 	fin      chan struct{}
 }
 func New(sched bindings.Scheduler) *SchedulerProcess {
 	p := &SchedulerProcess{
 		Process:   proc.New(),
 		Scheduler: sched,
 		stage:     initStage,
 		elected:   make(chan struct{}),
 		failover:  make(chan struct{}),
 		standby:   make(chan struct{}),
 		fin:       make(chan struct{}),
 	}
 	runtime.On(p.Running(), p.begin)
 	return p
 }
 func (self *SchedulerProcess) begin() {
 	if (&self.stage).transition(initStage, standbyStage) {
 		close(self.standby)
 		log.Infoln("scheduler process entered standby stage")
 	} else {
 		log.Errorf("failed to transition from init to standby stage")
 	}
 }
 func (self *SchedulerProcess) End() <-chan struct{} {
 	if (&self.stage).transitionTo(finStage, finStage) {
 		defer close(self.fin)
 		log.Infoln("scheduler process entered fin stage")
 	}
 	return self.Process.End()
 }
 func (self *SchedulerProcess) Elect(newDriver DriverFactory) {
 	errOnce := proc.NewErrorOnce(self.fin)
 	proc.OnError(errOnce.Send(standbyStage.Do(self, proc.Action(func() {
 		if !(&self.stage).transition(standbyStage, masterStage) {
 			log.Errorf("failed to transition from standby to master stage, aborting")
 			self.End()
 			return
 		}
 		log.Infoln("scheduler process entered master stage")
 		drv, err := newDriver()
 		if err != nil {
 			log.Errorf("failed to fetch scheduler driver: %v", err)
 			self.End()
 			return
 		}
 		log.V(1).Infoln("starting driver...")
 		stat, err := drv.Start()
 		if stat == mesos.Status_DRIVER_RUNNING && err == nil {
 			log.Infoln("driver started successfully and is running")
 			close(self.elected)
 			go func() {
 				defer self.End()
 				_, err := drv.Join()
 				if err != nil {
 					log.Errorf("driver failed with error: %v", err)
 				}
 				errOnce.Report(err)
 			}()
 			return
 		}
 		defer self.End()
 		if err != nil {
 			log.Errorf("failed to start scheduler driver: %v", err)
 		} else {
 			log.Errorf("expected RUNNING status, not %v", stat)
 		}
 	}))).Err(), func(err error) {
 		defer self.End()
 		log.Errorf("failed to handle election event, aborting: %v", err)
 	}, self.fin)
 }
 func (self *SchedulerProcess) Terminal() <-chan struct{} {
 	return self.fin
 }
 func (self *SchedulerProcess) Elected() <-chan struct{} {
 	return self.elected
 }
 func (self *SchedulerProcess) Failover() <-chan struct{} {
 	return self.failover
 }
 type masterProcess struct {
 	*SchedulerProcess
 	doer proc.Doer
 }
 func (self *masterProcess) Done() <-chan struct{} {
 	return self.SchedulerProcess.Terminal()
 }
 func (self *masterProcess) Do(a proc.Action) <-chan error {
 	return self.doer.Do(a)
 }
 // returns a Process instance that will only execute a proc.Action if the scheduler is the elected master
 func (self *SchedulerProcess) Master() proc.Process {
 	return &masterProcess{
 		SchedulerProcess: self,
 		doer: proc.DoWith(self, proc.DoerFunc(func(a proc.Action) <-chan error {
 			return proc.ErrorChan(masterStage.When(self, a))
 		})),
 	}
 }
 func (self *SchedulerProcess) logError(ch <-chan error) {
 	self.OnError(ch, func(err error) {
 		log.Errorf("failed to execute scheduler action: %v", err)
 	})
 }
 func (self *SchedulerProcess) Registered(drv bindings.SchedulerDriver, fid *mesos.FrameworkID, mi *mesos.MasterInfo) {
 	self.logError(self.Master().Do(proc.Action(func() {
 		self.Scheduler.Registered(drv, fid, mi)
 	})))
 }
 func (self *SchedulerProcess) Reregistered(drv bindings.SchedulerDriver, mi *mesos.MasterInfo) {
 	self.logError(self.Master().Do(proc.Action(func() {
 		self.Scheduler.Reregistered(drv, mi)
 	})))
 }
 func (self *SchedulerProcess) Disconnected(drv bindings.SchedulerDriver) {
 	self.logError(self.Master().Do(proc.Action(func() {
 		self.Scheduler.Disconnected(drv)
 	})))
 }
 func (self *SchedulerProcess) ResourceOffers(drv bindings.SchedulerDriver, off []*mesos.Offer) {
 	self.logError(self.Master().Do(proc.Action(func() {
 		self.Scheduler.ResourceOffers(drv, off)
 	})))
 }
 func (self *SchedulerProcess) OfferRescinded(drv bindings.SchedulerDriver, oid *mesos.OfferID) {
 	self.logError(self.Master().Do(proc.Action(func() {
 		self.Scheduler.OfferRescinded(drv, oid)
 	})))
 }
 func (self *SchedulerProcess) StatusUpdate(drv bindings.SchedulerDriver, ts *mesos.TaskStatus) {
 	self.logError(self.Master().Do(proc.Action(func() {
 		self.Scheduler.StatusUpdate(drv, ts)
 	})))
 }
 func (self *SchedulerProcess) FrameworkMessage(drv bindings.SchedulerDriver, eid *mesos.ExecutorID, sid *mesos.SlaveID, m string) {
 	self.logError(self.Master().Do(proc.Action(func() {
 		self.Scheduler.FrameworkMessage(drv, eid, sid, m)
 	})))
 }
 func (self *SchedulerProcess) SlaveLost(drv bindings.SchedulerDriver, sid *mesos.SlaveID) {
 	self.logError(self.Master().Do(proc.Action(func() {
 		self.Scheduler.SlaveLost(drv, sid)
 	})))
 }
 func (self *SchedulerProcess) ExecutorLost(drv bindings.SchedulerDriver, eid *mesos.ExecutorID, sid *mesos.SlaveID, x int) {
 	self.logError(self.Master().Do(proc.Action(func() {
 		self.Scheduler.ExecutorLost(drv, eid, sid, x)
 	})))
 }
 func (self *SchedulerProcess) Error(drv bindings.SchedulerDriver, msg string) {
 	self.Scheduler.Error(drv, msg)
 }
--- a/contrib/mesos/pkg/scheduler/meta/annotations.go
+++ b/contrib/mesos/pkg/scheduler/meta/annotations.go
@@ -0,0 +1,30 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package meta
 // kubernetes api object annotations
 const (
 	BindingHostKey           = "k8s.mesosphere.io/bindingHost"
 	TaskIdKey                = "k8s.mesosphere.io/taskId"
 	SlaveIdKey               = "k8s.mesosphere.io/slaveId"
 	OfferIdKey               = "k8s.mesosphere.io/offerId"
 	ExecutorIdKey            = "k8s.mesosphere.io/executorId"
 	PortMappingKeyPrefix     = "k8s.mesosphere.io/port_"
 	PortMappingKeyFormat     = PortMappingKeyPrefix + "%s_%d"
 	PortNameMappingKeyPrefix = "k8s.mesosphere.io/portName_"
 	PortNameMappingKeyFormat = PortNameMappingKeyPrefix + "%s_%s"
 )
--- a/contrib/mesos/pkg/scheduler/meta/doc.go
+++ b/contrib/mesos/pkg/scheduler/meta/doc.go
@@ -0,0 +1,19 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package meta defines framework constants used as keys in k8s annotations
 // that are attached to k8s pods
 package meta
--- a/contrib/mesos/pkg/scheduler/meta/store.go
+++ b/contrib/mesos/pkg/scheduler/meta/store.go
@@ -0,0 +1,24 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package meta
 // keys for things that we store
 const (
 	//TODO(jdef) this should also be a format instead of a fixed path
 	FrameworkIDKey        = "/mesos/k8sm/frameworkid"
 	DefaultElectionFormat = "/mesos/k8sm/framework/%s/leader"
 )
--- a/contrib/mesos/pkg/scheduler/metrics/doc.go
+++ b/contrib/mesos/pkg/scheduler/metrics/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package metrics defines and exposes instrumentation metrics of the scheduler.
 package metrics
--- a/contrib/mesos/pkg/scheduler/metrics/metrics.go
+++ b/contrib/mesos/pkg/scheduler/metrics/metrics.go
@@ -0,0 +1,102 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package metrics
 import (
 	"sync"
 	"time"
 	"github.com/prometheus/client_golang/prometheus"
 )
 const (
 	schedulerSubsystem = "k8sm_scheduler"
 )
 var (
 	QueueWaitTime = prometheus.NewSummary(
 		prometheus.SummaryOpts{
 			Subsystem: schedulerSubsystem,
 			Name:      "queue_wait_time_microseconds",
 			Help:      "Launch queue wait time in microseconds",
 		},
 	)
 	BindLatency = prometheus.NewSummary(
 		prometheus.SummaryOpts{
 			Subsystem: schedulerSubsystem,
 			Name:      "bind_latency_microseconds",
 			Help:      "Latency in microseconds between pod-task launch and pod binding.",
 		},
 	)
 	StatusUpdates = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Subsystem: schedulerSubsystem,
 			Name:      "status_updates",
 			Help:      "Counter of TaskStatus updates, broken out by source, reason, state.",
 		},
 		[]string{"source", "reason", "state"},
 	)
 	ReconciliationLatency = prometheus.NewSummary(
 		prometheus.SummaryOpts{
 			Subsystem: schedulerSubsystem,
 			Name:      "reconciliation_latency_microseconds",
 			Help:      "Latency in microseconds to execute explicit task reconciliation.",
 		},
 	)
 	ReconciliationRequested = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Subsystem: schedulerSubsystem,
 			Name:      "reconciliation_requested",
 			Help:      "Counter of requested task reconciliations, broken out by kind.",
 		},
 		[]string{"kind"},
 	)
 	ReconciliationExecuted = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Subsystem: schedulerSubsystem,
 			Name:      "reconciliation_executed",
 			Help:      "Counter of executed task reconciliations requests, broken out by kind.",
 		},
 		[]string{"kind"},
 	)
 	ReconciliationCancelled = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Subsystem: schedulerSubsystem,
 			Name:      "reconciliation_cancelled",
 			Help:      "Counter of cancelled task reconciliations requests, broken out by kind.",
 		},
 		[]string{"kind"},
 	)
 )
 var registerMetrics sync.Once
 func Register() {
 	registerMetrics.Do(func() {
 		prometheus.MustRegister(QueueWaitTime)
 		prometheus.MustRegister(BindLatency)
 		prometheus.MustRegister(StatusUpdates)
 		prometheus.MustRegister(ReconciliationLatency)
 		prometheus.MustRegister(ReconciliationRequested)
 		prometheus.MustRegister(ReconciliationExecuted)
 		prometheus.MustRegister(ReconciliationCancelled)
 	})
 }
 func InMicroseconds(d time.Duration) float64 {
 	return float64(d.Nanoseconds() / time.Microsecond.Nanoseconds())
 }
--- a/contrib/mesos/pkg/scheduler/mock_test.go
+++ b/contrib/mesos/pkg/scheduler/mock_test.go
@@ -0,0 +1,203 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package scheduler
 import (
 	"sync"
 	"testing"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	"github.com/stretchr/testify/mock"
 )
 // implements SchedulerInterface
 type MockScheduler struct {
 	sync.RWMutex
 	mock.Mock
 }
 func (m *MockScheduler) slaveFor(id string) (slave *Slave, ok bool) {
 	args := m.Called(id)
 	x := args.Get(0)
 	if x != nil {
 		slave = x.(*Slave)
 	}
 	ok = args.Bool(1)
 	return
 }
 func (m *MockScheduler) algorithm() (f PodScheduleFunc) {
 	args := m.Called()
 	x := args.Get(0)
 	if x != nil {
 		f = x.(PodScheduleFunc)
 	}
 	return
 }
 func (m *MockScheduler) createPodTask(ctx api.Context, pod *api.Pod) (task *podtask.T, err error) {
 	args := m.Called(ctx, pod)
 	x := args.Get(0)
 	if x != nil {
 		task = x.(*podtask.T)
 	}
 	err = args.Error(1)
 	return
 }
 func (m *MockScheduler) offers() (f offers.Registry) {
 	args := m.Called()
 	x := args.Get(0)
 	if x != nil {
 		f = x.(offers.Registry)
 	}
 	return
 }
 func (m *MockScheduler) tasks() (f podtask.Registry) {
 	args := m.Called()
 	x := args.Get(0)
 	if x != nil {
 		f = x.(podtask.Registry)
 	}
 	return
 }
 func (m *MockScheduler) killTask(taskId string) error {
 	args := m.Called(taskId)
 	return args.Error(0)
 }
 func (m *MockScheduler) launchTask(task *podtask.T) error {
 	args := m.Called(task)
 	return args.Error(0)
 }
 // @deprecated this is a placeholder for me to test the mock package
 func TestNoSlavesYet(t *testing.T) {
 	obj := &MockScheduler{}
 	obj.On("slaveFor", "foo").Return(nil, false)
 	obj.slaveFor("foo")
 	obj.AssertExpectations(t)
 }
 /*-----------------------------------------------------------------------------
 |
 |   this really belongs in the mesos-go package, but that's being updated soon
 |   any way so just keep it here for now unless we *really* need it there.
 |
 \-----------------------------------------------------------------------------
 // Scheduler defines the interfaces that needed to be implemented.
 type Scheduler interface {
        Registered(SchedulerDriver, *FrameworkID, *MasterInfo)
        Reregistered(SchedulerDriver, *MasterInfo)
        Disconnected(SchedulerDriver)
        ResourceOffers(SchedulerDriver, []*Offer)
        OfferRescinded(SchedulerDriver, *OfferID)
        StatusUpdate(SchedulerDriver, *TaskStatus)
        FrameworkMessage(SchedulerDriver, *ExecutorID, *SlaveID, string)
        SlaveLost(SchedulerDriver, *SlaveID)
        ExecutorLost(SchedulerDriver, *ExecutorID, *SlaveID, int)
        Error(SchedulerDriver, string)
 }
 */
 func status(args mock.Arguments, at int) (val mesos.Status) {
 	if x := args.Get(at); x != nil {
 		val = x.(mesos.Status)
 	}
 	return
 }
 type extendedMock struct {
 	mock.Mock
 }
 // Upon returns a chan that closes upon the execution of the most recently registered call.
 func (m *extendedMock) Upon() <-chan struct{} {
 	ch := make(chan struct{})
 	call := &m.ExpectedCalls[len(m.ExpectedCalls)-1]
 	f := call.Run
 	call.Run = func(args mock.Arguments) {
 		defer close(ch)
 		if f != nil {
 			f(args)
 		}
 	}
 	return ch
 }
 type MockSchedulerDriver struct {
 	extendedMock
 }
 func (m *MockSchedulerDriver) Init() error {
 	args := m.Called()
 	return args.Error(0)
 }
 func (m *MockSchedulerDriver) Start() (mesos.Status, error) {
 	args := m.Called()
 	return status(args, 0), args.Error(1)
 }
 func (m *MockSchedulerDriver) Stop(b bool) (mesos.Status, error) {
 	args := m.Called(b)
 	return status(args, 0), args.Error(1)
 }
 func (m *MockSchedulerDriver) Abort() (mesos.Status, error) {
 	args := m.Called()
 	return status(args, 0), args.Error(1)
 }
 func (m *MockSchedulerDriver) Join() (mesos.Status, error) {
 	args := m.Called()
 	return status(args, 0), args.Error(1)
 }
 func (m *MockSchedulerDriver) Run() (mesos.Status, error) {
 	args := m.Called()
 	return status(args, 0), args.Error(1)
 }
 func (m *MockSchedulerDriver) RequestResources(r []*mesos.Request) (mesos.Status, error) {
 	args := m.Called(r)
 	return status(args, 0), args.Error(1)
 }
 func (m *MockSchedulerDriver) ReconcileTasks(statuses []*mesos.TaskStatus) (mesos.Status, error) {
 	args := m.Called(statuses)
 	return status(args, 0), args.Error(1)
 }
 func (m *MockSchedulerDriver) LaunchTasks(offerIds []*mesos.OfferID, ti []*mesos.TaskInfo, f *mesos.Filters) (mesos.Status, error) {
 	args := m.Called(offerIds, ti, f)
 	return status(args, 0), args.Error(1)
 }
 func (m *MockSchedulerDriver) KillTask(tid *mesos.TaskID) (mesos.Status, error) {
 	args := m.Called(tid)
 	return status(args, 0), args.Error(1)
 }
 func (m *MockSchedulerDriver) DeclineOffer(oid *mesos.OfferID, f *mesos.Filters) (mesos.Status, error) {
 	args := m.Called(oid, f)
 	return status(args, 0), args.Error(1)
 }
 func (m *MockSchedulerDriver) ReviveOffers() (mesos.Status, error) {
 	args := m.Called()
 	return status(args, 0), args.Error(0)
 }
 func (m *MockSchedulerDriver) SendFrameworkMessage(eid *mesos.ExecutorID, sid *mesos.SlaveID, s string) (mesos.Status, error) {
 	args := m.Called(eid, sid, s)
 	return status(args, 0), args.Error(1)
 }
 func (m *MockSchedulerDriver) Destroy() {
 	m.Called()
 }
 func (m *MockSchedulerDriver) Wait() {
 	m.Called()
 }
--- a/contrib/mesos/pkg/scheduler/plugin.go
+++ b/contrib/mesos/pkg/scheduler/plugin.go
@@ -0,0 +1,875 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package scheduler
 import (
 	"fmt"
 	"io"
 	"net/http"
 	"strconv"
 	"sync"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/backoff"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
 	annotation "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	plugin "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler"
 	"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm"
 	log "github.com/golang/glog"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	mutil "github.com/mesos/mesos-go/mesosutil"
 )
 const (
 	enqueuePopTimeout   = 200 * time.Millisecond
 	enqueueWaitTimeout  = 1 * time.Second
 	yieldPopTimeout     = 200 * time.Millisecond
 	yieldWaitTimeout    = 1 * time.Second
 	pluginRecoveryDelay = 100 * time.Millisecond // delay after scheduler plugin crashes, before we resume scheduling
 )
 // scheduler abstraction to allow for easier unit testing
 type schedulerInterface interface {
 	sync.Locker // synchronize scheduler plugin operations
 	SlaveIndex
 	algorithm() PodScheduleFunc
 	offers() offers.Registry
 	tasks() podtask.Registry
 	// driver calls
 	killTask(taskId string) error
 	launchTask(*podtask.T) error
 	// convenience
 	createPodTask(api.Context, *api.Pod) (*podtask.T, error)
 }
 type k8smScheduler struct {
 	sync.Mutex
 	internal *KubernetesScheduler
 }
 func (k *k8smScheduler) algorithm() PodScheduleFunc {
 	return k.internal.scheduleFunc
 }
 func (k *k8smScheduler) offers() offers.Registry {
 	return k.internal.offers
 }
 func (k *k8smScheduler) tasks() podtask.Registry {
 	return k.internal.taskRegistry
 }
 func (k *k8smScheduler) createPodTask(ctx api.Context, pod *api.Pod) (*podtask.T, error) {
 	return podtask.New(ctx, "", *pod, k.internal.executor)
 }
 func (k *k8smScheduler) slaveFor(id string) (slave *Slave, ok bool) {
 	slave, ok = k.internal.slaves.getSlave(id)
 	return
 }
 func (k *k8smScheduler) killTask(taskId string) error {
 	killTaskId := mutil.NewTaskID(taskId)
 	_, err := k.internal.driver.KillTask(killTaskId)
 	return err
 }
 func (k *k8smScheduler) launchTask(task *podtask.T) error {
 	// assume caller is holding scheduler lock
 	taskList := []*mesos.TaskInfo{task.BuildTaskInfo()}
 	offerIds := []*mesos.OfferID{task.Offer.Details().Id}
 	filters := &mesos.Filters{}
 	_, err := k.internal.driver.LaunchTasks(offerIds, taskList, filters)
 	return err
 }
 type binder struct {
 	api schedulerInterface
 }
 // implements binding.Registry, launches the pod-associated-task in mesos
 func (b *binder) Bind(binding *api.Binding) error {
 	ctx := api.WithNamespace(api.NewContext(), binding.Namespace)
 	// default upstream scheduler passes pod.Name as binding.Name
 	podKey, err := podtask.MakePodKey(ctx, binding.Name)
 	if err != nil {
 		return err
 	}
 	b.api.Lock()
 	defer b.api.Unlock()
 	switch task, state := b.api.tasks().ForPod(podKey); state {
 	case podtask.StatePending:
 		return b.bind(ctx, binding, task)
 	default:
 		// in this case it's likely that the pod has been deleted between Schedule
 		// and Bind calls
 		log.Infof("No pending task for pod %s", podKey)
 		return noSuchPodErr //TODO(jdef) this error is somewhat misleading since the task could be running?!
 	}
 }
 func (b *binder) rollback(task *podtask.T, err error) error {
 	task.Offer.Release()
 	task.Reset()
 	if err2 := b.api.tasks().Update(task); err2 != nil {
 		log.Errorf("failed to update pod task: %v", err2)
 	}
 	return err
 }
 // assumes that: caller has acquired scheduler lock and that the task is still pending
 func (b *binder) bind(ctx api.Context, binding *api.Binding, task *podtask.T) (err error) {
 	// sanity check: ensure that the task hasAcceptedOffer(), it's possible that between
 	// Schedule() and now that the offer for this task was rescinded or invalidated.
 	// ((we should never see this here))
 	if !task.HasAcceptedOffer() {
 		return fmt.Errorf("task has not accepted a valid offer %v", task.ID)
 	}
 	// By this time, there is a chance that the slave is disconnected.
 	offerId := task.GetOfferId()
 	if offer, ok := b.api.offers().Get(offerId); !ok || offer.HasExpired() {
 		// already rescinded or timed out or otherwise invalidated
 		return b.rollback(task, fmt.Errorf("failed prior to launchTask due to expired offer for task %v", task.ID))
 	}
 	if err = b.prepareTaskForLaunch(ctx, binding.Target.Name, task, offerId); err == nil {
 		log.V(2).Infof("launching task: %q on target %q slave %q for pod \"%v/%v\"",
 			task.ID, binding.Target.Name, task.Spec.SlaveID, task.Pod.Namespace, task.Pod.Name)
 		if err = b.api.launchTask(task); err == nil {
 			b.api.offers().Invalidate(offerId)
 			task.Set(podtask.Launched)
 			if err = b.api.tasks().Update(task); err != nil {
 				// this should only happen if the task has been removed or has changed status,
 				// which SHOULD NOT HAPPEN as long as we're synchronizing correctly
 				log.Errorf("failed to update task w/ Launched status: %v", err)
 			}
 			return
 		}
 	}
 	return b.rollback(task, fmt.Errorf("Failed to launch task %v: %v", task.ID, err))
 }
 //TODO(jdef) unit test this, ensure that task's copy of api.Pod is not modified
 func (b *binder) prepareTaskForLaunch(ctx api.Context, machine string, task *podtask.T, offerId string) error {
 	pod := task.Pod
 	// we make an effort here to avoid making changes to the task's copy of the pod, since
 	// we want that to reflect the initial user spec, and not the modified spec that we
 	// build for the executor to consume.
 	oemCt := pod.Spec.Containers
 	pod.Spec.Containers = append([]api.Container{}, oemCt...) // (shallow) clone before mod
 	if pod.Annotations == nil {
 		pod.Annotations = make(map[string]string)
 	} else {
 		oemAnn := pod.Annotations
 		pod.Annotations = make(map[string]string)
 		for k, v := range oemAnn {
 			pod.Annotations[k] = v
 		}
 	}
 	pod.Annotations[annotation.BindingHostKey] = machine
 	task.SaveRecoveryInfo(pod.Annotations)
 	for _, entry := range task.Spec.PortMap {
 		oemPorts := pod.Spec.Containers[entry.ContainerIdx].Ports
 		ports := append([]api.ContainerPort{}, oemPorts...)
 		p := &ports[entry.PortIdx]
 		p.HostPort = int(entry.OfferPort)
 		op := strconv.FormatUint(entry.OfferPort, 10)
 		pod.Annotations[fmt.Sprintf(annotation.PortMappingKeyFormat, p.Protocol, p.ContainerPort)] = op
 		if p.Name != "" {
 			pod.Annotations[fmt.Sprintf(annotation.PortNameMappingKeyFormat, p.Protocol, p.Name)] = op
 		}
 		pod.Spec.Containers[entry.ContainerIdx].Ports = ports
 	}
 	// the kubelet-executor uses this to instantiate the pod
 	log.V(3).Infof("prepared pod spec: %+v", pod)
 	data, err := api.Codec.Encode(&pod)
 	if err != nil {
 		log.V(2).Infof("Failed to marshal the pod spec: %v", err)
 		return err
 	}
 	task.Spec.Data = data
 	return nil
 }
 type kubeScheduler struct {
 	api        schedulerInterface
 	podUpdates queue.FIFO
 }
 // Schedule implements the Scheduler interface of Kubernetes.
 // It returns the selectedMachine's name and error (if there's any).
 func (k *kubeScheduler) Schedule(pod *api.Pod, unused algorithm.MinionLister) (string, error) {
 	log.Infof("Try to schedule pod %v\n", pod.Name)
 	ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
 	// default upstream scheduler passes pod.Name as binding.PodID
 	podKey, err := podtask.MakePodKey(ctx, pod.Name)
 	if err != nil {
 		return "", err
 	}
 	k.api.Lock()
 	defer k.api.Unlock()
 	switch task, state := k.api.tasks().ForPod(podKey); state {
 	case podtask.StateUnknown:
 		// There's a bit of a potential race here, a pod could have been yielded() and
 		// then before we get *here* it could be deleted.
 		// We use meta to index the pod in the store since that's what k8s reflector does.
 		podName, err := cache.MetaNamespaceKeyFunc(pod)
 		if err != nil {
 			log.Warningf("aborting Schedule, unable to understand pod object %+v", pod)
 			return "", noSuchPodErr
 		}
 		if deleted := k.podUpdates.Poll(podName, queue.DELETE_EVENT); deleted {
 			// avoid scheduling a pod that's been deleted between yieldPod() and Schedule()
 			log.Infof("aborting Schedule, pod has been deleted %+v", pod)
 			return "", noSuchPodErr
 		}
 		return k.doSchedule(k.api.tasks().Register(k.api.createPodTask(ctx, pod)))
 	//TODO(jdef) it's possible that the pod state has diverged from what
 	//we knew previously, we should probably update the task.Pod state here
 	//before proceeding with scheduling
 	case podtask.StatePending:
 		if pod.UID != task.Pod.UID {
 			// we're dealing with a brand new pod spec here, so the old one must have been
 			// deleted -- and so our task store is out of sync w/ respect to reality
 			//TODO(jdef) reconcile task
 			return "", fmt.Errorf("task %v spec is out of sync with pod %v spec, aborting schedule", task.ID, pod.Name)
 		} else if task.Has(podtask.Launched) {
 			// task has been marked as "launched" but the pod binding creation may have failed in k8s,
 			// but we're going to let someone else handle it, probably the mesos task error handler
 			return "", fmt.Errorf("task %s has already been launched, aborting schedule", task.ID)
 		} else {
 			return k.doSchedule(task, nil)
 		}
 	default:
 		return "", fmt.Errorf("task %s is not pending, nothing to schedule", task.ID)
 	}
 }
 // Call ScheduleFunc and subtract some resources, returning the name of the machine the task is scheduled on
 func (k *kubeScheduler) doSchedule(task *podtask.T, err error) (string, error) {
 	var offer offers.Perishable
 	if task.HasAcceptedOffer() {
 		// verify that the offer is still on the table
 		offerId := task.GetOfferId()
 		if offer, ok := k.api.offers().Get(offerId); ok && !offer.HasExpired() {
 			// skip tasks that have already have assigned offers
 			offer = task.Offer
 		} else {
 			task.Offer.Release()
 			task.Reset()
 			if err = k.api.tasks().Update(task); err != nil {
 				return "", err
 			}
 		}
 	}
 	if err == nil && offer == nil {
 		offer, err = k.api.algorithm()(k.api.offers(), k.api, task)
 	}
 	if err != nil {
 		return "", err
 	}
 	details := offer.Details()
 	if details == nil {
 		return "", fmt.Errorf("offer already invalid/expired for task %v", task.ID)
 	}
 	slaveId := details.GetSlaveId().GetValue()
 	if slave, ok := k.api.slaveFor(slaveId); !ok {
 		// not much sense in Release()ing the offer here since its owner died
 		offer.Release()
 		k.api.offers().Invalidate(details.Id.GetValue())
 		return "", fmt.Errorf("Slave disappeared (%v) while scheduling task %v", slaveId, task.ID)
 	} else {
 		if task.Offer != nil && task.Offer != offer {
 			return "", fmt.Errorf("task.offer assignment must be idempotent, task %+v: offer %+v", task, offer)
 		}
 		task.Offer = offer
 		task.FillFromDetails(details)
 		if err := k.api.tasks().Update(task); err != nil {
 			offer.Release()
 			return "", err
 		}
 		return slave.HostName, nil
 	}
 }
 type queuer struct {
 	lock            sync.Mutex       // shared by condition variables of this struct
 	podUpdates      queue.FIFO       // queue of pod updates to be processed
 	podQueue        *queue.DelayFIFO // queue of pods to be scheduled
 	deltaCond       sync.Cond        // pod changes are available for processing
 	unscheduledCond sync.Cond        // there are unscheduled pods for processing
 }
 func newQueuer(store queue.FIFO) *queuer {
 	q := &queuer{
 		podQueue:   queue.NewDelayFIFO(),
 		podUpdates: store,
 	}
 	q.deltaCond.L = &q.lock
 	q.unscheduledCond.L = &q.lock
 	return q
 }
 func (q *queuer) installDebugHandlers(mux *http.ServeMux) {
 	mux.HandleFunc("/debug/scheduler/podqueue", func(w http.ResponseWriter, r *http.Request) {
 		for _, x := range q.podQueue.List() {
 			if _, err := io.WriteString(w, fmt.Sprintf("%+v\n", x)); err != nil {
 				break
 			}
 		}
 	})
 	mux.HandleFunc("/debug/scheduler/podstore", func(w http.ResponseWriter, r *http.Request) {
 		for _, x := range q.podUpdates.List() {
 			if _, err := io.WriteString(w, fmt.Sprintf("%+v\n", x)); err != nil {
 				break
 			}
 		}
 	})
 }
 // signal that there are probably pod updates waiting to be processed
 func (q *queuer) updatesAvailable() {
 	q.deltaCond.Broadcast()
 }
 // delete a pod from the to-be-scheduled queue
 func (q *queuer) dequeue(id string) {
 	q.podQueue.Delete(id)
 }
 // re-add a pod to the to-be-scheduled queue, will not overwrite existing pod data (that
 // may have already changed).
 func (q *queuer) requeue(pod *Pod) {
 	// use KeepExisting in case the pod has already been updated (can happen if binding fails
 	// due to constraint voilations); we don't want to overwrite a newer entry with stale data.
 	q.podQueue.Add(pod, queue.KeepExisting)
 	q.unscheduledCond.Broadcast()
 }
 // same as requeue but calls podQueue.Offer instead of podQueue.Add
 func (q *queuer) reoffer(pod *Pod) {
 	// use KeepExisting in case the pod has already been updated (can happen if binding fails
 	// due to constraint voilations); we don't want to overwrite a newer entry with stale data.
 	if q.podQueue.Offer(pod, queue.KeepExisting) {
 		q.unscheduledCond.Broadcast()
 	}
 }
 // spawns a go-routine to watch for unscheduled pods and queue them up
 // for scheduling. returns immediately.
 func (q *queuer) Run(done <-chan struct{}) {
 	go runtime.Until(func() {
 		log.Info("Watching for newly created pods")
 		q.lock.Lock()
 		defer q.lock.Unlock()
 		for {
 			// limit blocking here for short intervals so that scheduling
 			// may proceed even if there have been no recent pod changes
 			p := q.podUpdates.Await(enqueuePopTimeout)
 			if p == nil {
 				signalled := runtime.After(q.deltaCond.Wait)
 				// we've yielded the lock
 				select {
 				case <-time.After(enqueueWaitTimeout):
 					q.deltaCond.Broadcast() // abort Wait()
 					<-signalled             // wait for lock re-acquisition
 					log.V(4).Infoln("timed out waiting for a pod update")
 				case <-signalled:
 					// we've acquired the lock and there may be
 					// changes for us to process now
 				}
 				continue
 			}
 			pod := p.(*Pod)
 			if pod.Spec.NodeName != "" {
 				log.V(3).Infof("dequeuing pod for scheduling: %v", pod.Pod.Name)
 				q.dequeue(pod.GetUID())
 			} else {
 				// use ReplaceExisting because we are always pushing the latest state
 				now := time.Now()
 				pod.deadline = &now
 				if q.podQueue.Offer(pod, queue.ReplaceExisting) {
 					q.unscheduledCond.Broadcast()
 					log.V(3).Infof("queued pod for scheduling: %v", pod.Pod.Name)
 				} else {
 					log.Warningf("failed to queue pod for scheduling: %v", pod.Pod.Name)
 				}
 			}
 		}
 	}, 1*time.Second, done)
 }
 // implementation of scheduling plugin's NextPod func; see k8s plugin/pkg/scheduler
 func (q *queuer) yield() *api.Pod {
 	log.V(2).Info("attempting to yield a pod")
 	q.lock.Lock()
 	defer q.lock.Unlock()
 	for {
 		// limit blocking here to short intervals so that we don't block the
 		// enqueuer Run() routine for very long
 		kpod := q.podQueue.Await(yieldPopTimeout)
 		if kpod == nil {
 			signalled := runtime.After(q.unscheduledCond.Wait)
 			// lock is yielded at this point and we're going to wait for either
 			// a timeout, or a signal that there's data
 			select {
 			case <-time.After(yieldWaitTimeout):
 				q.unscheduledCond.Broadcast() // abort Wait()
 				<-signalled                   // wait for the go-routine, and the lock
 				log.V(4).Infoln("timed out waiting for a pod to yield")
 			case <-signalled:
 				// we have acquired the lock, and there
 				// may be a pod for us to pop now
 			}
 			continue
 		}
 		pod := kpod.(*Pod).Pod
 		if podName, err := cache.MetaNamespaceKeyFunc(pod); err != nil {
 			log.Warningf("yield unable to understand pod object %+v, will skip: %v", pod, err)
 		} else if !q.podUpdates.Poll(podName, queue.POP_EVENT) {
 			log.V(1).Infof("yield popped a transitioning pod, skipping: %+v", pod)
 		} else if pod.Spec.NodeName != "" {
 			// should never happen if enqueuePods is filtering properly
 			log.Warningf("yield popped an already-scheduled pod, skipping: %+v", pod)
 		} else {
 			return pod
 		}
 	}
 }
 type errorHandler struct {
 	api     schedulerInterface
 	backoff *backoff.Backoff
 	qr      *queuer
 }
 // implementation of scheduling plugin's Error func; see plugin/pkg/scheduler
 func (k *errorHandler) handleSchedulingError(pod *api.Pod, schedulingErr error) {
 	if schedulingErr == noSuchPodErr {
 		log.V(2).Infof("Not rescheduling non-existent pod %v", pod.Name)
 		return
 	}
 	log.Infof("Error scheduling %v: %v; retrying", pod.Name, schedulingErr)
 	defer util.HandleCrash()
 	// default upstream scheduler passes pod.Name as binding.PodID
 	ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
 	podKey, err := podtask.MakePodKey(ctx, pod.Name)
 	if err != nil {
 		log.Errorf("Failed to construct pod key, aborting scheduling for pod %v: %v", pod.Name, err)
 		return
 	}
 	k.backoff.GC()
 	k.api.Lock()
 	defer k.api.Unlock()
 	switch task, state := k.api.tasks().ForPod(podKey); state {
 	case podtask.StateUnknown:
 		// if we don't have a mapping here any more then someone deleted the pod
 		log.V(2).Infof("Could not resolve pod to task, aborting pod reschdule: %s", podKey)
 		return
 	case podtask.StatePending:
 		if task.Has(podtask.Launched) {
 			log.V(2).Infof("Skipping re-scheduling for already-launched pod %v", podKey)
 			return
 		}
 		breakoutEarly := queue.BreakChan(nil)
 		if schedulingErr == noSuitableOffersErr {
 			log.V(3).Infof("adding backoff breakout handler for pod %v", podKey)
 			breakoutEarly = queue.BreakChan(k.api.offers().Listen(podKey, func(offer *mesos.Offer) bool {
 				k.api.Lock()
 				defer k.api.Unlock()
 				switch task, state := k.api.tasks().Get(task.ID); state {
 				case podtask.StatePending:
 					return !task.Has(podtask.Launched) && task.AcceptOffer(offer)
 				default:
 					// no point in continuing to check for matching offers
 					return true
 				}
 			}))
 		}
 		delay := k.backoff.Get(podKey)
 		log.V(3).Infof("requeuing pod %v with delay %v", podKey, delay)
 		k.qr.requeue(&Pod{Pod: pod, delay: &delay, notify: breakoutEarly})
 	default:
 		log.V(2).Infof("Task is no longer pending, aborting reschedule for pod %v", podKey)
 	}
 }
 type deleter struct {
 	api schedulerInterface
 	qr  *queuer
 }
 // currently monitors for "pod deleted" events, upon which handle()
 // is invoked.
 func (k *deleter) Run(updates <-chan queue.Entry, done <-chan struct{}) {
 	go runtime.Until(func() {
 		for {
 			entry := <-updates
 			pod := entry.Value().(*Pod)
 			if entry.Is(queue.DELETE_EVENT) {
 				if err := k.deleteOne(pod); err != nil {
 					log.Error(err)
 				}
 			} else if !entry.Is(queue.POP_EVENT) {
 				k.qr.updatesAvailable()
 			}
 		}
 	}, 1*time.Second, done)
 }
 func (k *deleter) deleteOne(pod *Pod) error {
 	ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
 	podKey, err := podtask.MakePodKey(ctx, pod.Name)
 	if err != nil {
 		return err
 	}
 	log.V(2).Infof("pod deleted: %v", podKey)
 	// order is important here: we want to make sure we have the lock before
 	// removing the pod from the scheduling queue. this makes the concurrent
 	// execution of scheduler-error-handling and delete-handling easier to
 	// reason about.
 	k.api.Lock()
 	defer k.api.Unlock()
 	// prevent the scheduler from attempting to pop this; it's also possible that
 	// it's concurrently being scheduled (somewhere between pod scheduling and
 	// binding) - if so, then we'll end up removing it from taskRegistry which
 	// will abort Bind()ing
 	k.qr.dequeue(pod.GetUID())
 	switch task, state := k.api.tasks().ForPod(podKey); state {
 	case podtask.StateUnknown:
 		log.V(2).Infof("Could not resolve pod '%s' to task id", podKey)
 		return noSuchPodErr
 	// determine if the task has already been launched to mesos, if not then
 	// cleanup is easier (unregister) since there's no state to sync
 	case podtask.StatePending:
 		if !task.Has(podtask.Launched) {
 			// we've been invoked in between Schedule() and Bind()
 			if task.HasAcceptedOffer() {
 				task.Offer.Release()
 				task.Reset()
 				task.Set(podtask.Deleted)
 				//TODO(jdef) probably want better handling here
 				if err := k.api.tasks().Update(task); err != nil {
 					return err
 				}
 			}
 			k.api.tasks().Unregister(task)
 			return nil
 		}
 		fallthrough
 	case podtask.StateRunning:
 		// signal to watchers that the related pod is going down
 		task.Set(podtask.Deleted)
 		if err := k.api.tasks().Update(task); err != nil {
 			log.Errorf("failed to update task w/ Deleted status: %v", err)
 		}
 		return k.api.killTask(task.ID)
 	default:
 		log.Infof("cannot kill pod '%s': non-terminal task not found %v", podKey, task.ID)
 		return noSuchTaskErr
 	}
 }
 // Create creates a scheduler plugin and all supporting background functions.
 func (k *KubernetesScheduler) NewDefaultPluginConfig(terminate <-chan struct{}, mux *http.ServeMux) *PluginConfig {
 	// use ListWatch watching pods using the client by default
 	return k.NewPluginConfig(terminate, mux, createAllPodsLW(k.client))
 }
 func (k *KubernetesScheduler) NewPluginConfig(terminate <-chan struct{}, mux *http.ServeMux,
 	podsWatcher *cache.ListWatch) *PluginConfig {
 	// Watch and queue pods that need scheduling.
 	updates := make(chan queue.Entry, k.schedcfg.UpdatesBacklog)
 	podUpdates := &podStoreAdapter{queue.NewHistorical(updates)}
 	reflector := cache.NewReflector(podsWatcher, &api.Pod{}, podUpdates, 0)
 	// lock that guards critial sections that involve transferring pods from
 	// the store (cache) to the scheduling queue; its purpose is to maintain
 	// an ordering (vs interleaving) of operations that's easier to reason about.
 	kapi := &k8smScheduler{internal: k}
 	q := newQueuer(podUpdates)
 	podDeleter := &deleter{
 		api: kapi,
 		qr:  q,
 	}
 	eh := &errorHandler{
 		api:     kapi,
 		backoff: backoff.New(k.schedcfg.InitialPodBackoff.Duration, k.schedcfg.MaxPodBackoff.Duration),
 		qr:      q,
 	}
 	startLatch := make(chan struct{})
 	eventBroadcaster := record.NewBroadcaster()
 	runtime.On(startLatch, func() {
 		eventBroadcaster.StartRecordingToSink(k.client.Events(""))
 		reflector.Run() // TODO(jdef) should listen for termination
 		podDeleter.Run(updates, terminate)
 		q.Run(terminate)
 		q.installDebugHandlers(mux)
 		podtask.InstallDebugHandlers(k.taskRegistry, mux)
 	})
 	return &PluginConfig{
 		Config: &plugin.Config{
 			MinionLister: nil,
 			Algorithm: &kubeScheduler{
 				api:        kapi,
 				podUpdates: podUpdates,
 			},
 			Binder:   &binder{api: kapi},
 			NextPod:  q.yield,
 			Error:    eh.handleSchedulingError,
 			Recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"}),
 		},
 		api:      kapi,
 		client:   k.client,
 		qr:       q,
 		deleter:  podDeleter,
 		starting: startLatch,
 	}
 }
 type PluginConfig struct {
 	*plugin.Config
 	api      schedulerInterface
 	client   *client.Client
 	qr       *queuer
 	deleter  *deleter
 	starting chan struct{} // startup latch
 }
 func NewPlugin(c *PluginConfig) PluginInterface {
 	return &schedulingPlugin{
 		config:   c.Config,
 		api:      c.api,
 		client:   c.client,
 		qr:       c.qr,
 		deleter:  c.deleter,
 		starting: c.starting,
 	}
 }
 type schedulingPlugin struct {
 	config   *plugin.Config
 	api      schedulerInterface
 	client   *client.Client
 	qr       *queuer
 	deleter  *deleter
 	starting chan struct{}
 }
 func (s *schedulingPlugin) Run(done <-chan struct{}) {
 	defer close(s.starting)
 	go runtime.Until(s.scheduleOne, pluginRecoveryDelay, done)
 }
 // hacked from GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/scheduler.go,
 // with the Modeler stuff removed since we don't use it because we have mesos.
 func (s *schedulingPlugin) scheduleOne() {
 	pod := s.config.NextPod()
 	log.V(3).Infof("Attempting to schedule: %v", pod)
 	dest, err := s.config.Algorithm.Schedule(pod, s.config.MinionLister) // call kubeScheduler.Schedule
 	if err != nil {
 		log.V(1).Infof("Failed to schedule: %v", pod)
 		s.config.Recorder.Eventf(pod, "failedScheduling", "Error scheduling: %v", err)
 		s.config.Error(pod, err)
 		return
 	}
 	b := &api.Binding{
 		ObjectMeta: api.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name},
 		Target: api.ObjectReference{
 			Kind: "Node",
 			Name: dest,
 		},
 	}
 	if err := s.config.Binder.Bind(b); err != nil {
 		log.V(1).Infof("Failed to bind pod: %v", err)
 		s.config.Recorder.Eventf(pod, "failedScheduling", "Binding rejected: %v", err)
 		s.config.Error(pod, err)
 		return
 	}
 	s.config.Recorder.Eventf(pod, "scheduled", "Successfully assigned %v to %v", pod.Name, dest)
 }
 // this pod may be out of sync with respect to the API server registry:
 //      this pod   |  apiserver registry
 //    -------------|----------------------
 //      host=.*    |  404           ; pod was deleted
 //      host=.*    |  5xx           ; failed to sync, try again later?
 //      host=""    |  host=""       ; perhaps no updates to process?
 //      host=""    |  host="..."    ; pod has been scheduled and assigned, is there a task assigned? (check TaskIdKey in binding?)
 //      host="..." |  host=""       ; pod is no longer scheduled, does it need to be re-queued?
 //      host="..." |  host="..."    ; perhaps no updates to process?
 //
 // TODO(jdef) this needs an integration test
 func (s *schedulingPlugin) reconcilePod(oldPod api.Pod) {
 	log.V(1).Infof("reconcile pod %v", oldPod.Name)
 	ctx := api.WithNamespace(api.NewDefaultContext(), oldPod.Namespace)
 	pod, err := s.client.Pods(api.NamespaceValue(ctx)).Get(oldPod.Name)
 	if err != nil {
 		if errors.IsNotFound(err) {
 			// attempt to delete
 			if err = s.deleter.deleteOne(&Pod{Pod: &oldPod}); err != nil && err != noSuchPodErr && err != noSuchTaskErr {
 				log.Errorf("failed to delete pod: %v: %v", oldPod.Name, err)
 			}
 		} else {
 			//TODO(jdef) other errors should probably trigger a retry (w/ backoff).
 			//For now, drop the pod on the floor
 			log.Warning("aborting reconciliation for pod %v: %v", oldPod.Name, err)
 		}
 		return
 	}
 	if oldPod.Spec.NodeName != pod.Spec.NodeName {
 		if pod.Spec.NodeName == "" {
 			// pod is unscheduled.
 			// it's possible that we dropped the pod in the scheduler error handler
 			// because of task misalignment with the pod (task.Has(podtask.Launched) == true)
 			podKey, err := podtask.MakePodKey(ctx, pod.Name)
 			if err != nil {
 				log.Error(err)
 				return
 			}
 			s.api.Lock()
 			defer s.api.Unlock()
 			if _, state := s.api.tasks().ForPod(podKey); state != podtask.StateUnknown {
 				//TODO(jdef) reconcile the task
 				log.Errorf("task already registered for pod %v", pod.Name)
 				return
 			}
 			now := time.Now()
 			log.V(3).Infof("reoffering pod %v", podKey)
 			s.qr.reoffer(&Pod{
 				Pod:      pod,
 				deadline: &now,
 			})
 		} else {
 			// pod is scheduled.
 			// not sure how this happened behind our backs. attempt to reconstruct
 			// at least a partial podtask.T record.
 			//TODO(jdef) reconcile the task
 			log.Errorf("pod already scheduled: %v", pod.Name)
 		}
 	} else {
 		//TODO(jdef) for now, ignore the fact that the rest of the spec may be different
 		//and assume that our knowledge of the pod aligns with that of the apiserver
 		log.Error("pod reconciliation does not support updates; not yet implemented")
 	}
 }
 func parseSelectorOrDie(s string) fields.Selector {
 	selector, err := fields.ParseSelector(s)
 	if err != nil {
 		panic(err)
 	}
 	return selector
 }
 // createAllPodsLW returns a listWatch that finds all pods
 func createAllPodsLW(cl *client.Client) *cache.ListWatch {
 	return cache.NewListWatchFromClient(cl, "pods", api.NamespaceAll, parseSelectorOrDie(""))
 }
 // Consumes *api.Pod, produces *Pod; the k8s reflector wants to push *api.Pod
 // objects at us, but we want to store more flexible (Pod) type defined in
 // this package. The adapter implementation facilitates this. It's a little
 // hackish since the object type going in is different than the object type
 // coming out -- you've been warned.
 type podStoreAdapter struct {
 	queue.FIFO
 }
 func (psa *podStoreAdapter) Add(obj interface{}) error {
 	pod := obj.(*api.Pod)
 	return psa.FIFO.Add(&Pod{Pod: pod})
 }
 func (psa *podStoreAdapter) Update(obj interface{}) error {
 	pod := obj.(*api.Pod)
 	return psa.FIFO.Update(&Pod{Pod: pod})
 }
 func (psa *podStoreAdapter) Delete(obj interface{}) error {
 	pod := obj.(*api.Pod)
 	return psa.FIFO.Delete(&Pod{Pod: pod})
 }
 func (psa *podStoreAdapter) Get(obj interface{}) (interface{}, bool, error) {
 	pod := obj.(*api.Pod)
 	return psa.FIFO.Get(&Pod{Pod: pod})
 }
 // Replace will delete the contents of the store, using instead the
 // given map. This store implementation does NOT take ownership of the map.
 func (psa *podStoreAdapter) Replace(objs []interface{}) error {
 	newobjs := make([]interface{}, len(objs))
 	for i, v := range objs {
 		pod := v.(*api.Pod)
 		newobjs[i] = &Pod{Pod: pod}
 	}
 	return psa.FIFO.Replace(newobjs)
 }
--- a/contrib/mesos/pkg/scheduler/plugin_test.go
+++ b/contrib/mesos/pkg/scheduler/plugin_test.go
@@ -0,0 +1,700 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package scheduler
 import (
 	"fmt"
 	"net/http"
 	"net/http/httptest"
 	"sync"
 	"testing"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
 	kutil "github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
 	assertext "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/assert"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue"
 	schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/ha"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
 	log "github.com/golang/glog"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	util "github.com/mesos/mesos-go/mesosutil"
 	bindings "github.com/mesos/mesos-go/scheduler"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
 )
 // A apiserver mock which partially mocks the pods API
 type TestServer struct {
 	server *httptest.Server
 	stats  map[string]uint
 	lock   sync.Mutex
 }
 func NewTestServer(t *testing.T, namespace string, mockPodListWatch *MockPodsListWatch) *TestServer {
 	ts := TestServer{
 		stats: map[string]uint{},
 	}
 	mux := http.NewServeMux()
 	mux.HandleFunc(testapi.ResourcePath("pods", namespace, ""), func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusOK)
 		pods := mockPodListWatch.Pods()
 		w.Write([]byte(runtime.EncodeOrDie(testapi.Codec(), &pods)))
 	})
 	podsPrefix := testapi.ResourcePath("pods", namespace, "") + "/"
 	mux.HandleFunc(podsPrefix, func(w http.ResponseWriter, r *http.Request) {
 		name := r.URL.Path[len(podsPrefix):]
 		// update statistics for this pod
 		ts.lock.Lock()
 		defer ts.lock.Unlock()
 		ts.stats[name] = ts.stats[name] + 1
 		p := mockPodListWatch.GetPod(name)
 		if p != nil {
 			w.WriteHeader(http.StatusOK)
 			w.Write([]byte(runtime.EncodeOrDie(testapi.Codec(), p)))
 			return
 		}
 		w.WriteHeader(http.StatusNotFound)
 	})
 	mux.HandleFunc(testapi.ResourcePath("events", namespace, ""), func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusOK)
 	})
 	mux.HandleFunc("/", func(res http.ResponseWriter, req *http.Request) {
 		t.Errorf("unexpected request: %v", req.RequestURI)
 		res.WriteHeader(http.StatusNotFound)
 	})
 	ts.server = httptest.NewServer(mux)
 	return &ts
 }
 func (ts *TestServer) Stats(name string) uint {
 	ts.lock.Lock()
 	defer ts.lock.Unlock()
 	return ts.stats[name]
 }
 // Create mock of pods ListWatch, usually listening on the apiserver pods watch endpoint
 type MockPodsListWatch struct {
 	ListWatch   cache.ListWatch
 	fakeWatcher *watch.FakeWatcher
 	list        api.PodList
 	lock        sync.Mutex
 }
 func NewMockPodsListWatch(initialPodList api.PodList) *MockPodsListWatch {
 	lw := MockPodsListWatch{
 		fakeWatcher: watch.NewFake(),
 		list:        initialPodList,
 	}
 	lw.ListWatch = cache.ListWatch{
 		WatchFunc: func(resourceVersion string) (watch.Interface, error) {
 			return lw.fakeWatcher, nil
 		},
 		ListFunc: func() (runtime.Object, error) {
 			return &lw.list, nil
 		},
 	}
 	return &lw
 }
 func (lw *MockPodsListWatch) Pods() api.PodList {
 	lw.lock.Lock()
 	defer lw.lock.Unlock()
 	return lw.list
 }
 func (lw *MockPodsListWatch) GetPod(name string) *api.Pod {
 	lw.lock.Lock()
 	defer lw.lock.Unlock()
 	for _, p := range lw.list.Items {
 		if p.Name == name {
 			return &p
 		}
 	}
 	return nil
 }
 func (lw *MockPodsListWatch) Add(pod *api.Pod, notify bool) {
 	lw.lock.Lock()
 	defer lw.lock.Unlock()
 	lw.list.Items = append(lw.list.Items, *pod)
 	if notify {
 		lw.fakeWatcher.Add(pod)
 	}
 }
 func (lw *MockPodsListWatch) Modify(pod *api.Pod, notify bool) {
 	lw.lock.Lock()
 	defer lw.lock.Unlock()
 	for i, otherPod := range lw.list.Items {
 		if otherPod.Name == pod.Name {
 			lw.list.Items[i] = *pod
 			if notify {
 				lw.fakeWatcher.Modify(pod)
 			}
 			return
 		}
 	}
 	log.Fatalf("Cannot find pod %v to modify in MockPodsListWatch", pod.Name)
 }
 func (lw *MockPodsListWatch) Delete(pod *api.Pod, notify bool) {
 	lw.lock.Lock()
 	defer lw.lock.Unlock()
 	for i, otherPod := range lw.list.Items {
 		if otherPod.Name == pod.Name {
 			lw.list.Items = append(lw.list.Items[:i], lw.list.Items[i+1:]...)
 			if notify {
 				lw.fakeWatcher.Delete(&otherPod)
 			}
 			return
 		}
 	}
 	log.Fatalf("Cannot find pod %v to delete in MockPodsListWatch", pod.Name)
 }
 // Create a pod with a given index, requiring one port
 func NewTestPod(i int) *api.Pod {
 	name := fmt.Sprintf("pod%d", i)
 	return &api.Pod{
 		TypeMeta: api.TypeMeta{APIVersion: testapi.Version()},
 		ObjectMeta: api.ObjectMeta{
 			Name:      name,
 			Namespace: "default",
 			SelfLink:  fmt.Sprintf("http://1.2.3.4/api/v1beta1/pods/%s", name),
 		},
 		Spec: api.PodSpec{
 			Containers: []api.Container{
 				{
 					Ports: []api.ContainerPort{
 						{
 							ContainerPort: 8000 + i,
 							Protocol:      api.ProtocolTCP,
 						},
 					},
 				},
 			},
 		},
 		Status: api.PodStatus{
 			PodIP: fmt.Sprintf("1.2.3.%d", 4+i),
 			Conditions: []api.PodCondition{
 				{
 					Type:   api.PodReady,
 					Status: api.ConditionTrue,
 				},
 			},
 		},
 	}
 }
 // Offering some cpus and memory and the 8000-9000 port range
 func NewTestOffer(i int) *mesos.Offer {
 	hostname := fmt.Sprintf("h%d", i)
 	cpus := util.NewScalarResource("cpus", 3.75)
 	mem := util.NewScalarResource("mem", 940)
 	var port8000 uint64 = 8000
 	var port9000 uint64 = 9000
 	ports8000to9000 := mesos.Value_Range{Begin: &port8000, End: &port9000}
 	ports := util.NewRangesResource("ports", []*mesos.Value_Range{&ports8000to9000})
 	return &mesos.Offer{
 		Id:        util.NewOfferID(fmt.Sprintf("offer%d", i)),
 		Hostname:  &hostname,
 		SlaveId:   util.NewSlaveID(hostname),
 		Resources: []*mesos.Resource{cpus, mem, ports},
 	}
 }
 // Add assertions to reason about event streams
 type Event struct {
 	Object  runtime.Object
 	Reason  string
 	Message string
 }
 type EventPredicate func(e Event) bool
 type EventAssertions struct {
 	assert.Assertions
 }
 // EventObserver implements record.EventRecorder for the purposes of validation via EventAssertions.
 type EventObserver struct {
 	fifo chan Event
 }
 func NewEventObserver() *EventObserver {
 	return &EventObserver{
 		fifo: make(chan Event, 1000),
 	}
 }
 func (o *EventObserver) Event(object runtime.Object, reason, message string) {
 	o.fifo <- Event{Object: object, Reason: reason, Message: message}
 }
 func (o *EventObserver) Eventf(object runtime.Object, reason, messageFmt string, args ...interface{}) {
 	o.fifo <- Event{Object: object, Reason: reason, Message: fmt.Sprintf(messageFmt, args...)}
 }
 func (o *EventObserver) PastEventf(object runtime.Object, timestamp kutil.Time, reason, messageFmt string, args ...interface{}) {
 	o.fifo <- Event{Object: object, Reason: reason, Message: fmt.Sprintf(messageFmt, args...)}
 }
 func (a *EventAssertions) Event(observer *EventObserver, pred EventPredicate, msgAndArgs ...interface{}) bool {
 	// parse msgAndArgs: first possibly a duration, otherwise a format string with further args
 	timeout := time.Second * 2
 	msg := "event not received"
 	msgArgStart := 0
 	if len(msgAndArgs) > 0 {
 		switch msgAndArgs[0].(type) {
 		case time.Duration:
 			timeout = msgAndArgs[0].(time.Duration)
 			msgArgStart += 1
 		}
 	}
 	if len(msgAndArgs) > msgArgStart {
 		msg = fmt.Sprintf(msgAndArgs[msgArgStart].(string), msgAndArgs[msgArgStart+1:]...)
 	}
 	// watch events
 	result := make(chan bool)
 	stop := make(chan struct{})
 	go func() {
 		for {
 			select {
 			case e, ok := <-observer.fifo:
 				if !ok {
 					result <- false
 					return
 				} else if pred(e) {
 					log.V(3).Infof("found asserted event for reason '%v': %v", e.Reason, e.Message)
 					result <- true
 					return
 				} else {
 					log.V(5).Infof("ignoring not-asserted event for reason '%v': %v", e.Reason, e.Message)
 				}
 			case _, ok := <-stop:
 				if !ok {
 					return
 				}
 			}
 		}
 	}()
 	defer close(stop)
 	// wait for watch to match or timeout
 	select {
 	case matched := <-result:
 		return matched
 	case <-time.After(timeout):
 		return a.Fail(msg)
 	}
 }
 func (a *EventAssertions) EventWithReason(observer *EventObserver, reason string, msgAndArgs ...interface{}) bool {
 	return a.Event(observer, func(e Event) bool {
 		return e.Reason == reason
 	}, msgAndArgs...)
 }
 type joinableDriver struct {
 	MockSchedulerDriver
 	joinFunc func() (mesos.Status, error)
 }
 // Join invokes joinFunc if it has been set, otherwise blocks forever
 func (m *joinableDriver) Join() (mesos.Status, error) {
 	if m.joinFunc != nil {
 		return m.joinFunc()
 	}
 	select {}
 }
 // Create mesos.TaskStatus for a given task
 func newTaskStatusForTask(task *mesos.TaskInfo, state mesos.TaskState) *mesos.TaskStatus {
 	healthy := state == mesos.TaskState_TASK_RUNNING
 	ts := float64(time.Now().Nanosecond()) / 1000000000.0
 	source := mesos.TaskStatus_SOURCE_EXECUTOR
 	return &mesos.TaskStatus{
 		TaskId:     task.TaskId,
 		State:      &state,
 		SlaveId:    task.SlaveId,
 		ExecutorId: task.Executor.ExecutorId,
 		Timestamp:  &ts,
 		Healthy:    &healthy,
 		Source:     &source,
 		Data:       task.Data,
 	}
 }
 // Test to create the scheduler plugin with an empty plugin config
 func TestPlugin_New(t *testing.T) {
 	assert := assert.New(t)
 	c := PluginConfig{}
 	p := NewPlugin(&c)
 	assert.NotNil(p)
 }
 // Test to create the scheduler plugin with the config returned by the scheduler,
 // and play through the whole life cycle of the plugin while creating pods, deleting
 // and failing them.
 func TestPlugin_LifeCycle(t *testing.T) {
 	assert := &EventAssertions{*assert.New(t)}
 	// create a fake pod watch. We use that below to submit new pods to the scheduler
 	podListWatch := NewMockPodsListWatch(api.PodList{})
 	// create fake apiserver
 	testApiServer := NewTestServer(t, api.NamespaceDefault, podListWatch)
 	defer testApiServer.server.Close()
 	// create scheduler
 	testScheduler := New(Config{
 		Executor: util.NewExecutorInfo(
 			util.NewExecutorID("executor-id"),
 			util.NewCommandInfo("executor-cmd"),
 		),
 		Client:       client.NewOrDie(&client.Config{Host: testApiServer.server.URL, Version: testapi.Version()}),
 		ScheduleFunc: FCFSScheduleFunc,
 		Schedcfg:     *schedcfg.CreateDefaultConfig(),
 	})
 	assert.NotNil(testScheduler.client, "client is nil")
 	assert.NotNil(testScheduler.executor, "executor is nil")
 	assert.NotNil(testScheduler.offers, "offer registry is nil")
 	// create scheduler process
 	schedulerProcess := ha.New(testScheduler)
 	// get plugin config from it
 	c := testScheduler.NewPluginConfig(schedulerProcess.Terminal(), http.DefaultServeMux, &podListWatch.ListWatch)
 	assert.NotNil(c)
 	// make events observable
 	eventObserver := NewEventObserver()
 	c.Recorder = eventObserver
 	// create plugin
 	p := NewPlugin(c)
 	assert.NotNil(p)
 	// run plugin
 	p.Run(schedulerProcess.Terminal())
 	defer schedulerProcess.End()
 	// init scheduler
 	err := testScheduler.Init(schedulerProcess.Master(), p, http.DefaultServeMux)
 	assert.NoError(err)
 	// create mock mesos scheduler driver
 	mockDriver := &joinableDriver{}
 	mockDriver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once()
 	started := mockDriver.Upon()
 	mAny := mock.AnythingOfType
 	mockDriver.On("ReconcileTasks", mAny("[]*mesosproto.TaskStatus")).Return(mesos.Status_DRIVER_RUNNING, nil)
 	mockDriver.On("SendFrameworkMessage", mAny("*mesosproto.ExecutorID"), mAny("*mesosproto.SlaveID"), mAny("string")).
 		Return(mesos.Status_DRIVER_RUNNING, nil)
 	launchedTasks := make(chan *mesos.TaskInfo, 1)
 	launchTasksCalledFunc := func(args mock.Arguments) {
 		taskInfos := args.Get(1).([]*mesos.TaskInfo)
 		assert.Equal(1, len(taskInfos))
 		launchedTasks <- taskInfos[0]
 	}
 	mockDriver.On("LaunchTasks", mAny("[]*mesosproto.OfferID"), mAny("[]*mesosproto.TaskInfo"), mAny("*mesosproto.Filters")).
 		Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksCalledFunc)
 	// elect master with mock driver
 	driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) {
 		return mockDriver, nil
 	})
 	schedulerProcess.Elect(driverFactory)
 	elected := schedulerProcess.Elected()
 	// driver will be started
 	<-started
 	// tell scheduler to be registered
 	testScheduler.Registered(
 		mockDriver,
 		util.NewFrameworkID("kubernetes-id"),
 		util.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050),
 	)
 	// wait for being elected
 	<-elected
 	//TODO(jdef) refactor things above here into a test suite setup of some sort
 	// fake new, unscheduled pod
 	pod1 := NewTestPod(1)
 	podListWatch.Add(pod1, true) // notify watchers
 	// wait for failedScheduling event because there is no offer
 	assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received")
 	// add some matching offer
 	offers1 := []*mesos.Offer{NewTestOffer(1)}
 	testScheduler.ResourceOffers(nil, offers1)
 	// and wait for scheduled pod
 	assert.EventWithReason(eventObserver, "scheduled")
 	select {
 	case launchedTask := <-launchedTasks:
 		// report back that the task has been staged, and then started by mesos
 		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING))
 		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING))
 		// report back that the task has been lost
 		mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0)
 		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_LOST))
 		// and wait that framework message is sent to executor
 		mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1)
 	case <-time.After(5 * time.Second):
 		t.Fatalf("timed out waiting for launchTasks call")
 	}
 	// start another pod
 	podNum := 1
 	startPod := func(offers []*mesos.Offer) (*api.Pod, *mesos.TaskInfo) {
 		podNum = podNum + 1
 		// create pod and matching offer
 		pod := NewTestPod(podNum)
 		podListWatch.Add(pod, true) // notify watchers
 		testScheduler.ResourceOffers(mockDriver, offers)
 		assert.EventWithReason(eventObserver, "scheduled")
 		// wait for driver.launchTasks call
 		select {
 		case launchedTask := <-launchedTasks:
 			testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING))
 			testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING))
 			return pod, launchedTask
 		case <-time.After(5 * time.Second):
 			t.Fatal("timed out waiting for launchTasks")
 			return nil, nil
 		}
 	}
 	pod, launchedTask := startPod(offers1)
 	// mock drvier.KillTask, should be invoked when a pod is deleted
 	mockDriver.On("KillTask", mAny("*mesosproto.TaskID")).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) {
 		killedTaskId := *(args.Get(0).(*mesos.TaskID))
 		assert.Equal(*launchedTask.TaskId, killedTaskId, "expected same TaskID as during launch")
 	})
 	killTaskCalled := mockDriver.Upon()
 	// stop it again via the apiserver mock
 	podListWatch.Delete(pod, true) // notify watchers
 	// and wait for the driver killTask call with the correct TaskId
 	select {
 	case <-killTaskCalled:
 		// report back that the task is finished
 		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_FINISHED))
 	case <-time.After(5 * time.Second):
 		t.Fatal("timed out waiting for KillTask")
 	}
 	// start pods:
 	// - which are failing while binding,
 	// - leading to reconciliation
 	// - with different states on the apiserver
 	failPodFromExecutor := func(task *mesos.TaskInfo) {
 		beforePodLookups := testApiServer.Stats(pod.Name)
 		status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED)
 		message := messages.CreateBindingFailure
 		status.Message = &message
 		testScheduler.StatusUpdate(mockDriver, status)
 		// wait until pod is looked up at the apiserver
 		assertext.EventuallyTrue(t, time.Second, func() bool {
 			return testApiServer.Stats(pod.Name) == beforePodLookups+1
 		}, "expect that reconcilePod will access apiserver for pod %v", pod.Name)
 	}
 	// 1. with pod deleted from the apiserver
 	pod, launchedTask = startPod(offers1)
 	podListWatch.Delete(pod, false) // not notifying the watchers
 	failPodFromExecutor(launchedTask)
 	// 2. with pod still on the apiserver, not bound
 	pod, launchedTask = startPod(offers1)
 	failPodFromExecutor(launchedTask)
 	// 3. with pod still on the apiserver, bound i.e. host!=""
 	pod, launchedTask = startPod(offers1)
 	pod.Spec.NodeName = *offers1[0].Hostname
 	podListWatch.Modify(pod, false) // not notifying the watchers
 	failPodFromExecutor(launchedTask)
 	// 4. with pod still on the apiserver, bound i.e. host!="", notified via ListWatch
 	pod, launchedTask = startPod(offers1)
 	pod.Spec.NodeName = *offers1[0].Hostname
 	podListWatch.Modify(pod, true) // notifying the watchers
 	time.Sleep(time.Second / 2)
 	failPodFromExecutor(launchedTask)
 }
 func TestDeleteOne_NonexistentPod(t *testing.T) {
 	assert := assert.New(t)
 	obj := &MockScheduler{}
 	reg := podtask.NewInMemoryRegistry()
 	obj.On("tasks").Return(reg)
 	qr := newQueuer(nil)
 	assert.Equal(0, len(qr.podQueue.List()))
 	d := &deleter{
 		api: obj,
 		qr:  qr,
 	}
 	pod := &Pod{Pod: &api.Pod{
 		ObjectMeta: api.ObjectMeta{
 			Name:      "foo",
 			Namespace: api.NamespaceDefault,
 		}}}
 	err := d.deleteOne(pod)
 	assert.Equal(err, noSuchPodErr)
 	obj.AssertExpectations(t)
 }
 func TestDeleteOne_PendingPod(t *testing.T) {
 	assert := assert.New(t)
 	obj := &MockScheduler{}
 	reg := podtask.NewInMemoryRegistry()
 	obj.On("tasks").Return(reg)
 	pod := &Pod{Pod: &api.Pod{
 		ObjectMeta: api.ObjectMeta{
 			Name:      "foo",
 			UID:       "foo0",
 			Namespace: api.NamespaceDefault,
 		}}}
 	_, err := reg.Register(podtask.New(api.NewDefaultContext(), "bar", *pod.Pod, &mesos.ExecutorInfo{}))
 	if err != nil {
 		t.Fatalf("failed to create task: %v", err)
 	}
 	// preconditions
 	qr := newQueuer(nil)
 	qr.podQueue.Add(pod, queue.ReplaceExisting)
 	assert.Equal(1, len(qr.podQueue.List()))
 	_, found := qr.podQueue.Get("default/foo")
 	assert.True(found)
 	// exec & post conditions
 	d := &deleter{
 		api: obj,
 		qr:  qr,
 	}
 	err = d.deleteOne(pod)
 	assert.Nil(err)
 	_, found = qr.podQueue.Get("foo0")
 	assert.False(found)
 	assert.Equal(0, len(qr.podQueue.List()))
 	obj.AssertExpectations(t)
 }
 func TestDeleteOne_Running(t *testing.T) {
 	assert := assert.New(t)
 	obj := &MockScheduler{}
 	reg := podtask.NewInMemoryRegistry()
 	obj.On("tasks").Return(reg)
 	pod := &Pod{Pod: &api.Pod{
 		ObjectMeta: api.ObjectMeta{
 			Name:      "foo",
 			UID:       "foo0",
 			Namespace: api.NamespaceDefault,
 		}}}
 	task, err := reg.Register(podtask.New(api.NewDefaultContext(), "bar", *pod.Pod, &mesos.ExecutorInfo{}))
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	task.Set(podtask.Launched)
 	err = reg.Update(task)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	// preconditions
 	qr := newQueuer(nil)
 	qr.podQueue.Add(pod, queue.ReplaceExisting)
 	assert.Equal(1, len(qr.podQueue.List()))
 	_, found := qr.podQueue.Get("default/foo")
 	assert.True(found)
 	obj.On("killTask", task.ID).Return(nil)
 	// exec & post conditions
 	d := &deleter{
 		api: obj,
 		qr:  qr,
 	}
 	err = d.deleteOne(pod)
 	assert.Nil(err)
 	_, found = qr.podQueue.Get("foo0")
 	assert.False(found)
 	assert.Equal(0, len(qr.podQueue.List()))
 	obj.AssertExpectations(t)
 }
 func TestDeleteOne_badPodNaming(t *testing.T) {
 	assert := assert.New(t)
 	obj := &MockScheduler{}
 	pod := &Pod{Pod: &api.Pod{}}
 	d := &deleter{
 		api: obj,
 		qr:  newQueuer(nil),
 	}
 	err := d.deleteOne(pod)
 	assert.NotNil(err)
 	pod.Pod.ObjectMeta.Name = "foo"
 	err = d.deleteOne(pod)
 	assert.NotNil(err)
 	pod.Pod.ObjectMeta.Name = ""
 	pod.Pod.ObjectMeta.Namespace = "bar"
 	err = d.deleteOne(pod)
 	assert.NotNil(err)
 	obj.AssertExpectations(t)
 }
--- a/contrib/mesos/pkg/scheduler/pod.go
+++ b/contrib/mesos/pkg/scheduler/pod.go
@@ -0,0 +1,80 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package scheduler
 import (
 	"fmt"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
 )
 // wrapper for the k8s pod type so that we can define additional methods on a "pod"
 type Pod struct {
 	*api.Pod
 	deadline *time.Time
 	delay    *time.Duration
 	notify   queue.BreakChan
 }
 // implements Copyable
 func (p *Pod) Copy() queue.Copyable {
 	if p == nil {
 		return nil
 	}
 	//TODO(jdef) we may need a better "deep-copy" implementation
 	pod := *(p.Pod)
 	return &Pod{Pod: &pod}
 }
 // implements Unique
 func (p *Pod) GetUID() string {
 	if id, err := cache.MetaNamespaceKeyFunc(p.Pod); err != nil {
 		panic(fmt.Sprintf("failed to determine pod id for '%+v'", p.Pod))
 	} else {
 		return id
 	}
 }
 // implements Deadlined
 func (dp *Pod) Deadline() (time.Time, bool) {
 	if dp.deadline != nil {
 		return *(dp.deadline), true
 	}
 	return time.Time{}, false
 }
 func (dp *Pod) GetDelay() time.Duration {
 	if dp.delay != nil {
 		return *(dp.delay)
 	}
 	return 0
 }
 func (p *Pod) Breaker() queue.BreakChan {
 	return p.notify
 }
 func (p *Pod) String() string {
 	displayDeadline := "<none>"
 	if deadline, ok := p.Deadline(); ok {
 		displayDeadline = deadline.String()
 	}
 	return fmt.Sprintf("{pod:%v, deadline:%v, delay:%v}", p.Pod.Name, displayDeadline, p.GetDelay())
 }
--- a/contrib/mesos/pkg/scheduler/podtask/debug.go
+++ b/contrib/mesos/pkg/scheduler/podtask/debug.go
@@ -0,0 +1,54 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package podtask
 import (
 	"fmt"
 	"io"
 	"net/http"
 	log "github.com/golang/glog"
 )
 //TODO(jdef) we use a Locker to guard against concurrent task state changes, but it would be
 //really, really nice to avoid doing this. Maybe someday the registry won't return data ptrs
 //but plain structs instead.
 func InstallDebugHandlers(reg Registry, mux *http.ServeMux) {
 	mux.HandleFunc("/debug/registry/tasks", func(w http.ResponseWriter, r *http.Request) {
 		//TODO(jdef) support filtering tasks based on status
 		alltasks := reg.List(nil)
 		io.WriteString(w, fmt.Sprintf("task_count=%d\n", len(alltasks)))
 		for _, task := range alltasks {
 			if err := func() (err error) {
 				podName := task.Pod.Name
 				podNamespace := task.Pod.Namespace
 				offerId := ""
 				if task.Offer != nil {
 					offerId = task.Offer.Id()
 				}
 				_, err = io.WriteString(w, fmt.Sprintf("%v\t%v/%v\t%v\t%v\n", task.ID, podNamespace, podName, task.State, offerId))
 				return
 			}(); err != nil {
 				log.Warningf("aborting debug handler: %v", err)
 				break // stop listing on I/O errors
 			}
 		}
 		if flusher, ok := w.(http.Flusher); ok {
 			flusher.Flush()
 		}
 	})
 }
--- a/contrib/mesos/pkg/scheduler/podtask/doc.go
+++ b/contrib/mesos/pkg/scheduler/podtask/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package podtask maps Kubernetes pods to Mesos tasks.
 package podtask
--- a/contrib/mesos/pkg/scheduler/podtask/leaky.go
+++ b/contrib/mesos/pkg/scheduler/podtask/leaky.go
@@ -0,0 +1,29 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package podtask
 // Concepts that have leaked to where they should not have.
 import (
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/registry/etcd"
 )
 // makePodKey constructs etcd paths to pod items enforcing namespace rules.
 func MakePodKey(ctx api.Context, id string) (string, error) {
 	return etcd.MakeEtcdItemKey(ctx, PodPath, id)
 }
--- a/contrib/mesos/pkg/scheduler/podtask/pod_task.go
+++ b/contrib/mesos/pkg/scheduler/podtask/pod_task.go
@@ -0,0 +1,373 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package podtask
 import (
 	"fmt"
 	"strings"
 	"time"
 	"code.google.com/p/go-uuid/uuid"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
 	annotation "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/gogo/protobuf/proto"
 	log "github.com/golang/glog"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	mutil "github.com/mesos/mesos-go/mesosutil"
 )
 const (
 	containerCpus = 0.25 // initial CPU allocated for executor
 	containerMem  = 64   // initial MB of memory allocated for executor
 )
 type StateType int
 const (
 	StatePending StateType = iota
 	StateRunning
 	StateFinished
 	StateUnknown
 )
 type FlagType string
 const (
 	Launched = FlagType("launched")
 	Bound    = FlagType("bound")
 	Deleted  = FlagType("deleted")
 )
 // A struct that describes a pod task.
 type T struct {
 	ID          string
 	Pod         api.Pod
 	Spec        Spec
 	Offer       offers.Perishable // thread-safe
 	State       StateType
 	Flags       map[FlagType]struct{}
 	CreateTime  time.Time
 	UpdatedTime time.Time // time of the most recent StatusUpdate we've seen from the mesos master
 	podStatus  api.PodStatus
 	executor   *mesos.ExecutorInfo // readonly
 	podKey     string
 	launchTime time.Time
 	bindTime   time.Time
 	mapper     HostPortMappingType
 }
 type Spec struct {
 	SlaveID string
 	CPU     float64
 	Memory  float64
 	PortMap []HostPortMapping
 	Ports   []uint64
 	Data    []byte
 }
 // mostly-clone this pod task. the clone will actually share the some fields:
 //   - executor    // OK because it's read only
 //   - Offer       // OK because it's guarantees safe concurrent access
 func (t *T) Clone() *T {
 	if t == nil {
 		return nil
 	}
 	// shallow-copy
 	clone := *t
 	// deep copy
 	(&t.Spec).copyTo(&clone.Spec)
 	clone.Flags = map[FlagType]struct{}{}
 	for k := range t.Flags {
 		clone.Flags[k] = struct{}{}
 	}
 	return &clone
 }
 func (old *Spec) copyTo(new *Spec) {
 	if len(old.PortMap) > 0 {
 		new.PortMap = append(([]HostPortMapping)(nil), old.PortMap...)
 	}
 	if len(old.Ports) > 0 {
 		new.Ports = append(([]uint64)(nil), old.Ports...)
 	}
 	if len(old.Data) > 0 {
 		new.Data = append(([]byte)(nil), old.Data...)
 	}
 }
 func (t *T) HasAcceptedOffer() bool {
 	return t.Spec.SlaveID != ""
 }
 func (t *T) GetOfferId() string {
 	if t.Offer == nil {
 		return ""
 	}
 	return t.Offer.Details().Id.GetValue()
 }
 func generateTaskName(pod *api.Pod) string {
 	ns := pod.Namespace
 	if ns == "" {
 		ns = api.NamespaceDefault
 	}
 	return fmt.Sprintf("%s.%s.pods", pod.Name, ns)
 }
 func (t *T) BuildTaskInfo() *mesos.TaskInfo {
 	info := &mesos.TaskInfo{
 		Name:     proto.String(generateTaskName(&t.Pod)),
 		TaskId:   mutil.NewTaskID(t.ID),
 		SlaveId:  mutil.NewSlaveID(t.Spec.SlaveID),
 		Executor: t.executor,
 		Data:     t.Spec.Data,
 		Resources: []*mesos.Resource{
 			mutil.NewScalarResource("cpus", t.Spec.CPU),
 			mutil.NewScalarResource("mem", t.Spec.Memory),
 		},
 	}
 	if portsResource := rangeResource("ports", t.Spec.Ports); portsResource != nil {
 		info.Resources = append(info.Resources, portsResource)
 	}
 	return info
 }
 // Fill the Spec in the T, should be called during k8s scheduling,
 // before binding.
 func (t *T) FillFromDetails(details *mesos.Offer) error {
 	if details == nil {
 		//programming error
 		panic("offer details are nil")
 	}
 	log.V(3).Infof("Recording offer(s) %v against pod %v", details.Id, t.Pod.Name)
 	t.Spec = Spec{
 		SlaveID: details.GetSlaveId().GetValue(),
 		CPU:     containerCpus,
 		Memory:  containerMem,
 	}
 	if mapping, err := t.mapper.Generate(t, details); err != nil {
 		t.Reset()
 		return err
 	} else {
 		ports := []uint64{}
 		for _, entry := range mapping {
 			ports = append(ports, entry.OfferPort)
 		}
 		t.Spec.PortMap = mapping
 		t.Spec.Ports = ports
 	}
 	// hostname needs of the executor needs to match that of the offer, otherwise
 	// the kubelet node status checker/updater is very unhappy
 	const HOSTNAME_OVERRIDE_FLAG = "--hostname-override="
 	hostname := details.GetHostname() // required field, non-empty
 	hostnameOverride := HOSTNAME_OVERRIDE_FLAG + hostname
 	argv := t.executor.Command.Arguments
 	overwrite := false
 	for i, arg := range argv {
 		if strings.HasPrefix(arg, HOSTNAME_OVERRIDE_FLAG) {
 			overwrite = true
 			argv[i] = hostnameOverride
 			break
 		}
 	}
 	if !overwrite {
 		t.executor.Command.Arguments = append(argv, hostnameOverride)
 	}
 	return nil
 }
 // Clear offer-related details from the task, should be called if/when an offer
 // has already been assigned to a task but for some reason is no longer valid.
 func (t *T) Reset() {
 	log.V(3).Infof("Clearing offer(s) from pod %v", t.Pod.Name)
 	t.Offer = nil
 	t.Spec = Spec{}
 }
 func (t *T) AcceptOffer(offer *mesos.Offer) bool {
 	if offer == nil {
 		return false
 	}
 	var (
 		cpus float64 = 0
 		mem  float64 = 0
 	)
 	for _, resource := range offer.Resources {
 		if resource.GetName() == "cpus" {
 			cpus = *resource.GetScalar().Value
 		}
 		if resource.GetName() == "mem" {
 			mem = *resource.GetScalar().Value
 		}
 	}
 	if _, err := t.mapper.Generate(t, offer); err != nil {
 		log.V(3).Info(err)
 		return false
 	}
 	// for now hard-coded, constant values are used for cpus and mem. This is necessary
 	// until parent-cgroup integration is finished for mesos and k8sm. Then the k8sm
 	// executor can become the parent of pods and subsume their resource usage and
 	// therefore be compliant with expectations of mesos executors w/ respect to
 	// resource allocation and management.
 	//
 	// TODO(jdef): remove hardcoded values and make use of actual pod resource settings
 	if (cpus < containerCpus) || (mem < containerMem) {
 		log.V(3).Infof("not enough resources: cpus: %f mem: %f", cpus, mem)
 		return false
 	}
 	return true
 }
 func (t *T) Set(f FlagType) {
 	t.Flags[f] = struct{}{}
 	if Launched == f {
 		t.launchTime = time.Now()
 		queueWaitTime := t.launchTime.Sub(t.CreateTime)
 		metrics.QueueWaitTime.Observe(metrics.InMicroseconds(queueWaitTime))
 	}
 }
 func (t *T) Has(f FlagType) (exists bool) {
 	_, exists = t.Flags[f]
 	return
 }
 func New(ctx api.Context, id string, pod api.Pod, executor *mesos.ExecutorInfo) (*T, error) {
 	if executor == nil {
 		return nil, fmt.Errorf("illegal argument: executor was nil")
 	}
 	key, err := MakePodKey(ctx, pod.Name)
 	if err != nil {
 		return nil, err
 	}
 	if id == "" {
 		id = "pod." + uuid.NewUUID().String()
 	}
 	task := &T{
 		ID:       id,
 		Pod:      pod,
 		State:    StatePending,
 		podKey:   key,
 		mapper:   MappingTypeForPod(&pod),
 		Flags:    make(map[FlagType]struct{}),
 		executor: proto.Clone(executor).(*mesos.ExecutorInfo),
 	}
 	task.CreateTime = time.Now()
 	return task, nil
 }
 func (t *T) SaveRecoveryInfo(dict map[string]string) {
 	dict[annotation.TaskIdKey] = t.ID
 	dict[annotation.SlaveIdKey] = t.Spec.SlaveID
 	dict[annotation.OfferIdKey] = t.Offer.Details().Id.GetValue()
 	dict[annotation.ExecutorIdKey] = t.executor.ExecutorId.GetValue()
 }
 // reconstruct a task from metadata stashed in a pod entry. there are limited pod states that
 // support reconstruction. if we expect to be able to reconstruct state but encounter errors
 // in the process then those errors are returned. if the pod is in a seemingly valid state but
 // otherwise does not support task reconstruction return false. if we're able to reconstruct
 // state then return a reconstructed task and true.
 //
 // at this time task reconstruction is only supported for pods that have been annotated with
 // binding metadata, which implies that they've previously been associated with a task and
 // that mesos knows about it.
 //
 // assumes that the pod data comes from the k8s registry and reflects the desired state.
 //
 func RecoverFrom(pod api.Pod) (*T, bool, error) {
 	// we only expect annotations if pod has been bound, which implies that it has already
 	// been scheduled and launched
 	if pod.Spec.NodeName == "" && len(pod.Annotations) == 0 {
 		log.V(1).Infof("skipping recovery for unbound pod %v/%v", pod.Namespace, pod.Name)
 		return nil, false, nil
 	}
 	// only process pods that are not in a terminal state
 	switch pod.Status.Phase {
 	case api.PodPending, api.PodRunning, api.PodUnknown: // continue
 	default:
 		log.V(1).Infof("skipping recovery for terminal pod %v/%v", pod.Namespace, pod.Name)
 		return nil, false, nil
 	}
 	ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
 	key, err := MakePodKey(ctx, pod.Name)
 	if err != nil {
 		return nil, false, err
 	}
 	//TODO(jdef) recover ports (and other resource requirements?) from the pod spec as well
 	now := time.Now()
 	t := &T{
 		Pod:        pod,
 		CreateTime: now,
 		podKey:     key,
 		State:      StatePending, // possibly running? mesos will tell us during reconciliation
 		Flags:      make(map[FlagType]struct{}),
 		mapper:     MappingTypeForPod(&pod),
 		launchTime: now,
 		bindTime:   now,
 	}
 	var (
 		offerId  string
 		hostname string
 	)
 	for _, k := range []string{
 		annotation.BindingHostKey,
 		annotation.TaskIdKey,
 		annotation.SlaveIdKey,
 		annotation.OfferIdKey,
 		annotation.ExecutorIdKey,
 	} {
 		v, found := pod.Annotations[k]
 		if !found {
 			return nil, false, fmt.Errorf("incomplete metadata: missing value for pod annotation: %v", k)
 		}
 		switch k {
 		case annotation.BindingHostKey:
 			hostname = v
 		case annotation.SlaveIdKey:
 			t.Spec.SlaveID = v
 		case annotation.OfferIdKey:
 			offerId = v
 		case annotation.TaskIdKey:
 			t.ID = v
 		case annotation.ExecutorIdKey:
 			// this is nowhere near sufficient to re-launch a task, but we really just
 			// want this for tracking
 			t.executor = &mesos.ExecutorInfo{ExecutorId: mutil.NewExecutorID(v)}
 		}
 	}
 	t.Offer = offers.Expired(offerId, hostname, 0)
 	t.Flags[Launched] = struct{}{}
 	t.Flags[Bound] = struct{}{}
 	return t, true, nil
 }
--- a/contrib/mesos/pkg/scheduler/podtask/pod_task_test.go
+++ b/contrib/mesos/pkg/scheduler/podtask/pod_task_test.go
@@ -0,0 +1,153 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package podtask
 import (
 	"testing"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	mutil "github.com/mesos/mesos-go/mesosutil"
 )
 const (
 	t_min_cpu = 128
 	t_min_mem = 128
 )
 func fakePodTask(id string) (*T, error) {
 	return New(api.NewDefaultContext(), "", api.Pod{
 		ObjectMeta: api.ObjectMeta{
 			Name:      id,
 			Namespace: api.NamespaceDefault,
 		},
 	}, &mesos.ExecutorInfo{})
 }
 func TestEmptyOffer(t *testing.T) {
 	t.Parallel()
 	task, err := fakePodTask("foo")
 	if err != nil {
 		t.Fatal(err)
 	}
 	if ok := task.AcceptOffer(nil); ok {
 		t.Fatalf("accepted nil offer")
 	}
 	if ok := task.AcceptOffer(&mesos.Offer{}); ok {
 		t.Fatalf("accepted empty offer")
 	}
 }
 func TestNoPortsInPodOrOffer(t *testing.T) {
 	t.Parallel()
 	task, err := fakePodTask("foo")
 	if err != nil || task == nil {
 		t.Fatal(err)
 	}
 	offer := &mesos.Offer{
 		Resources: []*mesos.Resource{
 			mutil.NewScalarResource("cpus", 0.001),
 			mutil.NewScalarResource("mem", 0.001),
 		},
 	}
 	if ok := task.AcceptOffer(offer); ok {
 		t.Fatalf("accepted offer %v:", offer)
 	}
 	offer = &mesos.Offer{
 		Resources: []*mesos.Resource{
 			mutil.NewScalarResource("cpus", t_min_cpu),
 			mutil.NewScalarResource("mem", t_min_mem),
 		},
 	}
 	if ok := task.AcceptOffer(offer); !ok {
 		t.Fatalf("did not accepted offer %v:", offer)
 	}
 }
 func TestAcceptOfferPorts(t *testing.T) {
 	t.Parallel()
 	task, _ := fakePodTask("foo")
 	pod := &task.Pod
 	offer := &mesos.Offer{
 		Resources: []*mesos.Resource{
 			mutil.NewScalarResource("cpus", t_min_cpu),
 			mutil.NewScalarResource("mem", t_min_mem),
 			rangeResource("ports", []uint64{1, 1}),
 		},
 	}
 	if ok := task.AcceptOffer(offer); !ok {
 		t.Fatalf("did not accepted offer %v:", offer)
 	}
 	pod.Spec = api.PodSpec{
 		Containers: []api.Container{{
 			Ports: []api.ContainerPort{{
 				HostPort: 123,
 			}},
 		}},
 	}
 	if ok := task.AcceptOffer(offer); ok {
 		t.Fatalf("accepted offer %v:", offer)
 	}
 	pod.Spec.Containers[0].Ports[0].HostPort = 1
 	if ok := task.AcceptOffer(offer); !ok {
 		t.Fatalf("did not accepted offer %v:", offer)
 	}
 	pod.Spec.Containers[0].Ports[0].HostPort = 0
 	if ok := task.AcceptOffer(offer); !ok {
 		t.Fatalf("did not accepted offer %v:", offer)
 	}
 	offer.Resources = []*mesos.Resource{
 		mutil.NewScalarResource("cpus", t_min_cpu),
 		mutil.NewScalarResource("mem", t_min_mem),
 	}
 	if ok := task.AcceptOffer(offer); ok {
 		t.Fatalf("accepted offer %v:", offer)
 	}
 	pod.Spec.Containers[0].Ports[0].HostPort = 1
 	if ok := task.AcceptOffer(offer); ok {
 		t.Fatalf("accepted offer %v:", offer)
 	}
 }
 func TestGeneratePodName(t *testing.T) {
 	p := &api.Pod{
 		ObjectMeta: api.ObjectMeta{
 			Name:      "foo",
 			Namespace: "bar",
 		},
 	}
 	name := generateTaskName(p)
 	expected := "foo.bar.pods"
 	if name != expected {
 		t.Fatalf("expected %q instead of %q", expected, name)
 	}
 	p.Namespace = ""
 	name = generateTaskName(p)
 	expected = "foo.default.pods"
 	if name != expected {
 		t.Fatalf("expected %q instead of %q", expected, name)
 	}
 }
--- a/contrib/mesos/pkg/scheduler/podtask/port_mapping.go
+++ b/contrib/mesos/pkg/scheduler/podtask/port_mapping.go
@@ -0,0 +1,185 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package podtask
 import (
 	"fmt"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
 	log "github.com/golang/glog"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 )
 type HostPortMappingType string
 const (
 	// maps a Container.HostPort to the same exact offered host port, ignores .HostPort = 0
 	HostPortMappingFixed HostPortMappingType = "fixed"
 	// same as HostPortMappingFixed, except that .HostPort of 0 are mapped to any port offered
 	HostPortMappingWildcard = "wildcard"
 )
 type HostPortMapper interface {
 	// abstracts the way that host ports are mapped to pod container ports
 	Generate(t *T, offer *mesos.Offer) ([]HostPortMapping, error)
 }
 type HostPortMapping struct {
 	ContainerIdx int // index of the container in the pod spec
 	PortIdx      int // index of the port in a container's port spec
 	OfferPort    uint64
 }
 func (self HostPortMappingType) Generate(t *T, offer *mesos.Offer) ([]HostPortMapping, error) {
 	switch self {
 	case HostPortMappingWildcard:
 		return wildcardHostPortMapping(t, offer)
 	case HostPortMappingFixed:
 	default:
 		log.Warningf("illegal host-port mapping spec %q, defaulting to %q", self, HostPortMappingFixed)
 	}
 	return defaultHostPortMapping(t, offer)
 }
 type PortAllocationError struct {
 	PodId string
 	Ports []uint64
 }
 func (err *PortAllocationError) Error() string {
 	return fmt.Sprintf("Could not schedule pod %s: %d port(s) could not be allocated", err.PodId, len(err.Ports))
 }
 type DuplicateHostPortError struct {
 	m1, m2 HostPortMapping
 }
 func (err *DuplicateHostPortError) Error() string {
 	return fmt.Sprintf(
 		"Host port %d is specified for container %d, pod %d and container %d, pod %d",
 		err.m1.OfferPort, err.m1.ContainerIdx, err.m1.PortIdx, err.m2.ContainerIdx, err.m2.PortIdx)
 }
 // wildcard k8s host port mapping implementation: hostPort == 0 gets mapped to any available offer port
 func wildcardHostPortMapping(t *T, offer *mesos.Offer) ([]HostPortMapping, error) {
 	mapping, err := defaultHostPortMapping(t, offer)
 	if err != nil {
 		return nil, err
 	}
 	taken := make(map[uint64]struct{})
 	for _, entry := range mapping {
 		taken[entry.OfferPort] = struct{}{}
 	}
 	wildports := []HostPortMapping{}
 	for i, container := range t.Pod.Spec.Containers {
 		for pi, port := range container.Ports {
 			if port.HostPort == 0 {
 				wildports = append(wildports, HostPortMapping{
 					ContainerIdx: i,
 					PortIdx:      pi,
 				})
 			}
 		}
 	}
 	remaining := len(wildports)
 	foreachRange(offer, "ports", func(bp, ep uint64) {
 		log.V(3).Infof("Searching for wildcard port in range {%d:%d}", bp, ep)
 		for _, entry := range wildports {
 			if entry.OfferPort != 0 {
 				continue
 			}
 			for port := bp; port <= ep && remaining > 0; port++ {
 				if _, inuse := taken[port]; inuse {
 					continue
 				}
 				entry.OfferPort = port
 				mapping = append(mapping, entry)
 				remaining--
 				taken[port] = struct{}{}
 				break
 			}
 		}
 	})
 	if remaining > 0 {
 		err := &PortAllocationError{
 			PodId: t.Pod.Name,
 		}
 		// it doesn't make sense to include a port list here because they were all zero (wildcards)
 		return nil, err
 	}
 	return mapping, nil
 }
 // default k8s host port mapping implementation: hostPort == 0 means containerPort remains pod-private, and so
 // no offer ports will be mapped to such Container ports.
 func defaultHostPortMapping(t *T, offer *mesos.Offer) ([]HostPortMapping, error) {
 	requiredPorts := make(map[uint64]HostPortMapping)
 	mapping := []HostPortMapping{}
 	for i, container := range t.Pod.Spec.Containers {
 		// strip all port==0 from this array; k8s already knows what to do with zero-
 		// ports (it does not create 'port bindings' on the minion-host); we need to
 		// remove the wildcards from this array since they don't consume host resources
 		for pi, port := range container.Ports {
 			if port.HostPort == 0 {
 				continue // ignore
 			}
 			m := HostPortMapping{
 				ContainerIdx: i,
 				PortIdx:      pi,
 				OfferPort:    uint64(port.HostPort),
 			}
 			if entry, inuse := requiredPorts[uint64(port.HostPort)]; inuse {
 				return nil, &DuplicateHostPortError{entry, m}
 			}
 			requiredPorts[uint64(port.HostPort)] = m
 		}
 	}
 	foreachRange(offer, "ports", func(bp, ep uint64) {
 		for port := range requiredPorts {
 			log.V(3).Infof("evaluating port range {%d:%d} %d", bp, ep, port)
 			if (bp <= port) && (port <= ep) {
 				mapping = append(mapping, requiredPorts[port])
 				delete(requiredPorts, port)
 			}
 		}
 	})
 	unsatisfiedPorts := len(requiredPorts)
 	if unsatisfiedPorts > 0 {
 		err := &PortAllocationError{
 			PodId: t.Pod.Name,
 		}
 		for p := range requiredPorts {
 			err.Ports = append(err.Ports, p)
 		}
 		return nil, err
 	}
 	return mapping, nil
 }
 const PortMappingLabelKey = "k8s.mesosphere.io/portMapping"
 func MappingTypeForPod(pod *api.Pod) HostPortMappingType {
 	filter := map[string]string{
 		PortMappingLabelKey: string(HostPortMappingFixed),
 	}
 	selector := labels.Set(filter).AsSelector()
 	if selector.Matches(labels.Set(pod.Labels)) {
 		return HostPortMappingFixed
 	}
 	return HostPortMappingWildcard
 }
--- a/contrib/mesos/pkg/scheduler/podtask/port_mapping_test.go
+++ b/contrib/mesos/pkg/scheduler/podtask/port_mapping_test.go
@@ -0,0 +1,205 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package podtask
 import (
 	"testing"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 )
 func TestDefaultHostPortMatching(t *testing.T) {
 	t.Parallel()
 	task, _ := fakePodTask("foo")
 	pod := &task.Pod
 	offer := &mesos.Offer{
 		Resources: []*mesos.Resource{
 			rangeResource("ports", []uint64{1, 1}),
 		},
 	}
 	mapping, err := defaultHostPortMapping(task, offer)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if len(mapping) > 0 {
 		t.Fatalf("Found mappings for a pod without ports: %v", pod)
 	}
 	//--
 	pod.Spec = api.PodSpec{
 		Containers: []api.Container{{
 			Ports: []api.ContainerPort{{
 				HostPort: 123,
 			}, {
 				HostPort: 123,
 			}},
 		}},
 	}
 	task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{})
 	if err != nil {
 		t.Fatal(err)
 	}
 	_, err = defaultHostPortMapping(task, offer)
 	if err, _ := err.(*DuplicateHostPortError); err == nil {
 		t.Fatal("Expected duplicate port error")
 	} else if err.m1.OfferPort != 123 {
 		t.Fatal("Expected duplicate host port 123")
 	}
 }
 func TestWildcardHostPortMatching(t *testing.T) {
 	t.Parallel()
 	task, _ := fakePodTask("foo")
 	pod := &task.Pod
 	offer := &mesos.Offer{}
 	mapping, err := wildcardHostPortMapping(task, offer)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if len(mapping) > 0 {
 		t.Fatalf("Found mappings for an empty offer and a pod without ports: %v", pod)
 	}
 	//--
 	offer = &mesos.Offer{
 		Resources: []*mesos.Resource{
 			rangeResource("ports", []uint64{1, 1}),
 		},
 	}
 	mapping, err = wildcardHostPortMapping(task, offer)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if len(mapping) > 0 {
 		t.Fatalf("Found mappings for a pod without ports: %v", pod)
 	}
 	//--
 	pod.Spec = api.PodSpec{
 		Containers: []api.Container{{
 			Ports: []api.ContainerPort{{
 				HostPort: 123,
 			}},
 		}},
 	}
 	task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{})
 	if err != nil {
 		t.Fatal(err)
 	}
 	mapping, err = wildcardHostPortMapping(task, offer)
 	if err == nil {
 		t.Fatalf("expected error instead of mappings: %#v", mapping)
 	} else if err, _ := err.(*PortAllocationError); err == nil {
 		t.Fatal("Expected port allocation error")
 	} else if !(len(err.Ports) == 1 && err.Ports[0] == 123) {
 		t.Fatal("Expected port allocation error for host port 123")
 	}
 	//--
 	pod.Spec = api.PodSpec{
 		Containers: []api.Container{{
 			Ports: []api.ContainerPort{{
 				HostPort: 0,
 			}, {
 				HostPort: 123,
 			}},
 		}},
 	}
 	task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{})
 	if err != nil {
 		t.Fatal(err)
 	}
 	mapping, err = wildcardHostPortMapping(task, offer)
 	if err, _ := err.(*PortAllocationError); err == nil {
 		t.Fatal("Expected port allocation error")
 	} else if !(len(err.Ports) == 1 && err.Ports[0] == 123) {
 		t.Fatal("Expected port allocation error for host port 123")
 	}
 	//--
 	pod.Spec = api.PodSpec{
 		Containers: []api.Container{{
 			Ports: []api.ContainerPort{{
 				HostPort: 0,
 			}, {
 				HostPort: 1,
 			}},
 		}},
 	}
 	task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{})
 	if err != nil {
 		t.Fatal(err)
 	}
 	mapping, err = wildcardHostPortMapping(task, offer)
 	if err, _ := err.(*PortAllocationError); err == nil {
 		t.Fatal("Expected port allocation error")
 	} else if len(err.Ports) != 0 {
 		t.Fatal("Expected port allocation error for wildcard port")
 	}
 	//--
 	offer = &mesos.Offer{
 		Resources: []*mesos.Resource{
 			rangeResource("ports", []uint64{1, 2}),
 		},
 	}
 	mapping, err = wildcardHostPortMapping(task, offer)
 	if err != nil {
 		t.Fatal(err)
 	} else if len(mapping) != 2 {
 		t.Fatal("Expected both ports allocated")
 	}
 	valid := 0
 	for _, entry := range mapping {
 		if entry.ContainerIdx == 0 && entry.PortIdx == 0 && entry.OfferPort == 2 {
 			valid++
 		}
 		if entry.ContainerIdx == 0 && entry.PortIdx == 1 && entry.OfferPort == 1 {
 			valid++
 		}
 	}
 	if valid < 2 {
 		t.Fatalf("Expected 2 valid port mappings, not %d", valid)
 	}
 }
 func TestMappingTypeForPod(t *testing.T) {
 	pod := &api.Pod{
 		ObjectMeta: api.ObjectMeta{
 			Labels: map[string]string{},
 		},
 	}
 	mt := MappingTypeForPod(pod)
 	if mt != HostPortMappingWildcard {
 		t.Fatalf("expected wildcard mapping")
 	}
 	pod.Labels[PortMappingLabelKey] = string(HostPortMappingFixed)
 	mt = MappingTypeForPod(pod)
 	if mt != HostPortMappingFixed {
 		t.Fatalf("expected fixed mapping")
 	}
 	pod.Labels[PortMappingLabelKey] = string(HostPortMappingWildcard)
 	mt = MappingTypeForPod(pod)
 	if mt != HostPortMappingWildcard {
 		t.Fatalf("expected wildcard mapping")
 	}
 }
--- a/contrib/mesos/pkg/scheduler/podtask/protobuf.go
+++ b/contrib/mesos/pkg/scheduler/podtask/protobuf.go
@@ -0,0 +1,57 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package podtask
 import (
 	"github.com/gogo/protobuf/proto"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 )
 // create a range resource for the listed ports
 func rangeResource(name string, ports []uint64) *mesos.Resource {
 	if len(ports) == 0 {
 		// pod may consist of a container that doesn't expose any ports on the host
 		return nil
 	}
 	return &mesos.Resource{
 		Name:   proto.String(name),
 		Type:   mesos.Value_RANGES.Enum(),
 		Ranges: newRanges(ports),
 	}
 }
 // generate port ranges from a list of ports. this implementation is very naive
 func newRanges(ports []uint64) *mesos.Value_Ranges {
 	r := make([]*mesos.Value_Range, 0)
 	for _, port := range ports {
 		x := proto.Uint64(port)
 		r = append(r, &mesos.Value_Range{Begin: x, End: x})
 	}
 	return &mesos.Value_Ranges{Range: r}
 }
 func foreachRange(offer *mesos.Offer, resourceName string, f func(begin, end uint64)) {
 	for _, resource := range offer.Resources {
 		if resource.GetName() == resourceName {
 			for _, r := range (*resource).GetRanges().Range {
 				bp := r.GetBegin()
 				ep := r.GetEnd()
 				f(bp, ep)
 			}
 		}
 	}
 }
--- a/contrib/mesos/pkg/scheduler/podtask/registry.go
+++ b/contrib/mesos/pkg/scheduler/podtask/registry.go
@@ -0,0 +1,335 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package podtask
 import (
 	"container/ring"
 	"encoding/json"
 	"fmt"
 	"sync"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	log "github.com/golang/glog"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 )
 const (
 	//TODO(jdef) move this somewhere else
 	PodPath = "/pods"
 	// length of historical record of finished tasks
 	defaultFinishedTasksSize = 1024
 )
 // state store for pod tasks
 type Registry interface {
 	// register the specified task with this registry, as long as the current error
 	// condition is nil. if no errors occur then return a copy of the registered task.
 	Register(*T, error) (*T, error)
 	// unregister the specified task from this registry
 	Unregister(*T)
 	// update state for the registered task identified by task.ID, returning a copy of
 	// the updated task, if any.
 	Update(task *T) error
 	// return the task registered for the specified task ID and its current state.
 	// if there is no such task then StateUnknown is returned.
 	Get(taskId string) (task *T, currentState StateType)
 	// return the non-terminal task corresponding to the specified pod ID
 	ForPod(podID string) (task *T, currentState StateType)
 	// update the task status given the specified mesos task status update, returning a
 	// copy of the updated task (if any) and its state.
 	UpdateStatus(status *mesos.TaskStatus) (*T, StateType)
 	// return a list of task ID's that match the given filter, or all task ID's if filter == nil.
 	List(filter func(*T) bool) []*T
 }
 type inMemoryRegistry struct {
 	rw            sync.RWMutex
 	taskRegistry  map[string]*T
 	tasksFinished *ring.Ring
 	podToTask     map[string]string
 }
 func NewInMemoryRegistry() Registry {
 	return &inMemoryRegistry{
 		taskRegistry:  make(map[string]*T),
 		tasksFinished: ring.New(defaultFinishedTasksSize),
 		podToTask:     make(map[string]string),
 	}
 }
 func (k *inMemoryRegistry) List(accepts func(t *T) bool) (tasks []*T) {
 	k.rw.RLock()
 	defer k.rw.RUnlock()
 	for _, task := range k.taskRegistry {
 		if accepts == nil || accepts(task) {
 			tasks = append(tasks, task.Clone())
 		}
 	}
 	return
 }
 func (k *inMemoryRegistry) ForPod(podID string) (task *T, currentState StateType) {
 	k.rw.RLock()
 	defer k.rw.RUnlock()
 	tid, ok := k.podToTask[podID]
 	if !ok {
 		return nil, StateUnknown
 	}
 	t, state := k._get(tid)
 	return t.Clone(), state
 }
 // registers a pod task unless the spec'd error is not nil
 func (k *inMemoryRegistry) Register(task *T, err error) (*T, error) {
 	if err == nil {
 		k.rw.Lock()
 		defer k.rw.Unlock()
 		if _, found := k.podToTask[task.podKey]; found {
 			return nil, fmt.Errorf("task already registered for pod key %q", task.podKey)
 		}
 		if _, found := k.taskRegistry[task.ID]; found {
 			return nil, fmt.Errorf("task already registered for id %q", task.ID)
 		}
 		k.podToTask[task.podKey] = task.ID
 		k.taskRegistry[task.ID] = task
 	}
 	return task.Clone(), err
 }
 // updates internal task state. updates are limited to Spec, Flags, and Offer for
 // StatePending tasks, and are limited to Flag updates (additive only) for StateRunning tasks.
 func (k *inMemoryRegistry) Update(task *T) error {
 	if task == nil {
 		return nil
 	}
 	k.rw.Lock()
 	defer k.rw.Unlock()
 	switch internal, state := k._get(task.ID); state {
 	case StateUnknown:
 		return fmt.Errorf("no such task: %v", task.ID)
 	case StatePending:
 		internal.Offer = task.Offer
 		internal.Spec = task.Spec
 		(&task.Spec).copyTo(&internal.Spec)
 		internal.Flags = map[FlagType]struct{}{}
 		fallthrough
 	case StateRunning:
 		for k, v := range task.Flags {
 			internal.Flags[k] = v
 		}
 		return nil
 	default:
 		return fmt.Errorf("may not update task %v in state %v", task.ID, state)
 	}
 }
 func (k *inMemoryRegistry) Unregister(task *T) {
 	k.rw.Lock()
 	defer k.rw.Unlock()
 	delete(k.podToTask, task.podKey)
 	delete(k.taskRegistry, task.ID)
 }
 func (k *inMemoryRegistry) Get(taskId string) (*T, StateType) {
 	k.rw.RLock()
 	defer k.rw.RUnlock()
 	t, state := k._get(taskId)
 	return t.Clone(), state
 }
 // assume that the caller has already locked around access to task state.
 // the caller is also responsible for cloning the task object before it leaves
 // the context of this registry.
 func (k *inMemoryRegistry) _get(taskId string) (*T, StateType) {
 	if task, found := k.taskRegistry[taskId]; found {
 		return task, task.State
 	}
 	return nil, StateUnknown
 }
 func (k *inMemoryRegistry) UpdateStatus(status *mesos.TaskStatus) (*T, StateType) {
 	taskId := status.GetTaskId().GetValue()
 	k.rw.Lock()
 	defer k.rw.Unlock()
 	task, state := k._get(taskId)
 	switch status.GetState() {
 	case mesos.TaskState_TASK_STAGING:
 		k.handleTaskStaging(task, state, status)
 	case mesos.TaskState_TASK_STARTING:
 		k.handleTaskStarting(task, state, status)
 	case mesos.TaskState_TASK_RUNNING:
 		k.handleTaskRunning(task, state, status)
 	case mesos.TaskState_TASK_FINISHED:
 		k.handleTaskFinished(task, state, status)
 	case mesos.TaskState_TASK_FAILED:
 		k.handleTaskFailed(task, state, status)
 	case mesos.TaskState_TASK_KILLED:
 		k.handleTaskKilled(task, state, status)
 	case mesos.TaskState_TASK_LOST:
 		k.handleTaskLost(task, state, status)
 	default:
 		log.Warningf("unhandled status update for task: %v", taskId)
 	}
 	return task.Clone(), state
 }
 func (k *inMemoryRegistry) handleTaskStaging(task *T, state StateType, status *mesos.TaskStatus) {
 	if status.GetSource() != mesos.TaskStatus_SOURCE_MASTER {
 		log.Errorf("received STAGING for task %v with unexpected source: %v",
 			status.GetTaskId().GetValue(), status.GetSource())
 	}
 }
 func (k *inMemoryRegistry) handleTaskStarting(task *T, state StateType, status *mesos.TaskStatus) {
 	// we expect to receive this when a launched task is finally "bound"
 	// via the API server. however, there's nothing specific for us to do here.
 	switch state {
 	case StatePending:
 		task.UpdatedTime = time.Now()
 		if !task.Has(Bound) {
 			task.Set(Bound)
 			task.bindTime = task.UpdatedTime
 			timeToBind := task.bindTime.Sub(task.launchTime)
 			metrics.BindLatency.Observe(metrics.InMicroseconds(timeToBind))
 		}
 	default:
 		taskId := status.GetTaskId().GetValue()
 		log.Warningf("Ignore status TASK_STARTING because the task %v is not pending", taskId)
 	}
 }
 func (k *inMemoryRegistry) handleTaskRunning(task *T, state StateType, status *mesos.TaskStatus) {
 	taskId := status.GetTaskId().GetValue()
 	switch state {
 	case StatePending:
 		task.UpdatedTime = time.Now()
 		log.Infof("Received running status for pending task: %v", taskId)
 		fillRunningPodInfo(task, status)
 		task.State = StateRunning
 	case StateRunning:
 		task.UpdatedTime = time.Now()
 		log.V(2).Infof("Ignore status TASK_RUNNING because the task %v is already running", taskId)
 	case StateFinished:
 		log.Warningf("Ignore status TASK_RUNNING because the task %v is already finished", taskId)
 	default:
 		log.Warningf("Ignore status TASK_RUNNING because the task %v is discarded", taskId)
 	}
 }
 func ParsePodStatusResult(taskStatus *mesos.TaskStatus) (result api.PodStatusResult, err error) {
 	if taskStatus.Data != nil {
 		err = json.Unmarshal(taskStatus.Data, &result)
 	} else {
 		err = fmt.Errorf("missing TaskStatus.Data")
 	}
 	return
 }
 func fillRunningPodInfo(task *T, taskStatus *mesos.TaskStatus) {
 	if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER {
 		// there is no data..
 		return
 	}
 	//TODO(jdef) determine the usefullness of this information (if any)
 	if result, err := ParsePodStatusResult(taskStatus); err != nil {
 		log.Errorf("invalid TaskStatus.Data for task '%v': %v", task.ID, err)
 	} else {
 		task.podStatus = result.Status
 		log.Infof("received pod status for task %v: %+v", task.ID, result.Status)
 	}
 }
 func (k *inMemoryRegistry) handleTaskFinished(task *T, state StateType, status *mesos.TaskStatus) {
 	taskId := status.GetTaskId().GetValue()
 	switch state {
 	case StatePending:
 		panic(fmt.Sprintf("Pending task %v finished, this couldn't happen", taskId))
 	case StateRunning:
 		log.V(2).Infof("received finished status for running task: %v", taskId)
 		delete(k.podToTask, task.podKey)
 		task.State = StateFinished
 		task.UpdatedTime = time.Now()
 		k.tasksFinished = k.recordFinishedTask(task.ID)
 	case StateFinished:
 		log.Warningf("Ignore status TASK_FINISHED because the task %v is already finished", taskId)
 	default:
 		log.Warningf("Ignore status TASK_FINISHED because the task %v is not running", taskId)
 	}
 }
 // record that a task has finished.
 // older record are expunged one at a time once the historical ring buffer is saturated.
 // assumes caller is holding state lock.
 func (k *inMemoryRegistry) recordFinishedTask(taskId string) *ring.Ring {
 	slot := k.tasksFinished.Next()
 	if slot.Value != nil {
 		// garbage collect older finished task from the registry
 		gctaskId := slot.Value.(string)
 		if gctask, found := k.taskRegistry[gctaskId]; found && gctask.State == StateFinished {
 			delete(k.taskRegistry, gctaskId)
 		}
 	}
 	slot.Value = taskId
 	return slot
 }
 func (k *inMemoryRegistry) handleTaskFailed(task *T, state StateType, status *mesos.TaskStatus) {
 	switch state {
 	case StatePending:
 		delete(k.taskRegistry, task.ID)
 		delete(k.podToTask, task.podKey)
 	case StateRunning:
 		delete(k.taskRegistry, task.ID)
 		delete(k.podToTask, task.podKey)
 	}
 }
 func (k *inMemoryRegistry) handleTaskKilled(task *T, state StateType, status *mesos.TaskStatus) {
 	defer func() {
 		msg := fmt.Sprintf("task killed: %+v, task %+v", status, task)
 		if task != nil && task.Has(Deleted) {
 			// we were expecting this, nothing out of the ordinary
 			log.V(2).Infoln(msg)
 		} else {
 			log.Errorln(msg)
 		}
 	}()
 	switch state {
 	case StatePending, StateRunning:
 		delete(k.taskRegistry, task.ID)
 		delete(k.podToTask, task.podKey)
 	}
 }
 func (k *inMemoryRegistry) handleTaskLost(task *T, state StateType, status *mesos.TaskStatus) {
 	switch state {
 	case StateRunning, StatePending:
 		delete(k.taskRegistry, task.ID)
 		delete(k.podToTask, task.podKey)
 	}
 }
--- a/contrib/mesos/pkg/scheduler/podtask/registry_test.go
+++ b/contrib/mesos/pkg/scheduler/podtask/registry_test.go
@@ -0,0 +1,320 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package podtask
 import (
 	"testing"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	"github.com/mesos/mesos-go/mesosutil"
 	"github.com/stretchr/testify/assert"
 )
 func TestInMemoryRegistry_RegisterGetUnregister(t *testing.T) {
 	assert := assert.New(t)
 	registry := NewInMemoryRegistry()
 	// it's empty at the beginning
 	tasks := registry.List(func(t *T) bool { return true })
 	assert.Empty(tasks)
 	// add a task
 	a, _ := fakePodTask("a")
 	a_clone, err := registry.Register(a, nil)
 	assert.NoError(err)
 	assert.Equal(a_clone.ID, a.ID)
 	assert.Equal(a_clone.podKey, a.podKey)
 	// add another task
 	b, _ := fakePodTask("b")
 	b_clone, err := registry.Register(b, nil)
 	assert.NoError(err)
 	assert.Equal(b_clone.ID, b.ID)
 	assert.Equal(b_clone.podKey, b.podKey)
 	// find tasks in the registry
 	tasks = registry.List(func(t *T) bool { return true })
 	assert.Len(tasks, 2)
 	assert.Contains(tasks, a_clone)
 	assert.Contains(tasks, b_clone)
 	tasks = registry.List(func(t *T) bool { return t.ID == a.ID })
 	assert.Len(tasks, 1)
 	assert.Contains(tasks, a_clone)
 	task, _ := registry.ForPod(a.podKey)
 	assert.NotNil(task)
 	assert.Equal(task.ID, a.ID)
 	task, _ = registry.ForPod(b.podKey)
 	assert.NotNil(task)
 	assert.Equal(task.ID, b.ID)
 	task, _ = registry.ForPod("no-pod-key")
 	assert.Nil(task)
 	task, _ = registry.Get(a.ID)
 	assert.NotNil(task)
 	assert.Equal(task.ID, a.ID)
 	task, _ = registry.Get("unknown-task-id")
 	assert.Nil(task)
 	// re-add a task
 	a_clone, err = registry.Register(a, nil)
 	assert.Error(err)
 	assert.Nil(a_clone)
 	// re-add a task with another podKey, but same task id
 	another_a := a.Clone()
 	another_a.podKey = "another-pod"
 	another_a_clone, err := registry.Register(another_a, nil)
 	assert.Error(err)
 	assert.Nil(another_a_clone)
 	// re-add a task with another task ID, but same podKey
 	another_b := b.Clone()
 	another_b.ID = "another-task-id"
 	another_b_clone, err := registry.Register(another_b, nil)
 	assert.Error(err)
 	assert.Nil(another_b_clone)
 	// unregister a task
 	registry.Unregister(b)
 	tasks = registry.List(func(t *T) bool { return true })
 	assert.Len(tasks, 1)
 	assert.Contains(tasks, a)
 	// unregister a task not registered
 	unregistered_task, _ := fakePodTask("unregistered-task")
 	registry.Unregister(unregistered_task)
 }
 func fakeStatusUpdate(taskId string, state mesos.TaskState) *mesos.TaskStatus {
 	status := mesosutil.NewTaskStatus(mesosutil.NewTaskID(taskId), state)
 	status.Data = []byte("{}") // empty json
 	masterSource := mesos.TaskStatus_SOURCE_MASTER
 	status.Source = &masterSource
 	return status
 }
 func TestInMemoryRegistry_State(t *testing.T) {
 	assert := assert.New(t)
 	registry := NewInMemoryRegistry()
 	// add a task
 	a, _ := fakePodTask("a")
 	a_clone, err := registry.Register(a, nil)
 	assert.NoError(err)
 	assert.Equal(a.State, a_clone.State)
 	// update the status
 	assert.Equal(a_clone.State, StatePending)
 	a_clone, state := registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_RUNNING))
 	assert.Equal(state, StatePending)         // old state
 	assert.Equal(a_clone.State, StateRunning) // new state
 	// update unknown task
 	unknown_clone, state := registry.UpdateStatus(fakeStatusUpdate("unknown-task-id", mesos.TaskState_TASK_RUNNING))
 	assert.Nil(unknown_clone)
 	assert.Equal(state, StateUnknown)
 }
 func TestInMemoryRegistry_Update(t *testing.T) {
 	assert := assert.New(t)
 	// create offers registry
 	ttl := time.Second / 4
 	config := offers.RegistryConfig{
 		DeclineOffer: func(offerId string) <-chan error {
 			return proc.ErrorChan(nil)
 		},
 		Compat: func(o *mesos.Offer) bool {
 			return true
 		},
 		TTL:       ttl,
 		LingerTTL: 2 * ttl,
 	}
 	storage := offers.CreateRegistry(config)
 	// Add offer
 	offerId := mesosutil.NewOfferID("foo")
 	mesosOffer := &mesos.Offer{Id: offerId}
 	storage.Add([]*mesos.Offer{mesosOffer})
 	offer, ok := storage.Get(offerId.GetValue())
 	assert.True(ok)
 	// create registry
 	registry := NewInMemoryRegistry()
 	a, _ := fakePodTask("a")
 	registry.Register(a.Clone(), nil) // here clone a because we change it below
 	// state changes are ignored
 	a.State = StateRunning
 	err := registry.Update(a)
 	assert.NoError(err)
 	a_clone, _ := registry.Get(a.ID)
 	assert.Equal(StatePending, a_clone.State)
 	// offer is updated while pending
 	a.Offer = offer
 	err = registry.Update(a)
 	assert.NoError(err)
 	a_clone, _ = registry.Get(a.ID)
 	assert.Equal(offer.Id(), a_clone.Offer.Id())
 	// spec is updated while pending
 	a.Spec = Spec{SlaveID: "slave-1"}
 	err = registry.Update(a)
 	assert.NoError(err)
 	a_clone, _ = registry.Get(a.ID)
 	assert.Equal("slave-1", a_clone.Spec.SlaveID)
 	// flags are updated while pending
 	a.Flags[Launched] = struct{}{}
 	err = registry.Update(a)
 	assert.NoError(err)
 	a_clone, _ = registry.Get(a.ID)
 	_, found_launched := a_clone.Flags[Launched]
 	assert.True(found_launched)
 	// flags are updated while running
 	registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_RUNNING))
 	a.Flags[Bound] = struct{}{}
 	err = registry.Update(a)
 	assert.NoError(err)
 	a_clone, _ = registry.Get(a.ID)
 	_, found_launched = a_clone.Flags[Launched]
 	assert.True(found_launched)
 	_, found_bound := a_clone.Flags[Bound]
 	assert.True(found_bound)
 	// spec is ignored while running
 	a.Spec = Spec{SlaveID: "slave-2"}
 	err = registry.Update(a)
 	assert.NoError(err)
 	a_clone, _ = registry.Get(a.ID)
 	assert.Equal("slave-1", a_clone.Spec.SlaveID)
 	// error when finished
 	registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_FINISHED))
 	err = registry.Update(a)
 	assert.Error(err)
 	// update unknown task
 	unknown_task, _ := fakePodTask("unknown-task")
 	err = registry.Update(unknown_task)
 	assert.Error(err)
 	// update nil task
 	err = registry.Update(nil)
 	assert.Nil(err)
 }
 type transition struct {
 	statusUpdate  mesos.TaskState
 	expectedState *StateType
 	expectPanic   bool
 }
 func NewTransition(statusUpdate mesos.TaskState, expectedState StateType) transition {
 	return transition{statusUpdate: statusUpdate, expectedState: &expectedState, expectPanic: false}
 }
 func NewTransitionToDeletedTask(statusUpdate mesos.TaskState) transition {
 	return transition{statusUpdate: statusUpdate, expectedState: nil, expectPanic: false}
 }
 func NewTransitionWhichPanics(statusUpdate mesos.TaskState) transition {
 	return transition{statusUpdate: statusUpdate, expectPanic: true}
 }
 func testStateTrace(t *testing.T, transitions []transition) *Registry {
 	assert := assert.New(t)
 	registry := NewInMemoryRegistry()
 	a, _ := fakePodTask("a")
 	a, _ = registry.Register(a, nil)
 	// initial pending state
 	assert.Equal(a.State, StatePending)
 	for _, transition := range transitions {
 		if transition.expectPanic {
 			assert.Panics(func() {
 				registry.UpdateStatus(fakeStatusUpdate(a.ID, transition.statusUpdate))
 			})
 		} else {
 			a, _ = registry.UpdateStatus(fakeStatusUpdate(a.ID, transition.statusUpdate))
 			if transition.expectedState == nil {
 				a, _ = registry.Get(a.ID)
 				assert.Nil(a, "expected task to be deleted from registry after status update to %v", transition.statusUpdate)
 			} else {
 				assert.Equal(a.State, *transition.expectedState)
 			}
 		}
 	}
 	return &registry
 }
 func TestInMemoryRegistry_TaskLifeCycle(t *testing.T) {
 	testStateTrace(t, []transition{
 		NewTransition(mesos.TaskState_TASK_STAGING, StatePending),
 		NewTransition(mesos.TaskState_TASK_STARTING, StatePending),
 		NewTransitionWhichPanics(mesos.TaskState_TASK_FINISHED),
 		NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning),
 		NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning),
 		NewTransition(mesos.TaskState_TASK_STARTING, StateRunning),
 		NewTransition(mesos.TaskState_TASK_FINISHED, StateFinished),
 		NewTransition(mesos.TaskState_TASK_FINISHED, StateFinished),
 		NewTransition(mesos.TaskState_TASK_RUNNING, StateFinished),
 	})
 }
 func TestInMemoryRegistry_NotFinished(t *testing.T) {
 	// all these behave the same
 	notFinishedStates := []mesos.TaskState{
 		mesos.TaskState_TASK_FAILED,
 		mesos.TaskState_TASK_KILLED,
 		mesos.TaskState_TASK_LOST,
 	}
 	for _, notFinishedState := range notFinishedStates {
 		testStateTrace(t, []transition{
 			NewTransitionToDeletedTask(notFinishedState),
 		})
 		testStateTrace(t, []transition{
 			NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning),
 			NewTransitionToDeletedTask(notFinishedState),
 		})
 		testStateTrace(t, []transition{
 			NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning),
 			NewTransition(mesos.TaskState_TASK_FINISHED, StateFinished),
 			NewTransition(notFinishedState, StateFinished),
 		})
 	}
 }
--- a/contrib/mesos/pkg/scheduler/scheduler.go
+++ b/contrib/mesos/pkg/scheduler/scheduler.go
@@ -0,0 +1,924 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package scheduler
 import (
 	"fmt"
 	"io"
 	"math"
 	"net/http"
 	"reflect"
 	"sync"
 	"time"
 	execcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/config"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
 	offerMetrics "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers/metrics"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
 	schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/uid"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/tools"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	log "github.com/golang/glog"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	mutil "github.com/mesos/mesos-go/mesosutil"
 	bindings "github.com/mesos/mesos-go/scheduler"
 )
 type Slave struct {
 	HostName string
 }
 func newSlave(hostName string) *Slave {
 	return &Slave{
 		HostName: hostName,
 	}
 }
 type slaveStorage struct {
 	sync.Mutex
 	slaves map[string]*Slave // SlaveID => slave.
 }
 func newSlaveStorage() *slaveStorage {
 	return &slaveStorage{
 		slaves: make(map[string]*Slave),
 	}
 }
 // Create a mapping between a slaveID and slave if not existing.
 func (self *slaveStorage) checkAndAdd(slaveId, slaveHostname string) {
 	self.Lock()
 	defer self.Unlock()
 	_, exists := self.slaves[slaveId]
 	if !exists {
 		self.slaves[slaveId] = newSlave(slaveHostname)
 	}
 }
 func (self *slaveStorage) getSlaveIds() []string {
 	self.Lock()
 	defer self.Unlock()
 	slaveIds := make([]string, 0, len(self.slaves))
 	for slaveID := range self.slaves {
 		slaveIds = append(slaveIds, slaveID)
 	}
 	return slaveIds
 }
 func (self *slaveStorage) getSlave(slaveId string) (*Slave, bool) {
 	self.Lock()
 	defer self.Unlock()
 	slave, exists := self.slaves[slaveId]
 	return slave, exists
 }
 type PluginInterface interface {
 	// the apiserver may have a different state for the pod than we do
 	// so reconcile our records, but only for this one pod
 	reconcilePod(api.Pod)
 	// execute the Scheduling plugin, should start a go routine and return immediately
 	Run(<-chan struct{})
 }
 // KubernetesScheduler implements:
 // 1: A mesos scheduler.
 // 2: A kubernetes scheduler plugin.
 // 3: A kubernetes pod.Registry.
 type KubernetesScheduler struct {
 	// We use a lock here to avoid races
 	// between invoking the mesos callback
 	// and the invoking the pod registry interfaces.
 	// In particular, changes to podtask.T objects are currently guarded by this lock.
 	*sync.RWMutex
 	// Config related, write-once
 	schedcfg          *schedcfg.Config
 	executor          *mesos.ExecutorInfo
 	executorGroup     uint64
 	scheduleFunc      PodScheduleFunc
 	client            *client.Client
 	etcdClient        tools.EtcdGetSet
 	failoverTimeout   float64 // in seconds
 	reconcileInterval int64
 	// Mesos context.
 	driver         bindings.SchedulerDriver // late initialization
 	frameworkId    *mesos.FrameworkID
 	masterInfo     *mesos.MasterInfo
 	registered     bool
 	registration   chan struct{} // signal chan that closes upon first successful registration
 	onRegistration sync.Once
 	offers         offers.Registry
 	slaves         *slaveStorage
 	// unsafe state, needs to be guarded
 	taskRegistry podtask.Registry
 	// via deferred init
 	plugin             PluginInterface
 	reconciler         *Reconciler
 	reconcileCooldown  time.Duration
 	asRegisteredMaster proc.Doer
 	terminate          <-chan struct{} // signal chan, closes when we should kill background tasks
 }
 type Config struct {
 	Schedcfg          schedcfg.Config
 	Executor          *mesos.ExecutorInfo
 	ScheduleFunc      PodScheduleFunc
 	Client            *client.Client
 	EtcdClient        tools.EtcdGetSet
 	FailoverTimeout   float64
 	ReconcileInterval int64
 	ReconcileCooldown time.Duration
 }
 // New creates a new KubernetesScheduler
 func New(config Config) *KubernetesScheduler {
 	var k *KubernetesScheduler
 	k = &KubernetesScheduler{
 		schedcfg:          &config.Schedcfg,
 		RWMutex:           new(sync.RWMutex),
 		executor:          config.Executor,
 		executorGroup:     uid.Parse(config.Executor.ExecutorId.GetValue()).Group(),
 		scheduleFunc:      config.ScheduleFunc,
 		client:            config.Client,
 		etcdClient:        config.EtcdClient,
 		failoverTimeout:   config.FailoverTimeout,
 		reconcileInterval: config.ReconcileInterval,
 		offers: offers.CreateRegistry(offers.RegistryConfig{
 			Compat: func(o *mesos.Offer) bool {
 				// filter the offers: the executor IDs must not identify a kubelet-
 				// executor with a group that doesn't match ours
 				for _, eid := range o.GetExecutorIds() {
 					execuid := uid.Parse(eid.GetValue())
 					if execuid.Name() == execcfg.DefaultInfoID && execuid.Group() != k.executorGroup {
 						return false
 					}
 				}
 				return true
 			},
 			DeclineOffer: func(id string) <-chan error {
 				errOnce := proc.NewErrorOnce(k.terminate)
 				errOuter := k.asRegisteredMaster.Do(func() {
 					var err error
 					defer errOnce.Report(err)
 					offerId := mutil.NewOfferID(id)
 					filters := &mesos.Filters{}
 					_, err = k.driver.DeclineOffer(offerId, filters)
 				})
 				return errOnce.Send(errOuter).Err()
 			},
 			// remember expired offers so that we can tell if a previously scheduler offer relies on one
 			LingerTTL:     config.Schedcfg.OfferLingerTTL.Duration,
 			TTL:           config.Schedcfg.OfferTTL.Duration,
 			ListenerDelay: config.Schedcfg.ListenerDelay.Duration,
 		}),
 		slaves:            newSlaveStorage(),
 		taskRegistry:      podtask.NewInMemoryRegistry(),
 		reconcileCooldown: config.ReconcileCooldown,
 		registration:      make(chan struct{}),
 		asRegisteredMaster: proc.DoerFunc(func(proc.Action) <-chan error {
 			return proc.ErrorChanf("cannot execute action with unregistered scheduler")
 		}),
 	}
 	return k
 }
 func (k *KubernetesScheduler) Init(electedMaster proc.Process, pl PluginInterface, mux *http.ServeMux) error {
 	log.V(1).Infoln("initializing kubernetes mesos scheduler")
 	k.asRegisteredMaster = proc.DoerFunc(func(a proc.Action) <-chan error {
 		if !k.registered {
 			return proc.ErrorChanf("failed to execute action, scheduler is disconnected")
 		}
 		return electedMaster.Do(a)
 	})
 	k.terminate = electedMaster.Done()
 	k.plugin = pl
 	k.offers.Init(k.terminate)
 	k.InstallDebugHandlers(mux)
 	return k.recoverTasks()
 }
 func (k *KubernetesScheduler) asMaster() proc.Doer {
 	k.RLock()
 	defer k.RUnlock()
 	return k.asRegisteredMaster
 }
 func (k *KubernetesScheduler) InstallDebugHandlers(mux *http.ServeMux) {
 	wrappedHandler := func(uri string, h http.Handler) {
 		mux.HandleFunc(uri, func(w http.ResponseWriter, r *http.Request) {
 			ch := make(chan struct{})
 			closer := runtime.Closer(ch)
 			proc.OnError(k.asMaster().Do(func() {
 				defer closer()
 				h.ServeHTTP(w, r)
 			}), func(err error) {
 				defer closer()
 				log.Warningf("failed HTTP request for %s: %v", uri, err)
 				w.WriteHeader(http.StatusServiceUnavailable)
 			}, k.terminate)
 			select {
 			case <-time.After(k.schedcfg.HttpHandlerTimeout.Duration):
 				log.Warningf("timed out waiting for request to be processed")
 				w.WriteHeader(http.StatusServiceUnavailable)
 				return
 			case <-ch: // noop
 			}
 		})
 	}
 	requestReconciliation := func(uri string, requestAction func()) {
 		wrappedHandler(uri, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 			requestAction()
 			w.WriteHeader(http.StatusNoContent)
 		}))
 	}
 	requestReconciliation("/debug/actions/requestExplicit", k.reconciler.RequestExplicit)
 	requestReconciliation("/debug/actions/requestImplicit", k.reconciler.RequestImplicit)
 	wrappedHandler("/debug/actions/kamikaze", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		slaves := k.slaves.getSlaveIds()
 		for _, slaveId := range slaves {
 			_, err := k.driver.SendFrameworkMessage(
 				k.executor.ExecutorId,
 				mutil.NewSlaveID(slaveId),
 				messages.Kamikaze)
 			if err != nil {
 				log.Warningf("failed to send kamikaze message to slave %s: %v", slaveId, err)
 			} else {
 				io.WriteString(w, fmt.Sprintf("kamikaze slave %s\n", slaveId))
 			}
 		}
 		io.WriteString(w, "OK")
 	}))
 }
 func (k *KubernetesScheduler) Registration() <-chan struct{} {
 	return k.registration
 }
 // Registered is called when the scheduler registered with the master successfully.
 func (k *KubernetesScheduler) Registered(drv bindings.SchedulerDriver, fid *mesos.FrameworkID, mi *mesos.MasterInfo) {
 	log.Infof("Scheduler registered with the master: %v with frameworkId: %v\n", mi, fid)
 	k.driver = drv
 	k.frameworkId = fid
 	k.masterInfo = mi
 	k.registered = true
 	k.onRegistration.Do(func() { k.onInitialRegistration(drv) })
 	k.reconciler.RequestExplicit()
 }
 func (k *KubernetesScheduler) storeFrameworkId() {
 	// TODO(jdef): port FrameworkId store to generic Kubernetes config store as soon as available
 	_, err := k.etcdClient.Set(meta.FrameworkIDKey, k.frameworkId.GetValue(), uint64(k.failoverTimeout))
 	if err != nil {
 		log.Errorf("failed to renew frameworkId TTL: %v", err)
 	}
 }
 // Reregistered is called when the scheduler re-registered with the master successfully.
 // This happends when the master fails over.
 func (k *KubernetesScheduler) Reregistered(drv bindings.SchedulerDriver, mi *mesos.MasterInfo) {
 	log.Infof("Scheduler reregistered with the master: %v\n", mi)
 	k.driver = drv
 	k.masterInfo = mi
 	k.registered = true
 	k.onRegistration.Do(func() { k.onInitialRegistration(drv) })
 	k.reconciler.RequestExplicit()
 }
 // perform one-time initialization actions upon the first registration event received from Mesos.
 func (k *KubernetesScheduler) onInitialRegistration(driver bindings.SchedulerDriver) {
 	defer close(k.registration)
 	if k.failoverTimeout > 0 {
 		refreshInterval := k.schedcfg.FrameworkIdRefreshInterval.Duration
 		if k.failoverTimeout < k.schedcfg.FrameworkIdRefreshInterval.Duration.Seconds() {
 			refreshInterval = time.Duration(math.Max(1, k.failoverTimeout/2)) * time.Second
 		}
 		go runtime.Until(k.storeFrameworkId, refreshInterval, k.terminate)
 	}
 	r1 := k.makeTaskRegistryReconciler()
 	r2 := k.makePodRegistryReconciler()
 	k.reconciler = newReconciler(k.asRegisteredMaster, k.makeCompositeReconciler(r1, r2),
 		k.reconcileCooldown, k.schedcfg.ExplicitReconciliationAbortTimeout.Duration, k.terminate)
 	go k.reconciler.Run(driver)
 	if k.reconcileInterval > 0 {
 		ri := time.Duration(k.reconcileInterval) * time.Second
 		time.AfterFunc(k.schedcfg.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.reconciler.RequestImplicit, ri, k.terminate) })
 		log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedcfg.InitialImplicitReconciliationDelay.Duration)
 	}
 }
 // Disconnected is called when the scheduler loses connection to the master.
 func (k *KubernetesScheduler) Disconnected(driver bindings.SchedulerDriver) {
 	log.Infof("Master disconnected!\n")
 	k.registered = false
 	// discard all cached offers to avoid unnecessary TASK_LOST updates
 	k.offers.Invalidate("")
 }
 // ResourceOffers is called when the scheduler receives some offers from the master.
 func (k *KubernetesScheduler) ResourceOffers(driver bindings.SchedulerDriver, offers []*mesos.Offer) {
 	log.V(2).Infof("Received offers %+v", offers)
 	// Record the offers in the global offer map as well as each slave's offer map.
 	k.offers.Add(offers)
 	for _, offer := range offers {
 		slaveId := offer.GetSlaveId().GetValue()
 		k.slaves.checkAndAdd(slaveId, offer.GetHostname())
 	}
 }
 // OfferRescinded is called when the resources are recinded from the scheduler.
 func (k *KubernetesScheduler) OfferRescinded(driver bindings.SchedulerDriver, offerId *mesos.OfferID) {
 	log.Infof("Offer rescinded %v\n", offerId)
 	oid := offerId.GetValue()
 	k.offers.Delete(oid, offerMetrics.OfferRescinded)
 }
 // StatusUpdate is called when a status update message is sent to the scheduler.
 func (k *KubernetesScheduler) StatusUpdate(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) {
 	source, reason := "none", "none"
 	if taskStatus.Source != nil {
 		source = (*taskStatus.Source).String()
 	}
 	if taskStatus.Reason != nil {
 		reason = (*taskStatus.Reason).String()
 	}
 	taskState := taskStatus.GetState()
 	metrics.StatusUpdates.WithLabelValues(source, reason, taskState.String()).Inc()
 	log.Infof(
 		"task status update %q from %q for task %q on slave %q executor %q for reason %q",
 		taskState.String(),
 		source,
 		taskStatus.TaskId.GetValue(),
 		taskStatus.SlaveId.GetValue(),
 		taskStatus.ExecutorId.GetValue(),
 		reason)
 	switch taskState {
 	case mesos.TaskState_TASK_RUNNING, mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_STARTING, mesos.TaskState_TASK_STAGING:
 		if _, state := k.taskRegistry.UpdateStatus(taskStatus); state == podtask.StateUnknown {
 			if taskState != mesos.TaskState_TASK_FINISHED {
 				//TODO(jdef) what if I receive this after a TASK_LOST or TASK_KILLED?
 				//I don't want to reincarnate then..  TASK_LOST is a special case because
 				//the master is stateless and there are scenarios where I may get TASK_LOST
 				//followed by TASK_RUNNING.
 				//TODO(jdef) consider running this asynchronously since there are API server
 				//calls that may be made
 				k.reconcileNonTerminalTask(driver, taskStatus)
 			} // else, we don't really care about FINISHED tasks that aren't registered
 			return
 		}
 		if _, exists := k.slaves.getSlave(taskStatus.GetSlaveId().GetValue()); !exists {
 			// a registered task has an update reported by a slave that we don't recognize.
 			// this should never happen! So we don't reconcile it.
 			log.Errorf("Ignore status %+v because the slave does not exist", taskStatus)
 			return
 		}
 	case mesos.TaskState_TASK_FAILED:
 		if task, _ := k.taskRegistry.UpdateStatus(taskStatus); task != nil {
 			if task.Has(podtask.Launched) && !task.Has(podtask.Bound) {
 				go k.plugin.reconcilePod(task.Pod)
 				return
 			}
 		} else {
 			// unknown task failed, not much we can do about it
 			return
 		}
 		// last-ditch effort to reconcile our records
 		fallthrough
 	case mesos.TaskState_TASK_LOST, mesos.TaskState_TASK_KILLED:
 		k.reconcileTerminalTask(driver, taskStatus)
 	}
 }
 func (k *KubernetesScheduler) reconcileTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) {
 	task, state := k.taskRegistry.UpdateStatus(taskStatus)
 	if (state == podtask.StateRunning || state == podtask.StatePending) && taskStatus.SlaveId != nil &&
 		((taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER && taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION) ||
 			(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) ||
 			(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED)) {
 		//--
 		// pod-task has metadata that refers to:
 		// (1) a task that Mesos no longer knows about, or else
 		// (2) a pod that the Kubelet will never report as "failed"
 		// For now, destroy the pod and hope that there's a replication controller backing it up.
 		// TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed
 		pod := &task.Pod
 		log.Warningf("deleting rogue pod %v/%v for lost task %v", pod.Namespace, pod.Name, task.ID)
 		if err := k.client.Pods(pod.Namespace).Delete(pod.Name, nil); err != nil && !errors.IsNotFound(err) {
 			log.Errorf("failed to delete pod %v/%v for terminal task %v: %v", pod.Namespace, pod.Name, task.ID, err)
 		}
 	} else if taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED || taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED {
 		// attempt to prevent dangling pods in the pod and task registries
 		log.V(1).Infof("request explicit reconciliation to clean up for task %v after executor reported (terminated/unregistered)", taskStatus.TaskId.GetValue())
 		k.reconciler.RequestExplicit()
 	} else if taskStatus.GetState() == mesos.TaskState_TASK_LOST && state == podtask.StateRunning && taskStatus.ExecutorId != nil && taskStatus.SlaveId != nil {
 		//TODO(jdef) this may not be meaningful once we have proper checkpointing and master detection
 		//If we're reconciling and receive this then the executor may be
 		//running a task that we need it to kill. It's possible that the framework
 		//is unrecognized by the master at this point, so KillTask is not guaranteed
 		//to do anything. The underlying driver transport may be able to send a
 		//FrameworkMessage directly to the slave to terminate the task.
 		log.V(2).Info("forwarding TASK_LOST message to executor %v on slave %v", taskStatus.ExecutorId, taskStatus.SlaveId)
 		data := fmt.Sprintf("task-lost:%s", task.ID) //TODO(jdef) use a real message type
 		if _, err := driver.SendFrameworkMessage(taskStatus.ExecutorId, taskStatus.SlaveId, data); err != nil {
 			log.Error(err.Error())
 		}
 	}
 }
 // reconcile an unknown (from the perspective of our registry) non-terminal task
 func (k *KubernetesScheduler) reconcileNonTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) {
 	// attempt to recover task from pod info:
 	// - task data may contain an api.PodStatusResult; if status.reason == REASON_RECONCILIATION then status.data == nil
 	// - the Name can be parsed by container.ParseFullName() to yield a pod Name and Namespace
 	// - pull the pod metadata down from the api server
 	// - perform task recovery based on pod metadata
 	taskId := taskStatus.TaskId.GetValue()
 	if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER {
 		// there will be no data in the task status that we can use to determine the associated pod
 		switch taskStatus.GetState() {
 		case mesos.TaskState_TASK_STAGING:
 			// there is still hope for this task, don't kill it just yet
 			//TODO(jdef) there should probably be a limit for how long we tolerate tasks stuck in this state
 			return
 		default:
 			// for TASK_{STARTING,RUNNING} we should have already attempted to recoverTasks() for.
 			// if the scheduler failed over before the executor fired TASK_STARTING, then we should *not*
 			// be processing this reconciliation update before we process the one from the executor.
 			// point: we don't know what this task is (perhaps there was unrecoverable metadata in the pod),
 			// so it gets killed.
 			log.Errorf("killing non-terminal, unrecoverable task %v", taskId)
 		}
 	} else if podStatus, err := podtask.ParsePodStatusResult(taskStatus); err != nil {
 		// possible rogue pod exists at this point because we can't identify it; should kill the task
 		log.Errorf("possible rogue pod; illegal task status data for task %v, expected an api.PodStatusResult: %v", taskId, err)
 	} else if name, namespace, err := container.ParsePodFullName(podStatus.Name); err != nil {
 		// possible rogue pod exists at this point because we can't identify it; should kill the task
 		log.Errorf("possible rogue pod; illegal api.PodStatusResult, unable to parse full pod name from: '%v' for task %v: %v",
 			podStatus.Name, taskId, err)
 	} else if pod, err := k.client.Pods(namespace).Get(name); err == nil {
 		if t, ok, err := podtask.RecoverFrom(*pod); ok {
 			log.Infof("recovered task %v from metadata in pod %v/%v", taskId, namespace, name)
 			_, err := k.taskRegistry.Register(t, nil)
 			if err != nil {
 				// someone beat us to it?!
 				log.Warningf("failed to register recovered task: %v", err)
 				return
 			} else {
 				k.taskRegistry.UpdateStatus(taskStatus)
 			}
 			return
 		} else if err != nil {
 			//should kill the pod and the task
 			log.Errorf("killing pod, failed to recover task from pod %v/%v: %v", namespace, name, err)
 			if err := k.client.Pods(namespace).Delete(name, nil); err != nil {
 				log.Errorf("failed to delete pod %v/%v: %v", namespace, name, err)
 			}
 		} else {
 			//this is pretty unexpected: we received a TASK_{STARTING,RUNNING} message, but the apiserver's pod
 			//metadata is not appropriate for task reconstruction -- which should almost certainly never
 			//be the case unless someone swapped out the pod on us (and kept the same namespace/name) while
 			//we were failed over.
 			//kill this task, allow the newly launched scheduler to schedule the new pod
 			log.Warningf("unexpected pod metadata for task %v in apiserver, assuming new unscheduled pod spec: %+v", taskId, pod)
 		}
 	} else if errors.IsNotFound(err) {
 		// pod lookup failed, should delete the task since the pod is no longer valid; may be redundant, that's ok
 		log.Infof("killing task %v since pod %v/%v no longer exists", taskId, namespace, name)
 	} else if errors.IsServerTimeout(err) {
 		log.V(2).Infof("failed to reconcile task due to API server timeout: %v", err)
 		return
 	} else {
 		log.Errorf("unexpected API server error, aborting reconcile for task %v: %v", taskId, err)
 		return
 	}
 	if _, err := driver.KillTask(taskStatus.TaskId); err != nil {
 		log.Errorf("failed to kill task %v: %v", taskId, err)
 	}
 }
 // FrameworkMessage is called when the scheduler receives a message from the executor.
 func (k *KubernetesScheduler) FrameworkMessage(driver bindings.SchedulerDriver,
 	executorId *mesos.ExecutorID, slaveId *mesos.SlaveID, message string) {
 	log.Infof("Received messages from executor %v of slave %v, %v\n", executorId, slaveId, message)
 }
 // SlaveLost is called when some slave is lost.
 func (k *KubernetesScheduler) SlaveLost(driver bindings.SchedulerDriver, slaveId *mesos.SlaveID) {
 	log.Infof("Slave %v is lost\n", slaveId)
 	sid := slaveId.GetValue()
 	k.offers.InvalidateForSlave(sid)
 	// TODO(jdef): delete slave from our internal list? probably not since we may need to reconcile
 	// tasks. it would be nice to somehow flag the slave as lost so that, perhaps, we can periodically
 	// flush lost slaves older than X, and for which no tasks or pods reference.
 	// unfinished tasks/pods will be dropped. use a replication controller if you want pods to
 	// be restarted when slaves die.
 }
 // ExecutorLost is called when some executor is lost.
 func (k *KubernetesScheduler) ExecutorLost(driver bindings.SchedulerDriver, executorId *mesos.ExecutorID, slaveId *mesos.SlaveID, status int) {
 	log.Infof("Executor %v of slave %v is lost, status: %v\n", executorId, slaveId, status)
 	// TODO(yifan): Restart any unfinished tasks of the executor.
 }
 // Error is called when there is an unrecoverable error in the scheduler or scheduler driver.
 // The driver should have been aborted before this is invoked.
 func (k *KubernetesScheduler) Error(driver bindings.SchedulerDriver, message string) {
 	log.Fatalf("fatal scheduler error: %v\n", message)
 }
 // filter func used for explicit task reconciliation, selects only non-terminal tasks which
 // have been communicated to mesos (read: launched).
 func explicitTaskFilter(t *podtask.T) bool {
 	switch t.State {
 	case podtask.StateRunning:
 		return true
 	case podtask.StatePending:
 		return t.Has(podtask.Launched)
 	default:
 		return false
 	}
 }
 // invoke the given ReconcilerAction funcs in sequence, aborting the sequence if reconciliation
 // is cancelled. if any other errors occur the composite reconciler will attempt to complete the
 // sequence, reporting only the last generated error.
 func (k *KubernetesScheduler) makeCompositeReconciler(actions ...ReconcilerAction) ReconcilerAction {
 	if x := len(actions); x == 0 {
 		// programming error
 		panic("no actions specified for composite reconciler")
 	} else if x == 1 {
 		return actions[0]
 	}
 	chained := func(d bindings.SchedulerDriver, c <-chan struct{}, a, b ReconcilerAction) <-chan error {
 		ech := a(d, c)
 		ch := make(chan error, 1)
 		go func() {
 			select {
 			case <-k.terminate:
 			case <-c:
 			case e := <-ech:
 				if e != nil {
 					ch <- e
 					return
 				}
 				ech = b(d, c)
 				select {
 				case <-k.terminate:
 				case <-c:
 				case e := <-ech:
 					if e != nil {
 						ch <- e
 						return
 					}
 					close(ch)
 					return
 				}
 			}
 			ch <- fmt.Errorf("aborting composite reconciler action")
 		}()
 		return ch
 	}
 	result := func(d bindings.SchedulerDriver, c <-chan struct{}) <-chan error {
 		return chained(d, c, actions[0], actions[1])
 	}
 	for i := 2; i < len(actions); i++ {
 		i := i
 		next := func(d bindings.SchedulerDriver, c <-chan struct{}) <-chan error {
 			return chained(d, c, ReconcilerAction(result), actions[i])
 		}
 		result = next
 	}
 	return ReconcilerAction(result)
 }
 // reconciler action factory, performs explicit task reconciliation for non-terminal
 // tasks listed in the scheduler's internal taskRegistry.
 func (k *KubernetesScheduler) makeTaskRegistryReconciler() ReconcilerAction {
 	return ReconcilerAction(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error {
 		taskToSlave := make(map[string]string)
 		for _, t := range k.taskRegistry.List(explicitTaskFilter) {
 			if t.Spec.SlaveID != "" {
 				taskToSlave[t.ID] = t.Spec.SlaveID
 			}
 		}
 		return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel))
 	})
 }
 // reconciler action factory, performs explicit task reconciliation for non-terminal
 // tasks identified by annotations in the Kubernetes pod registry.
 func (k *KubernetesScheduler) makePodRegistryReconciler() ReconcilerAction {
 	return ReconcilerAction(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error {
 		ctx := api.NewDefaultContext()
 		podList, err := k.client.Pods(api.NamespaceValue(ctx)).List(labels.Everything(), fields.Everything())
 		if err != nil {
 			return proc.ErrorChanf("failed to reconcile pod registry: %v", err)
 		}
 		taskToSlave := make(map[string]string)
 		for _, pod := range podList.Items {
 			if len(pod.Annotations) == 0 {
 				continue
 			}
 			taskId, found := pod.Annotations[meta.TaskIdKey]
 			if !found {
 				continue
 			}
 			slaveId, found := pod.Annotations[meta.SlaveIdKey]
 			if !found {
 				continue
 			}
 			taskToSlave[taskId] = slaveId
 		}
 		return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel))
 	})
 }
 // execute an explicit task reconciliation, as per http://mesos.apache.org/documentation/latest/reconciliation/
 func (k *KubernetesScheduler) explicitlyReconcileTasks(driver bindings.SchedulerDriver, taskToSlave map[string]string, cancel <-chan struct{}) error {
 	log.Info("explicit reconcile tasks")
 	// tell mesos to send us the latest status updates for all the non-terminal tasks that we know about
 	statusList := []*mesos.TaskStatus{}
 	remaining := util.KeySet(reflect.ValueOf(taskToSlave))
 	for taskId, slaveId := range taskToSlave {
 		if slaveId == "" {
 			delete(taskToSlave, taskId)
 			continue
 		}
 		statusList = append(statusList, &mesos.TaskStatus{
 			TaskId:  mutil.NewTaskID(taskId),
 			SlaveId: mutil.NewSlaveID(slaveId),
 			State:   mesos.TaskState_TASK_RUNNING.Enum(), // req'd field, doesn't have to reflect reality
 		})
 	}
 	select {
 	case <-cancel:
 		return reconciliationCancelledErr
 	default:
 		if _, err := driver.ReconcileTasks(statusList); err != nil {
 			return err
 		}
 	}
 	start := time.Now()
 	first := true
 	for backoff := 1 * time.Second; first || remaining.Len() > 0; backoff = backoff * 2 {
 		first = false
 		// nothing to do here other than wait for status updates..
 		if backoff > k.schedcfg.ExplicitReconciliationMaxBackoff.Duration {
 			backoff = k.schedcfg.ExplicitReconciliationMaxBackoff.Duration
 		}
 		select {
 		case <-cancel:
 			return reconciliationCancelledErr
 		case <-time.After(backoff):
 			for taskId := range remaining {
 				if task, _ := k.taskRegistry.Get(taskId); task != nil && explicitTaskFilter(task) && task.UpdatedTime.Before(start) {
 					// keep this task in remaining list
 					continue
 				}
 				remaining.Delete(taskId)
 			}
 		}
 	}
 	return nil
 }
 var (
 	reconciliationCancelledErr = fmt.Errorf("explicit task reconciliation cancelled")
 )
 type ReconcilerAction func(driver bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error
 type Reconciler struct {
 	proc.Doer
 	Action                             ReconcilerAction
 	explicit                           chan struct{}   // send an empty struct to trigger explicit reconciliation
 	implicit                           chan struct{}   // send an empty struct to trigger implicit reconciliation
 	done                               <-chan struct{} // close this when you want the reconciler to exit
 	cooldown                           time.Duration
 	explicitReconciliationAbortTimeout time.Duration
 }
 func newReconciler(doer proc.Doer, action ReconcilerAction,
 	cooldown, explicitReconciliationAbortTimeout time.Duration, done <-chan struct{}) *Reconciler {
 	return &Reconciler{
 		Doer:     doer,
 		explicit: make(chan struct{}, 1),
 		implicit: make(chan struct{}, 1),
 		cooldown: cooldown,
 		explicitReconciliationAbortTimeout: explicitReconciliationAbortTimeout,
 		done: done,
 		Action: func(driver bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error {
 			// trigged the reconciler action in the doer's execution context,
 			// but it could take a while and the scheduler needs to be able to
 			// process updates, the callbacks for which ALSO execute in the SAME
 			// deferred execution context -- so the action MUST be executed async.
 			errOnce := proc.NewErrorOnce(cancel)
 			return errOnce.Send(doer.Do(func() {
 				// only triggers the action if we're the currently elected,
 				// registered master and runs the action async.
 				go func() {
 					var err <-chan error
 					defer errOnce.Send(err)
 					err = action(driver, cancel)
 				}()
 			})).Err()
 		},
 	}
 }
 func (r *Reconciler) RequestExplicit() {
 	select {
 	case r.explicit <- struct{}{}: // noop
 	default: // request queue full; noop
 	}
 }
 func (r *Reconciler) RequestImplicit() {
 	select {
 	case r.implicit <- struct{}{}: // noop
 	default: // request queue full; noop
 	}
 }
 // execute task reconciliation, returns when r.done is closed. intended to run as a goroutine.
 // if reconciliation is requested while another is in progress, the in-progress operation will be
 // cancelled before the new reconciliation operation begins.
 func (r *Reconciler) Run(driver bindings.SchedulerDriver) {
 	var cancel, finished chan struct{}
 requestLoop:
 	for {
 		select {
 		case <-r.done:
 			return
 		default: // proceed
 		}
 		select {
 		case <-r.implicit:
 			metrics.ReconciliationRequested.WithLabelValues("implicit").Inc()
 			select {
 			case <-r.done:
 				return
 			case <-r.explicit:
 				break // give preference to a pending request for explicit
 			default: // continue
 				// don't run implicit reconciliation while explicit is ongoing
 				if finished != nil {
 					select {
 					case <-finished: // continue w/ implicit
 					default:
 						log.Infoln("skipping implicit reconcile because explicit reconcile is ongoing")
 						continue requestLoop
 					}
 				}
 				errOnce := proc.NewErrorOnce(r.done)
 				errCh := r.Do(func() {
 					var err error
 					defer errOnce.Report(err)
 					log.Infoln("implicit reconcile tasks")
 					metrics.ReconciliationExecuted.WithLabelValues("implicit").Inc()
 					if _, err = driver.ReconcileTasks([]*mesos.TaskStatus{}); err != nil {
 						log.V(1).Infof("failed to request implicit reconciliation from mesos: %v", err)
 					}
 				})
 				proc.OnError(errOnce.Send(errCh).Err(), func(err error) {
 					log.Errorf("failed to run implicit reconciliation: %v", err)
 				}, r.done)
 				goto slowdown
 			}
 		case <-r.done:
 			return
 		case <-r.explicit: // continue
 			metrics.ReconciliationRequested.WithLabelValues("explicit").Inc()
 		}
 		if cancel != nil {
 			close(cancel)
 			cancel = nil
 			// play nice and wait for the prior operation to finish, complain
 			// if it doesn't
 			select {
 			case <-r.done:
 				return
 			case <-finished: // noop, expected
 			case <-time.After(r.explicitReconciliationAbortTimeout): // very unexpected
 				log.Error("reconciler action failed to stop upon cancellation")
 			}
 		}
 		// copy 'finished' to 'fin' here in case we end up with simultaneous go-routines,
 		// if cancellation takes too long or fails - we don't want to close the same chan
 		// more than once
 		cancel = make(chan struct{})
 		finished = make(chan struct{})
 		go func(fin chan struct{}) {
 			startedAt := time.Now()
 			defer func() {
 				metrics.ReconciliationLatency.Observe(metrics.InMicroseconds(time.Since(startedAt)))
 			}()
 			metrics.ReconciliationExecuted.WithLabelValues("explicit").Inc()
 			defer close(fin)
 			err := <-r.Action(driver, cancel)
 			if err == reconciliationCancelledErr {
 				metrics.ReconciliationCancelled.WithLabelValues("explicit").Inc()
 				log.Infoln(err.Error())
 			} else if err != nil {
 				log.Errorf("reconciler action failed: %v", err)
 			}
 		}(finished)
 	slowdown:
 		// don't allow reconciliation to run very frequently, either explicit or implicit
 		select {
 		case <-r.done:
 			return
 		case <-time.After(r.cooldown): // noop
 		}
 	} // for
 }
 func (ks *KubernetesScheduler) recoverTasks() error {
 	ctx := api.NewDefaultContext()
 	podList, err := ks.client.Pods(api.NamespaceValue(ctx)).List(labels.Everything(), fields.Everything())
 	if err != nil {
 		log.V(1).Infof("failed to recover pod registry, madness may ensue: %v", err)
 		return err
 	}
 	recoverSlave := func(t *podtask.T) {
 		slaveId := t.Spec.SlaveID
 		ks.slaves.checkAndAdd(slaveId, t.Offer.Host())
 	}
 	for _, pod := range podList.Items {
 		if t, ok, err := podtask.RecoverFrom(pod); err != nil {
 			log.Errorf("failed to recover task from pod, will attempt to delete '%v/%v': %v", pod.Namespace, pod.Name, err)
 			err := ks.client.Pods(pod.Namespace).Delete(pod.Name, nil)
 			//TODO(jdef) check for temporary or not-found errors
 			if err != nil {
 				log.Errorf("failed to delete pod '%v/%v': %v", pod.Namespace, pod.Name, err)
 			}
 		} else if ok {
 			ks.taskRegistry.Register(t, nil)
 			recoverSlave(t)
 			log.Infof("recovered task %v from pod %v/%v", t.ID, pod.Namespace, pod.Name)
 		}
 	}
 	return nil
 }
--- a/contrib/mesos/pkg/scheduler/scheduler_test.go
+++ b/contrib/mesos/pkg/scheduler/scheduler_test.go
@@ -0,0 +1,350 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package scheduler
 import (
 	"testing"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
 	schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	util "github.com/mesos/mesos-go/mesosutil"
 	"github.com/stretchr/testify/assert"
 )
 // Check that same slave is only added once.
 func TestSlaveStorage_checkAndAdd(t *testing.T) {
 	assert := assert.New(t)
 	slaveStorage := newSlaveStorage()
 	assert.Equal(0, len(slaveStorage.slaves))
 	slaveId := "slave1"
 	slaveHostname := "slave1Hostname"
 	slaveStorage.checkAndAdd(slaveId, slaveHostname)
 	assert.Equal(1, len(slaveStorage.getSlaveIds()))
 	slaveStorage.checkAndAdd(slaveId, slaveHostname)
 	assert.Equal(1, len(slaveStorage.getSlaveIds()))
 }
 // Check that getSlave returns notExist for nonexisting slave.
 func TestSlaveStorage_getSlave(t *testing.T) {
 	assert := assert.New(t)
 	slaveStorage := newSlaveStorage()
 	assert.Equal(0, len(slaveStorage.slaves))
 	slaveId := "slave1"
 	slaveHostname := "slave1Hostname"
 	_, exists := slaveStorage.getSlave(slaveId)
 	assert.Equal(false, exists)
 	slaveStorage.checkAndAdd(slaveId, slaveHostname)
 	assert.Equal(1, len(slaveStorage.getSlaveIds()))
 	_, exists = slaveStorage.getSlave(slaveId)
 	assert.Equal(true, exists)
 }
 // Check that getSlaveIds returns array with all slaveIds.
 func TestSlaveStorage_getSlaveIds(t *testing.T) {
 	assert := assert.New(t)
 	slaveStorage := newSlaveStorage()
 	assert.Equal(0, len(slaveStorage.slaves))
 	slaveId := "1"
 	slaveHostname := "hn1"
 	slaveStorage.checkAndAdd(slaveId, slaveHostname)
 	assert.Equal(1, len(slaveStorage.getSlaveIds()))
 	slaveId = "2"
 	slaveHostname = "hn2"
 	slaveStorage.checkAndAdd(slaveId, slaveHostname)
 	assert.Equal(2, len(slaveStorage.getSlaveIds()))
 	slaveIds := slaveStorage.getSlaveIds()
 	slaveIdsMap := make(map[string]bool, len(slaveIds))
 	for _, s := range slaveIds {
 		slaveIdsMap[s] = true
 	}
 	_, ok := slaveIdsMap["1"]
 	assert.Equal(ok, true)
 	_, ok = slaveIdsMap["2"]
 	assert.Equal(ok, true)
 }
 //get number of non-expired offers from  offer registry
 func getNumberOffers(os offers.Registry) int {
 	//walk offers and check it is stored in registry
 	walked := 0
 	walker1 := func(p offers.Perishable) (bool, error) {
 		walked++
 		return false, nil
 	}
 	os.Walk(walker1)
 	return walked
 }
 //test adding of ressource offer, should be added to offer registry and slavesf
 func TestResourceOffer_Add(t *testing.T) {
 	assert := assert.New(t)
 	testScheduler := &KubernetesScheduler{
 		offers: offers.CreateRegistry(offers.RegistryConfig{
 			Compat: func(o *mesos.Offer) bool {
 				return true
 			},
 			DeclineOffer: func(offerId string) <-chan error {
 				return proc.ErrorChan(nil)
 			},
 			// remember expired offers so that we can tell if a previously scheduler offer relies on one
 			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
 			TTL:           schedcfg.DefaultOfferTTL,
 			ListenerDelay: schedcfg.DefaultListenerDelay,
 		}),
 		slaves: newSlaveStorage(),
 	}
 	hostname := "h1"
 	offerID1 := util.NewOfferID("test1")
 	offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
 	offers1 := []*mesos.Offer{offer1}
 	testScheduler.ResourceOffers(nil, offers1)
 	assert.Equal(1, getNumberOffers(testScheduler.offers))
 	//check slave hostname
 	assert.Equal(1, len(testScheduler.slaves.getSlaveIds()))
 	//add another offer
 	hostname2 := "h2"
 	offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
 	offers2 := []*mesos.Offer{offer2}
 	testScheduler.ResourceOffers(nil, offers2)
 	//check it is stored in registry
 	assert.Equal(2, getNumberOffers(testScheduler.offers))
 	//check slave hostnames
 	assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
 }
 //test adding of ressource offer, should be added to offer registry and slavesf
 func TestResourceOffer_Add_Rescind(t *testing.T) {
 	assert := assert.New(t)
 	testScheduler := &KubernetesScheduler{
 		offers: offers.CreateRegistry(offers.RegistryConfig{
 			Compat: func(o *mesos.Offer) bool {
 				return true
 			},
 			DeclineOffer: func(offerId string) <-chan error {
 				return proc.ErrorChan(nil)
 			},
 			// remember expired offers so that we can tell if a previously scheduler offer relies on one
 			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
 			TTL:           schedcfg.DefaultOfferTTL,
 			ListenerDelay: schedcfg.DefaultListenerDelay,
 		}),
 		slaves: newSlaveStorage(),
 	}
 	hostname := "h1"
 	offerID1 := util.NewOfferID("test1")
 	offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
 	offers1 := []*mesos.Offer{offer1}
 	testScheduler.ResourceOffers(nil, offers1)
 	assert.Equal(1, getNumberOffers(testScheduler.offers))
 	//check slave hostname
 	assert.Equal(1, len(testScheduler.slaves.getSlaveIds()))
 	//add another offer
 	hostname2 := "h2"
 	offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
 	offers2 := []*mesos.Offer{offer2}
 	testScheduler.ResourceOffers(nil, offers2)
 	assert.Equal(2, getNumberOffers(testScheduler.offers))
 	//check slave hostnames
 	assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
 	//next whether offers can be rescinded
 	testScheduler.OfferRescinded(nil, offerID1)
 	assert.Equal(1, getNumberOffers(testScheduler.offers))
 	//next whether offers can be rescinded
 	testScheduler.OfferRescinded(nil, util.NewOfferID("test2"))
 	//walk offers again and check it is removed from registry
 	assert.Equal(0, getNumberOffers(testScheduler.offers))
 	//remove non existing ID
 	testScheduler.OfferRescinded(nil, util.NewOfferID("notExist"))
 }
 //test that when a slave is lost we remove all offers
 func TestSlave_Lost(t *testing.T) {
 	assert := assert.New(t)
 	//
 	testScheduler := &KubernetesScheduler{
 		offers: offers.CreateRegistry(offers.RegistryConfig{
 			Compat: func(o *mesos.Offer) bool {
 				return true
 			},
 			// remember expired offers so that we can tell if a previously scheduler offer relies on one
 			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
 			TTL:           schedcfg.DefaultOfferTTL,
 			ListenerDelay: schedcfg.DefaultListenerDelay,
 		}),
 		slaves: newSlaveStorage(),
 	}
 	hostname := "h1"
 	offer1 := &mesos.Offer{Id: util.NewOfferID("test1"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
 	offers1 := []*mesos.Offer{offer1}
 	testScheduler.ResourceOffers(nil, offers1)
 	offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
 	offers2 := []*mesos.Offer{offer2}
 	testScheduler.ResourceOffers(nil, offers2)
 	//add another offer from different slaveID
 	hostname2 := "h2"
 	offer3 := &mesos.Offer{Id: util.NewOfferID("test3"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
 	offers3 := []*mesos.Offer{offer3}
 	testScheduler.ResourceOffers(nil, offers3)
 	//test precondition
 	assert.Equal(3, getNumberOffers(testScheduler.offers))
 	assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
 	//remove first slave
 	testScheduler.SlaveLost(nil, util.NewSlaveID(hostname))
 	//offers should be removed
 	assert.Equal(1, getNumberOffers(testScheduler.offers))
 	//slave hostnames should still be all present
 	assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
 	//remove second slave
 	testScheduler.SlaveLost(nil, util.NewSlaveID(hostname2))
 	//offers should be removed
 	assert.Equal(0, getNumberOffers(testScheduler.offers))
 	//slave hostnames should still be all present
 	assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
 	//try to remove non existing slave
 	testScheduler.SlaveLost(nil, util.NewSlaveID("notExist"))
 }
 //test when we loose connection to master we invalidate all cached offers
 func TestDisconnect(t *testing.T) {
 	assert := assert.New(t)
 	//
 	testScheduler := &KubernetesScheduler{
 		offers: offers.CreateRegistry(offers.RegistryConfig{
 			Compat: func(o *mesos.Offer) bool {
 				return true
 			},
 			// remember expired offers so that we can tell if a previously scheduler offer relies on one
 			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
 			TTL:           schedcfg.DefaultOfferTTL,
 			ListenerDelay: schedcfg.DefaultListenerDelay,
 		}),
 		slaves: newSlaveStorage(),
 	}
 	hostname := "h1"
 	offer1 := &mesos.Offer{Id: util.NewOfferID("test1"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
 	offers1 := []*mesos.Offer{offer1}
 	testScheduler.ResourceOffers(nil, offers1)
 	offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
 	offers2 := []*mesos.Offer{offer2}
 	testScheduler.ResourceOffers(nil, offers2)
 	//add another offer from different slaveID
 	hostname2 := "h2"
 	offer3 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
 	offers3 := []*mesos.Offer{offer3}
 	testScheduler.ResourceOffers(nil, offers3)
 	//disconnect
 	testScheduler.Disconnected(nil)
 	//all offers should be removed
 	assert.Equal(0, getNumberOffers(testScheduler.offers))
 	//slave hostnames should still be all present
 	assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
 }
 //test we can handle different status updates, TODO check state transitions
 func TestStatus_Update(t *testing.T) {
 	mockdriver := MockSchedulerDriver{}
 	// setup expectations
 	mockdriver.On("KillTask", util.NewTaskID("test-task-001")).Return(mesos.Status_DRIVER_RUNNING, nil)
 	testScheduler := &KubernetesScheduler{
 		offers: offers.CreateRegistry(offers.RegistryConfig{
 			Compat: func(o *mesos.Offer) bool {
 				return true
 			},
 			// remember expired offers so that we can tell if a previously scheduler offer relies on one
 			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
 			TTL:           schedcfg.DefaultOfferTTL,
 			ListenerDelay: schedcfg.DefaultListenerDelay,
 		}),
 		slaves:       newSlaveStorage(),
 		driver:       &mockdriver,
 		taskRegistry: podtask.NewInMemoryRegistry(),
 	}
 	taskStatus_task_starting := util.NewTaskStatus(
 		util.NewTaskID("test-task-001"),
 		mesos.TaskState_TASK_RUNNING,
 	)
 	testScheduler.StatusUpdate(testScheduler.driver, taskStatus_task_starting)
 	taskStatus_task_running := util.NewTaskStatus(
 		util.NewTaskID("test-task-001"),
 		mesos.TaskState_TASK_RUNNING,
 	)
 	testScheduler.StatusUpdate(testScheduler.driver, taskStatus_task_running)
 	taskStatus_task_failed := util.NewTaskStatus(
 		util.NewTaskID("test-task-001"),
 		mesos.TaskState_TASK_FAILED,
 	)
 	testScheduler.StatusUpdate(testScheduler.driver, taskStatus_task_failed)
 	//assert that mock was invoked
 	mockdriver.AssertExpectations(t)
 }
--- a/contrib/mesos/pkg/scheduler/service/compat_testing.go
+++ b/contrib/mesos/pkg/scheduler/service/compat_testing.go
@@ -0,0 +1,32 @@
 // +build unit_test
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package service
 import (
 	"os"
 	"syscall"
 )
 func makeFailoverSigChan() <-chan os.Signal {
 	return nil
 }
 func makeDisownedProcAttr() *syscall.SysProcAttr {
 	return nil
 }
--- a/contrib/mesos/pkg/scheduler/service/compat_unix.go
+++ b/contrib/mesos/pkg/scheduler/service/compat_unix.go
@@ -0,0 +1,38 @@
 // +build darwin dragonfly freebsd linux netbsd openbsd
 // +build !unit_test
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package service
 import (
 	"os"
 	"os/signal"
 	"syscall"
 )
 func makeFailoverSigChan() <-chan os.Signal {
 	ch := make(chan os.Signal, 1)
 	signal.Notify(ch, syscall.SIGUSR1)
 	return ch
 }
 func makeDisownedProcAttr() *syscall.SysProcAttr {
 	return &syscall.SysProcAttr{
 		Setpgid: true, // disown the spawned scheduler
 	}
 }
--- a/contrib/mesos/pkg/scheduler/service/compat_windows.go
+++ b/contrib/mesos/pkg/scheduler/service/compat_windows.go
@@ -0,0 +1,51 @@
 // +build windows
 // +build !unit_test
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package service
 import (
 	"os"
 	"syscall"
 )
 func makeFailoverSigChan() <-chan os.Signal {
 	/* TODO(jdef)
 		from go's windows compatibility test, it looks like we need to provide a filtered
 		signal channel here
 	        c := make(chan os.Signal, 10)
 	        signal.Notify(c)
 	        select {
 	        case s := <-c:
 	                if s != os.Interrupt {
 	                        log.Fatalf("Wrong signal received: got %q, want %q\n", s, os.Interrupt)
 	                }
 	        case <-time.After(3 * time.Second):
 	                log.Fatalf("Timeout waiting for Ctrl+Break\n")
 	        }
 	*/
 	return nil
 }
 func makeDisownedProcAttr() *syscall.SysProcAttr {
 	//TODO(jdef) test this somehow?!?!
 	return &syscall.SysProcAttr{
 		CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP | syscall.CREATE_UNICODE_ENVIRONMENT,
 	}
 }
--- a/contrib/mesos/pkg/scheduler/service/doc.go
+++ b/contrib/mesos/pkg/scheduler/service/doc.go
@@ -0,0 +1,18 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package service contains the cmd/k8sm-scheduler glue code
 package service
--- a/contrib/mesos/pkg/scheduler/service/publish.go
+++ b/contrib/mesos/pkg/scheduler/service/publish.go
@@ -0,0 +1,121 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package service
 import (
 	"net"
 	"reflect"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/master/ports"
 	"github.com/golang/glog"
 )
 const (
 	SCHEDULER_SERVICE_NAME = "k8sm-scheduler"
 )
 func (m *SchedulerServer) newServiceWriter(stop <-chan struct{}) func() {
 	return func() {
 		for {
 			// Update service & endpoint records.
 			// TODO(k8s): when it becomes possible to change this stuff,
 			// stop polling and start watching.
 			if err := m.createSchedulerServiceIfNeeded(SCHEDULER_SERVICE_NAME, ports.SchedulerPort); err != nil {
 				glog.Errorf("Can't create scheduler service: %v", err)
 			}
 			if err := m.setEndpoints(SCHEDULER_SERVICE_NAME, net.IP(m.Address), m.Port); err != nil {
 				glog.Errorf("Can't create scheduler endpoints: %v", err)
 			}
 			select {
 			case <-stop:
 				return
 			case <-time.After(10 * time.Second):
 			}
 		}
 	}
 }
 // createSchedulerServiceIfNeeded will create the specified service if it
 // doesn't already exist.
 func (m *SchedulerServer) createSchedulerServiceIfNeeded(serviceName string, servicePort int) error {
 	ctx := api.NewDefaultContext()
 	if _, err := m.client.Services(api.NamespaceValue(ctx)).Get(serviceName); err == nil {
 		// The service already exists.
 		return nil
 	}
 	svc := &api.Service{
 		ObjectMeta: api.ObjectMeta{
 			Name:      serviceName,
 			Namespace: api.NamespaceDefault,
 			Labels:    map[string]string{"provider": "k8sm", "component": "scheduler"},
 		},
 		Spec: api.ServiceSpec{
 			Ports: []api.ServicePort{{Port: servicePort, Protocol: api.ProtocolTCP}},
 			// maintained by this code, not by the pod selector
 			Selector:        nil,
 			SessionAffinity: api.ServiceAffinityNone,
 		},
 	}
 	if m.ServiceAddress != nil {
 		svc.Spec.ClusterIP = m.ServiceAddress.String()
 	}
 	_, err := m.client.Services(api.NamespaceValue(ctx)).Create(svc)
 	if err != nil && errors.IsAlreadyExists(err) {
 		err = nil
 	}
 	return err
 }
 // setEndpoints sets the endpoints for the given service.
 // in a multi-master scenario only the master will be publishing an endpoint.
 // see SchedulerServer.bootstrap.
 func (m *SchedulerServer) setEndpoints(serviceName string, ip net.IP, port int) error {
 	// The setting we want to find.
 	want := []api.EndpointSubset{{
 		Addresses: []api.EndpointAddress{{IP: ip.String()}},
 		Ports:     []api.EndpointPort{{Port: port, Protocol: api.ProtocolTCP}},
 	}}
 	ctx := api.NewDefaultContext()
 	e, err := m.client.Endpoints(api.NamespaceValue(ctx)).Get(serviceName)
 	createOrUpdate := m.client.Endpoints(api.NamespaceValue(ctx)).Update
 	if err != nil {
 		if errors.IsNotFound(err) {
 			createOrUpdate = m.client.Endpoints(api.NamespaceValue(ctx)).Create
 		}
 		e = &api.Endpoints{
 			ObjectMeta: api.ObjectMeta{
 				Name:      serviceName,
 				Namespace: api.NamespaceDefault,
 			},
 		}
 	}
 	if !reflect.DeepEqual(e.Subsets, want) {
 		e.Subsets = want
 		glog.Infof("setting endpoints for master service %q to %#v", serviceName, e)
 		_, err = createOrUpdate(e)
 		return err
 	}
 	// We didn't make any changes, no need to actually call update.
 	return nil
 }
--- a/contrib/mesos/pkg/scheduler/service/service.go
+++ b/contrib/mesos/pkg/scheduler/service/service.go
@@ -0,0 +1,751 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package service
 import (
 	"bufio"
 	"errors"
 	"fmt"
 	"io/ioutil"
 	"net"
 	"net/http"
 	"os"
 	"os/exec"
 	"os/user"
 	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/election"
 	execcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/config"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/profile"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler"
 	schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/ha"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics"
 	"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/uid"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/clientauth"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/master/ports"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/tools"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	"github.com/coreos/go-etcd/etcd"
 	"github.com/gogo/protobuf/proto"
 	log "github.com/golang/glog"
 	"github.com/kardianos/osext"
 	"github.com/mesos/mesos-go/auth"
 	"github.com/mesos/mesos-go/auth/sasl"
 	"github.com/mesos/mesos-go/auth/sasl/mech"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 	mutil "github.com/mesos/mesos-go/mesosutil"
 	bindings "github.com/mesos/mesos-go/scheduler"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/spf13/pflag"
 	"golang.org/x/net/context"
 )
 const (
 	defaultMesosMaster       = "localhost:5050"
 	defaultMesosUser         = "root" // should have privs to execute docker and iptables commands
 	defaultReconcileInterval = 300    // 5m default task reconciliation interval
 	defaultReconcileCooldown = 15 * time.Second
 	defaultFrameworkName     = "Kubernetes"
 )
 type SchedulerServer struct {
 	Port                          int
 	Address                       util.IP
 	EnableProfiling               bool
 	AuthPath                      string
 	APIServerList                 util.StringList
 	EtcdServerList                util.StringList
 	EtcdConfigFile                string
 	AllowPrivileged               bool
 	ExecutorPath                  string
 	ProxyPath                     string
 	MesosMaster                   string
 	MesosUser                     string
 	MesosRole                     string
 	MesosAuthPrincipal            string
 	MesosAuthSecretFile           string
 	Checkpoint                    bool
 	FailoverTimeout               float64
 	ExecutorBindall               bool
 	ExecutorRunProxy              bool
 	ExecutorProxyBindall          bool
 	ExecutorLogV                  int
 	ExecutorSuicideTimeout        time.Duration
 	MesosAuthProvider             string
 	DriverPort                    uint
 	HostnameOverride              string
 	ReconcileInterval             int64
 	ReconcileCooldown             time.Duration
 	SchedulerConfigFileName       string
 	Graceful                      bool
 	FrameworkName                 string
 	FrameworkWebURI               string
 	HA                            bool
 	AdvertisedAddress             string
 	ServiceAddress                util.IP
 	HADomain                      string
 	KMPath                        string
 	ClusterDNS                    util.IP
 	ClusterDomain                 string
 	KubeletRootDirectory          string
 	KubeletDockerEndpoint         string
 	KubeletPodInfraContainerImage string
 	KubeletCadvisorPort           uint
 	KubeletHostNetworkSources     string
 	KubeletSyncFrequency          time.Duration
 	KubeletNetworkPluginName      string
 	executable  string // path to the binary running this service
 	client      *client.Client
 	driver      bindings.SchedulerDriver
 	driverMutex sync.RWMutex
 	mux         *http.ServeMux
 }
 // useful for unit testing specific funcs
 type schedulerProcessInterface interface {
 	End() <-chan struct{}
 	Failover() <-chan struct{}
 	Terminal() <-chan struct{}
 }
 // NewSchedulerServer creates a new SchedulerServer with default parameters
 func NewSchedulerServer() *SchedulerServer {
 	s := SchedulerServer{
 		Port:                   ports.SchedulerPort,
 		Address:                util.IP(net.ParseIP("127.0.0.1")),
 		FailoverTimeout:        time.Duration((1 << 62) - 1).Seconds(),
 		ExecutorRunProxy:       true,
 		ExecutorSuicideTimeout: execcfg.DefaultSuicideTimeout,
 		MesosAuthProvider:      sasl.ProviderName,
 		MesosMaster:            defaultMesosMaster,
 		MesosUser:              defaultMesosUser,
 		ReconcileInterval:      defaultReconcileInterval,
 		ReconcileCooldown:      defaultReconcileCooldown,
 		Checkpoint:             true,
 		FrameworkName:          defaultFrameworkName,
 		HA:                     false,
 		mux:                    http.NewServeMux(),
 		KubeletCadvisorPort:    4194, // copied from github.com/GoogleCloudPlatform/kubernetes/blob/release-0.14/cmd/kubelet/app/server.go
 		KubeletSyncFrequency:   10 * time.Second,
 	}
 	// cache this for later use. also useful in case the original binary gets deleted, e.g.
 	// during upgrades, development deployments, etc.
 	if filename, err := osext.Executable(); err != nil {
 		log.Fatalf("failed to determine path to currently running executable: %v", err)
 	} else {
 		s.executable = filename
 		s.KMPath = filename
 	}
 	return &s
 }
 func (s *SchedulerServer) addCoreFlags(fs *pflag.FlagSet) {
 	fs.IntVar(&s.Port, "port", s.Port, "The port that the scheduler's http service runs on")
 	fs.Var(&s.Address, "address", "The IP address to serve on (set to 0.0.0.0 for all interfaces)")
 	fs.BoolVar(&s.EnableProfiling, "profiling", s.EnableProfiling, "Enable profiling via web interface host:port/debug/pprof/")
 	fs.Var(&s.APIServerList, "api-servers", "List of Kubernetes API servers for publishing events, and reading pods and services. (ip:port), comma separated.")
 	fs.StringVar(&s.AuthPath, "auth-path", s.AuthPath, "Path to .kubernetes_auth file, specifying how to authenticate to API server.")
 	fs.Var(&s.EtcdServerList, "etcd-servers", "List of etcd servers to watch (http://ip:port), comma separated. Mutually exclusive with --etcd-config")
 	fs.StringVar(&s.EtcdConfigFile, "etcd-config", s.EtcdConfigFile, "The config file for the etcd client. Mutually exclusive with --etcd-servers.")
 	fs.BoolVar(&s.AllowPrivileged, "allow-privileged", s.AllowPrivileged, "If true, allow privileged containers.")
 	fs.StringVar(&s.ClusterDomain, "cluster-domain", s.ClusterDomain, "Domain for this cluster.  If set, kubelet will configure all containers to search this domain in addition to the host's search domains")
 	fs.Var(&s.ClusterDNS, "cluster-dns", "IP address for a cluster DNS server. If set, kubelet will configure all containers to use this for DNS resolution in addition to the host's DNS servers")
 	fs.StringVar(&s.MesosMaster, "mesos-master", s.MesosMaster, "Location of the Mesos master. The format is a comma-delimited list of of hosts like zk://host1:port,host2:port/mesos. If using ZooKeeper, pay particular attention to the leading zk:// and trailing /mesos! If not using ZooKeeper, standard URLs like http://localhost are also acceptable.")
 	fs.StringVar(&s.MesosUser, "mesos-user", s.MesosUser, "Mesos user for this framework, defaults to root.")
 	fs.StringVar(&s.MesosRole, "mesos-role", s.MesosRole, "Mesos role for this framework, defaults to none.")
 	fs.StringVar(&s.MesosAuthPrincipal, "mesos-authentication-principal", s.MesosAuthPrincipal, "Mesos authentication principal.")
 	fs.StringVar(&s.MesosAuthSecretFile, "mesos-authentication-secret-file", s.MesosAuthSecretFile, "Mesos authentication secret file.")
 	fs.StringVar(&s.MesosAuthProvider, "mesos-authentication-provider", s.MesosAuthProvider, fmt.Sprintf("Authentication provider to use, default is SASL that supports mechanisms: %+v", mech.ListSupported()))
 	fs.BoolVar(&s.Checkpoint, "checkpoint", s.Checkpoint, "Enable/disable checkpointing for the kubernetes-mesos framework.")
 	fs.Float64Var(&s.FailoverTimeout, "failover-timeout", s.FailoverTimeout, fmt.Sprintf("Framework failover timeout, in sec."))
 	fs.UintVar(&s.DriverPort, "driver-port", s.DriverPort, "Port that the Mesos scheduler driver process should listen on.")
 	fs.StringVar(&s.HostnameOverride, "hostname-override", s.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.")
 	fs.Int64Var(&s.ReconcileInterval, "reconcile-interval", s.ReconcileInterval, "Interval at which to execute task reconciliation, in sec. Zero disables.")
 	fs.DurationVar(&s.ReconcileCooldown, "reconcile-cooldown", s.ReconcileCooldown, "Minimum rest period between task reconciliation operations.")
 	fs.StringVar(&s.SchedulerConfigFileName, "scheduler-config", s.SchedulerConfigFileName, "An ini-style configuration file with low-level scheduler settings.")
 	fs.BoolVar(&s.Graceful, "graceful", s.Graceful, "Indicator of a graceful failover, intended for internal use only.")
 	fs.BoolVar(&s.HA, "ha", s.HA, "Run the scheduler in high availability mode with leader election. All peers should be configured exactly the same.")
 	fs.StringVar(&s.FrameworkName, "framework-name", s.FrameworkName, "The framework name to register with Mesos.")
 	fs.StringVar(&s.FrameworkWebURI, "framework-weburi", s.FrameworkWebURI, "A URI that points to a web-based interface for interacting with the framework.")
 	fs.StringVar(&s.AdvertisedAddress, "advertised-address", s.AdvertisedAddress, "host:port address that is advertised to clients. May be used to construct artifact download URIs.")
 	fs.Var(&s.ServiceAddress, "service-address", "The service portal IP address that the scheduler should register with (if unset, chooses randomly)")
 	fs.BoolVar(&s.ExecutorBindall, "executor-bindall", s.ExecutorBindall, "When true will set -address of the executor to 0.0.0.0.")
 	fs.IntVar(&s.ExecutorLogV, "executor-logv", s.ExecutorLogV, "Logging verbosity of spawned executor processes.")
 	fs.BoolVar(&s.ExecutorProxyBindall, "executor-proxy-bindall", s.ExecutorProxyBindall, "When true pass -proxy-bindall to the executor.")
 	fs.BoolVar(&s.ExecutorRunProxy, "executor-run-proxy", s.ExecutorRunProxy, "Run the kube-proxy as a child process of the executor.")
 	fs.DurationVar(&s.ExecutorSuicideTimeout, "executor-suicide-timeout", s.ExecutorSuicideTimeout, "Executor self-terminates after this period of inactivity. Zero disables suicide watch.")
 	fs.StringVar(&s.KubeletRootDirectory, "kubelet-root-dir", s.KubeletRootDirectory, "Directory path for managing kubelet files (volume mounts,etc). Defaults to executor sandbox.")
 	fs.StringVar(&s.KubeletDockerEndpoint, "kubelet-docker-endpoint", s.KubeletDockerEndpoint, "If non-empty, kubelet will use this for the docker endpoint to communicate with.")
 	fs.StringVar(&s.KubeletPodInfraContainerImage, "kubelet-pod-infra-container-image", s.KubeletPodInfraContainerImage, "The image whose network/ipc namespaces containers in each pod will use.")
 	fs.UintVar(&s.KubeletCadvisorPort, "kubelet-cadvisor-port", s.KubeletCadvisorPort, "The port of the kubelet's local cAdvisor endpoint")
 	fs.StringVar(&s.KubeletHostNetworkSources, "kubelet-host-network-sources", s.KubeletHostNetworkSources, "Comma-separated list of sources from which the Kubelet allows pods to use of host network. For all sources use \"*\" [default=\"file\"]")
 	fs.DurationVar(&s.KubeletSyncFrequency, "kubelet-sync-frequency", s.KubeletSyncFrequency, "Max period between synchronizing running containers and config")
 	fs.StringVar(&s.KubeletNetworkPluginName, "kubelet-network-plugin", s.KubeletNetworkPluginName, "<Warning: Alpha feature> The name of the network plugin to be invoked for various events in kubelet/pod lifecycle")
 	//TODO(jdef) support this flag once we have a better handle on mesos-dns and k8s DNS integration
 	//fs.StringVar(&s.HADomain, "ha-domain", s.HADomain, "Domain of the HA scheduler service, only used in HA mode. If specified may be used to construct artifact download URIs.")
 }
 func (s *SchedulerServer) AddStandaloneFlags(fs *pflag.FlagSet) {
 	s.addCoreFlags(fs)
 	fs.StringVar(&s.ExecutorPath, "executor-path", s.ExecutorPath, "Location of the kubernetes executor executable")
 	fs.StringVar(&s.ProxyPath, "proxy-path", s.ProxyPath, "Location of the kubernetes proxy executable")
 }
 func (s *SchedulerServer) AddHyperkubeFlags(fs *pflag.FlagSet) {
 	s.addCoreFlags(fs)
 	fs.StringVar(&s.KMPath, "km-path", s.KMPath, "Location of the km executable, may be a URI or an absolute file path.")
 }
 // returns (downloadURI, basename(path))
 func (s *SchedulerServer) serveFrameworkArtifact(path string) (string, string) {
 	serveFile := func(pattern string, filename string) {
 		s.mux.HandleFunc(pattern, func(w http.ResponseWriter, r *http.Request) {
 			http.ServeFile(w, r, filename)
 		})
 	}
 	// Create base path (http://foobar:5000/<base>)
 	pathSplit := strings.Split(path, "/")
 	var base string
 	if len(pathSplit) > 0 {
 		base = pathSplit[len(pathSplit)-1]
 	} else {
 		base = path
 	}
 	serveFile("/"+base, path)
 	hostURI := ""
 	if s.AdvertisedAddress != "" {
 		hostURI = fmt.Sprintf("http://%s/%s", s.AdvertisedAddress, base)
 	} else if s.HA && s.HADomain != "" {
 		hostURI = fmt.Sprintf("http://%s.%s:%d/%s", SCHEDULER_SERVICE_NAME, s.HADomain, ports.SchedulerPort, base)
 	} else {
 		hostURI = fmt.Sprintf("http://%s:%d/%s", s.Address.String(), s.Port, base)
 	}
 	log.V(2).Infof("Hosting artifact '%s' at '%s'", path, hostURI)
 	return hostURI, base
 }
 func (s *SchedulerServer) prepareExecutorInfo(hks hyperkube.Interface) (*mesos.ExecutorInfo, *uid.UID, error) {
 	ci := &mesos.CommandInfo{
 		Shell: proto.Bool(false),
 	}
 	//TODO(jdef) these should be shared constants with km
 	const (
 		KM_EXECUTOR = "executor"
 		KM_PROXY    = "proxy"
 	)
 	if s.ExecutorPath != "" {
 		uri, executorCmd := s.serveFrameworkArtifact(s.ExecutorPath)
 		ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)})
 		ci.Value = proto.String(fmt.Sprintf("./%s", executorCmd))
 	} else if !hks.FindServer(KM_EXECUTOR) {
 		return nil, nil, fmt.Errorf("either run this scheduler via km or else --executor-path is required")
 	} else {
 		if strings.Index(s.KMPath, "://") > 0 {
 			// URI could point directly to executable, e.g. hdfs:///km
 			// or else indirectly, e.g. http://acmestorage/tarball.tgz
 			// so we assume that for this case the command will always "km"
 			ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(s.KMPath), Executable: proto.Bool(true)})
 			ci.Value = proto.String("./km") // TODO(jdef) extract constant
 		} else if s.KMPath != "" {
 			uri, kmCmd := s.serveFrameworkArtifact(s.KMPath)
 			ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)})
 			ci.Value = proto.String(fmt.Sprintf("./%s", kmCmd))
 		} else {
 			uri, kmCmd := s.serveFrameworkArtifact(s.executable)
 			ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)})
 			ci.Value = proto.String(fmt.Sprintf("./%s", kmCmd))
 		}
 		ci.Arguments = append(ci.Arguments, KM_EXECUTOR)
 	}
 	if s.ProxyPath != "" {
 		uri, proxyCmd := s.serveFrameworkArtifact(s.ProxyPath)
 		ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)})
 		ci.Arguments = append(ci.Arguments, fmt.Sprintf("--proxy-exec=./%s", proxyCmd))
 	} else if !hks.FindServer(KM_PROXY) {
 		return nil, nil, fmt.Errorf("either run this scheduler via km or else --proxy-path is required")
 	} else if s.ExecutorPath != "" {
 		return nil, nil, fmt.Errorf("proxy can only use km binary if executor does the same")
 	} // else, executor is smart enough to know when proxy-path is required, or to use km
 	//TODO(jdef): provide some way (env var?) for users to customize executor config
 	//TODO(jdef): set -address to 127.0.0.1 if `address` is 127.0.0.1
 	//TODO(jdef): propagate dockercfg from RootDirectory?
 	apiServerArgs := strings.Join(s.APIServerList, ",")
 	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--api-servers=%s", apiServerArgs))
 	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--v=%d", s.ExecutorLogV))
 	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--allow-privileged=%t", s.AllowPrivileged))
 	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--suicide-timeout=%v", s.ExecutorSuicideTimeout))
 	if s.ExecutorBindall {
 		//TODO(jdef) determine whether hostname-override is really needed for bindall because
 		//it conflicts with kubelet node status checks/updates
 		//ci.Arguments = append(ci.Arguments, "--hostname-override=0.0.0.0")
 		ci.Arguments = append(ci.Arguments, "--address=0.0.0.0")
 	}
 	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--proxy-bindall=%v", s.ExecutorProxyBindall))
 	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--run-proxy=%v", s.ExecutorRunProxy))
 	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--cadvisor-port=%v", s.KubeletCadvisorPort))
 	ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.KubeletSyncFrequency))
 	if s.AuthPath != "" {
 		//TODO(jdef) should probably support non-local files, e.g. hdfs:///some/config/file
 		uri, basename := s.serveFrameworkArtifact(s.AuthPath)
 		ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri)})
 		ci.Arguments = append(ci.Arguments, fmt.Sprintf("--auth-path=%s", basename))
 	}
 	appendOptional := func(name string, value string) {
 		if value != "" {
 			ci.Arguments = append(ci.Arguments, fmt.Sprintf("--%s=%s", name, value))
 		}
 	}
 	if s.ClusterDNS != nil {
 		appendOptional("cluster-dns", s.ClusterDNS.String())
 	}
 	appendOptional("cluster-domain", s.ClusterDomain)
 	appendOptional("root-dir", s.KubeletRootDirectory)
 	appendOptional("docker-endpoint", s.KubeletDockerEndpoint)
 	appendOptional("pod-infra-container-image", s.KubeletPodInfraContainerImage)
 	appendOptional("host-network-sources", s.KubeletHostNetworkSources)
 	appendOptional("network-plugin", s.KubeletNetworkPluginName)
 	log.V(1).Infof("prepared executor command %q with args '%+v'", ci.GetValue(), ci.Arguments)
 	// Create mesos scheduler driver.
 	info := &mesos.ExecutorInfo{
 		Command: ci,
 		Name:    proto.String(execcfg.DefaultInfoName),
 		Source:  proto.String(execcfg.DefaultInfoSource),
 	}
 	// calculate ExecutorInfo hash to be used for validating compatibility
 	// of ExecutorInfo's generated by other HA schedulers.
 	ehash := hashExecutorInfo(info)
 	eid := uid.New(ehash, execcfg.DefaultInfoID)
 	info.ExecutorId = &mesos.ExecutorID{Value: proto.String(eid.String())}
 	return info, eid, nil
 }
 // TODO(jdef): hacked from kubelet/server/server.go
 // TODO(k8s): replace this with clientcmd
 func (s *SchedulerServer) createAPIServerClient() (*client.Client, error) {
 	authInfo, err := clientauth.LoadFromFile(s.AuthPath)
 	if err != nil {
 		log.Warningf("Could not load kubernetes auth path: %v. Continuing with defaults.", err)
 	}
 	if authInfo == nil {
 		// authInfo didn't load correctly - continue with defaults.
 		authInfo = &clientauth.Info{}
 	}
 	clientConfig, err := authInfo.MergeWithConfig(client.Config{})
 	if err != nil {
 		return nil, err
 	}
 	if len(s.APIServerList) < 1 {
 		return nil, fmt.Errorf("no api servers specified")
 	}
 	// TODO: adapt Kube client to support LB over several servers
 	if len(s.APIServerList) > 1 {
 		log.Infof("Multiple api servers specified.  Picking first one")
 	}
 	clientConfig.Host = s.APIServerList[0]
 	c, err := client.New(&clientConfig)
 	if err != nil {
 		return nil, err
 	}
 	return c, nil
 }
 func (s *SchedulerServer) setDriver(driver bindings.SchedulerDriver) {
 	s.driverMutex.Lock()
 	defer s.driverMutex.Unlock()
 	s.driver = driver
 }
 func (s *SchedulerServer) getDriver() (driver bindings.SchedulerDriver) {
 	s.driverMutex.RLock()
 	defer s.driverMutex.RUnlock()
 	return s.driver
 }
 func (s *SchedulerServer) Run(hks hyperkube.Interface, _ []string) error {
 	// get scheduler low-level config
 	sc := schedcfg.CreateDefaultConfig()
 	if s.SchedulerConfigFileName != "" {
 		f, err := os.Open(s.SchedulerConfigFileName)
 		if err != nil {
 			log.Fatalf("Cannot open scheduler config file: %v", err)
 		}
 		err = sc.Read(bufio.NewReader(f))
 		if err != nil {
 			log.Fatalf("Invalid scheduler config file: %v", err)
 		}
 	}
 	schedulerProcess, driverFactory, etcdClient, eid := s.bootstrap(hks, sc)
 	if s.EnableProfiling {
 		profile.InstallHandler(s.mux)
 	}
 	go runtime.Until(func() {
 		log.V(1).Info("Starting HTTP interface")
 		log.Error(http.ListenAndServe(net.JoinHostPort(s.Address.String(), strconv.Itoa(s.Port)), s.mux))
 	}, sc.HttpBindInterval.Duration, schedulerProcess.Terminal())
 	if s.HA {
 		validation := ha.ValidationFunc(validateLeadershipTransition)
 		srv := ha.NewCandidate(schedulerProcess, driverFactory, validation)
 		path := fmt.Sprintf(meta.DefaultElectionFormat, s.FrameworkName)
 		sid := uid.New(eid.Group(), "").String()
 		log.Infof("registering for election at %v with id %v", path, sid)
 		go election.Notify(election.NewEtcdMasterElector(etcdClient), path, sid, srv, nil)
 	} else {
 		log.Infoln("self-electing in non-HA mode")
 		schedulerProcess.Elect(driverFactory)
 	}
 	return s.awaitFailover(schedulerProcess, func() error { return s.failover(s.getDriver(), hks) })
 }
 // watch the scheduler process for failover signals and properly handle such. may never return.
 func (s *SchedulerServer) awaitFailover(schedulerProcess schedulerProcessInterface, handler func() error) error {
 	// we only want to return the first error (if any), everyone else can block forever
 	errCh := make(chan error, 1)
 	doFailover := func() error {
 		// we really don't expect handler to return, if it does something went seriously wrong
 		err := handler()
 		if err != nil {
 			defer schedulerProcess.End()
 			err = fmt.Errorf("failover failed, scheduler will terminate: %v", err)
 		}
 		return err
 	}
 	// guard for failover signal processing, first signal processor wins
 	failoverLatch := &runtime.Latch{}
 	runtime.On(schedulerProcess.Terminal(), func() {
 		if !failoverLatch.Acquire() {
 			log.V(1).Infof("scheduler process ending, already failing over")
 			select {}
 		}
 		var err error
 		defer func() { errCh <- err }()
 		select {
 		case <-schedulerProcess.Failover():
 			err = doFailover()
 		default:
 			if s.HA {
 				err = fmt.Errorf("ha scheduler exiting instead of failing over")
 			} else {
 				log.Infof("exiting scheduler")
 			}
 		}
 	})
 	runtime.OnOSSignal(makeFailoverSigChan(), func(_ os.Signal) {
 		if !failoverLatch.Acquire() {
 			log.V(1).Infof("scheduler process signalled, already failing over")
 			select {}
 		}
 		errCh <- doFailover()
 	})
 	return <-errCh
 }
 func validateLeadershipTransition(desired, current string) {
 	log.Infof("validating leadership transition")
 	d := uid.Parse(desired).Group()
 	c := uid.Parse(current).Group()
 	if d == 0 {
 		// should *never* happen, but..
 		log.Fatalf("illegal scheduler UID: %q", desired)
 	}
 	if d != c && c != 0 {
 		log.Fatalf("desired scheduler group (%x) != current scheduler group (%x)", d, c)
 	}
 }
 // hacked from https://github.com/GoogleCloudPlatform/kubernetes/blob/release-0.14/cmd/kube-apiserver/app/server.go
 func newEtcd(etcdConfigFile string, etcdServerList util.StringList) (client tools.EtcdGetSet, err error) {
 	if etcdConfigFile != "" {
 		client, err = etcd.NewClientFromFile(etcdConfigFile)
 	} else {
 		client = etcd.NewClient(etcdServerList)
 	}
 	return
 }
 func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config) (*ha.SchedulerProcess, ha.DriverFactory, tools.EtcdGetSet, *uid.UID) {
 	s.FrameworkName = strings.TrimSpace(s.FrameworkName)
 	if s.FrameworkName == "" {
 		log.Fatalf("framework-name must be a non-empty string")
 	}
 	s.FrameworkWebURI = strings.TrimSpace(s.FrameworkWebURI)
 	metrics.Register()
 	runtime.Register()
 	s.mux.Handle("/metrics", prometheus.Handler())
 	if (s.EtcdConfigFile != "" && len(s.EtcdServerList) != 0) || (s.EtcdConfigFile == "" && len(s.EtcdServerList) == 0) {
 		log.Fatalf("specify either --etcd-servers or --etcd-config")
 	}
 	if len(s.APIServerList) < 1 {
 		log.Fatal("No api servers specified.")
 	}
 	client, err := s.createAPIServerClient()
 	if err != nil {
 		log.Fatalf("Unable to make apiserver client: %v", err)
 	}
 	s.client = client
 	if s.ReconcileCooldown < defaultReconcileCooldown {
 		s.ReconcileCooldown = defaultReconcileCooldown
 		log.Warningf("user-specified reconcile cooldown too small, defaulting to %v", s.ReconcileCooldown)
 	}
 	executor, eid, err := s.prepareExecutorInfo(hks)
 	if err != nil {
 		log.Fatalf("misconfigured executor: %v", err)
 	}
 	// TODO(jdef): remove the dependency on etcd as soon as
 	// (1) the generic config store is available for the FrameworkId storage
 	// (2) the generic master election is provided by the apiserver
 	// Compare docs/proposals/high-availability.md
 	etcdClient, err := newEtcd(s.EtcdConfigFile, s.EtcdServerList)
 	if err != nil {
 		log.Fatalf("misconfigured etcd: %v", err)
 	}
 	mesosPodScheduler := scheduler.New(scheduler.Config{
 		Schedcfg:          *sc,
 		Executor:          executor,
 		ScheduleFunc:      scheduler.FCFSScheduleFunc,
 		Client:            client,
 		EtcdClient:        etcdClient,
 		FailoverTimeout:   s.FailoverTimeout,
 		ReconcileInterval: s.ReconcileInterval,
 		ReconcileCooldown: s.ReconcileCooldown,
 	})
 	masterUri := s.MesosMaster
 	info, cred, err := s.buildFrameworkInfo()
 	if err != nil {
 		log.Fatalf("Misconfigured mesos framework: %v", err)
 	}
 	schedulerProcess := ha.New(mesosPodScheduler)
 	dconfig := &bindings.DriverConfig{
 		Scheduler:        schedulerProcess,
 		Framework:        info,
 		Master:           masterUri,
 		Credential:       cred,
 		BindingAddress:   net.IP(s.Address),
 		BindingPort:      uint16(s.DriverPort),
 		HostnameOverride: s.HostnameOverride,
 		WithAuthContext: func(ctx context.Context) context.Context {
 			ctx = auth.WithLoginProvider(ctx, s.MesosAuthProvider)
 			ctx = sasl.WithBindingAddress(ctx, net.IP(s.Address))
 			return ctx
 		},
 	}
 	kpl := scheduler.NewPlugin(mesosPodScheduler.NewDefaultPluginConfig(schedulerProcess.Terminal(), s.mux))
 	runtime.On(mesosPodScheduler.Registration(), func() { kpl.Run(schedulerProcess.Terminal()) })
 	runtime.On(mesosPodScheduler.Registration(), s.newServiceWriter(schedulerProcess.Terminal()))
 	driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) {
 		log.V(1).Infoln("performing deferred initialization")
 		if err = mesosPodScheduler.Init(schedulerProcess.Master(), kpl, s.mux); err != nil {
 			return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err)
 		}
 		log.V(1).Infoln("deferred init complete")
 		// defer obtaining framework ID to prevent multiple schedulers
 		// from overwriting each other's framework IDs
 		dconfig.Framework.Id, err = s.fetchFrameworkID(etcdClient)
 		if err != nil {
 			return nil, fmt.Errorf("failed to fetch framework ID from etcd: %v", err)
 		}
 		log.V(1).Infoln("constructing mesos scheduler driver")
 		drv, err = bindings.NewMesosSchedulerDriver(*dconfig)
 		if err != nil {
 			return nil, fmt.Errorf("failed to construct scheduler driver: %v", err)
 		}
 		log.V(1).Infoln("constructed mesos scheduler driver:", drv)
 		s.setDriver(drv)
 		return drv, nil
 	})
 	return schedulerProcess, driverFactory, etcdClient, eid
 }
 func (s *SchedulerServer) failover(driver bindings.SchedulerDriver, hks hyperkube.Interface) error {
 	if driver != nil {
 		stat, err := driver.Stop(true)
 		if stat != mesos.Status_DRIVER_STOPPED {
 			return fmt.Errorf("failed to stop driver for failover, received unexpected status code: %v", stat)
 		} else if err != nil {
 			return err
 		}
 	}
 	// there's no guarantee that all goroutines are actually programmed intelligently with 'done'
 	// signals, so we'll need to restart if we want to really stop everything
 	// run the same command that we were launched with
 	//TODO(jdef) assumption here is that the sheduler is the only service running in this process, we should probably validate that somehow
 	args := []string{}
 	flags := pflag.CommandLine
 	if hks != nil {
 		args = append(args, hks.Name())
 		flags = hks.Flags()
 	}
 	flags.Visit(func(flag *pflag.Flag) {
 		if flag.Name != "api-servers" && flag.Name != "etcd-servers" {
 			args = append(args, fmt.Sprintf("--%s=%s", flag.Name, flag.Value.String()))
 		}
 	})
 	if !s.Graceful {
 		args = append(args, "--graceful")
 	}
 	if len(s.APIServerList) > 0 {
 		args = append(args, "--api-servers="+strings.Join(s.APIServerList, ","))
 	}
 	if len(s.EtcdServerList) > 0 {
 		args = append(args, "--etcd-servers="+strings.Join(s.EtcdServerList, ","))
 	}
 	args = append(args, flags.Args()...)
 	log.V(1).Infof("spawning scheduler for graceful failover: %s %+v", s.executable, args)
 	cmd := exec.Command(s.executable, args...)
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	cmd.SysProcAttr = makeDisownedProcAttr()
 	// TODO(jdef) pass in a pipe FD so that we can block, waiting for the child proc to be ready
 	//cmd.ExtraFiles = []*os.File{}
 	exitcode := 0
 	log.Flush() // TODO(jdef) it would be really nice to ensure that no one else in our process was still logging
 	if err := cmd.Start(); err != nil {
 		//log to stdtout here to avoid conflicts with normal stderr logging
 		fmt.Fprintf(os.Stdout, "failed to spawn failover process: %v\n", err)
 		os.Exit(1)
 	}
 	os.Exit(exitcode)
 	select {} // will never reach here
 }
 func (s *SchedulerServer) buildFrameworkInfo() (info *mesos.FrameworkInfo, cred *mesos.Credential, err error) {
 	username, err := s.getUsername()
 	if err != nil {
 		return nil, nil, err
 	}
 	log.V(2).Infof("Framework configured with mesos user %v", username)
 	info = &mesos.FrameworkInfo{
 		Name:       proto.String(s.FrameworkName),
 		User:       proto.String(username),
 		Checkpoint: proto.Bool(s.Checkpoint),
 	}
 	if s.FrameworkWebURI != "" {
 		info.WebuiUrl = proto.String(s.FrameworkWebURI)
 	}
 	if s.FailoverTimeout > 0 {
 		info.FailoverTimeout = proto.Float64(s.FailoverTimeout)
 	}
 	if s.MesosRole != "" {
 		info.Role = proto.String(s.MesosRole)
 	}
 	if s.MesosAuthPrincipal != "" {
 		info.Principal = proto.String(s.MesosAuthPrincipal)
 		if s.MesosAuthSecretFile == "" {
 			return nil, nil, errors.New("authentication principal specified without the required credentials file")
 		}
 		secret, err := ioutil.ReadFile(s.MesosAuthSecretFile)
 		if err != nil {
 			return nil, nil, err
 		}
 		cred = &mesos.Credential{
 			Principal: proto.String(s.MesosAuthPrincipal),
 			Secret:    secret,
 		}
 	}
 	return
 }
 func (s *SchedulerServer) fetchFrameworkID(client tools.EtcdGetSet) (*mesos.FrameworkID, error) {
 	if s.FailoverTimeout > 0 {
 		if response, err := client.Get(meta.FrameworkIDKey, false, false); err != nil {
 			if !tools.IsEtcdNotFound(err) {
 				return nil, fmt.Errorf("unexpected failure attempting to load framework ID from etcd: %v", err)
 			}
 			log.V(1).Infof("did not find framework ID in etcd")
 		} else if response.Node.Value != "" {
 			log.Infof("configuring FrameworkInfo with Id found in etcd: '%s'", response.Node.Value)
 			return mutil.NewFrameworkID(response.Node.Value), nil
 		}
 	} else {
 		//TODO(jdef) this seems like a totally hackish way to clean up the framework ID
 		if _, err := client.Delete(meta.FrameworkIDKey, true); err != nil {
 			if !tools.IsEtcdNotFound(err) {
 				return nil, fmt.Errorf("failed to delete framework ID from etcd: %v", err)
 			}
 			log.V(1).Infof("nothing to delete: did not find framework ID in etcd")
 		}
 	}
 	return nil, nil
 }
 func (s *SchedulerServer) getUsername() (username string, err error) {
 	username = s.MesosUser
 	if username == "" {
 		if u, err := user.Current(); err == nil {
 			username = u.Username
 			if username == "" {
 				username = defaultMesosUser
 			}
 		}
 	}
 	return
 }
--- a/contrib/mesos/pkg/scheduler/service/service_test.go
+++ b/contrib/mesos/pkg/scheduler/service/service_test.go
@@ -0,0 +1,108 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // +build unit_test
 package service
 import (
 	"testing"
 	"time"
 )
 type fakeSchedulerProcess struct {
 	doneFunc     func() <-chan struct{}
 	failoverFunc func() <-chan struct{}
 }
 func (self *fakeSchedulerProcess) Terminal() <-chan struct{} {
 	if self == nil || self.doneFunc == nil {
 		return nil
 	}
 	return self.doneFunc()
 }
 func (self *fakeSchedulerProcess) Failover() <-chan struct{} {
 	if self == nil || self.failoverFunc == nil {
 		return nil
 	}
 	return self.failoverFunc()
 }
 func (self *fakeSchedulerProcess) End() <-chan struct{} {
 	ch := make(chan struct{})
 	close(ch)
 	return ch
 }
 func Test_awaitFailoverDone(t *testing.T) {
 	done := make(chan struct{})
 	p := &fakeSchedulerProcess{
 		doneFunc: func() <-chan struct{} { return done },
 	}
 	ss := &SchedulerServer{}
 	failoverHandlerCalled := false
 	failoverFailedHandler := func() error {
 		failoverHandlerCalled = true
 		return nil
 	}
 	errCh := make(chan error, 1)
 	go func() {
 		errCh <- ss.awaitFailover(p, failoverFailedHandler)
 	}()
 	close(done)
 	select {
 	case err := <-errCh:
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 	case <-time.After(1 * time.Second):
 		t.Fatalf("timed out waiting for failover")
 	}
 	if failoverHandlerCalled {
 		t.Fatalf("unexpected call to failover handler")
 	}
 }
 func Test_awaitFailoverDoneFailover(t *testing.T) {
 	ch := make(chan struct{})
 	p := &fakeSchedulerProcess{
 		doneFunc:     func() <-chan struct{} { return ch },
 		failoverFunc: func() <-chan struct{} { return ch },
 	}
 	ss := &SchedulerServer{}
 	failoverHandlerCalled := false
 	failoverFailedHandler := func() error {
 		failoverHandlerCalled = true
 		return nil
 	}
 	errCh := make(chan error, 1)
 	go func() {
 		errCh <- ss.awaitFailover(p, failoverFailedHandler)
 	}()
 	close(ch)
 	select {
 	case err := <-errCh:
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 	case <-time.After(1 * time.Second):
 		t.Fatalf("timed out waiting for failover")
 	}
 	if !failoverHandlerCalled {
 		t.Fatalf("expected call to failover handler")
 	}
 }
--- a/contrib/mesos/pkg/scheduler/service/util.go
+++ b/contrib/mesos/pkg/scheduler/service/util.go
@@ -0,0 +1,88 @@
 /*
 Copyright 2015 The Kubernetes Authors All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package service
 import (
 	"bytes"
 	"fmt"
 	"hash/crc64"
 	"sort"
 	"strconv"
 	mesos "github.com/mesos/mesos-go/mesosproto"
 )
 // compute a hashcode for ExecutorInfo that may be used as a reasonable litmus test
 // with respect to compatibility across HA schedulers. the intent is that an HA scheduler
 // should fail-fast if it doesn't pass this test, rather than generating (potentially many)
 // errors at run-time because a Mesos master decides that the ExecutorInfo generated by a
 // secondary scheduler doesn't match that of the primary scheduler.
 //
 // see https://github.com/apache/mesos/blob/0.22.0/src/common/type_utils.cpp#L110
 func hashExecutorInfo(info *mesos.ExecutorInfo) uint64 {
 	// !!! we specifically do NOT include:
 	// - Framework ID because it's a value that's initialized too late for us to use
 	// - Executor ID because it's a value that includes a copy of this hash
 	buf := &bytes.Buffer{}
 	buf.WriteString(info.GetName())
 	buf.WriteString(info.GetSource())
 	buf.Write(info.Data)
 	if info.Command != nil {
 		buf.WriteString(info.Command.GetValue())
 		buf.WriteString(info.Command.GetUser())
 		buf.WriteString(strconv.FormatBool(info.Command.GetShell()))
 		if sz := len(info.Command.Arguments); sz > 0 {
 			x := make([]string, sz)
 			copy(x, info.Command.Arguments)
 			sort.Strings(x)
 			for _, item := range x {
 				buf.WriteString(item)
 			}
 		}
 		if vars := info.Command.Environment.GetVariables(); vars != nil && len(vars) > 0 {
 			names := []string{}
 			e := make(map[string]string)
 			for _, v := range vars {
 				if name := v.GetName(); name != "" {
 					names = append(names, name)
 					e[name] = v.GetValue()
 				}
 			}
 			sort.Strings(names)
 			for _, n := range names {
 				buf.WriteString(n)
 				buf.WriteString("=")
 				buf.WriteString(e[n])
 			}
 		}
 		if uris := info.Command.GetUris(); len(uris) > 0 {
 			su := []string{}
 			for _, uri := range uris {
 				su = append(su, fmt.Sprintf("%s%t%t", uri.GetValue(), uri.GetExecutable(), uri.GetExtract()))
 			}
 			sort.Strings(su)
 			for _, uri := range su {
 				buf.WriteString(uri)
 			}
 		}
 		//TODO(jdef) add support for Resources and Container
 	}
 	table := crc64.MakeTable(crc64.ECMA)
 	return crc64.Checksum(buf.Bytes(), table)
 }
--- a/Show More
+++ b/Show More