Merge pull request #8882 from mesosphere/upstream_k8sm

Upstream Kubernetes-Mesos framework
This commit is contained in:
Abhi Shah
2015-06-12 06:36:20 -07:00
160 changed files with 23986 additions and 27 deletions

20
Godeps/Godeps.json generated
View File

@@ -358,6 +358,10 @@
"Comment": "v0.8.8",
"Rev": "afde71eb1740fd763ab9450e1f700ba0e53c36d0"
},
{
"ImportPath": "github.com/kardianos/osext",
"Rev": "8fef92e41e22a70e700a96b29f066cda30ea24ef"
},
{
"ImportPath": "github.com/kr/pty",
"Comment": "release.r56-25-g05017fc",
@@ -367,10 +371,18 @@
"ImportPath": "github.com/matttproud/golang_protobuf_extensions/pbutil",
"Rev": "fc2b8d3a73c4867e51861bbdd5ae3c1f0869dd6a"
},
{
"ImportPath": "github.com/mesos/mesos-go/auth",
"Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a"
},
{
"ImportPath": "github.com/mesos/mesos-go/detector",
"Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a"
},
{
"ImportPath": "github.com/mesos/mesos-go/executor",
"Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a"
},
{
"ImportPath": "github.com/mesos/mesos-go/mesosproto",
"Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a"
@@ -379,6 +391,14 @@
"ImportPath": "github.com/mesos/mesos-go/mesosutil",
"Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a"
},
{
"ImportPath": "github.com/mesos/mesos-go/messenger",
"Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a"
},
{
"ImportPath": "github.com/mesos/mesos-go/scheduler",
"Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a"
},
{
"ImportPath": "github.com/mesos/mesos-go/upid",
"Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a"

View File

@@ -0,0 +1,27 @@
Copyright (c) 2012 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,14 @@
### Extensions to the "os" package.
## Find the current Executable and ExecutableFolder.
There is sometimes utility in finding the current executable file
that is running. This can be used for upgrading the current executable
or finding resources located relative to the executable file.
Multi-platform and supports:
* Linux
* OS X
* Windows
* Plan 9
* BSDs.

View File

@@ -0,0 +1,27 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Extensions to the standard "os" package.
package osext
import "path/filepath"
// Executable returns an absolute path that can be used to
// re-invoke the current program.
// It may not be valid after the current program exits.
func Executable() (string, error) {
p, err := executable()
return filepath.Clean(p), err
}
// Returns same path as Executable, returns just the folder
// path. Excludes the executable name.
func ExecutableFolder() (string, error) {
p, err := Executable()
if err != nil {
return "", err
}
folder, _ := filepath.Split(p)
return folder, nil
}

View File

@@ -0,0 +1,20 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package osext
import (
"os"
"strconv"
"syscall"
)
func executable() (string, error) {
f, err := os.Open("/proc/" + strconv.Itoa(os.Getpid()) + "/text")
if err != nil {
return "", err
}
defer f.Close()
return syscall.Fd2path(int(f.Fd()))
}

View File

@@ -0,0 +1,36 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build linux netbsd openbsd solaris dragonfly
package osext
import (
"errors"
"fmt"
"os"
"runtime"
"strings"
)
func executable() (string, error) {
switch runtime.GOOS {
case "linux":
const deletedTag = " (deleted)"
execpath, err := os.Readlink("/proc/self/exe")
if err != nil {
return execpath, err
}
execpath = strings.TrimSuffix(execpath, deletedTag)
execpath = strings.TrimPrefix(execpath, deletedTag)
return execpath, nil
case "netbsd":
return os.Readlink("/proc/curproc/exe")
case "openbsd", "dragonfly":
return os.Readlink("/proc/curproc/file")
case "solaris":
return os.Readlink(fmt.Sprintf("/proc/%d/path/a.out", os.Getpid()))
}
return "", errors.New("ExecPath not implemented for " + runtime.GOOS)
}

View File

@@ -0,0 +1,79 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin freebsd
package osext
import (
"os"
"path/filepath"
"runtime"
"syscall"
"unsafe"
)
var initCwd, initCwdErr = os.Getwd()
func executable() (string, error) {
var mib [4]int32
switch runtime.GOOS {
case "freebsd":
mib = [4]int32{1 /* CTL_KERN */, 14 /* KERN_PROC */, 12 /* KERN_PROC_PATHNAME */, -1}
case "darwin":
mib = [4]int32{1 /* CTL_KERN */, 38 /* KERN_PROCARGS */, int32(os.Getpid()), -1}
}
n := uintptr(0)
// Get length.
_, _, errNum := syscall.Syscall6(syscall.SYS___SYSCTL, uintptr(unsafe.Pointer(&mib[0])), 4, 0, uintptr(unsafe.Pointer(&n)), 0, 0)
if errNum != 0 {
return "", errNum
}
if n == 0 { // This shouldn't happen.
return "", nil
}
buf := make([]byte, n)
_, _, errNum = syscall.Syscall6(syscall.SYS___SYSCTL, uintptr(unsafe.Pointer(&mib[0])), 4, uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&n)), 0, 0)
if errNum != 0 {
return "", errNum
}
if n == 0 { // This shouldn't happen.
return "", nil
}
for i, v := range buf {
if v == 0 {
buf = buf[:i]
break
}
}
var err error
execPath := string(buf)
// execPath will not be empty due to above checks.
// Try to get the absolute path if the execPath is not rooted.
if execPath[0] != '/' {
execPath, err = getAbs(execPath)
if err != nil {
return execPath, err
}
}
// For darwin KERN_PROCARGS may return the path to a symlink rather than the
// actual executable.
if runtime.GOOS == "darwin" {
if execPath, err = filepath.EvalSymlinks(execPath); err != nil {
return execPath, err
}
}
return execPath, nil
}
func getAbs(execPath string) (string, error) {
if initCwdErr != nil {
return execPath, initCwdErr
}
// The execPath may begin with a "../" or a "./" so clean it first.
// Join the two paths, trailing and starting slashes undetermined, so use
// the generic Join function.
return filepath.Join(initCwd, filepath.Clean(execPath)), nil
}

View File

@@ -0,0 +1,180 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin linux freebsd netbsd windows
package osext
import (
"bytes"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"runtime"
"testing"
)
const (
executableEnvVar = "OSTEST_OUTPUT_EXECUTABLE"
executableEnvValueMatch = "match"
executableEnvValueDelete = "delete"
)
func TestExecutableMatch(t *testing.T) {
ep, err := Executable()
if err != nil {
t.Fatalf("Executable failed: %v", err)
}
// fullpath to be of the form "dir/prog".
dir := filepath.Dir(filepath.Dir(ep))
fullpath, err := filepath.Rel(dir, ep)
if err != nil {
t.Fatalf("filepath.Rel: %v", err)
}
// Make child start with a relative program path.
// Alter argv[0] for child to verify getting real path without argv[0].
cmd := &exec.Cmd{
Dir: dir,
Path: fullpath,
Env: []string{fmt.Sprintf("%s=%s", executableEnvVar, executableEnvValueMatch)},
}
out, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("exec(self) failed: %v", err)
}
outs := string(out)
if !filepath.IsAbs(outs) {
t.Fatalf("Child returned %q, want an absolute path", out)
}
if !sameFile(outs, ep) {
t.Fatalf("Child returned %q, not the same file as %q", out, ep)
}
}
func TestExecutableDelete(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip()
}
fpath, err := Executable()
if err != nil {
t.Fatalf("Executable failed: %v", err)
}
r, w := io.Pipe()
stderrBuff := &bytes.Buffer{}
stdoutBuff := &bytes.Buffer{}
cmd := &exec.Cmd{
Path: fpath,
Env: []string{fmt.Sprintf("%s=%s", executableEnvVar, executableEnvValueDelete)},
Stdin: r,
Stderr: stderrBuff,
Stdout: stdoutBuff,
}
err = cmd.Start()
if err != nil {
t.Fatalf("exec(self) start failed: %v", err)
}
tempPath := fpath + "_copy"
_ = os.Remove(tempPath)
err = copyFile(tempPath, fpath)
if err != nil {
t.Fatalf("copy file failed: %v", err)
}
err = os.Remove(fpath)
if err != nil {
t.Fatalf("remove running test file failed: %v", err)
}
err = os.Rename(tempPath, fpath)
if err != nil {
t.Fatalf("rename copy to previous name failed: %v", err)
}
w.Write([]byte{0})
w.Close()
err = cmd.Wait()
if err != nil {
t.Fatalf("exec wait failed: %v", err)
}
childPath := stderrBuff.String()
if !filepath.IsAbs(childPath) {
t.Fatalf("Child returned %q, want an absolute path", childPath)
}
if !sameFile(childPath, fpath) {
t.Fatalf("Child returned %q, not the same file as %q", childPath, fpath)
}
}
func sameFile(fn1, fn2 string) bool {
fi1, err := os.Stat(fn1)
if err != nil {
return false
}
fi2, err := os.Stat(fn2)
if err != nil {
return false
}
return os.SameFile(fi1, fi2)
}
func copyFile(dest, src string) error {
df, err := os.Create(dest)
if err != nil {
return err
}
defer df.Close()
sf, err := os.Open(src)
if err != nil {
return err
}
defer sf.Close()
_, err = io.Copy(df, sf)
return err
}
func TestMain(m *testing.M) {
env := os.Getenv(executableEnvVar)
switch env {
case "":
os.Exit(m.Run())
case executableEnvValueMatch:
// First chdir to another path.
dir := "/"
if runtime.GOOS == "windows" {
dir = filepath.VolumeName(".")
}
os.Chdir(dir)
if ep, err := Executable(); err != nil {
fmt.Fprint(os.Stderr, "ERROR: ", err)
} else {
fmt.Fprint(os.Stderr, ep)
}
case executableEnvValueDelete:
bb := make([]byte, 1)
var err error
n, err := os.Stdin.Read(bb)
if err != nil {
fmt.Fprint(os.Stderr, "ERROR: ", err)
os.Exit(2)
}
if n != 1 {
fmt.Fprint(os.Stderr, "ERROR: n != 1, n == ", n)
os.Exit(2)
}
if ep, err := Executable(); err != nil {
fmt.Fprint(os.Stderr, "ERROR: ", err)
} else {
fmt.Fprint(os.Stderr, ep)
}
}
os.Exit(0)
}

View File

@@ -0,0 +1,34 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package osext
import (
"syscall"
"unicode/utf16"
"unsafe"
)
var (
kernel = syscall.MustLoadDLL("kernel32.dll")
getModuleFileNameProc = kernel.MustFindProc("GetModuleFileNameW")
)
// GetModuleFileName() with hModule = NULL
func executable() (exePath string, err error) {
return getModuleFileName()
}
func getModuleFileName() (string, error) {
var n uint32
b := make([]uint16, syscall.MAX_PATH)
size := uint32(len(b))
r0, _, e1 := getModuleFileNameProc.Call(0, uintptr(unsafe.Pointer(&b[0])), uintptr(size))
n = uint32(r0)
if n == 0 {
return "", e1
}
return string(utf16.Decode(b[0:n])), nil
}

View File

@@ -0,0 +1,28 @@
package callback
import (
"fmt"
)
type Unsupported struct {
Callback Interface
}
func (uc *Unsupported) Error() string {
return fmt.Sprintf("Unsupported callback <%T>: %v", uc.Callback, uc.Callback)
}
type Interface interface {
// marker interface
}
type Handler interface {
// may return an Unsupported error on failure
Handle(callbacks ...Interface) error
}
type HandlerFunc func(callbacks ...Interface) error
func (f HandlerFunc) Handle(callbacks ...Interface) error {
return f(callbacks...)
}

View File

@@ -0,0 +1,27 @@
package callback
import (
"github.com/mesos/mesos-go/upid"
)
type Interprocess struct {
client upid.UPID
server upid.UPID
}
func NewInterprocess() *Interprocess {
return &Interprocess{}
}
func (cb *Interprocess) Client() upid.UPID {
return cb.client
}
func (cb *Interprocess) Server() upid.UPID {
return cb.server
}
func (cb *Interprocess) Set(server, client upid.UPID) {
cb.server = server
cb.client = client
}

View File

@@ -0,0 +1,17 @@
package callback
type Name struct {
name string
}
func NewName() *Name {
return &Name{}
}
func (cb *Name) Get() string {
return cb.name
}
func (cb *Name) Set(name string) {
cb.name = name
}

View File

@@ -0,0 +1,20 @@
package callback
type Password struct {
password []byte
}
func NewPassword() *Password {
return &Password{}
}
func (cb *Password) Get() []byte {
clone := make([]byte, len(cb.password))
copy(clone, cb.password)
return clone
}
func (cb *Password) Set(password []byte) {
cb.password = make([]byte, len(password))
copy(cb.password, password)
}

View File

@@ -0,0 +1,63 @@
package auth
import (
"errors"
"fmt"
"sync"
log "github.com/golang/glog"
"github.com/mesos/mesos-go/auth/callback"
"golang.org/x/net/context"
)
// SPI interface: login provider implementations support this interface, clients
// do not authenticate against this directly, instead they should use Login()
type Authenticatee interface {
// Returns no errors if successfully authenticated, otherwise a single
// error.
Authenticate(ctx context.Context, handler callback.Handler) error
}
// Func adapter for interface: allow func's to implement the Authenticatee interface
// as long as the func signature matches
type AuthenticateeFunc func(ctx context.Context, handler callback.Handler) error
func (f AuthenticateeFunc) Authenticate(ctx context.Context, handler callback.Handler) error {
return f(ctx, handler)
}
var (
// Authentication was attempted and failed (likely due to incorrect credentials, too
// many retries within a time window, etc). Distinctly different from authentication
// errors (e.g. network errors, configuration errors, etc).
AuthenticationFailed = errors.New("authentication failed")
authenticateeProviders = make(map[string]Authenticatee) // authentication providers dict
providerLock sync.Mutex
)
// Register an authentication provider (aka "login provider"). packages that
// provide Authenticatee implementations should invoke this func in their
// init() to register.
func RegisterAuthenticateeProvider(name string, auth Authenticatee) (err error) {
providerLock.Lock()
defer providerLock.Unlock()
if _, found := authenticateeProviders[name]; found {
err = fmt.Errorf("authentication provider already registered: %v", name)
} else {
authenticateeProviders[name] = auth
log.V(1).Infof("registered authentication provider: %v", name)
}
return
}
// Look up an authentication provider by name, returns non-nil and true if such
// a provider is found.
func getAuthenticateeProvider(name string) (provider Authenticatee, ok bool) {
providerLock.Lock()
defer providerLock.Unlock()
provider, ok = authenticateeProviders[name]
return
}

View File

@@ -0,0 +1,80 @@
package auth
import (
"errors"
"fmt"
"github.com/mesos/mesos-go/auth/callback"
"github.com/mesos/mesos-go/upid"
"golang.org/x/net/context"
)
var (
// No login provider name has been specified in a context.Context
NoLoginProviderName = errors.New("missing login provider name in context")
)
// Main client entrypoint into the authentication APIs: clients are expected to
// invoke this func with a context containing a login provider name value.
// This may be written as:
// providerName := ... // the user has probably configured this via some flag
// handler := ... // handlers provide data like usernames and passwords
// ctx := ... // obtain some initial or timed context
// err := auth.Login(auth.WithLoginProvider(ctx, providerName), handler)
func Login(ctx context.Context, handler callback.Handler) error {
name, ok := LoginProviderFrom(ctx)
if !ok {
return NoLoginProviderName
}
provider, ok := getAuthenticateeProvider(name)
if !ok {
return fmt.Errorf("unrecognized login provider name in context: %s", name)
}
return provider.Authenticate(ctx, handler)
}
// Unexported key type, avoids conflicts with other context-using packages. All
// context items registered from this package should use keys of this type.
type loginKeyType int
const (
loginProviderNameKey loginKeyType = iota // name of login provider to use
parentUpidKey // upid.UPID of some parent process
)
// Return a context that inherits all values from the parent ctx and specifies
// the login provider name given here. Intended to be invoked before calls to
// Login().
func WithLoginProvider(ctx context.Context, providerName string) context.Context {
return context.WithValue(ctx, loginProviderNameKey, providerName)
}
// Return the name of the login provider specified in this context.
func LoginProviderFrom(ctx context.Context) (name string, ok bool) {
name, ok = ctx.Value(loginProviderNameKey).(string)
return
}
// Return the name of the login provider specified in this context, or empty
// string if none.
func LoginProvider(ctx context.Context) string {
name, _ := LoginProviderFrom(ctx)
return name
}
func WithParentUPID(ctx context.Context, pid upid.UPID) context.Context {
return context.WithValue(ctx, parentUpidKey, pid)
}
func ParentUPIDFrom(ctx context.Context) (pid upid.UPID, ok bool) {
pid, ok = ctx.Value(parentUpidKey).(upid.UPID)
return
}
func ParentUPID(ctx context.Context) (upid *upid.UPID) {
if upid, ok := ParentUPIDFrom(ctx); ok {
return &upid
} else {
return nil
}
}

View File

@@ -0,0 +1,358 @@
package sasl
import (
"errors"
"fmt"
"sync/atomic"
"github.com/gogo/protobuf/proto"
log "github.com/golang/glog"
"github.com/mesos/mesos-go/auth"
"github.com/mesos/mesos-go/auth/callback"
"github.com/mesos/mesos-go/auth/sasl/mech"
mesos "github.com/mesos/mesos-go/mesosproto"
"github.com/mesos/mesos-go/mesosutil/process"
"github.com/mesos/mesos-go/messenger"
"github.com/mesos/mesos-go/upid"
"golang.org/x/net/context"
)
var (
UnexpectedAuthenticationMechanisms = errors.New("Unexpected authentication 'mechanisms' received")
UnexpectedAuthenticationStep = errors.New("Unexpected authentication 'step' received")
UnexpectedAuthenticationCompleted = errors.New("Unexpected authentication 'completed' received")
UnexpectedAuthenticatorPid = errors.New("Unexpected authentator pid") // authenticator pid changed mid-process
UnsupportedMechanism = errors.New("failed to identify a compatible mechanism")
)
type statusType int32
const (
statusReady statusType = iota
statusStarting
statusStepping
_statusTerminal // meta status, should never be assigned: all status types following are "terminal"
statusCompleted
statusFailed
statusError
statusDiscarded
// this login provider name is automatically registered with the auth package; see init()
ProviderName = "SASL"
)
type authenticateeProcess struct {
transport messenger.Messenger
client upid.UPID
status statusType
done chan struct{}
err error
mech mech.Interface
stepFn mech.StepFunc
from *upid.UPID
handler callback.Handler
}
type authenticateeConfig struct {
client upid.UPID // pid of the client we're attempting to authenticate
handler callback.Handler
transport messenger.Messenger // mesos communications transport
}
type transportFactory interface {
makeTransport() messenger.Messenger
}
type transportFactoryFunc func() messenger.Messenger
func (f transportFactoryFunc) makeTransport() messenger.Messenger {
return f()
}
func init() {
factory := func(ctx context.Context) transportFactoryFunc {
return transportFactoryFunc(func() messenger.Messenger {
parent := auth.ParentUPID(ctx)
if parent == nil {
log.Fatal("expected to have a parent UPID in context")
}
process := process.New("sasl_authenticatee")
tpid := &upid.UPID{
ID: process.Label(),
Host: parent.Host,
}
return messenger.NewHttpWithBindingAddress(tpid, BindingAddressFrom(ctx))
})
}
delegate := auth.AuthenticateeFunc(func(ctx context.Context, handler callback.Handler) error {
if impl, err := makeAuthenticatee(handler, factory(ctx)); err != nil {
return err
} else {
return impl.Authenticate(ctx, handler)
}
})
if err := auth.RegisterAuthenticateeProvider(ProviderName, delegate); err != nil {
log.Error(err)
}
}
func (s *statusType) get() statusType {
return statusType(atomic.LoadInt32((*int32)(s)))
}
func (s *statusType) swap(old, new statusType) bool {
return old != new && atomic.CompareAndSwapInt32((*int32)(s), int32(old), int32(new))
}
// build a new authenticatee implementation using the given callbacks and a new transport instance
func makeAuthenticatee(handler callback.Handler, factory transportFactory) (auth.Authenticatee, error) {
ip := callback.NewInterprocess()
if err := handler.Handle(ip); err != nil {
return nil, err
}
config := &authenticateeConfig{
client: ip.Client(),
handler: handler,
transport: factory.makeTransport(),
}
return auth.AuthenticateeFunc(func(ctx context.Context, handler callback.Handler) error {
ctx, auth := newAuthenticatee(ctx, config)
auth.authenticate(ctx, ip.Server())
select {
case <-ctx.Done():
return auth.discard(ctx)
case <-auth.done:
return auth.err
}
}), nil
}
// Terminate the authentication process upon context cancellation;
// only to be called if/when ctx.Done() has been signalled.
func (self *authenticateeProcess) discard(ctx context.Context) error {
err := ctx.Err()
status := statusFrom(ctx)
for ; status < _statusTerminal; status = (&self.status).get() {
if self.terminate(status, statusDiscarded, err) {
break
}
}
return err
}
func newAuthenticatee(ctx context.Context, config *authenticateeConfig) (context.Context, *authenticateeProcess) {
initialStatus := statusReady
proc := &authenticateeProcess{
transport: config.transport,
client: config.client,
handler: config.handler,
status: initialStatus,
done: make(chan struct{}),
}
ctx = withStatus(ctx, initialStatus)
err := proc.installHandlers(ctx)
if err == nil {
err = proc.startTransport()
}
if err != nil {
proc.terminate(initialStatus, statusError, err)
}
return ctx, proc
}
func (self *authenticateeProcess) startTransport() error {
if err := self.transport.Start(); err != nil {
return err
} else {
go func() {
// stop the authentication transport upon termination of the
// authenticator process
select {
case <-self.done:
log.V(2).Infof("stopping authenticator transport: %v", self.transport.UPID())
self.transport.Stop()
}
}()
}
return nil
}
// returns true when handlers are installed without error, otherwise terminates the
// authentication process.
func (self *authenticateeProcess) installHandlers(ctx context.Context) error {
type handlerFn func(ctx context.Context, from *upid.UPID, pbMsg proto.Message)
withContext := func(f handlerFn) messenger.MessageHandler {
return func(from *upid.UPID, m proto.Message) {
status := (&self.status).get()
if self.from != nil && !self.from.Equal(from) {
self.terminate(status, statusError, UnexpectedAuthenticatorPid)
} else {
f(withStatus(ctx, status), from, m)
}
}
}
// Anticipate mechanisms and steps from the server
handlers := []struct {
f handlerFn
m proto.Message
}{
{self.mechanisms, &mesos.AuthenticationMechanismsMessage{}},
{self.step, &mesos.AuthenticationStepMessage{}},
{self.completed, &mesos.AuthenticationCompletedMessage{}},
{self.failed, &mesos.AuthenticationFailedMessage{}},
{self.errored, &mesos.AuthenticationErrorMessage{}},
}
for _, h := range handlers {
if err := self.transport.Install(withContext(h.f), h.m); err != nil {
return err
}
}
return nil
}
// return true if the authentication status was updated (if true, self.done will have been closed)
func (self *authenticateeProcess) terminate(old, new statusType, err error) bool {
if (&self.status).swap(old, new) {
self.err = err
if self.mech != nil {
self.mech.Discard()
}
close(self.done)
return true
}
return false
}
func (self *authenticateeProcess) authenticate(ctx context.Context, pid upid.UPID) {
status := statusFrom(ctx)
if status != statusReady {
return
}
message := &mesos.AuthenticateMessage{
Pid: proto.String(self.client.String()),
}
if err := self.transport.Send(ctx, &pid, message); err != nil {
self.terminate(status, statusError, err)
} else {
(&self.status).swap(status, statusStarting)
}
}
func (self *authenticateeProcess) mechanisms(ctx context.Context, from *upid.UPID, pbMsg proto.Message) {
status := statusFrom(ctx)
if status != statusStarting {
self.terminate(status, statusError, UnexpectedAuthenticationMechanisms)
return
}
msg, ok := pbMsg.(*mesos.AuthenticationMechanismsMessage)
if !ok {
self.terminate(status, statusError, fmt.Errorf("Expected AuthenticationMechanismsMessage, not %T", pbMsg))
return
}
mechanisms := msg.GetMechanisms()
log.Infof("Received SASL authentication mechanisms: %v", mechanisms)
selectedMech, factory := mech.SelectSupported(mechanisms)
if selectedMech == "" {
self.terminate(status, statusError, UnsupportedMechanism)
return
}
if m, f, err := factory(self.handler); err != nil {
self.terminate(status, statusError, err)
return
} else {
self.mech = m
self.stepFn = f
self.from = from
}
// execute initialization step...
nextf, data, err := self.stepFn(self.mech, nil)
if err != nil {
self.terminate(status, statusError, err)
return
} else {
self.stepFn = nextf
}
message := &mesos.AuthenticationStartMessage{
Mechanism: proto.String(selectedMech),
Data: proto.String(string(data)), // may be nil, depends on init step
}
if err := self.transport.Send(ctx, from, message); err != nil {
self.terminate(status, statusError, err)
} else {
(&self.status).swap(status, statusStepping)
}
}
func (self *authenticateeProcess) step(ctx context.Context, from *upid.UPID, pbMsg proto.Message) {
status := statusFrom(ctx)
if status != statusStepping {
self.terminate(status, statusError, UnexpectedAuthenticationStep)
return
}
log.Info("Received SASL authentication step")
msg, ok := pbMsg.(*mesos.AuthenticationStepMessage)
if !ok {
self.terminate(status, statusError, fmt.Errorf("Expected AuthenticationStepMessage, not %T", pbMsg))
return
}
input := msg.GetData()
fn, output, err := self.stepFn(self.mech, input)
if err != nil {
self.terminate(status, statusError, fmt.Errorf("failed to perform authentication step: %v", err))
return
}
self.stepFn = fn
// We don't start the client with SASL_SUCCESS_DATA so we may
// need to send one more "empty" message to the server.
message := &mesos.AuthenticationStepMessage{}
if len(output) > 0 {
message.Data = output
}
if err := self.transport.Send(ctx, from, message); err != nil {
self.terminate(status, statusError, err)
}
}
func (self *authenticateeProcess) completed(ctx context.Context, from *upid.UPID, pbMsg proto.Message) {
status := statusFrom(ctx)
if status != statusStepping {
self.terminate(status, statusError, UnexpectedAuthenticationCompleted)
return
}
log.Info("Authentication success")
self.terminate(status, statusCompleted, nil)
}
func (self *authenticateeProcess) failed(ctx context.Context, from *upid.UPID, pbMsg proto.Message) {
status := statusFrom(ctx)
self.terminate(status, statusFailed, auth.AuthenticationFailed)
}
func (self *authenticateeProcess) errored(ctx context.Context, from *upid.UPID, pbMsg proto.Message) {
var err error
if msg, ok := pbMsg.(*mesos.AuthenticationErrorMessage); !ok {
err = fmt.Errorf("Expected AuthenticationErrorMessage, not %T", pbMsg)
} else {
err = fmt.Errorf("Authentication error: %s", msg.GetError())
}
status := statusFrom(ctx)
self.terminate(status, statusError, err)
}

View File

@@ -0,0 +1,98 @@
package sasl
import (
"testing"
"time"
"github.com/gogo/protobuf/proto"
"github.com/mesos/mesos-go/auth/callback"
"github.com/mesos/mesos-go/auth/sasl/mech/crammd5"
mesos "github.com/mesos/mesos-go/mesosproto"
"github.com/mesos/mesos-go/messenger"
"github.com/mesos/mesos-go/upid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"golang.org/x/net/context"
)
type MockTransport struct {
*messenger.MockedMessenger
}
func (m *MockTransport) Send(ctx context.Context, upid *upid.UPID, msg proto.Message) error {
return m.Called(mock.Anything, upid, msg).Error(0)
}
func TestAuthticatee_validLogin(t *testing.T) {
assert := assert.New(t)
ctx := context.TODO()
client := upid.UPID{
ID: "someFramework",
Host: "b.net",
Port: "789",
}
server := upid.UPID{
ID: "serv",
Host: "a.com",
Port: "123",
}
tpid := upid.UPID{
ID: "sasl_transport",
Host: "g.org",
Port: "456",
}
handler := callback.HandlerFunc(func(cb ...callback.Interface) error {
for _, c := range cb {
switch c := c.(type) {
case *callback.Name:
c.Set("foo")
case *callback.Password:
c.Set([]byte("bar"))
case *callback.Interprocess:
c.Set(server, client)
default:
return &callback.Unsupported{Callback: c}
}
}
return nil
})
var transport *MockTransport
factory := transportFactoryFunc(func() messenger.Messenger {
transport = &MockTransport{messenger.NewMockedMessenger()}
transport.On("Install").Return(nil)
transport.On("UPID").Return(&tpid)
transport.On("Start").Return(nil)
transport.On("Stop").Return(nil)
transport.On("Send", mock.Anything, &server, &mesos.AuthenticateMessage{
Pid: proto.String(client.String()),
}).Return(nil).Once()
transport.On("Send", mock.Anything, &server, &mesos.AuthenticationStartMessage{
Mechanism: proto.String(crammd5.Name),
Data: proto.String(""), // may be nil, depends on init step
}).Return(nil).Once()
transport.On("Send", mock.Anything, &server, &mesos.AuthenticationStepMessage{
Data: []byte(`foo cc7fd96cd80123ea844a7dba29a594ed`),
}).Return(nil).Once()
go func() {
transport.Recv(&server, &mesos.AuthenticationMechanismsMessage{
Mechanisms: []string{crammd5.Name},
})
transport.Recv(&server, &mesos.AuthenticationStepMessage{
Data: []byte(`lsd;lfkgjs;dlfkgjs;dfklg`),
})
transport.Recv(&server, &mesos.AuthenticationCompletedMessage{})
}()
return transport
})
login, err := makeAuthenticatee(handler, factory)
assert.Nil(err)
err = login.Authenticate(ctx, handler)
assert.Nil(err)
assert.NotNil(transport)
time.Sleep(1 * time.Second) // wait for the authenticator to shut down
transport.AssertExpectations(t)
}

View File

@@ -0,0 +1,43 @@
package sasl
import (
"net"
"golang.org/x/net/context"
)
// unexported to prevent collisions with context keys defined in
// other packages.
type _key int
// If this package defined other context keys, they would have
// different integer values.
const (
statusKey _key = iota
bindingAddressKey // bind address for login-related network ops
)
func withStatus(ctx context.Context, s statusType) context.Context {
return context.WithValue(ctx, statusKey, s)
}
func statusFrom(ctx context.Context) statusType {
s, ok := ctx.Value(statusKey).(statusType)
if !ok {
panic("missing status in context")
}
return s
}
func WithBindingAddress(ctx context.Context, address net.IP) context.Context {
return context.WithValue(ctx, bindingAddressKey, address)
}
func BindingAddressFrom(ctx context.Context) net.IP {
obj := ctx.Value(bindingAddressKey)
if addr, ok := obj.(net.IP); ok {
return addr
} else {
return nil
}
}

View File

@@ -0,0 +1,72 @@
package crammd5
import (
"crypto/hmac"
"crypto/md5"
"encoding/hex"
"errors"
"io"
log "github.com/golang/glog"
"github.com/mesos/mesos-go/auth/callback"
"github.com/mesos/mesos-go/auth/sasl/mech"
)
var (
Name = "CRAM-MD5" // name this mechanism is registered with
//TODO(jdef) is this a generic SASL error? if so, move it up to mech
challengeDataRequired = errors.New("challenge data may not be empty")
)
func init() {
mech.Register(Name, newInstance)
}
type mechanism struct {
handler callback.Handler
}
func (m *mechanism) Handler() callback.Handler {
return m.handler
}
func (m *mechanism) Discard() {
// noop
}
func newInstance(h callback.Handler) (mech.Interface, mech.StepFunc, error) {
m := &mechanism{
handler: h,
}
fn := func(m mech.Interface, data []byte) (mech.StepFunc, []byte, error) {
// noop: no initialization needed
return challengeResponse, nil, nil
}
return m, fn, nil
}
// algorithm lifted from wikipedia: http://en.wikipedia.org/wiki/CRAM-MD5
// except that the SASL mechanism used by Mesos doesn't leverage base64 encoding
func challengeResponse(m mech.Interface, data []byte) (mech.StepFunc, []byte, error) {
if len(data) == 0 {
return mech.IllegalState, nil, challengeDataRequired
}
decoded := string(data)
log.V(4).Infof("challenge(decoded): %s", decoded) // for deep debugging only
username := callback.NewName()
secret := callback.NewPassword()
if err := m.Handler().Handle(username, secret); err != nil {
return mech.IllegalState, nil, err
}
hash := hmac.New(md5.New, secret.Get())
if _, err := io.WriteString(hash, decoded); err != nil {
return mech.IllegalState, nil, err
}
codes := hex.EncodeToString(hash.Sum(nil))
msg := username.Get() + " " + codes
return nil, []byte(msg), nil
}

View File

@@ -0,0 +1,33 @@
package mech
import (
"errors"
"github.com/mesos/mesos-go/auth/callback"
)
var (
IllegalStateErr = errors.New("illegal mechanism state")
)
type Interface interface {
Handler() callback.Handler
Discard() // clean up resources or sensitive information; idempotent
}
// return a mechanism and it's initialization step (may be a noop that returns
// a nil data blob and handle to the first "real" challenge step).
type Factory func(h callback.Handler) (Interface, StepFunc, error)
// StepFunc implementations should never return a nil StepFunc result. This
// helps keep the logic in the SASL authticatee simpler: step functions are
// never nil. Mechanisms that end up an error state (for example, some decoding
// logic fails...) should return a StepFunc that represents an error state.
// Some mechanisms may be able to recover from such.
type StepFunc func(m Interface, data []byte) (StepFunc, []byte, error)
// reflects an unrecoverable, illegal mechanism state; always returns IllegalState
// as the next step along with an IllegalStateErr
func IllegalState(m Interface, data []byte) (StepFunc, []byte, error) {
return IllegalState, nil, IllegalStateErr
}

View File

@@ -0,0 +1,49 @@
package mech
import (
"fmt"
"sync"
log "github.com/golang/glog"
)
var (
mechLock sync.Mutex
supportedMechs = make(map[string]Factory)
)
func Register(name string, f Factory) error {
mechLock.Lock()
defer mechLock.Unlock()
if _, found := supportedMechs[name]; found {
return fmt.Errorf("Mechanism registered twice: %s", name)
}
supportedMechs[name] = f
log.V(1).Infof("Registered mechanism %s", name)
return nil
}
func ListSupported() (list []string) {
mechLock.Lock()
defer mechLock.Unlock()
for mechname := range supportedMechs {
list = append(list, mechname)
}
return list
}
func SelectSupported(mechanisms []string) (selectedMech string, factory Factory) {
mechLock.Lock()
defer mechLock.Unlock()
for _, m := range mechanisms {
if f, ok := supportedMechs[m]; ok {
selectedMech = m
factory = f
break
}
}
return
}

View File

@@ -0,0 +1,5 @@
/*
Package executor includes the interfaces of the mesos executor and
the mesos executor driver, as well as an implementation of the driver.
*/
package executor

View File

@@ -0,0 +1,142 @@
package executor
import (
"github.com/mesos/mesos-go/mesosproto"
)
/**
* Executor callback interface to be implemented by frameworks' executors. Note
* that only one callback will be invoked at a time, so it is not
* recommended that you block within a callback because it may cause a
* deadlock.
*
* Each callback includes an instance to the executor driver that was
* used to run this executor. The driver will not change for the
* duration of an executor (i.e., from the point you do
* ExecutorDriver.Start() to the point that ExecutorDriver.Join()
* returns). This is intended for convenience so that an executor
* doesn't need to store a pointer to the driver itself.
*/
type Executor interface {
/**
* Invoked once the executor driver has been able to successfully
* connect with Mesos. In particular, a scheduler can pass some
* data to its executors through the FrameworkInfo.ExecutorInfo's
* data field.
*/
Registered(ExecutorDriver, *mesosproto.ExecutorInfo, *mesosproto.FrameworkInfo, *mesosproto.SlaveInfo)
/**
* Invoked when the executor re-registers with a restarted slave.
*/
Reregistered(ExecutorDriver, *mesosproto.SlaveInfo)
/**
* Invoked when the executor becomes "disconnected" from the slave
* (e.g., the slave is being restarted due to an upgrade).
*/
Disconnected(ExecutorDriver)
/**
* Invoked when a task has been launched on this executor (initiated
* via SchedulerDriver.LaunchTasks). Note that this task can be realized
* with a goroutine, an external process, or some simple computation, however,
* no other callbacks will be invoked on this executor until this
* callback has returned.
*/
LaunchTask(ExecutorDriver, *mesosproto.TaskInfo)
/**
* Invoked when a task running within this executor has been killed
* (via SchedulerDriver.KillTask). Note that no status update will
* be sent on behalf of the executor, the executor is responsible
* for creating a new TaskStatus (i.e., with TASK_KILLED) and
* invoking ExecutorDriver.SendStatusUpdate.
*/
KillTask(ExecutorDriver, *mesosproto.TaskID)
/**
* Invoked when a framework message has arrived for this
* executor. These messages are best effort; do not expect a
* framework message to be retransmitted in any reliable fashion.
*/
FrameworkMessage(ExecutorDriver, string)
/**
* Invoked when the executor should terminate all of its currently
* running tasks. Note that after Mesos has determined that an
* executor has terminated, any tasks that the executor did not send
* terminal status updates for (e.g., TASK_KILLED, TASK_FINISHED,
* TASK_FAILED, etc) a TASK_LOST status update will be created.
*/
Shutdown(ExecutorDriver)
/**
* Invoked when a fatal error has occured with the executor and/or
* executor driver. The driver will be aborted BEFORE invoking this
* callback.
*/
Error(ExecutorDriver, string)
}
/**
* ExecutorDriver interface for connecting an executor to Mesos. This
* interface is used both to manage the executor's lifecycle (start
* it, stop it, or wait for it to finish) and to interact with Mesos
* (e.g., send status updates, send framework messages, etc.).
* A driver method is expected to fail-fast and return an error when possible.
* Other internal errors (or remote error) that occur asynchronously are handled
* using the the Executor.Error() callback.
*/
type ExecutorDriver interface {
/**
* Starts the executor driver. This needs to be called before any
* other driver calls are made.
*/
Start() (mesosproto.Status, error)
/**
* Stops the executor driver.
*/
Stop() (mesosproto.Status, error)
/**
* Aborts the driver so that no more callbacks can be made to the
* executor. The semantics of abort and stop have deliberately been
* separated so that code can detect an aborted driver (i.e., via
* the return status of ExecutorDriver.Join, see below), and
* instantiate and start another driver if desired (from within the
* same process ... although this functionality is currently not
* supported for executors).
*/
Abort() (mesosproto.Status, error)
/**
* Waits for the driver to be stopped or aborted, possibly
* blocking the calling goroutine indefinitely. The return status of
* this function can be used to determine if the driver was aborted
* (see package mesosproto for a description of Status).
*/
Join() (mesosproto.Status, error)
/**
* Starts and immediately joins (i.e., blocks on) the driver.
*/
Run() (mesosproto.Status, error)
/**
* Sends a status update to the framework scheduler, retrying as
* necessary until an acknowledgement has been received or the
* executor is terminated (in which case, a TASK_LOST status update
* will be sent). See Scheduler.StatusUpdate for more information
* about status update acknowledgements.
*/
SendStatusUpdate(*mesosproto.TaskStatus) (mesosproto.Status, error)
/**
* Sends a message to the framework scheduler. These messages are
* best effort; do not expect a framework message to be
* retransmitted in any reliable fashion.
*/
SendFrameworkMessage(string) (mesosproto.Status, error)
}

View File

@@ -0,0 +1,583 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package executor
import (
"fmt"
"net"
"os"
"sync"
"time"
"code.google.com/p/go-uuid/uuid"
"github.com/gogo/protobuf/proto"
log "github.com/golang/glog"
"github.com/mesos/mesos-go/mesosproto"
"github.com/mesos/mesos-go/mesosutil"
"github.com/mesos/mesos-go/mesosutil/process"
"github.com/mesos/mesos-go/messenger"
"github.com/mesos/mesos-go/upid"
"golang.org/x/net/context"
)
type DriverConfig struct {
Executor Executor
HostnameOverride string // optional
BindingAddress net.IP // optional
BindingPort uint16 // optional
NewMessenger func() (messenger.Messenger, error) // optional
}
// MesosExecutorDriver is a implementation of the ExecutorDriver.
type MesosExecutorDriver struct {
lock sync.RWMutex
self *upid.UPID
exec Executor
stopCh chan struct{}
destroyCh chan struct{}
stopped bool
status mesosproto.Status
messenger messenger.Messenger
slaveUPID *upid.UPID
slaveID *mesosproto.SlaveID
frameworkID *mesosproto.FrameworkID
executorID *mesosproto.ExecutorID
workDir string
connected bool
connection uuid.UUID
local bool // TODO(yifan): Not used yet.
directory string // TODO(yifan): Not used yet.
checkpoint bool
recoveryTimeout time.Duration
updates map[string]*mesosproto.StatusUpdate // Key is a UUID string. TODO(yifan): Not used yet.
tasks map[string]*mesosproto.TaskInfo // Key is a UUID string. TODO(yifan): Not used yet.
}
// NewMesosExecutorDriver creates a new mesos executor driver.
func NewMesosExecutorDriver(config DriverConfig) (*MesosExecutorDriver, error) {
if config.Executor == nil {
msg := "Executor callback interface cannot be nil."
log.Errorln(msg)
return nil, fmt.Errorf(msg)
}
hostname := mesosutil.GetHostname(config.HostnameOverride)
newMessenger := config.NewMessenger
if newMessenger == nil {
newMessenger = func() (messenger.Messenger, error) {
process := process.New("executor")
return messenger.ForHostname(process, hostname, config.BindingAddress, config.BindingPort)
}
}
driver := &MesosExecutorDriver{
exec: config.Executor,
status: mesosproto.Status_DRIVER_NOT_STARTED,
stopCh: make(chan struct{}),
destroyCh: make(chan struct{}),
stopped: true,
updates: make(map[string]*mesosproto.StatusUpdate),
tasks: make(map[string]*mesosproto.TaskInfo),
workDir: ".",
}
var err error
if driver.messenger, err = newMessenger(); err != nil {
return nil, err
}
if err = driver.init(); err != nil {
log.Errorf("failed to initialize the driver: %v", err)
return nil, err
}
return driver, nil
}
// init initializes the driver.
func (driver *MesosExecutorDriver) init() error {
log.Infof("Init mesos executor driver\n")
log.Infof("Version: %v\n", mesosutil.MesosVersion)
// Parse environments.
if err := driver.parseEnviroments(); err != nil {
log.Errorf("Failed to parse environments: %v\n", err)
return err
}
// Install handlers.
driver.messenger.Install(driver.registered, &mesosproto.ExecutorRegisteredMessage{})
driver.messenger.Install(driver.reregistered, &mesosproto.ExecutorReregisteredMessage{})
driver.messenger.Install(driver.reconnect, &mesosproto.ReconnectExecutorMessage{})
driver.messenger.Install(driver.runTask, &mesosproto.RunTaskMessage{})
driver.messenger.Install(driver.killTask, &mesosproto.KillTaskMessage{})
driver.messenger.Install(driver.statusUpdateAcknowledgement, &mesosproto.StatusUpdateAcknowledgementMessage{})
driver.messenger.Install(driver.frameworkMessage, &mesosproto.FrameworkToExecutorMessage{})
driver.messenger.Install(driver.shutdown, &mesosproto.ShutdownExecutorMessage{})
driver.messenger.Install(driver.frameworkError, &mesosproto.FrameworkErrorMessage{})
return nil
}
func (driver *MesosExecutorDriver) parseEnviroments() error {
var value string
value = os.Getenv("MESOS_LOCAL")
if len(value) > 0 {
driver.local = true
}
value = os.Getenv("MESOS_SLAVE_PID")
if len(value) == 0 {
return fmt.Errorf("Cannot find MESOS_SLAVE_PID in the environment")
}
upid, err := upid.Parse(value)
if err != nil {
log.Errorf("Cannot parse UPID %v\n", err)
return err
}
driver.slaveUPID = upid
value = os.Getenv("MESOS_SLAVE_ID")
driver.slaveID = &mesosproto.SlaveID{Value: proto.String(value)}
value = os.Getenv("MESOS_FRAMEWORK_ID")
driver.frameworkID = &mesosproto.FrameworkID{Value: proto.String(value)}
value = os.Getenv("MESOS_EXECUTOR_ID")
driver.executorID = &mesosproto.ExecutorID{Value: proto.String(value)}
value = os.Getenv("MESOS_DIRECTORY")
if len(value) > 0 {
driver.workDir = value
}
value = os.Getenv("MESOS_CHECKPOINT")
if value == "1" {
driver.checkpoint = true
}
// TODO(yifan): Parse the duration. For now just use default.
return nil
}
// ------------------------- Accessors ----------------------- //
func (driver *MesosExecutorDriver) Status() mesosproto.Status {
driver.lock.RLock()
defer driver.lock.RUnlock()
return driver.status
}
func (driver *MesosExecutorDriver) setStatus(stat mesosproto.Status) {
driver.lock.Lock()
driver.status = stat
driver.lock.Unlock()
}
func (driver *MesosExecutorDriver) Stopped() bool {
return driver.stopped
}
func (driver *MesosExecutorDriver) setStopped(val bool) {
driver.lock.Lock()
driver.stopped = val
driver.lock.Unlock()
}
func (driver *MesosExecutorDriver) Connected() bool {
return driver.connected
}
func (driver *MesosExecutorDriver) setConnected(val bool) {
driver.lock.Lock()
driver.connected = val
driver.lock.Unlock()
}
// --------------------- Message Handlers --------------------- //
func (driver *MesosExecutorDriver) registered(from *upid.UPID, pbMsg proto.Message) {
log.Infoln("Executor driver registered")
msg := pbMsg.(*mesosproto.ExecutorRegisteredMessage)
slaveID := msg.GetSlaveId()
executorInfo := msg.GetExecutorInfo()
frameworkInfo := msg.GetFrameworkInfo()
slaveInfo := msg.GetSlaveInfo()
if driver.stopped {
log.Infof("Ignoring registered message from slave %v, because the driver is stopped!\n", slaveID)
return
}
log.Infof("Registered on slave %v\n", slaveID)
driver.setConnected(true)
driver.connection = uuid.NewUUID()
driver.exec.Registered(driver, executorInfo, frameworkInfo, slaveInfo)
}
func (driver *MesosExecutorDriver) reregistered(from *upid.UPID, pbMsg proto.Message) {
log.Infoln("Executor driver reregistered")
msg := pbMsg.(*mesosproto.ExecutorReregisteredMessage)
slaveID := msg.GetSlaveId()
slaveInfo := msg.GetSlaveInfo()
if driver.stopped {
log.Infof("Ignoring re-registered message from slave %v, because the driver is stopped!\n", slaveID)
return
}
log.Infof("Re-registered on slave %v\n", slaveID)
driver.setConnected(true)
driver.connection = uuid.NewUUID()
driver.exec.Reregistered(driver, slaveInfo)
}
func (driver *MesosExecutorDriver) send(upid *upid.UPID, msg proto.Message) error {
//TODO(jdef) should implement timeout here
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
c := make(chan error, 1)
go func() { c <- driver.messenger.Send(ctx, upid, msg) }()
select {
case <-ctx.Done():
<-c // wait for Send(...)
return ctx.Err()
case err := <-c:
return err
}
}
func (driver *MesosExecutorDriver) reconnect(from *upid.UPID, pbMsg proto.Message) {
log.Infoln("Executor driver reconnect")
msg := pbMsg.(*mesosproto.ReconnectExecutorMessage)
slaveID := msg.GetSlaveId()
if driver.stopped {
log.Infof("Ignoring reconnect message from slave %v, because the driver is stopped!\n", slaveID)
return
}
log.Infof("Received reconnect request from slave %v\n", slaveID)
driver.slaveUPID = from
message := &mesosproto.ReregisterExecutorMessage{
ExecutorId: driver.executorID,
FrameworkId: driver.frameworkID,
}
// Send all unacknowledged updates.
for _, u := range driver.updates {
message.Updates = append(message.Updates, u)
}
// Send all unacknowledged tasks.
for _, t := range driver.tasks {
message.Tasks = append(message.Tasks, t)
}
// Send the message.
if err := driver.send(driver.slaveUPID, message); err != nil {
log.Errorf("Failed to send %v: %v\n", message, err)
}
}
func (driver *MesosExecutorDriver) runTask(from *upid.UPID, pbMsg proto.Message) {
log.Infoln("Executor driver runTask")
msg := pbMsg.(*mesosproto.RunTaskMessage)
task := msg.GetTask()
taskID := task.GetTaskId()
if driver.stopped {
log.Infof("Ignoring run task message for task %v because the driver is stopped!\n", taskID)
return
}
if _, ok := driver.tasks[taskID.String()]; ok {
log.Fatalf("Unexpected duplicate task %v\n", taskID)
}
log.Infof("Executor asked to run task '%v'\n", taskID)
driver.tasks[taskID.String()] = task
driver.exec.LaunchTask(driver, task)
}
func (driver *MesosExecutorDriver) killTask(from *upid.UPID, pbMsg proto.Message) {
log.Infoln("Executor driver killTask")
msg := pbMsg.(*mesosproto.KillTaskMessage)
taskID := msg.GetTaskId()
if driver.stopped {
log.Infof("Ignoring kill task message for task %v, because the driver is stopped!\n", taskID)
return
}
log.Infof("Executor driver is asked to kill task '%v'\n", taskID)
driver.exec.KillTask(driver, taskID)
}
func (driver *MesosExecutorDriver) statusUpdateAcknowledgement(from *upid.UPID, pbMsg proto.Message) {
log.Infoln("Executor statusUpdateAcknowledgement")
msg := pbMsg.(*mesosproto.StatusUpdateAcknowledgementMessage)
log.Infof("Receiving status update acknowledgement %v", msg)
frameworkID := msg.GetFrameworkId()
taskID := msg.GetTaskId()
uuid := uuid.UUID(msg.GetUuid())
if driver.stopped {
log.Infof("Ignoring status update acknowledgement %v for task %v of framework %v because the driver is stopped!\n",
uuid, taskID, frameworkID)
}
// Remove the corresponding update.
delete(driver.updates, uuid.String())
// Remove the corresponding task.
delete(driver.tasks, taskID.String())
}
func (driver *MesosExecutorDriver) frameworkMessage(from *upid.UPID, pbMsg proto.Message) {
log.Infoln("Executor driver received frameworkMessage")
msg := pbMsg.(*mesosproto.FrameworkToExecutorMessage)
data := msg.GetData()
if driver.stopped {
log.Infof("Ignoring framework message because the driver is stopped!\n")
return
}
log.Infof("Executor driver receives framework message\n")
driver.exec.FrameworkMessage(driver, string(data))
}
func (driver *MesosExecutorDriver) shutdown(from *upid.UPID, pbMsg proto.Message) {
log.Infoln("Executor driver received shutdown")
_, ok := pbMsg.(*mesosproto.ShutdownExecutorMessage)
if !ok {
panic("Not a ShutdownExecutorMessage! This should not happen")
}
if driver.stopped {
log.Infof("Ignoring shutdown message because the driver is stopped!\n")
return
}
log.Infof("Executor driver is asked to shutdown\n")
driver.exec.Shutdown(driver)
// driver.Stop() will cause process to eventually stop.
driver.Stop()
}
func (driver *MesosExecutorDriver) frameworkError(from *upid.UPID, pbMsg proto.Message) {
log.Infoln("Executor driver received error")
msg := pbMsg.(*mesosproto.FrameworkErrorMessage)
driver.exec.Error(driver, msg.GetMessage())
}
// ------------------------ Driver Implementation ----------------- //
// Start starts the executor driver
func (driver *MesosExecutorDriver) Start() (mesosproto.Status, error) {
log.Infoln("Starting the executor driver")
if stat := driver.Status(); stat != mesosproto.Status_DRIVER_NOT_STARTED {
return stat, fmt.Errorf("Unable to Start, expecting status %s, but got %s", mesosproto.Status_DRIVER_NOT_STARTED, stat)
}
driver.setStatus(mesosproto.Status_DRIVER_NOT_STARTED)
driver.setStopped(true)
// Start the messenger.
if err := driver.messenger.Start(); err != nil {
log.Errorf("Failed to start executor: %v\n", err)
return driver.Status(), err
}
driver.self = driver.messenger.UPID()
// Register with slave.
log.V(3).Infoln("Sending Executor registration")
message := &mesosproto.RegisterExecutorMessage{
FrameworkId: driver.frameworkID,
ExecutorId: driver.executorID,
}
if err := driver.send(driver.slaveUPID, message); err != nil {
stat := driver.Status()
log.Errorf("Stopping the executor, failed to send %v: %v\n", message, err)
err0 := driver.stop(stat)
if err0 != nil {
log.Errorf("Failed to stop executor: %v\n", err)
return stat, err0
}
return stat, err
}
driver.setStopped(false)
driver.setStatus(mesosproto.Status_DRIVER_RUNNING)
log.Infoln("Mesos executor is started with PID=", driver.self.String())
return driver.Status(), nil
}
// Stop stops the driver by sending a 'stopEvent' to the event loop, and
// receives the result from the response channel.
func (driver *MesosExecutorDriver) Stop() (mesosproto.Status, error) {
log.Infoln("Stopping the executor driver")
if stat := driver.Status(); stat != mesosproto.Status_DRIVER_RUNNING {
return stat, fmt.Errorf("Unable to Stop, expecting status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, stat)
}
stopStat := mesosproto.Status_DRIVER_STOPPED
return stopStat, driver.stop(stopStat)
}
// internal function for stopping the driver and set reason for stopping
// Note that messages inflight or queued will not be processed.
func (driver *MesosExecutorDriver) stop(stopStatus mesosproto.Status) error {
err := driver.messenger.Stop()
defer close(driver.destroyCh)
defer close(driver.stopCh)
driver.setStatus(stopStatus)
driver.setStopped(true)
if err != nil {
return err
}
return nil
}
// Abort aborts the driver by sending an 'abortEvent' to the event loop, and
// receives the result from the response channel.
func (driver *MesosExecutorDriver) Abort() (mesosproto.Status, error) {
if stat := driver.Status(); stat != mesosproto.Status_DRIVER_RUNNING {
return stat, fmt.Errorf("Unable to Stop, expecting status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, stat)
}
log.Infoln("Aborting the executor driver")
abortStat := mesosproto.Status_DRIVER_ABORTED
return abortStat, driver.stop(abortStat)
}
// Join waits for the driver by sending a 'joinEvent' to the event loop, and wait
// on a channel for the notification of driver termination.
func (driver *MesosExecutorDriver) Join() (mesosproto.Status, error) {
log.Infoln("Waiting for the executor driver to stop")
if stat := driver.Status(); stat != mesosproto.Status_DRIVER_RUNNING {
return stat, fmt.Errorf("Unable to Join, expecting status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, stat)
}
<-driver.stopCh // wait for stop signal
return driver.Status(), nil
}
// Run starts the driver and calls Join() to wait for stop request.
func (driver *MesosExecutorDriver) Run() (mesosproto.Status, error) {
stat, err := driver.Start()
if err != nil {
return driver.Stop()
}
if stat != mesosproto.Status_DRIVER_RUNNING {
return stat, fmt.Errorf("Unable to continue to Run, expecting status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, driver.status)
}
return driver.Join()
}
// SendStatusUpdate sends status updates to the slave.
func (driver *MesosExecutorDriver) SendStatusUpdate(taskStatus *mesosproto.TaskStatus) (mesosproto.Status, error) {
log.V(3).Infoln("Sending task status update: ", taskStatus.String())
if stat := driver.Status(); stat != mesosproto.Status_DRIVER_RUNNING {
return stat, fmt.Errorf("Unable to SendStatusUpdate, expecting driver.status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, stat)
}
if taskStatus.GetState() == mesosproto.TaskState_TASK_STAGING {
err := fmt.Errorf("Executor is not allowed to send TASK_STAGING status update. Aborting!")
log.Errorln(err)
if err0 := driver.stop(mesosproto.Status_DRIVER_ABORTED); err0 != nil {
log.Errorln("Error while stopping the driver", err0)
}
return driver.Status(), err
}
// Set up status update.
update := driver.makeStatusUpdate(taskStatus)
log.Infof("Executor sending status update %v\n", update.String())
// Capture the status update.
driver.updates[uuid.UUID(update.GetUuid()).String()] = update
// Put the status update in the message.
message := &mesosproto.StatusUpdateMessage{
Update: update,
Pid: proto.String(driver.self.String()),
}
// Send the message.
if err := driver.send(driver.slaveUPID, message); err != nil {
log.Errorf("Failed to send %v: %v\n", message, err)
return driver.status, err
}
return driver.Status(), nil
}
func (driver *MesosExecutorDriver) makeStatusUpdate(taskStatus *mesosproto.TaskStatus) *mesosproto.StatusUpdate {
now := float64(time.Now().Unix())
// Fill in all the fields.
taskStatus.Timestamp = proto.Float64(now)
taskStatus.SlaveId = driver.slaveID
update := &mesosproto.StatusUpdate{
FrameworkId: driver.frameworkID,
ExecutorId: driver.executorID,
SlaveId: driver.slaveID,
Status: taskStatus,
Timestamp: proto.Float64(now),
Uuid: uuid.NewUUID(),
}
return update
}
// SendFrameworkMessage sends the framework message by sending a 'sendFrameworkMessageEvent'
// to the event loop, and receives the result from the response channel.
func (driver *MesosExecutorDriver) SendFrameworkMessage(data string) (mesosproto.Status, error) {
log.V(3).Infoln("Sending framework message", string(data))
if stat := driver.Status(); stat != mesosproto.Status_DRIVER_RUNNING {
return stat, fmt.Errorf("Unable to SendFrameworkMessage, expecting status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, stat)
}
message := &mesosproto.ExecutorToFrameworkMessage{
SlaveId: driver.slaveID,
FrameworkId: driver.frameworkID,
ExecutorId: driver.executorID,
Data: []byte(data),
}
// Send the message.
if err := driver.send(driver.slaveUPID, message); err != nil {
log.Errorln("Failed to send message %v: %v", message, err)
return driver.status, err
}
return driver.status, nil
}

View File

@@ -0,0 +1,531 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package executor
import (
"io/ioutil"
"net/http"
"net/url"
"os"
"strings"
"sync"
"testing"
"time"
"code.google.com/p/go-uuid/uuid"
"github.com/gogo/protobuf/proto"
log "github.com/golang/glog"
mesos "github.com/mesos/mesos-go/mesosproto"
util "github.com/mesos/mesos-go/mesosutil"
"github.com/mesos/mesos-go/testutil"
"github.com/stretchr/testify/assert"
)
// testScuduler is used for testing Schduler callbacks.
type testExecutor struct {
ch chan bool
wg *sync.WaitGroup
t *testing.T
}
func newTestExecutor(t *testing.T) *testExecutor {
return &testExecutor{ch: make(chan bool), t: t}
}
func (exec *testExecutor) Registered(driver ExecutorDriver, execinfo *mesos.ExecutorInfo, fwinfo *mesos.FrameworkInfo, slaveinfo *mesos.SlaveInfo) {
log.Infoln("Exec.Registered() called.")
assert.NotNil(exec.t, execinfo)
assert.NotNil(exec.t, fwinfo)
assert.NotNil(exec.t, slaveinfo)
exec.ch <- true
}
func (exec *testExecutor) Reregistered(driver ExecutorDriver, slaveinfo *mesos.SlaveInfo) {
log.Infoln("Exec.Re-registered() called.")
assert.NotNil(exec.t, slaveinfo)
exec.ch <- true
}
func (e *testExecutor) Disconnected(ExecutorDriver) {}
func (exec *testExecutor) LaunchTask(driver ExecutorDriver, taskinfo *mesos.TaskInfo) {
log.Infoln("Exec.LaunchTask() called.")
assert.NotNil(exec.t, taskinfo)
assert.True(exec.t, util.NewTaskID("test-task-001").Equal(taskinfo.TaskId))
exec.ch <- true
}
func (exec *testExecutor) KillTask(driver ExecutorDriver, taskid *mesos.TaskID) {
log.Infoln("Exec.KillTask() called.")
assert.NotNil(exec.t, taskid)
assert.True(exec.t, util.NewTaskID("test-task-001").Equal(taskid))
exec.ch <- true
}
func (exec *testExecutor) FrameworkMessage(driver ExecutorDriver, message string) {
log.Infoln("Exec.FrameworkMessage() called.")
assert.NotNil(exec.t, message)
assert.Equal(exec.t, "Hello-Test", message)
exec.ch <- true
}
func (exec *testExecutor) Shutdown(ExecutorDriver) {
log.Infoln("Exec.Shutdown() called.")
exec.ch <- true
}
func (exec *testExecutor) Error(driver ExecutorDriver, err string) {
log.Infoln("Exec.Error() called.")
log.Infoln("Got error ", err)
driver.Stop()
exec.ch <- true
}
// ------------------------ Test Functions -------------------- //
func setTestEnv(t *testing.T) {
assert.NoError(t, os.Setenv("MESOS_FRAMEWORK_ID", frameworkID))
assert.NoError(t, os.Setenv("MESOS_EXECUTOR_ID", executorID))
}
func newIntegrationTestDriver(t *testing.T, exec Executor) *MesosExecutorDriver {
dconfig := DriverConfig{
Executor: exec,
}
driver, err := NewMesosExecutorDriver(dconfig)
if err != nil {
t.Fatal(err)
}
return driver
}
func TestExecutorDriverRegisterExecutorMessage(t *testing.T) {
setTestEnv(t)
ch := make(chan bool)
server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
reqPath, err := url.QueryUnescape(req.URL.String())
assert.NoError(t, err)
log.Infoln("RCVD request", reqPath)
data, err := ioutil.ReadAll(req.Body)
if err != nil {
t.Fatalf("Missing RegisteredExecutor data from scheduler.")
}
defer req.Body.Close()
message := new(mesos.RegisterExecutorMessage)
err = proto.Unmarshal(data, message)
assert.NoError(t, err)
assert.Equal(t, frameworkID, message.GetFrameworkId().GetValue())
assert.Equal(t, executorID, message.GetExecutorId().GetValue())
ch <- true
rsp.WriteHeader(http.StatusAccepted)
})
defer server.Close()
exec := newTestExecutor(t)
exec.ch = ch
driver := newIntegrationTestDriver(t, exec)
assert.True(t, driver.stopped)
stat, err := driver.Start()
assert.NoError(t, err)
assert.False(t, driver.stopped)
assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
select {
case <-ch:
case <-time.After(time.Millisecond * 2):
log.Errorf("Tired of waiting...")
}
}
func TestExecutorDriverExecutorRegisteredEvent(t *testing.T) {
setTestEnv(t)
ch := make(chan bool)
// Mock Slave process to respond to registration event.
server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
reqPath, err := url.QueryUnescape(req.URL.String())
assert.NoError(t, err)
log.Infoln("RCVD request", reqPath)
rsp.WriteHeader(http.StatusAccepted)
})
defer server.Close()
exec := newTestExecutor(t)
exec.ch = ch
exec.t = t
// start
driver := newIntegrationTestDriver(t, exec)
stat, err := driver.Start()
assert.NoError(t, err)
assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
//simulate sending ExecutorRegisteredMessage from server to exec pid.
pbMsg := &mesos.ExecutorRegisteredMessage{
ExecutorInfo: util.NewExecutorInfo(util.NewExecutorID(executorID), nil),
FrameworkId: util.NewFrameworkID(frameworkID),
FrameworkInfo: util.NewFrameworkInfo("test", "test-framework", util.NewFrameworkID(frameworkID)),
SlaveId: util.NewSlaveID(slaveID),
SlaveInfo: &mesos.SlaveInfo{Hostname: proto.String("localhost")},
}
c := testutil.NewMockMesosClient(t, server.PID)
c.SendMessage(driver.self, pbMsg)
assert.True(t, driver.connected)
select {
case <-ch:
case <-time.After(time.Millisecond * 2):
log.Errorf("Tired of waiting...")
}
}
func TestExecutorDriverExecutorReregisteredEvent(t *testing.T) {
setTestEnv(t)
ch := make(chan bool)
// Mock Slave process to respond to registration event.
server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
reqPath, err := url.QueryUnescape(req.URL.String())
assert.NoError(t, err)
log.Infoln("RCVD request", reqPath)
rsp.WriteHeader(http.StatusAccepted)
})
defer server.Close()
exec := newTestExecutor(t)
exec.ch = ch
exec.t = t
// start
driver := newIntegrationTestDriver(t, exec)
stat, err := driver.Start()
assert.NoError(t, err)
assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
//simulate sending ExecutorRegisteredMessage from server to exec pid.
pbMsg := &mesos.ExecutorReregisteredMessage{
SlaveId: util.NewSlaveID(slaveID),
SlaveInfo: &mesos.SlaveInfo{Hostname: proto.String("localhost")},
}
c := testutil.NewMockMesosClient(t, server.PID)
c.SendMessage(driver.self, pbMsg)
assert.True(t, driver.connected)
select {
case <-ch:
case <-time.After(time.Millisecond * 2):
log.Errorf("Tired of waiting...")
}
}
func TestExecutorDriverReconnectEvent(t *testing.T) {
setTestEnv(t)
ch := make(chan bool)
// Mock Slave process to respond to registration event.
server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
reqPath, err := url.QueryUnescape(req.URL.String())
assert.NoError(t, err)
log.Infoln("RCVD request", reqPath)
// exec registration request
if strings.Contains(reqPath, "RegisterExecutorMessage") {
log.Infoln("Got Executor registration request")
}
if strings.Contains(reqPath, "ReregisterExecutorMessage") {
log.Infoln("Got Executor Re-registration request")
ch <- true
}
rsp.WriteHeader(http.StatusAccepted)
})
defer server.Close()
exec := newTestExecutor(t)
exec.t = t
// start
driver := newIntegrationTestDriver(t, exec)
stat, err := driver.Start()
assert.NoError(t, err)
assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
driver.connected = true
// send "reconnect" event to driver
pbMsg := &mesos.ReconnectExecutorMessage{
SlaveId: util.NewSlaveID(slaveID),
}
c := testutil.NewMockMesosClient(t, server.PID)
c.SendMessage(driver.self, pbMsg)
select {
case <-ch:
case <-time.After(time.Millisecond * 2):
log.Errorf("Tired of waiting...")
}
}
func TestExecutorDriverRunTaskEvent(t *testing.T) {
setTestEnv(t)
ch := make(chan bool)
// Mock Slave process to respond to registration event.
server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
reqPath, err := url.QueryUnescape(req.URL.String())
assert.NoError(t, err)
log.Infoln("RCVD request", reqPath)
rsp.WriteHeader(http.StatusAccepted)
})
defer server.Close()
exec := newTestExecutor(t)
exec.ch = ch
exec.t = t
// start
driver := newIntegrationTestDriver(t, exec)
stat, err := driver.Start()
assert.NoError(t, err)
assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
driver.connected = true
// send runtask event to driver
pbMsg := &mesos.RunTaskMessage{
FrameworkId: util.NewFrameworkID(frameworkID),
Framework: util.NewFrameworkInfo(
"test", "test-framework-001", util.NewFrameworkID(frameworkID),
),
Pid: proto.String(server.PID.String()),
Task: util.NewTaskInfo(
"test-task",
util.NewTaskID("test-task-001"),
util.NewSlaveID(slaveID),
[]*mesos.Resource{
util.NewScalarResource("mem", 112),
util.NewScalarResource("cpus", 2),
},
),
}
c := testutil.NewMockMesosClient(t, server.PID)
c.SendMessage(driver.self, pbMsg)
select {
case <-ch:
case <-time.After(time.Millisecond * 2):
log.Errorf("Tired of waiting...")
}
}
func TestExecutorDriverKillTaskEvent(t *testing.T) {
setTestEnv(t)
ch := make(chan bool)
// Mock Slave process to respond to registration event.
server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
reqPath, err := url.QueryUnescape(req.URL.String())
assert.NoError(t, err)
log.Infoln("RCVD request", reqPath)
rsp.WriteHeader(http.StatusAccepted)
})
defer server.Close()
exec := newTestExecutor(t)
exec.ch = ch
exec.t = t
// start
driver := newIntegrationTestDriver(t, exec)
stat, err := driver.Start()
assert.NoError(t, err)
assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
driver.connected = true
// send runtask event to driver
pbMsg := &mesos.KillTaskMessage{
FrameworkId: util.NewFrameworkID(frameworkID),
TaskId: util.NewTaskID("test-task-001"),
}
c := testutil.NewMockMesosClient(t, server.PID)
c.SendMessage(driver.self, pbMsg)
select {
case <-ch:
case <-time.After(time.Millisecond * 2):
log.Errorf("Tired of waiting...")
}
}
func TestExecutorDriverStatusUpdateAcknowledgement(t *testing.T) {
setTestEnv(t)
ch := make(chan bool)
// Mock Slave process to respond to registration event.
server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
reqPath, err := url.QueryUnescape(req.URL.String())
assert.NoError(t, err)
log.Infoln("RCVD request", reqPath)
rsp.WriteHeader(http.StatusAccepted)
})
defer server.Close()
exec := newTestExecutor(t)
exec.ch = ch
exec.t = t
// start
driver := newIntegrationTestDriver(t, exec)
stat, err := driver.Start()
assert.NoError(t, err)
assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
driver.connected = true
// send ACK from server
pbMsg := &mesos.StatusUpdateAcknowledgementMessage{
SlaveId: util.NewSlaveID(slaveID),
FrameworkId: util.NewFrameworkID(frameworkID),
TaskId: util.NewTaskID("test-task-001"),
Uuid: []byte(uuid.NewRandom().String()),
}
c := testutil.NewMockMesosClient(t, server.PID)
c.SendMessage(driver.self, pbMsg)
<-time.After(time.Millisecond * 2)
}
func TestExecutorDriverFrameworkToExecutorMessageEvent(t *testing.T) {
setTestEnv(t)
ch := make(chan bool)
// Mock Slave process to respond to registration event.
server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
reqPath, err := url.QueryUnescape(req.URL.String())
assert.NoError(t, err)
log.Infoln("RCVD request", reqPath)
rsp.WriteHeader(http.StatusAccepted)
})
defer server.Close()
exec := newTestExecutor(t)
exec.ch = ch
exec.t = t
// start
driver := newIntegrationTestDriver(t, exec)
stat, err := driver.Start()
assert.NoError(t, err)
assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
driver.connected = true
// send runtask event to driver
pbMsg := &mesos.FrameworkToExecutorMessage{
SlaveId: util.NewSlaveID(slaveID),
ExecutorId: util.NewExecutorID(executorID),
FrameworkId: util.NewFrameworkID(frameworkID),
Data: []byte("Hello-Test"),
}
c := testutil.NewMockMesosClient(t, server.PID)
c.SendMessage(driver.self, pbMsg)
select {
case <-ch:
case <-time.After(time.Millisecond * 2):
log.Errorf("Tired of waiting...")
}
}
func TestExecutorDriverShutdownEvent(t *testing.T) {
setTestEnv(t)
ch := make(chan bool)
// Mock Slave process to respond to registration event.
server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
reqPath, err := url.QueryUnescape(req.URL.String())
assert.NoError(t, err)
log.Infoln("RCVD request", reqPath)
rsp.WriteHeader(http.StatusAccepted)
})
defer server.Close()
exec := newTestExecutor(t)
exec.ch = ch
exec.t = t
// start
driver := newIntegrationTestDriver(t, exec)
stat, err := driver.Start()
assert.NoError(t, err)
assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
driver.connected = true
// send runtask event to driver
pbMsg := &mesos.ShutdownExecutorMessage{}
c := testutil.NewMockMesosClient(t, server.PID)
c.SendMessage(driver.self, pbMsg)
select {
case <-ch:
case <-time.After(time.Millisecond * 5):
log.Errorf("Tired of waiting...")
}
<-time.After(time.Millisecond * 5) // wait for shutdown to finish.
assert.Equal(t, mesos.Status_DRIVER_STOPPED, driver.Status())
}
func TestExecutorDriverError(t *testing.T) {
setTestEnv(t)
// Mock Slave process to respond to registration event.
server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
reqPath, err := url.QueryUnescape(req.URL.String())
assert.NoError(t, err)
log.Infoln("RCVD request", reqPath)
rsp.WriteHeader(http.StatusAccepted)
})
ch := make(chan bool)
exec := newTestExecutor(t)
exec.ch = ch
exec.t = t
driver := newIntegrationTestDriver(t, exec)
server.Close() // will cause error
// Run() cause async message processing to start
// Therefore, error-handling will be done via Executor.Error callaback.
stat, err := driver.Run()
assert.NoError(t, err)
assert.Equal(t, mesos.Status_DRIVER_STOPPED, stat)
select {
case <-ch:
case <-time.After(time.Millisecond * 5):
log.Errorf("Tired of waiting...")
}
}

View File

@@ -0,0 +1,396 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package executor
import (
"fmt"
"os"
"testing"
"time"
"github.com/mesos/mesos-go/healthchecker"
"github.com/mesos/mesos-go/mesosproto"
util "github.com/mesos/mesos-go/mesosutil"
"github.com/mesos/mesos-go/messenger"
"github.com/mesos/mesos-go/upid"
"github.com/stretchr/testify/assert"
)
var (
slavePID = "slave(1)@127.0.0.1:8080"
slaveID = "some-slave-id-uuid"
frameworkID = "some-framework-id-uuid"
executorID = "some-executor-id-uuid"
)
func setEnvironments(t *testing.T, workDir string, checkpoint bool) {
assert.NoError(t, os.Setenv("MESOS_SLAVE_PID", slavePID))
assert.NoError(t, os.Setenv("MESOS_SLAVE_ID", slaveID))
assert.NoError(t, os.Setenv("MESOS_FRAMEWORK_ID", frameworkID))
assert.NoError(t, os.Setenv("MESOS_EXECUTOR_ID", executorID))
if len(workDir) > 0 {
assert.NoError(t, os.Setenv("MESOS_DIRECTORY", workDir))
}
if checkpoint {
assert.NoError(t, os.Setenv("MESOS_CHECKPOINT", "1"))
}
}
func clearEnvironments(t *testing.T) {
assert.NoError(t, os.Setenv("MESOS_SLAVE_PID", ""))
assert.NoError(t, os.Setenv("MESOS_SLAVE_ID", ""))
assert.NoError(t, os.Setenv("MESOS_FRAMEWORK_ID", ""))
assert.NoError(t, os.Setenv("MESOS_EXECUTOR_ID", ""))
}
func newTestExecutorDriver(t *testing.T, exec Executor) *MesosExecutorDriver {
dconfig := DriverConfig{
Executor: exec,
}
driver, err := NewMesosExecutorDriver(dconfig)
if err != nil {
t.Fatal(err)
}
return driver
}
func createTestExecutorDriver(t *testing.T) (
*MesosExecutorDriver,
*messenger.MockedMessenger,
*healthchecker.MockedHealthChecker) {
exec := NewMockedExecutor()
setEnvironments(t, "", false)
driver := newTestExecutorDriver(t, exec)
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
checker := healthchecker.NewMockedHealthChecker()
checker.On("Start").Return()
checker.On("Stop").Return()
driver.messenger = messenger
return driver, messenger, checker
}
func TestExecutorDriverStartFailedToParseEnvironment(t *testing.T) {
clearEnvironments(t)
exec := NewMockedExecutor()
exec.On("Error").Return(nil)
driver := newTestExecutorDriver(t, exec)
assert.Nil(t, driver)
}
func TestExecutorDriverStartFailedToStartMessenger(t *testing.T) {
exec := NewMockedExecutor()
setEnvironments(t, "", false)
driver := newTestExecutorDriver(t, exec)
assert.NotNil(t, driver)
messenger := messenger.NewMockedMessenger()
driver.messenger = messenger
// Set expections and return values.
messenger.On("Start").Return(fmt.Errorf("messenger failed to start"))
messenger.On("Stop").Return(nil)
status, err := driver.Start()
assert.Error(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_NOT_STARTED, status)
messenger.Stop()
messenger.AssertNumberOfCalls(t, "Start", 1)
messenger.AssertNumberOfCalls(t, "Stop", 1)
}
func TestExecutorDriverStartFailedToSendRegisterMessage(t *testing.T) {
exec := NewMockedExecutor()
setEnvironments(t, "", false)
driver := newTestExecutorDriver(t, exec)
messenger := messenger.NewMockedMessenger()
driver.messenger = messenger
// Set expections and return values.
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(fmt.Errorf("messenger failed to send"))
messenger.On("Stop").Return(nil)
status, err := driver.Start()
assert.Error(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_NOT_STARTED, status)
messenger.AssertNumberOfCalls(t, "Start", 1)
messenger.AssertNumberOfCalls(t, "UPID", 1)
messenger.AssertNumberOfCalls(t, "Send", 1)
messenger.AssertNumberOfCalls(t, "Stop", 1)
}
func TestExecutorDriverStartSucceed(t *testing.T) {
setEnvironments(t, "", false)
exec := NewMockedExecutor()
exec.On("Error").Return(nil)
driver := newTestExecutorDriver(t, exec)
messenger := messenger.NewMockedMessenger()
driver.messenger = messenger
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
checker := healthchecker.NewMockedHealthChecker()
checker.On("Start").Return()
checker.On("Stop").Return()
assert.True(t, driver.stopped)
status, err := driver.Start()
assert.False(t, driver.stopped)
assert.NoError(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, status)
messenger.AssertNumberOfCalls(t, "Start", 1)
messenger.AssertNumberOfCalls(t, "UPID", 1)
messenger.AssertNumberOfCalls(t, "Send", 1)
}
func TestExecutorDriverRun(t *testing.T) {
setEnvironments(t, "", false)
// Set expections and return values.
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
exec := NewMockedExecutor()
exec.On("Error").Return(nil)
driver := newTestExecutorDriver(t, exec)
driver.messenger = messenger
assert.True(t, driver.stopped)
checker := healthchecker.NewMockedHealthChecker()
checker.On("Start").Return()
checker.On("Stop").Return()
go func() {
stat, err := driver.Run()
assert.NoError(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, stat)
}()
time.Sleep(time.Millisecond * 1) // allow for things to settle
assert.False(t, driver.stopped)
assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, driver.Status())
// mannually close it all
driver.setStatus(mesosproto.Status_DRIVER_STOPPED)
close(driver.stopCh)
time.Sleep(time.Millisecond * 1)
}
func TestExecutorDriverJoin(t *testing.T) {
setEnvironments(t, "", false)
// Set expections and return values.
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
exec := NewMockedExecutor()
exec.On("Error").Return(nil)
driver := newTestExecutorDriver(t, exec)
driver.messenger = messenger
assert.True(t, driver.stopped)
checker := healthchecker.NewMockedHealthChecker()
checker.On("Start").Return()
checker.On("Stop").Return()
stat, err := driver.Start()
assert.NoError(t, err)
assert.False(t, driver.stopped)
assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
testCh := make(chan mesosproto.Status)
go func() {
stat, _ := driver.Join()
testCh <- stat
}()
close(driver.stopCh) // manually stopping
stat = <-testCh // when Stop() is called, stat will be DRIVER_STOPPED.
}
func TestExecutorDriverAbort(t *testing.T) {
statusChan := make(chan mesosproto.Status)
driver, messenger, _ := createTestExecutorDriver(t)
assert.True(t, driver.stopped)
stat, err := driver.Start()
assert.False(t, driver.stopped)
assert.NoError(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
go func() {
st, _ := driver.Join()
statusChan <- st
}()
stat, err = driver.Abort()
assert.NoError(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat)
assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, <-statusChan)
assert.True(t, driver.stopped)
// Abort for the second time, should return directly.
stat, err = driver.Abort()
assert.Error(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat)
stat, err = driver.Stop()
assert.Error(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat)
assert.True(t, driver.stopped)
// Restart should not start.
stat, err = driver.Start()
assert.True(t, driver.stopped)
assert.Error(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat)
messenger.AssertNumberOfCalls(t, "Start", 1)
messenger.AssertNumberOfCalls(t, "UPID", 1)
messenger.AssertNumberOfCalls(t, "Send", 1)
messenger.AssertNumberOfCalls(t, "Stop", 1)
}
func TestExecutorDriverStop(t *testing.T) {
statusChan := make(chan mesosproto.Status)
driver, messenger, _ := createTestExecutorDriver(t)
assert.True(t, driver.stopped)
stat, err := driver.Start()
assert.False(t, driver.stopped)
assert.NoError(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
go func() {
stat, _ := driver.Join()
statusChan <- stat
}()
stat, err = driver.Stop()
assert.NoError(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, stat)
assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, <-statusChan)
assert.True(t, driver.stopped)
// Stop for the second time, should return directly.
stat, err = driver.Stop()
assert.Error(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, stat)
stat, err = driver.Abort()
assert.Error(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, stat)
assert.True(t, driver.stopped)
// Restart should not start.
stat, err = driver.Start()
assert.True(t, driver.stopped)
assert.Error(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, stat)
messenger.AssertNumberOfCalls(t, "Start", 1)
messenger.AssertNumberOfCalls(t, "UPID", 1)
messenger.AssertNumberOfCalls(t, "Send", 1)
messenger.AssertNumberOfCalls(t, "Stop", 1)
}
func TestExecutorDriverSendStatusUpdate(t *testing.T) {
driver, _, _ := createTestExecutorDriver(t)
stat, err := driver.Start()
assert.NoError(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
driver.connected = true
driver.stopped = false
taskStatus := util.NewTaskStatus(
util.NewTaskID("test-task-001"),
mesosproto.TaskState_TASK_RUNNING,
)
stat, err = driver.SendStatusUpdate(taskStatus)
assert.NoError(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
}
func TestExecutorDriverSendStatusUpdateStaging(t *testing.T) {
driver, _, _ := createTestExecutorDriver(t)
exec := NewMockedExecutor()
exec.On("Error").Return(nil)
driver.exec = exec
stat, err := driver.Start()
assert.NoError(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
driver.connected = true
driver.stopped = false
taskStatus := util.NewTaskStatus(
util.NewTaskID("test-task-001"),
mesosproto.TaskState_TASK_STAGING,
)
stat, err = driver.SendStatusUpdate(taskStatus)
assert.Error(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat)
}
func TestExecutorDriverSendFrameworkMessage(t *testing.T) {
driver, _, _ := createTestExecutorDriver(t)
stat, err := driver.SendFrameworkMessage("failed")
assert.Error(t, err)
stat, err = driver.Start()
assert.NoError(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
driver.connected = true
driver.stopped = false
stat, err = driver.SendFrameworkMessage("Testing Mesos")
assert.NoError(t, err)
assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
}

View File

@@ -0,0 +1,74 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package executor
import (
"github.com/mesos/mesos-go/mesosproto"
"github.com/stretchr/testify/mock"
)
// MockedExecutor is used for testing the executor driver.
type MockedExecutor struct {
mock.Mock
}
// NewMockedExecutor returns a mocked executor.
func NewMockedExecutor() *MockedExecutor {
return &MockedExecutor{}
}
// Registered implements the Registered handler.
func (e *MockedExecutor) Registered(ExecutorDriver, *mesosproto.ExecutorInfo, *mesosproto.FrameworkInfo, *mesosproto.SlaveInfo) {
e.Called()
}
// Reregistered implements the Reregistered handler.
func (e *MockedExecutor) Reregistered(ExecutorDriver, *mesosproto.SlaveInfo) {
e.Called()
}
// Disconnected implements the Disconnected handler.
func (e *MockedExecutor) Disconnected(ExecutorDriver) {
e.Called()
}
// LaunchTask implements the LaunchTask handler.
func (e *MockedExecutor) LaunchTask(ExecutorDriver, *mesosproto.TaskInfo) {
e.Called()
}
// KillTask implements the KillTask handler.
func (e *MockedExecutor) KillTask(ExecutorDriver, *mesosproto.TaskID) {
e.Called()
}
// FrameworkMessage implements the FrameworkMessage handler.
func (e *MockedExecutor) FrameworkMessage(ExecutorDriver, string) {
e.Called()
}
// Shutdown implements the Shutdown handler.
func (e *MockedExecutor) Shutdown(ExecutorDriver) {
e.Called()
}
// Error implements the Error handler.
func (e *MockedExecutor) Error(ExecutorDriver, string) {
e.Called()
}

View File

@@ -0,0 +1,39 @@
####Benchmark of the messenger.
```shell
$ go test -v -run=Benckmark* -bench=.
PASS
BenchmarkMessengerSendSmallMessage 50000 70568 ns/op
BenchmarkMessengerSendMediumMessage 50000 70265 ns/op
BenchmarkMessengerSendBigMessage 50000 72693 ns/op
BenchmarkMessengerSendLargeMessage 50000 72896 ns/op
BenchmarkMessengerSendMixedMessage 50000 72631 ns/op
BenchmarkMessengerSendRecvSmallMessage 20000 78409 ns/op
BenchmarkMessengerSendRecvMediumMessage 20000 80471 ns/op
BenchmarkMessengerSendRecvBigMessage 20000 82629 ns/op
BenchmarkMessengerSendRecvLargeMessage 20000 85987 ns/op
BenchmarkMessengerSendRecvMixedMessage 20000 83678 ns/op
ok github.com/mesos/mesos-go/messenger 115.135s
$ go test -v -run=Benckmark* -bench=. -cpu=4 -send-routines=4 2>/dev/null
PASS
BenchmarkMessengerSendSmallMessage-4 50000 35529 ns/op
BenchmarkMessengerSendMediumMessage-4 50000 35997 ns/op
BenchmarkMessengerSendBigMessage-4 50000 36871 ns/op
BenchmarkMessengerSendLargeMessage-4 50000 37310 ns/op
BenchmarkMessengerSendMixedMessage-4 50000 37419 ns/op
BenchmarkMessengerSendRecvSmallMessage-4 50000 39320 ns/op
BenchmarkMessengerSendRecvMediumMessage-4 50000 41990 ns/op
BenchmarkMessengerSendRecvBigMessage-4 50000 42157 ns/op
BenchmarkMessengerSendRecvLargeMessage-4 50000 45472 ns/op
BenchmarkMessengerSendRecvMixedMessage-4 50000 47393 ns/op
ok github.com/mesos/mesos-go/messenger 105.173s
```
####environment:
```
OS: Linux yifan-laptop 3.13.0-32-generic #57-Ubuntu SMP Tue Jul 15 03:51:08 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux
CPU: Intel(R) Core(TM) i5-3210M CPU @ 2.50GHz
MEM: 4G DDR3 1600MHz
```

View File

@@ -0,0 +1,7 @@
/*
Package messenger includes a messenger and a transporter.
The messenger provides interfaces to send a protobuf message
through the underlying transporter. It also dispatches messages
to installed handlers.
*/
package messenger

View File

@@ -0,0 +1,371 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package messenger
import (
"bytes"
"fmt"
"github.com/mesos/mesos-go/upid"
"io/ioutil"
"net"
"net/http"
"net/url"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
log "github.com/golang/glog"
"golang.org/x/net/context"
)
var (
discardOnStopError = fmt.Errorf("discarding message because transport is shutting down")
)
// HTTPTransporter implements the interfaces of the Transporter.
type HTTPTransporter struct {
// If the host is empty("") then it will listen on localhost.
// If the port is empty("") then it will listen on random port.
upid *upid.UPID
listener net.Listener // TODO(yifan): Change to TCPListener.
mux *http.ServeMux
tr *http.Transport
client *http.Client // TODO(yifan): Set read/write deadline.
messageQueue chan *Message
address net.IP // optional binding address
started chan struct{}
stopped chan struct{}
stopping int32
lifeLock sync.Mutex // protect lifecycle (start/stop) funcs
}
// NewHTTPTransporter creates a new http transporter with an optional binding address.
func NewHTTPTransporter(upid *upid.UPID, address net.IP) *HTTPTransporter {
tr := &http.Transport{}
result := &HTTPTransporter{
upid: upid,
messageQueue: make(chan *Message, defaultQueueSize),
mux: http.NewServeMux(),
client: &http.Client{Transport: tr},
tr: tr,
address: address,
started: make(chan struct{}),
stopped: make(chan struct{}),
}
close(result.stopped)
return result
}
// some network errors are probably recoverable, attempt to determine that here.
func isRecoverableError(err error) bool {
if urlErr, ok := err.(*url.Error); ok {
log.V(2).Infof("checking url.Error for recoverability")
return urlErr.Op == "Post" && isRecoverableError(urlErr.Err)
} else if netErr, ok := err.(*net.OpError); ok && netErr.Err != nil {
log.V(2).Infof("checking net.OpError for recoverability: %#v", err)
if netErr.Temporary() {
return true
}
//TODO(jdef) this is pretty hackish, there's probably a better way
return (netErr.Op == "dial" && netErr.Net == "tcp" && netErr.Err == syscall.ECONNREFUSED)
}
log.V(2).Infof("unrecoverable error: %#v", err)
return false
}
type recoverableError struct {
Err error
}
func (e *recoverableError) Error() string {
if e == nil {
return ""
}
return e.Err.Error()
}
// Send sends the message to its specified upid.
func (t *HTTPTransporter) Send(ctx context.Context, msg *Message) (sendError error) {
log.V(2).Infof("Sending message to %v via http\n", msg.UPID)
req, err := t.makeLibprocessRequest(msg)
if err != nil {
log.Errorf("Failed to make libprocess request: %v\n", err)
return err
}
duration := 1 * time.Second
for attempt := 0; attempt < 5; attempt++ { //TODO(jdef) extract/parameterize constant
if sendError != nil {
duration *= 2
log.Warningf("attempting to recover from error '%v', waiting before retry: %v", sendError, duration)
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(duration):
// ..retry request, continue
case <-t.stopped:
return discardOnStopError
}
}
sendError = t.httpDo(ctx, req, func(resp *http.Response, err error) error {
if err != nil {
if isRecoverableError(err) {
return &recoverableError{Err: err}
}
log.Infof("Failed to POST: %v\n", err)
return err
}
defer resp.Body.Close()
// ensure master acknowledgement.
if (resp.StatusCode != http.StatusOK) &&
(resp.StatusCode != http.StatusAccepted) {
msg := fmt.Sprintf("Master %s rejected %s. Returned status %s.",
msg.UPID, msg.RequestURI(), resp.Status)
log.Warning(msg)
return fmt.Errorf(msg)
}
return nil
})
if sendError == nil {
// success
return
} else if _, ok := sendError.(*recoverableError); ok {
// recoverable, attempt backoff?
continue
}
// unrecoverable
break
}
if recoverable, ok := sendError.(*recoverableError); ok {
sendError = recoverable.Err
}
return
}
func (t *HTTPTransporter) httpDo(ctx context.Context, req *http.Request, f func(*http.Response, error) error) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-t.stopped:
return discardOnStopError
default: // continue
}
c := make(chan error, 1)
go func() { c <- f(t.client.Do(req)) }()
select {
case <-ctx.Done():
t.tr.CancelRequest(req)
<-c // Wait for f to return.
return ctx.Err()
case err := <-c:
return err
case <-t.stopped:
t.tr.CancelRequest(req)
<-c // Wait for f to return.
return discardOnStopError
}
}
// Recv returns the message, one at a time.
func (t *HTTPTransporter) Recv() (*Message, error) {
select {
default:
select {
case msg := <-t.messageQueue:
return msg, nil
case <-t.stopped:
}
case <-t.stopped:
}
return nil, discardOnStopError
}
//Inject places a message into the incoming message queue.
func (t *HTTPTransporter) Inject(ctx context.Context, msg *Message) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-t.stopped:
return discardOnStopError
default: // continue
}
select {
case t.messageQueue <- msg:
return nil
case <-ctx.Done():
return ctx.Err()
case <-t.stopped:
return discardOnStopError
}
}
// Install the request URI according to the message's name.
func (t *HTTPTransporter) Install(msgName string) {
requestURI := fmt.Sprintf("/%s/%s", t.upid.ID, msgName)
t.mux.HandleFunc(requestURI, t.messageHandler)
}
// Listen starts listen on UPID. If UPID is empty, the transporter
// will listen on a random port, and then fill the UPID with the
// host:port it is listening.
func (t *HTTPTransporter) listen() error {
var host string
if t.address != nil {
host = t.address.String()
} else {
host = t.upid.Host
}
port := t.upid.Port
// NOTE: Explicitly specifies IPv4 because Libprocess
// only supports IPv4 for now.
ln, err := net.Listen("tcp4", net.JoinHostPort(host, port))
if err != nil {
log.Errorf("HTTPTransporter failed to listen: %v\n", err)
return err
}
// Save the host:port in case they are not specified in upid.
host, port, _ = net.SplitHostPort(ln.Addr().String())
t.upid.Host, t.upid.Port = host, port
t.listener = ln
return nil
}
// Start starts the http transporter
func (t *HTTPTransporter) Start() <-chan error {
t.lifeLock.Lock()
defer t.lifeLock.Unlock()
select {
case <-t.started:
// already started
return nil
case <-t.stopped:
defer close(t.started)
t.stopped = make(chan struct{})
atomic.StoreInt32(&t.stopping, 0)
default:
panic("not started, not stopped, what am i? how can i start?")
}
ch := make(chan error, 1)
if err := t.listen(); err != nil {
ch <- err
} else {
// TODO(yifan): Set read/write deadline.
log.Infof("http transport listening on %v", t.listener.Addr())
go func() {
err := http.Serve(t.listener, t.mux)
if atomic.CompareAndSwapInt32(&t.stopping, 1, 0) {
ch <- nil
} else {
ch <- err
}
}()
}
return ch
}
// Stop stops the http transporter by closing the listener.
func (t *HTTPTransporter) Stop(graceful bool) error {
t.lifeLock.Lock()
defer t.lifeLock.Unlock()
select {
case <-t.stopped:
// already stopped
return nil
case <-t.started:
defer close(t.stopped)
t.started = make(chan struct{})
default:
panic("not started, not stopped, what am i? how can i stop?")
}
//TODO(jdef) if graceful, wait for pending requests to terminate
atomic.StoreInt32(&t.stopping, 1)
err := t.listener.Close()
return err
}
// UPID returns the upid of the transporter.
func (t *HTTPTransporter) UPID() *upid.UPID {
return t.upid
}
func (t *HTTPTransporter) messageHandler(w http.ResponseWriter, r *http.Request) {
// Verify it's a libprocess request.
from, err := getLibprocessFrom(r)
if err != nil {
log.Errorf("Ignoring the request, because it's not a libprocess request: %v\n", err)
w.WriteHeader(http.StatusBadRequest)
return
}
data, err := ioutil.ReadAll(r.Body)
if err != nil {
log.Errorf("Failed to read HTTP body: %v\n", err)
w.WriteHeader(http.StatusBadRequest)
return
}
log.V(2).Infof("Receiving message from %v, length %v\n", from, len(data))
w.WriteHeader(http.StatusAccepted)
t.messageQueue <- &Message{
UPID: from,
Name: extractNameFromRequestURI(r.RequestURI),
Bytes: data,
}
}
func (t *HTTPTransporter) makeLibprocessRequest(msg *Message) (*http.Request, error) {
if msg.UPID == nil {
panic(fmt.Sprintf("message is missing UPID: %+v", msg))
}
hostport := net.JoinHostPort(msg.UPID.Host, msg.UPID.Port)
targetURL := fmt.Sprintf("http://%s%s", hostport, msg.RequestURI())
log.V(2).Infof("libproc target URL %s", targetURL)
req, err := http.NewRequest("POST", targetURL, bytes.NewReader(msg.Bytes))
if err != nil {
log.Errorf("Failed to create request: %v\n", err)
return nil, err
}
req.Header.Add("Libprocess-From", t.upid.String())
req.Header.Add("Content-Type", "application/x-protobuf")
req.Header.Add("Connection", "Keep-Alive")
return req, nil
}
func getLibprocessFrom(r *http.Request) (*upid.UPID, error) {
if r.Method != "POST" {
return nil, fmt.Errorf("Not a POST request")
}
ua, ok := r.Header["User-Agent"]
if ok && strings.HasPrefix(ua[0], "libprocess/") {
// TODO(yifan): Just take the first field for now.
return upid.Parse(ua[0][len("libprocess/"):])
}
lf, ok := r.Header["Libprocess-From"]
if ok {
// TODO(yifan): Just take the first field for now.
return upid.Parse(lf[0])
}
return nil, fmt.Errorf("Cannot find 'User-Agent' or 'Libprocess-From'")
}

View File

@@ -0,0 +1,273 @@
package messenger
import (
"fmt"
"net/http"
"net/http/httptest"
"regexp"
"strconv"
"testing"
"time"
"github.com/mesos/mesos-go/messenger/testmessage"
"github.com/mesos/mesos-go/upid"
"github.com/stretchr/testify/assert"
"golang.org/x/net/context"
)
func TestTransporterNew(t *testing.T) {
id, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(t, err)
trans := NewHTTPTransporter(id, nil)
assert.NotNil(t, trans)
assert.NotNil(t, trans.upid)
assert.NotNil(t, trans.messageQueue)
assert.NotNil(t, trans.client)
}
func TestTransporterSend(t *testing.T) {
idreg := regexp.MustCompile(`[A-Za-z0-9_\-]+@[A-Za-z0-9_\-\.]+:[0-9]+`)
serverId := "testserver"
// setup mesos client-side
fromUpid, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(t, err)
protoMsg := testmessage.GenerateSmallMessage()
msgName := getMessageName(protoMsg)
msg := &Message{
Name: msgName,
ProtoMessage: protoMsg,
}
requestURI := fmt.Sprintf("/%s/%s", serverId, msgName)
// setup server-side
msgReceived := make(chan struct{})
srv := makeMockServer(requestURI, func(rsp http.ResponseWriter, req *http.Request) {
defer close(msgReceived)
from := req.Header.Get("Libprocess-From")
assert.NotEmpty(t, from)
assert.True(t, idreg.MatchString(from), fmt.Sprintf("regexp failed for '%v'", from))
})
defer srv.Close()
toUpid, err := upid.Parse(fmt.Sprintf("%s@%s", serverId, srv.Listener.Addr().String()))
assert.NoError(t, err)
// make transport call.
transport := NewHTTPTransporter(fromUpid, nil)
errch := transport.Start()
defer transport.Stop(false)
msg.UPID = toUpid
err = transport.Send(context.TODO(), msg)
assert.NoError(t, err)
select {
case <-time.After(2 * time.Second):
t.Fatalf("timed out waiting for message receipt")
case <-msgReceived:
case err := <-errch:
if err != nil {
t.Fatalf(err.Error())
}
}
}
func TestTransporter_DiscardedSend(t *testing.T) {
serverId := "testserver"
// setup mesos client-side
fromUpid, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(t, err)
protoMsg := testmessage.GenerateSmallMessage()
msgName := getMessageName(protoMsg)
msg := &Message{
Name: msgName,
ProtoMessage: protoMsg,
}
requestURI := fmt.Sprintf("/%s/%s", serverId, msgName)
// setup server-side
msgReceived := make(chan struct{})
srv := makeMockServer(requestURI, func(rsp http.ResponseWriter, req *http.Request) {
close(msgReceived)
time.Sleep(2 * time.Second) // long enough that we should be able to stop it
})
defer srv.Close()
toUpid, err := upid.Parse(fmt.Sprintf("%s@%s", serverId, srv.Listener.Addr().String()))
assert.NoError(t, err)
// make transport call.
transport := NewHTTPTransporter(fromUpid, nil)
errch := transport.Start()
defer transport.Stop(false)
msg.UPID = toUpid
senderr := make(chan struct{})
go func() {
defer close(senderr)
err = transport.Send(context.TODO(), msg)
assert.NotNil(t, err)
assert.Equal(t, discardOnStopError, err)
}()
// wait for message to be received
select {
case <-time.After(2 * time.Second):
t.Fatalf("timed out waiting for message receipt")
return
case <-msgReceived:
transport.Stop(false)
case err := <-errch:
if err != nil {
t.Fatalf(err.Error())
return
}
}
// wait for send() to process discarded-error
select {
case <-time.After(5 * time.Second):
t.Fatalf("timed out waiting for aborted send")
return
case <-senderr: // continue
}
}
func TestTransporterStartAndRcvd(t *testing.T) {
serverId := "testserver"
serverPort := getNewPort()
serverAddr := "127.0.0.1:" + strconv.Itoa(serverPort)
protoMsg := testmessage.GenerateSmallMessage()
msgName := getMessageName(protoMsg)
ctrl := make(chan struct{})
// setup receiver (server) process
rcvPid, err := upid.Parse(fmt.Sprintf("%s@%s", serverId, serverAddr))
assert.NoError(t, err)
receiver := NewHTTPTransporter(rcvPid, nil)
receiver.Install(msgName)
go func() {
defer close(ctrl)
msg, err := receiver.Recv()
assert.Nil(t, err)
assert.NotNil(t, msg)
if msg != nil {
assert.Equal(t, msgName, msg.Name)
}
}()
errch := receiver.Start()
defer receiver.Stop(false)
assert.NotNil(t, errch)
time.Sleep(time.Millisecond * 7) // time to catchup
// setup sender (client) process
sndUpid, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(t, err)
sender := NewHTTPTransporter(sndUpid, nil)
msg := &Message{
UPID: rcvPid,
Name: msgName,
ProtoMessage: protoMsg,
}
errch2 := sender.Start()
defer sender.Stop(false)
sender.Send(context.TODO(), msg)
select {
case <-time.After(time.Second * 5):
t.Fatalf("Timeout")
case <-ctrl:
case err := <-errch:
if err != nil {
t.Fatalf(err.Error())
}
case err := <-errch2:
if err != nil {
t.Fatalf(err.Error())
}
}
}
func TestTransporterStartAndInject(t *testing.T) {
serverId := "testserver"
serverPort := getNewPort()
serverAddr := "127.0.0.1:" + strconv.Itoa(serverPort)
protoMsg := testmessage.GenerateSmallMessage()
msgName := getMessageName(protoMsg)
ctrl := make(chan struct{})
// setup receiver (server) process
rcvPid, err := upid.Parse(fmt.Sprintf("%s@%s", serverId, serverAddr))
assert.NoError(t, err)
receiver := NewHTTPTransporter(rcvPid, nil)
receiver.Install(msgName)
errch := receiver.Start()
defer receiver.Stop(false)
msg := &Message{
UPID: rcvPid,
Name: msgName,
ProtoMessage: protoMsg,
}
receiver.Inject(context.TODO(), msg)
go func() {
defer close(ctrl)
msg, err := receiver.Recv()
assert.Nil(t, err)
assert.NotNil(t, msg)
if msg != nil {
assert.Equal(t, msgName, msg.Name)
}
}()
select {
case <-time.After(time.Second * 1):
t.Fatalf("Timeout")
case <-ctrl:
case err := <-errch:
if err != nil {
t.Fatalf(err.Error())
}
}
}
func TestTransporterStartAndStop(t *testing.T) {
serverId := "testserver"
serverPort := getNewPort()
serverAddr := "127.0.0.1:" + strconv.Itoa(serverPort)
// setup receiver (server) process
rcvPid, err := upid.Parse(fmt.Sprintf("%s@%s", serverId, serverAddr))
assert.NoError(t, err)
receiver := NewHTTPTransporter(rcvPid, nil)
errch := receiver.Start()
assert.NotNil(t, errch)
time.Sleep(1 * time.Second)
receiver.Stop(false)
select {
case <-time.After(2 * time.Second):
t.Fatalf("timed out waiting for transport to stop")
case err := <-errch:
if err != nil {
t.Fatalf(err.Error())
}
}
}
func makeMockServer(path string, handler func(rsp http.ResponseWriter, req *http.Request)) *httptest.Server {
mux := http.NewServeMux()
mux.HandleFunc(path, handler)
return httptest.NewServer(mux)
}

View File

@@ -0,0 +1,45 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package messenger
import (
"fmt"
"strings"
"github.com/gogo/protobuf/proto"
"github.com/mesos/mesos-go/upid"
)
// Message defines the type that passes in the Messenger.
type Message struct {
UPID *upid.UPID
Name string
ProtoMessage proto.Message
Bytes []byte
}
// RequestURI returns the request URI of the message.
func (m *Message) RequestURI() string {
return fmt.Sprintf("/%s/%s", m.UPID.ID, m.Name)
}
// NOTE: This should not fail or panic.
func extractNameFromRequestURI(requestURI string) string {
return strings.Split(requestURI, "/")[2]
}

View File

@@ -0,0 +1,357 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package messenger
import (
"flag"
"fmt"
"net"
"reflect"
"strconv"
"time"
"github.com/gogo/protobuf/proto"
log "github.com/golang/glog"
mesos "github.com/mesos/mesos-go/mesosproto"
"github.com/mesos/mesos-go/mesosutil/process"
"github.com/mesos/mesos-go/upid"
"golang.org/x/net/context"
)
const (
defaultQueueSize = 1024
preparePeriod = time.Second * 1
)
var (
sendRoutines int
encodeRoutines int
decodeRoutines int
)
func init() {
flag.IntVar(&sendRoutines, "send-routines", 1, "Number of network sending routines")
flag.IntVar(&encodeRoutines, "encode-routines", 1, "Number of encoding routines")
flag.IntVar(&decodeRoutines, "decode-routines", 1, "Number of decoding routines")
}
// MessageHandler is the callback of the message. When the callback
// is invoked, the sender's upid and the message is passed to the callback.
type MessageHandler func(from *upid.UPID, pbMsg proto.Message)
// Messenger defines the interfaces that should be implemented.
type Messenger interface {
Install(handler MessageHandler, msg proto.Message) error
Send(ctx context.Context, upid *upid.UPID, msg proto.Message) error
Route(ctx context.Context, from *upid.UPID, msg proto.Message) error
Start() error
Stop() error
UPID() *upid.UPID
}
// MesosMessenger is an implementation of the Messenger interface.
type MesosMessenger struct {
upid *upid.UPID
encodingQueue chan *Message
sendingQueue chan *Message
installedMessages map[string]reflect.Type
installedHandlers map[string]MessageHandler
stop chan struct{}
tr Transporter
}
// create a new default messenger (HTTP). If a non-nil, non-wildcard bindingAddress is
// specified then it will be used for both the UPID and Transport binding address. Otherwise
// hostname is resolved to an IP address and the UPID.Host is set to that address and the
// bindingAddress is passed through to the Transport.
func ForHostname(proc *process.Process, hostname string, bindingAddress net.IP, port uint16) (Messenger, error) {
upid := &upid.UPID{
ID: proc.Label(),
Port: strconv.Itoa(int(port)),
}
if bindingAddress != nil && "0.0.0.0" != bindingAddress.String() {
upid.Host = bindingAddress.String()
} else {
ips, err := net.LookupIP(hostname)
if err != nil {
return nil, err
}
// try to find an ipv4 and use that
ip := net.IP(nil)
for _, addr := range ips {
if ip = addr.To4(); ip != nil {
break
}
}
if ip == nil {
// no ipv4? best guess, just take the first addr
if len(ips) > 0 {
ip = ips[0]
log.Warningf("failed to find an IPv4 address for '%v', best guess is '%v'", hostname, ip)
} else {
return nil, fmt.Errorf("failed to determine IP address for host '%v'", hostname)
}
}
upid.Host = ip.String()
}
return NewHttpWithBindingAddress(upid, bindingAddress), nil
}
// NewMesosMessenger creates a new mesos messenger.
func NewHttp(upid *upid.UPID) *MesosMessenger {
return NewHttpWithBindingAddress(upid, nil)
}
func NewHttpWithBindingAddress(upid *upid.UPID, address net.IP) *MesosMessenger {
return New(upid, NewHTTPTransporter(upid, address))
}
func New(upid *upid.UPID, t Transporter) *MesosMessenger {
return &MesosMessenger{
upid: upid,
encodingQueue: make(chan *Message, defaultQueueSize),
sendingQueue: make(chan *Message, defaultQueueSize),
installedMessages: make(map[string]reflect.Type),
installedHandlers: make(map[string]MessageHandler),
tr: t,
}
}
/// Install installs the handler with the given message.
func (m *MesosMessenger) Install(handler MessageHandler, msg proto.Message) error {
// Check if the message is a pointer.
mtype := reflect.TypeOf(msg)
if mtype.Kind() != reflect.Ptr {
return fmt.Errorf("Message %v is not a Ptr type", msg)
}
// Check if the message is already installed.
name := getMessageName(msg)
if _, ok := m.installedMessages[name]; ok {
return fmt.Errorf("Message %v is already installed", name)
}
m.installedMessages[name] = mtype.Elem()
m.installedHandlers[name] = handler
m.tr.Install(name)
return nil
}
// Send puts a message into the outgoing queue, waiting to be sent.
// With buffered channels, this will not block under moderate throughput.
// When an error is generated, the error can be communicated by placing
// a message on the incoming queue to be handled upstream.
func (m *MesosMessenger) Send(ctx context.Context, upid *upid.UPID, msg proto.Message) error {
if upid == nil {
panic("cannot sent a message to a nil pid")
} else if upid.Equal(m.upid) {
return fmt.Errorf("Send the message to self")
}
name := getMessageName(msg)
log.V(2).Infof("Sending message %v to %v\n", name, upid)
select {
case <-ctx.Done():
return ctx.Err()
case m.encodingQueue <- &Message{upid, name, msg, nil}:
return nil
}
}
// Route puts a message either in the incoming or outgoing queue.
// This method is useful for:
// 1) routing internal error to callback handlers
// 2) testing components without starting remote servers.
func (m *MesosMessenger) Route(ctx context.Context, upid *upid.UPID, msg proto.Message) error {
// if destination is not self, send to outbound.
if !upid.Equal(m.upid) {
return m.Send(ctx, upid, msg)
}
data, err := proto.Marshal(msg)
if err != nil {
return err
}
name := getMessageName(msg)
return m.tr.Inject(ctx, &Message{upid, name, msg, data})
}
// Start starts the messenger.
func (m *MesosMessenger) Start() error {
m.stop = make(chan struct{})
errChan := m.tr.Start()
select {
case err := <-errChan:
log.Errorf("failed to start messenger: %v", err)
return err
case <-time.After(preparePeriod): // continue
}
m.upid = m.tr.UPID()
for i := 0; i < sendRoutines; i++ {
go m.sendLoop()
}
for i := 0; i < encodeRoutines; i++ {
go m.encodeLoop()
}
for i := 0; i < decodeRoutines; i++ {
go m.decodeLoop()
}
go func() {
select {
case err := <-errChan:
if err != nil {
//TODO(jdef) should the driver abort in this case? probably
//since this messenger will never attempt to re-establish the
//transport
log.Error(err)
}
case <-m.stop:
}
}()
return nil
}
// Stop stops the messenger and clean up all the goroutines.
func (m *MesosMessenger) Stop() error {
//TODO(jdef) don't hardcode the graceful flag here
if err := m.tr.Stop(true); err != nil {
log.Errorf("Failed to stop the transporter: %v\n", err)
return err
}
close(m.stop)
return nil
}
// UPID returns the upid of the messenger.
func (m *MesosMessenger) UPID() *upid.UPID {
return m.upid
}
func (m *MesosMessenger) encodeLoop() {
for {
select {
case <-m.stop:
return
case msg := <-m.encodingQueue:
e := func() error {
//TODO(jdef) implement timeout for context
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
b, err := proto.Marshal(msg.ProtoMessage)
if err != nil {
return err
}
msg.Bytes = b
select {
case <-ctx.Done():
return ctx.Err()
case m.sendingQueue <- msg:
return nil
}
}()
if e != nil {
m.reportError(fmt.Errorf("Failed to enqueue message %v: %v", msg, e))
}
}
}
}
func (m *MesosMessenger) reportError(err error) {
log.V(2).Info(err)
//TODO(jdef) implement timeout for context
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
c := make(chan error, 1)
go func() { c <- m.Route(ctx, m.UPID(), &mesos.FrameworkErrorMessage{Message: proto.String(err.Error())}) }()
select {
case <-ctx.Done():
<-c // wait for Route to return
case e := <-c:
if e != nil {
log.Errorf("failed to report error %v due to: %v", err, e)
}
}
}
func (m *MesosMessenger) sendLoop() {
for {
select {
case <-m.stop:
return
case msg := <-m.sendingQueue:
e := func() error {
//TODO(jdef) implement timeout for context
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
c := make(chan error, 1)
go func() { c <- m.tr.Send(ctx, msg) }()
select {
case <-ctx.Done():
// Transport layer must use the context to detect cancelled requests.
<-c // wait for Send to return
return ctx.Err()
case err := <-c:
return err
}
}()
if e != nil {
m.reportError(fmt.Errorf("Failed to send message %v: %v", msg.Name, e))
}
}
}
}
// Since HTTPTransporter.Recv() is already buffered, so we don't need a 'recvLoop' here.
func (m *MesosMessenger) decodeLoop() {
for {
select {
case <-m.stop:
return
default:
}
msg, err := m.tr.Recv()
if err != nil {
if err == discardOnStopError {
log.V(1).Info("exiting decodeLoop, transport shutting down")
return
} else {
panic(fmt.Sprintf("unexpected transport error: %v", err))
}
}
log.V(2).Infof("Receiving message %v from %v\n", msg.Name, msg.UPID)
msg.ProtoMessage = reflect.New(m.installedMessages[msg.Name]).Interface().(proto.Message)
if err := proto.Unmarshal(msg.Bytes, msg.ProtoMessage); err != nil {
log.Errorf("Failed to unmarshal message %v: %v\n", msg, err)
continue
}
// TODO(yifan): Catch panic.
m.installedHandlers[msg.Name](msg.UPID, msg.ProtoMessage)
}
}
// getMessageName returns the name of the message in the mesos manner.
func getMessageName(msg proto.Message) string {
return fmt.Sprintf("%v.%v", "mesos.internal", reflect.TypeOf(msg).Elem().Name())
}

View File

@@ -0,0 +1,433 @@
package messenger
import (
"fmt"
"math/rand"
"net/http"
"net/http/httptest"
"strconv"
"sync"
"testing"
"time"
"github.com/gogo/protobuf/proto"
"github.com/mesos/mesos-go/messenger/testmessage"
"github.com/mesos/mesos-go/upid"
"github.com/stretchr/testify/assert"
"golang.org/x/net/context"
)
var (
startPort = 10000 + rand.Intn(30000)
globalWG = new(sync.WaitGroup)
)
func noopHandler(*upid.UPID, proto.Message) {
globalWG.Done()
}
func getNewPort() int {
startPort++
return startPort
}
func shuffleMessages(queue *[]proto.Message) {
for i := range *queue {
index := rand.Intn(i + 1)
(*queue)[i], (*queue)[index] = (*queue)[index], (*queue)[i]
}
}
func generateSmallMessages(n int) []proto.Message {
queue := make([]proto.Message, n)
for i := range queue {
queue[i] = testmessage.GenerateSmallMessage()
}
return queue
}
func generateMediumMessages(n int) []proto.Message {
queue := make([]proto.Message, n)
for i := range queue {
queue[i] = testmessage.GenerateMediumMessage()
}
return queue
}
func generateBigMessages(n int) []proto.Message {
queue := make([]proto.Message, n)
for i := range queue {
queue[i] = testmessage.GenerateBigMessage()
}
return queue
}
func generateLargeMessages(n int) []proto.Message {
queue := make([]proto.Message, n)
for i := range queue {
queue[i] = testmessage.GenerateLargeMessage()
}
return queue
}
func generateMixedMessages(n int) []proto.Message {
queue := make([]proto.Message, n*4)
for i := 0; i < n*4; i = i + 4 {
queue[i] = testmessage.GenerateSmallMessage()
queue[i+1] = testmessage.GenerateMediumMessage()
queue[i+2] = testmessage.GenerateBigMessage()
queue[i+3] = testmessage.GenerateLargeMessage()
}
shuffleMessages(&queue)
return queue
}
func installMessages(t *testing.T, m Messenger, queue *[]proto.Message, counts *[]int, done chan struct{}) {
testCounts := func(counts []int, done chan struct{}) {
for i := range counts {
if counts[i] != cap(*queue)/4 {
return
}
}
close(done)
}
hander1 := func(from *upid.UPID, pbMsg proto.Message) {
(*queue) = append(*queue, pbMsg)
(*counts)[0]++
testCounts(*counts, done)
}
hander2 := func(from *upid.UPID, pbMsg proto.Message) {
(*queue) = append(*queue, pbMsg)
(*counts)[1]++
testCounts(*counts, done)
}
hander3 := func(from *upid.UPID, pbMsg proto.Message) {
(*queue) = append(*queue, pbMsg)
(*counts)[2]++
testCounts(*counts, done)
}
hander4 := func(from *upid.UPID, pbMsg proto.Message) {
(*queue) = append(*queue, pbMsg)
(*counts)[3]++
testCounts(*counts, done)
}
assert.NoError(t, m.Install(hander1, &testmessage.SmallMessage{}))
assert.NoError(t, m.Install(hander2, &testmessage.MediumMessage{}))
assert.NoError(t, m.Install(hander3, &testmessage.BigMessage{}))
assert.NoError(t, m.Install(hander4, &testmessage.LargeMessage{}))
}
func runTestServer(b *testing.B, wg *sync.WaitGroup) *httptest.Server {
mux := http.NewServeMux()
mux.HandleFunc("/testserver/mesos.internal.SmallMessage", func(http.ResponseWriter, *http.Request) {
wg.Done()
})
mux.HandleFunc("/testserver/mesos.internal.MediumMessage", func(http.ResponseWriter, *http.Request) {
wg.Done()
})
mux.HandleFunc("/testserver/mesos.internal.BigMessage", func(http.ResponseWriter, *http.Request) {
wg.Done()
})
mux.HandleFunc("/testserver/mesos.internal.LargeMessage", func(http.ResponseWriter, *http.Request) {
wg.Done()
})
return httptest.NewServer(mux)
}
func TestMessengerFailToInstall(t *testing.T) {
m := NewHttp(&upid.UPID{ID: "mesos"})
handler := func(from *upid.UPID, pbMsg proto.Message) {}
assert.NotNil(t, m)
assert.NoError(t, m.Install(handler, &testmessage.SmallMessage{}))
assert.Error(t, m.Install(handler, &testmessage.SmallMessage{}))
}
func TestMessengerFailToStart(t *testing.T) {
port := strconv.Itoa(getNewPort())
m1 := NewHttp(&upid.UPID{ID: "mesos", Host: "localhost", Port: port})
m2 := NewHttp(&upid.UPID{ID: "mesos", Host: "localhost", Port: port})
assert.NoError(t, m1.Start())
assert.Error(t, m2.Start())
}
func TestMessengerFailToSend(t *testing.T) {
upid, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(t, err)
m := NewHttp(upid)
assert.NoError(t, m.Start())
assert.Error(t, m.Send(context.TODO(), upid, &testmessage.SmallMessage{}))
}
func TestMessenger(t *testing.T) {
messages := generateMixedMessages(1000)
upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(t, err)
upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort()))
assert.NoError(t, err)
m1 := NewHttp(upid1)
m2 := NewHttp(upid2)
done := make(chan struct{})
counts := make([]int, 4)
msgQueue := make([]proto.Message, 0, len(messages))
installMessages(t, m2, &msgQueue, &counts, done)
assert.NoError(t, m1.Start())
assert.NoError(t, m2.Start())
go func() {
for _, msg := range messages {
assert.NoError(t, m1.Send(context.TODO(), upid2, msg))
}
}()
select {
case <-time.After(time.Second * 10):
t.Fatalf("Timeout")
case <-done:
}
for i := range counts {
assert.Equal(t, 1000, counts[i])
}
assert.Equal(t, messages, msgQueue)
}
func BenchmarkMessengerSendSmallMessage(b *testing.B) {
messages := generateSmallMessages(1000)
wg := new(sync.WaitGroup)
wg.Add(b.N)
srv := runTestServer(b, wg)
defer srv.Close()
upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(b, err)
upid2, err := upid.Parse(fmt.Sprintf("testserver@%s", srv.Listener.Addr().String()))
assert.NoError(b, err)
m1 := NewHttp(upid1)
assert.NoError(b, m1.Start())
b.ResetTimer()
for i := 0; i < b.N; i++ {
m1.Send(context.TODO(), upid2, messages[i%1000])
}
wg.Wait()
}
func BenchmarkMessengerSendMediumMessage(b *testing.B) {
messages := generateMediumMessages(1000)
wg := new(sync.WaitGroup)
wg.Add(b.N)
srv := runTestServer(b, wg)
defer srv.Close()
upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(b, err)
upid2, err := upid.Parse(fmt.Sprintf("testserver@%s", srv.Listener.Addr().String()))
assert.NoError(b, err)
m1 := NewHttp(upid1)
assert.NoError(b, m1.Start())
b.ResetTimer()
for i := 0; i < b.N; i++ {
m1.Send(context.TODO(), upid2, messages[i%1000])
}
wg.Wait()
}
func BenchmarkMessengerSendBigMessage(b *testing.B) {
messages := generateBigMessages(1000)
wg := new(sync.WaitGroup)
wg.Add(b.N)
srv := runTestServer(b, wg)
defer srv.Close()
upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(b, err)
upid2, err := upid.Parse(fmt.Sprintf("testserver@%s", srv.Listener.Addr().String()))
assert.NoError(b, err)
m1 := NewHttp(upid1)
assert.NoError(b, m1.Start())
b.ResetTimer()
for i := 0; i < b.N; i++ {
m1.Send(context.TODO(), upid2, messages[i%1000])
}
wg.Wait()
}
func BenchmarkMessengerSendLargeMessage(b *testing.B) {
messages := generateLargeMessages(1000)
wg := new(sync.WaitGroup)
wg.Add(b.N)
srv := runTestServer(b, wg)
defer srv.Close()
upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(b, err)
upid2, err := upid.Parse(fmt.Sprintf("testserver@%s", srv.Listener.Addr().String()))
assert.NoError(b, err)
m1 := NewHttp(upid1)
assert.NoError(b, m1.Start())
b.ResetTimer()
for i := 0; i < b.N; i++ {
m1.Send(context.TODO(), upid2, messages[i%1000])
}
wg.Wait()
}
func BenchmarkMessengerSendMixedMessage(b *testing.B) {
messages := generateMixedMessages(1000)
wg := new(sync.WaitGroup)
wg.Add(b.N)
srv := runTestServer(b, wg)
defer srv.Close()
upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(b, err)
upid2, err := upid.Parse(fmt.Sprintf("testserver@%s", srv.Listener.Addr().String()))
assert.NoError(b, err)
m1 := NewHttp(upid1)
assert.NoError(b, m1.Start())
b.ResetTimer()
for i := 0; i < b.N; i++ {
m1.Send(context.TODO(), upid2, messages[i%1000])
}
wg.Wait()
}
func BenchmarkMessengerSendRecvSmallMessage(b *testing.B) {
globalWG.Add(b.N)
messages := generateSmallMessages(1000)
upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(b, err)
upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort()))
assert.NoError(b, err)
m1 := NewHttp(upid1)
m2 := NewHttp(upid2)
assert.NoError(b, m1.Start())
assert.NoError(b, m2.Start())
assert.NoError(b, m2.Install(noopHandler, &testmessage.SmallMessage{}))
time.Sleep(time.Second) // Avoid race on upid.
b.ResetTimer()
for i := 0; i < b.N; i++ {
m1.Send(context.TODO(), upid2, messages[i%1000])
}
globalWG.Wait()
}
func BenchmarkMessengerSendRecvMediumMessage(b *testing.B) {
globalWG.Add(b.N)
messages := generateMediumMessages(1000)
upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(b, err)
upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort()))
assert.NoError(b, err)
m1 := NewHttp(upid1)
m2 := NewHttp(upid2)
assert.NoError(b, m1.Start())
assert.NoError(b, m2.Start())
assert.NoError(b, m2.Install(noopHandler, &testmessage.MediumMessage{}))
time.Sleep(time.Second) // Avoid race on upid.
b.ResetTimer()
for i := 0; i < b.N; i++ {
m1.Send(context.TODO(), upid2, messages[i%1000])
}
globalWG.Wait()
}
func BenchmarkMessengerSendRecvBigMessage(b *testing.B) {
globalWG.Add(b.N)
messages := generateBigMessages(1000)
upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(b, err)
upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort()))
assert.NoError(b, err)
m1 := NewHttp(upid1)
m2 := NewHttp(upid2)
assert.NoError(b, m1.Start())
assert.NoError(b, m2.Start())
assert.NoError(b, m2.Install(noopHandler, &testmessage.BigMessage{}))
time.Sleep(time.Second) // Avoid race on upid.
b.ResetTimer()
for i := 0; i < b.N; i++ {
m1.Send(context.TODO(), upid2, messages[i%1000])
}
globalWG.Wait()
}
func BenchmarkMessengerSendRecvLargeMessage(b *testing.B) {
globalWG.Add(b.N)
messages := generateLargeMessages(1000)
upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(b, err)
upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort()))
assert.NoError(b, err)
m1 := NewHttp(upid1)
m2 := NewHttp(upid2)
assert.NoError(b, m1.Start())
assert.NoError(b, m2.Start())
assert.NoError(b, m2.Install(noopHandler, &testmessage.LargeMessage{}))
time.Sleep(time.Second) // Avoid race on upid.
b.ResetTimer()
for i := 0; i < b.N; i++ {
m1.Send(context.TODO(), upid2, messages[i%1000])
}
globalWG.Wait()
}
func BenchmarkMessengerSendRecvMixedMessage(b *testing.B) {
globalWG.Add(b.N)
messages := generateMixedMessages(1000)
upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort()))
assert.NoError(b, err)
upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort()))
assert.NoError(b, err)
m1 := NewHttp(upid1)
m2 := NewHttp(upid2)
assert.NoError(b, m1.Start())
assert.NoError(b, m2.Start())
assert.NoError(b, m2.Install(noopHandler, &testmessage.SmallMessage{}))
assert.NoError(b, m2.Install(noopHandler, &testmessage.MediumMessage{}))
assert.NoError(b, m2.Install(noopHandler, &testmessage.BigMessage{}))
assert.NoError(b, m2.Install(noopHandler, &testmessage.LargeMessage{}))
time.Sleep(time.Second) // Avoid race on upid.
b.ResetTimer()
for i := 0; i < b.N; i++ {
m1.Send(context.TODO(), upid2, messages[i%1000])
}
globalWG.Wait()
}

View File

@@ -0,0 +1,106 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package messenger
import (
"reflect"
"github.com/gogo/protobuf/proto"
"github.com/mesos/mesos-go/upid"
"github.com/stretchr/testify/mock"
"golang.org/x/net/context"
)
type message struct {
from *upid.UPID
msg proto.Message
}
// MockedMessenger is a messenger that returns error on every operation.
type MockedMessenger struct {
mock.Mock
messageQueue chan *message
handlers map[string]MessageHandler
stop chan struct{}
}
// NewMockedMessenger returns a mocked messenger used for testing.
func NewMockedMessenger() *MockedMessenger {
return &MockedMessenger{
messageQueue: make(chan *message, 1),
handlers: make(map[string]MessageHandler),
stop: make(chan struct{}),
}
}
// Install is a mocked implementation.
func (m *MockedMessenger) Install(handler MessageHandler, msg proto.Message) error {
m.handlers[reflect.TypeOf(msg).Elem().Name()] = handler
return m.Called().Error(0)
}
// Send is a mocked implementation.
func (m *MockedMessenger) Send(ctx context.Context, upid *upid.UPID, msg proto.Message) error {
return m.Called().Error(0)
}
func (m *MockedMessenger) Route(ctx context.Context, upid *upid.UPID, msg proto.Message) error {
return m.Called().Error(0)
}
// Start is a mocked implementation.
func (m *MockedMessenger) Start() error {
go m.recvLoop()
return m.Called().Error(0)
}
// Stop is a mocked implementation.
func (m *MockedMessenger) Stop() error {
// don't close an already-closed channel
select {
case <-m.stop:
// noop
default:
close(m.stop)
}
return m.Called().Error(0)
}
// UPID is a mocked implementation.
func (m *MockedMessenger) UPID() *upid.UPID {
return m.Called().Get(0).(*upid.UPID)
}
func (m *MockedMessenger) recvLoop() {
for {
select {
case <-m.stop:
return
case msg := <-m.messageQueue:
name := reflect.TypeOf(msg.msg).Elem().Name()
m.handlers[name](msg.from, msg.msg)
}
}
}
// Recv receives a upid and a message, it will dispatch the message to its handler
// with the upid. This is for testing.
func (m *MockedMessenger) Recv(from *upid.UPID, msg proto.Message) {
m.messageQueue <- &message{from, msg}
}

View File

@@ -0,0 +1,2 @@
all: testmessage.proto
protoc --proto_path=${GOPATH}/src:${GOPATH}/src/github.com/gogo/protobuf/protobuf:. --gogo_out=. testmessage.proto

View File

@@ -0,0 +1,49 @@
package testmessage
import (
"math/rand"
)
func generateRandomString(length int) string {
b := make([]byte, length)
for i := range b {
b[i] = byte(rand.Int())
}
return string(b)
}
// GenerateSmallMessage generates a small size message.
func GenerateSmallMessage() *SmallMessage {
v := make([]string, 3)
for i := range v {
v[i] = generateRandomString(5)
}
return &SmallMessage{Values: v}
}
// GenerateMediumMessage generates a medium size message.
func GenerateMediumMessage() *MediumMessage {
v := make([]string, 10)
for i := range v {
v[i] = generateRandomString(10)
}
return &MediumMessage{Values: v}
}
// GenerateBigMessage generates a big size message.
func GenerateBigMessage() *BigMessage {
v := make([]string, 20)
for i := range v {
v[i] = generateRandomString(20)
}
return &BigMessage{Values: v}
}
// GenerateLargeMessage generates a large size message.
func GenerateLargeMessage() *LargeMessage {
v := make([]string, 30)
for i := range v {
v[i] = generateRandomString(30)
}
return &LargeMessage{Values: v}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
package testmessage;
import "github.com/gogo/protobuf/gogoproto/gogo.proto";
option (gogoproto.gostring_all) = true;
option (gogoproto.equal_all) = true;
option (gogoproto.verbose_equal_all) = true;
option (gogoproto.goproto_stringer_all) = false;
option (gogoproto.stringer_all) = true;
option (gogoproto.populate_all) = true;
option (gogoproto.testgen_all) = false;
option (gogoproto.benchgen_all) = false;
option (gogoproto.marshaler_all) = true;
option (gogoproto.sizer_all) = true;
option (gogoproto.unmarshaler_all) = true;
message SmallMessage {
repeated string Values = 1;
}
message MediumMessage {
repeated string Values = 1;
}
message BigMessage {
repeated string Values = 1;
}
message LargeMessage {
repeated string Values = 1;
}

View File

@@ -0,0 +1,53 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package messenger
import (
"github.com/mesos/mesos-go/upid"
"golang.org/x/net/context"
)
// Transporter defines methods for communicating with remote processes.
type Transporter interface {
//Send sends message to remote process. Must use context to determine
//cancelled requests. Will stop sending when transport is stopped.
Send(ctx context.Context, msg *Message) error
//Rcvd receives and delegate message handling to installed handlers.
//Will stop receiving when transport is stopped.
Recv() (*Message, error)
//Inject injects a message to the incoming queue. Must use context to
//determine cancelled requests. Injection is aborted if the transport
//is stopped.
Inject(ctx context.Context, msg *Message) error
//Install mount an handler based on incoming message name.
Install(messageName string)
//Start starts the transporter and returns immediately. The error chan
//is never nil.
Start() <-chan error
//Stop kills the transporter.
Stop(graceful bool) error
//UPID returns the PID for transporter.
UPID() *upid.UPID
}

View File

@@ -0,0 +1,6 @@
/*
Package scheduler includes the interfaces for the mesos scheduler and
the mesos executor driver. It also contains as well as an implementation
of the driver that you can use in your code.
*/
package scheduler

View File

@@ -0,0 +1,29 @@
package scheduler
import (
"github.com/mesos/mesos-go/auth/callback"
mesos "github.com/mesos/mesos-go/mesosproto"
"github.com/mesos/mesos-go/upid"
)
type CredentialHandler struct {
pid *upid.UPID // the process to authenticate against (master)
client *upid.UPID // the process to be authenticated (slave / framework)
credential *mesos.Credential
}
func (h *CredentialHandler) Handle(callbacks ...callback.Interface) error {
for _, cb := range callbacks {
switch cb := cb.(type) {
case *callback.Name:
cb.Set(h.credential.GetPrincipal())
case *callback.Password:
cb.Set(h.credential.GetSecret())
case *callback.Interprocess:
cb.Set(*(h.pid), *(h.client))
default:
return &callback.Unsupported{Callback: cb}
}
}
return nil
}

View File

@@ -0,0 +1,56 @@
package scheduler
import (
log "github.com/golang/glog"
mesos "github.com/mesos/mesos-go/mesosproto"
"github.com/stretchr/testify/mock"
)
type MockScheduler struct {
mock.Mock
}
func NewMockScheduler() *MockScheduler {
return &MockScheduler{}
}
func (sched *MockScheduler) Registered(SchedulerDriver, *mesos.FrameworkID, *mesos.MasterInfo) {
sched.Called()
}
func (sched *MockScheduler) Reregistered(SchedulerDriver, *mesos.MasterInfo) {
sched.Called()
}
func (sched *MockScheduler) Disconnected(SchedulerDriver) {
sched.Called()
}
func (sched *MockScheduler) ResourceOffers(SchedulerDriver, []*mesos.Offer) {
sched.Called()
}
func (sched *MockScheduler) OfferRescinded(SchedulerDriver, *mesos.OfferID) {
sched.Called()
}
func (sched *MockScheduler) StatusUpdate(SchedulerDriver, *mesos.TaskStatus) {
sched.Called()
}
func (sched *MockScheduler) FrameworkMessage(SchedulerDriver, *mesos.ExecutorID, *mesos.SlaveID, string) {
sched.Called()
}
func (sched *MockScheduler) SlaveLost(SchedulerDriver, *mesos.SlaveID) {
sched.Called()
}
func (sched *MockScheduler) ExecutorLost(SchedulerDriver, *mesos.ExecutorID, *mesos.SlaveID, int) {
sched.Called()
}
func (sched *MockScheduler) Error(d SchedulerDriver, msg string) {
log.Error(msg)
sched.Called()
}

View File

@@ -0,0 +1,7 @@
package scheduler
import (
_ "github.com/mesos/mesos-go/auth/sasl"
_ "github.com/mesos/mesos-go/auth/sasl/mech/crammd5"
_ "github.com/mesos/mesos-go/detector/zoo"
)

View File

@@ -0,0 +1,96 @@
package scheduler
import (
log "github.com/golang/glog"
mesos "github.com/mesos/mesos-go/mesosproto"
"github.com/mesos/mesos-go/upid"
"sync"
)
type cachedOffer struct {
offer *mesos.Offer
slavePid *upid.UPID
}
func newCachedOffer(offer *mesos.Offer, slavePid *upid.UPID) *cachedOffer {
return &cachedOffer{offer: offer, slavePid: slavePid}
}
// schedCache a managed cache with backing maps to store offeres
// and tasked slaves.
type schedCache struct {
lock sync.RWMutex
savedOffers map[string]*cachedOffer // current offers key:OfferID
savedSlavePids map[string]*upid.UPID // Current saved slaves, key:slaveId
}
func newSchedCache() *schedCache {
return &schedCache{
savedOffers: make(map[string]*cachedOffer),
savedSlavePids: make(map[string]*upid.UPID),
}
}
// putOffer stores an offer and the slavePID associated with offer.
func (cache *schedCache) putOffer(offer *mesos.Offer, pid *upid.UPID) {
if offer == nil || pid == nil {
log.V(3).Infoln("WARN: Offer not cached. The offer or pid cannot be nil")
return
}
log.V(3).Infoln("Caching offer ", offer.Id.GetValue(), " with slavePID ", pid.String())
cache.lock.Lock()
cache.savedOffers[offer.Id.GetValue()] = &cachedOffer{offer: offer, slavePid: pid}
cache.lock.Unlock()
}
// getOffer returns cached offer
func (cache *schedCache) getOffer(offerId *mesos.OfferID) *cachedOffer {
if offerId == nil {
log.V(3).Infoln("WARN: OfferId == nil, returning nil")
return nil
}
cache.lock.RLock()
defer cache.lock.RUnlock()
return cache.savedOffers[offerId.GetValue()]
}
// containsOff test cache for offer(offerId)
func (cache *schedCache) containsOffer(offerId *mesos.OfferID) bool {
cache.lock.RLock()
defer cache.lock.RUnlock()
_, ok := cache.savedOffers[offerId.GetValue()]
return ok
}
func (cache *schedCache) removeOffer(offerId *mesos.OfferID) {
cache.lock.Lock()
delete(cache.savedOffers, offerId.GetValue())
cache.lock.Unlock()
}
func (cache *schedCache) putSlavePid(slaveId *mesos.SlaveID, pid *upid.UPID) {
cache.lock.Lock()
cache.savedSlavePids[slaveId.GetValue()] = pid
cache.lock.Unlock()
}
func (cache *schedCache) getSlavePid(slaveId *mesos.SlaveID) *upid.UPID {
if slaveId == nil {
log.V(3).Infoln("SlaveId == nil, returning empty UPID")
return nil
}
return cache.savedSlavePids[slaveId.GetValue()]
}
func (cache *schedCache) containsSlavePid(slaveId *mesos.SlaveID) bool {
cache.lock.RLock()
defer cache.lock.RUnlock()
_, ok := cache.savedSlavePids[slaveId.GetValue()]
return ok
}
func (cache *schedCache) removeSlavePid(slaveId *mesos.SlaveID) {
cache.lock.Lock()
delete(cache.savedSlavePids, slaveId.GetValue())
cache.lock.Unlock()
}

View File

@@ -0,0 +1,215 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package scheduler
import (
mesos "github.com/mesos/mesos-go/mesosproto"
util "github.com/mesos/mesos-go/mesosutil"
"github.com/stretchr/testify/assert"
"testing"
"github.com/mesos/mesos-go/upid"
)
func TestSchedCacheNew(t *testing.T) {
cache := newSchedCache()
assert.NotNil(t, cache)
assert.NotNil(t, cache.savedOffers)
assert.NotNil(t, cache.savedSlavePids)
}
func TestSchedCachePutOffer(t *testing.T) {
cache := newSchedCache()
offer01 := createTestOffer("01")
pid01, err := upid.Parse("slave01@127.0.0.1:5050")
assert.NoError(t, err)
cache.putOffer(offer01, pid01)
offer02 := createTestOffer("02")
pid02, err := upid.Parse("slave02@127.0.0.1:5050")
assert.NoError(t, err)
cache.putOffer(offer02, pid02)
assert.Equal(t, len(cache.savedOffers), 2)
cachedOffer1, ok := cache.savedOffers["test-offer-01"]
assert.True(t, ok)
cachedOffer2, ok := cache.savedOffers["test-offer-02"]
assert.True(t, ok)
assert.NotNil(t, cachedOffer1.offer)
assert.Equal(t, "test-offer-01", cachedOffer1.offer.Id.GetValue())
assert.NotNil(t, cachedOffer2.offer)
assert.Equal(t, "test-offer-02", cachedOffer2.offer.Id.GetValue())
assert.NotNil(t, cachedOffer1.slavePid)
assert.Equal(t, "slave01@127.0.0.1:5050", cachedOffer1.slavePid.String())
assert.NotNil(t, cachedOffer2.slavePid)
assert.Equal(t, "slave02@127.0.0.1:5050", cachedOffer2.slavePid.String())
}
func TestSchedCacheGetOffer(t *testing.T) {
cache := newSchedCache()
offer01 := createTestOffer("01")
pid01, err := upid.Parse("slave01@127.0.0.1:5050")
assert.NoError(t, err)
offer02 := createTestOffer("02")
pid02, err := upid.Parse("slave02@127.0.0.1:5050")
assert.NoError(t, err)
cache.putOffer(offer01, pid01)
cache.putOffer(offer02, pid02)
cachedOffer01 := cache.getOffer(util.NewOfferID("test-offer-01")).offer
cachedOffer02 := cache.getOffer(util.NewOfferID("test-offer-02")).offer
assert.NotEqual(t, offer01, cachedOffer02)
assert.Equal(t, offer01, cachedOffer01)
assert.Equal(t, offer02, cachedOffer02)
}
func TestSchedCacheContainsOffer(t *testing.T) {
cache := newSchedCache()
offer01 := createTestOffer("01")
pid01, err := upid.Parse("slave01@127.0.0.1:5050")
assert.NoError(t, err)
offer02 := createTestOffer("02")
pid02, err := upid.Parse("slave02@127.0.0.1:5050")
assert.NoError(t, err)
cache.putOffer(offer01, pid01)
cache.putOffer(offer02, pid02)
assert.True(t, cache.containsOffer(util.NewOfferID("test-offer-01")))
assert.True(t, cache.containsOffer(util.NewOfferID("test-offer-02")))
assert.False(t, cache.containsOffer(util.NewOfferID("test-offer-05")))
}
func TestSchedCacheRemoveOffer(t *testing.T) {
cache := newSchedCache()
offer01 := createTestOffer("01")
pid01, err := upid.Parse("slave01@127.0.0.1:5050")
assert.NoError(t, err)
offer02 := createTestOffer("02")
pid02, err := upid.Parse("slave02@127.0.0.1:5050")
assert.NoError(t, err)
cache.putOffer(offer01, pid01)
cache.putOffer(offer02, pid02)
cache.removeOffer(util.NewOfferID("test-offer-01"))
assert.Equal(t, 1, len(cache.savedOffers))
assert.True(t, cache.containsOffer(util.NewOfferID("test-offer-02")))
assert.False(t, cache.containsOffer(util.NewOfferID("test-offer-01")))
}
func TestSchedCachePutSlavePid(t *testing.T) {
cache := newSchedCache()
pid01, err := upid.Parse("slave01@127.0.0.1:5050")
assert.NoError(t, err)
pid02, err := upid.Parse("slave02@127.0.0.1:5050")
assert.NoError(t, err)
pid03, err := upid.Parse("slave03@127.0.0.1:5050")
assert.NoError(t, err)
cache.putSlavePid(util.NewSlaveID("slave01"), pid01)
cache.putSlavePid(util.NewSlaveID("slave02"), pid02)
cache.putSlavePid(util.NewSlaveID("slave03"), pid03)
assert.Equal(t, len(cache.savedSlavePids), 3)
cachedSlavePid1, ok := cache.savedSlavePids["slave01"]
assert.True(t, ok)
cachedSlavePid2, ok := cache.savedSlavePids["slave02"]
assert.True(t, ok)
cachedSlavePid3, ok := cache.savedSlavePids["slave03"]
assert.True(t, ok)
assert.True(t, cachedSlavePid1.Equal(pid01))
assert.True(t, cachedSlavePid2.Equal(pid02))
assert.True(t, cachedSlavePid3.Equal(pid03))
}
func TestSchedCacheGetSlavePid(t *testing.T) {
cache := newSchedCache()
pid01, err := upid.Parse("slave01@127.0.0.1:5050")
assert.NoError(t, err)
pid02, err := upid.Parse("slave02@127.0.0.1:5050")
assert.NoError(t, err)
cache.putSlavePid(util.NewSlaveID("slave01"), pid01)
cache.putSlavePid(util.NewSlaveID("slave02"), pid02)
cachedSlavePid1 := cache.getSlavePid(util.NewSlaveID("slave01"))
cachedSlavePid2 := cache.getSlavePid(util.NewSlaveID("slave02"))
assert.NotNil(t, cachedSlavePid1)
assert.NotNil(t, cachedSlavePid2)
assert.True(t, pid01.Equal(cachedSlavePid1))
assert.True(t, pid02.Equal(cachedSlavePid2))
assert.False(t, pid01.Equal(cachedSlavePid2))
}
func TestSchedCacheContainsSlavePid(t *testing.T) {
cache := newSchedCache()
pid01, err := upid.Parse("slave01@127.0.0.1:5050")
assert.NoError(t, err)
pid02, err := upid.Parse("slave02@127.0.0.1:5050")
assert.NoError(t, err)
cache.putSlavePid(util.NewSlaveID("slave01"), pid01)
cache.putSlavePid(util.NewSlaveID("slave02"), pid02)
assert.True(t, cache.containsSlavePid(util.NewSlaveID("slave01")))
assert.True(t, cache.containsSlavePid(util.NewSlaveID("slave02")))
assert.False(t, cache.containsSlavePid(util.NewSlaveID("slave05")))
}
func TestSchedCacheRemoveSlavePid(t *testing.T) {
cache := newSchedCache()
pid01, err := upid.Parse("slave01@127.0.0.1:5050")
assert.NoError(t, err)
pid02, err := upid.Parse("slave02@127.0.0.1:5050")
assert.NoError(t, err)
cache.putSlavePid(util.NewSlaveID("slave01"), pid01)
cache.putSlavePid(util.NewSlaveID("slave02"), pid02)
assert.True(t, cache.containsSlavePid(util.NewSlaveID("slave01")))
assert.True(t, cache.containsSlavePid(util.NewSlaveID("slave02")))
assert.False(t, cache.containsSlavePid(util.NewSlaveID("slave05")))
cache.removeSlavePid(util.NewSlaveID("slave01"))
assert.Equal(t, 1, len(cache.savedSlavePids))
assert.False(t, cache.containsSlavePid(util.NewSlaveID("slave01")))
}
func createTestOffer(idSuffix string) *mesos.Offer {
return util.NewOffer(
util.NewOfferID("test-offer-"+idSuffix),
util.NewFrameworkID("test-framework-"+idSuffix),
util.NewSlaveID("test-slave-"+idSuffix),
"localhost."+idSuffix,
)
}

View File

@@ -0,0 +1,191 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package scheduler
import (
mesos "github.com/mesos/mesos-go/mesosproto"
)
// Interface for connecting a scheduler to Mesos. This
// interface is used both to manage the scheduler's lifecycle (start
// it, stop it, or wait for it to finish) and to interact with Mesos
// (e.g., launch tasks, kill tasks, etc.).
// See the MesosSchedulerDriver type for a concrete
// impl of a SchedulerDriver.
type SchedulerDriver interface {
// Starts the scheduler driver. This needs to be called before any
// other driver calls are made.
Start() (mesos.Status, error)
// Stops the scheduler driver. If the 'failover' flag is set to
// false then it is expected that this framework will never
// reconnect to Mesos and all of its executors and tasks can be
// terminated. Otherwise, all executors and tasks will remain
// running (for some framework specific failover timeout) allowing the
// scheduler to reconnect (possibly in the same process, or from a
// different process, for example, on a different machine).
Stop(failover bool) (mesos.Status, error)
// Aborts the driver so that no more callbacks can be made to the
// scheduler. The semantics of abort and stop have deliberately been
// separated so that code can detect an aborted driver (i.e., via
// the return status of SchedulerDriver::join, see below), and
// instantiate and start another driver if desired (from within the
// same process). Note that 'Stop()' is not automatically called
// inside 'Abort()'.
Abort() (mesos.Status, error)
// Waits for the driver to be stopped or aborted, possibly
// _blocking_ the current thread indefinitely. The return status of
// this function can be used to determine if the driver was aborted
// (see mesos.proto for a description of Status).
Join() (mesos.Status, error)
// Starts and immediately joins (i.e., blocks on) the driver.
Run() (mesos.Status, error)
// Requests resources from Mesos (see mesos.proto for a description
// of Request and how, for example, to request resources
// from specific slaves). Any resources available are offered to the
// framework via Scheduler.ResourceOffers callback, asynchronously.
RequestResources(requests []*mesos.Request) (mesos.Status, error)
// Launches the given set of tasks. Any resources remaining (i.e.,
// not used by the tasks or their executors) will be considered
// declined. The specified filters are applied on all unused
// resources (see mesos.proto for a description of Filters).
// Available resources are aggregated when mutiple offers are
// provided. Note that all offers must belong to the same slave.
// Invoking this function with an empty collection of tasks declines
// offers in their entirety (see Scheduler::declineOffer).
LaunchTasks(offerIDs []*mesos.OfferID, tasks []*mesos.TaskInfo, filters *mesos.Filters) (mesos.Status, error)
// Kills the specified task. Note that attempting to kill a task is
// currently not reliable. If, for example, a scheduler fails over
// while it was attempting to kill a task it will need to retry in
// the future. Likewise, if unregistered / disconnected, the request
// will be dropped (these semantics may be changed in the future).
KillTask(taskID *mesos.TaskID) (mesos.Status, error)
// Declines an offer in its entirety and applies the specified
// filters on the resources (see mesos.proto for a description of
// Filters). Note that this can be done at any time, it is not
// necessary to do this within the Scheduler::resourceOffers
// callback.
DeclineOffer(offerID *mesos.OfferID, filters *mesos.Filters) (mesos.Status, error)
// Removes all filters previously set by the framework (via
// LaunchTasks()). This enables the framework to receive offers from
// those filtered slaves.
ReviveOffers() (mesos.Status, error)
// Sends a message from the framework to one of its executors. These
// messages are best effort; do not expect a framework message to be
// retransmitted in any reliable fashion.
SendFrameworkMessage(executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, data string) (mesos.Status, error)
// Allows the framework to query the status for non-terminal tasks.
// This causes the master to send back the latest task status for
// each task in 'statuses', if possible. Tasks that are no longer
// known will result in a TASK_LOST update. If statuses is empty,
// then the master will send the latest status for each task
// currently known.
ReconcileTasks(statuses []*mesos.TaskStatus) (mesos.Status, error)
}
// Scheduler a type with callback attributes to be provided by frameworks
// schedulers.
//
// Each callback includes a reference to the scheduler driver that was
// used to run this scheduler. The pointer will not change for the
// duration of a scheduler (i.e., from the point you do
// SchedulerDriver.Start() to the point that SchedulerDriver.Stop()
// returns). This is intended for convenience so that a scheduler
// doesn't need to store a reference to the driver itself.
type Scheduler interface {
// Invoked when the scheduler successfully registers with a Mesos
// master. A unique ID (generated by the master) used for
// distinguishing this framework from others and MasterInfo
// with the ip and port of the current master are provided as arguments.
Registered(SchedulerDriver, *mesos.FrameworkID, *mesos.MasterInfo)
// Invoked when the scheduler re-registers with a newly elected Mesos master.
// This is only called when the scheduler has previously been registered.
// MasterInfo containing the updated information about the elected master
// is provided as an argument.
Reregistered(SchedulerDriver, *mesos.MasterInfo)
// Invoked when the scheduler becomes "disconnected" from the master
// (e.g., the master fails and another is taking over).
Disconnected(SchedulerDriver)
// Invoked when resources have been offered to this framework. A
// single offer will only contain resources from a single slave.
// Resources associated with an offer will not be re-offered to
// _this_ framework until either (a) this framework has rejected
// those resources (see SchedulerDriver::launchTasks) or (b) those
// resources have been rescinded (see Scheduler::offerRescinded).
// Note that resources may be concurrently offered to more than one
// framework at a time (depending on the allocator being used). In
// that case, the first framework to launch tasks using those
// resources will be able to use them while the other frameworks
// will have those resources rescinded (or if a framework has
// already launched tasks with those resources then those tasks will
// fail with a TASK_LOST status and a message saying as much).
ResourceOffers(SchedulerDriver, []*mesos.Offer)
// Invoked when an offer is no longer valid (e.g., the slave was
// lost or another framework used resources in the offer). If for
// whatever reason an offer is never rescinded (e.g., dropped
// message, failing over framework, etc.), a framwork that attempts
// to launch tasks using an invalid offer will receive TASK_LOST
// status updates for those tasks (see Scheduler::resourceOffers).
OfferRescinded(SchedulerDriver, *mesos.OfferID)
// Invoked when the status of a task has changed (e.g., a slave is
// lost and so the task is lost, a task finishes and an executor
// sends a status update saying so, etc). Note that returning from
// this callback _acknowledges_ receipt of this status update! If
// for whatever reason the scheduler aborts during this callback (or
// the process exits) another status update will be delivered (note,
// however, that this is currently not true if the slave sending the
// status update is lost/fails during that time).
StatusUpdate(SchedulerDriver, *mesos.TaskStatus)
// Invoked when an executor sends a message. These messages are best
// effort; do not expect a framework message to be retransmitted in
// any reliable fashion.
FrameworkMessage(SchedulerDriver, *mesos.ExecutorID, *mesos.SlaveID, string)
// Invoked when a slave has been determined unreachable (e.g.,
// machine failure, network partition). Most frameworks will need to
// reschedule any tasks launched on this slave on a new slave.
SlaveLost(SchedulerDriver, *mesos.SlaveID)
// Invoked when an executor has exited/terminated. Note that any
// tasks running will have TASK_LOST status updates automagically
// generated.
ExecutorLost(SchedulerDriver, *mesos.ExecutorID, *mesos.SlaveID, int)
// Invoked when there is an unrecoverable error in the scheduler or
// scheduler driver. The driver will be aborted BEFORE invoking this
// callback.
Error(SchedulerDriver, string)
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,442 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package scheduler
import (
"io/ioutil"
"net/http"
"reflect"
"sync"
"testing"
"time"
"github.com/gogo/protobuf/proto"
log "github.com/golang/glog"
mesos "github.com/mesos/mesos-go/mesosproto"
util "github.com/mesos/mesos-go/mesosutil"
"github.com/mesos/mesos-go/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
)
// testScuduler is used for testing Schduler callbacks.
type testScheduler struct {
ch chan bool
wg *sync.WaitGroup
s *SchedulerIntegrationTestSuite
}
// convenience
func (sched *testScheduler) T() *testing.T {
return sched.s.T()
}
func (sched *testScheduler) Registered(dr SchedulerDriver, fw *mesos.FrameworkID, mi *mesos.MasterInfo) {
log.Infoln("Sched.Registered() called.")
sched.s.Equal(fw.GetValue(), sched.s.registeredFrameworkId.GetValue(), "driver did not register the expected framework ID")
sched.s.Equal(mi.GetIp(), uint32(123456))
sched.ch <- true
}
func (sched *testScheduler) Reregistered(dr SchedulerDriver, mi *mesos.MasterInfo) {
log.Infoln("Sched.Reregistered() called")
sched.s.Equal(mi.GetIp(), uint32(123456))
sched.ch <- true
}
func (sched *testScheduler) Disconnected(dr SchedulerDriver) {
log.Infoln("Shed.Disconnected() called")
}
func (sched *testScheduler) ResourceOffers(dr SchedulerDriver, offers []*mesos.Offer) {
log.Infoln("Sched.ResourceOffers called.")
sched.s.NotNil(offers)
sched.s.Equal(len(offers), 1)
sched.ch <- true
}
func (sched *testScheduler) OfferRescinded(dr SchedulerDriver, oid *mesos.OfferID) {
log.Infoln("Sched.OfferRescinded() called.")
sched.s.NotNil(oid)
sched.s.Equal("test-offer-001", oid.GetValue())
sched.ch <- true
}
func (sched *testScheduler) StatusUpdate(dr SchedulerDriver, stat *mesos.TaskStatus) {
log.Infoln("Sched.StatusUpdate() called.")
sched.s.NotNil(stat)
sched.s.Equal("test-task-001", stat.GetTaskId().GetValue())
sched.wg.Done()
log.Infof("Status update done with waitGroup %v \n", sched.wg)
}
func (sched *testScheduler) SlaveLost(dr SchedulerDriver, slaveId *mesos.SlaveID) {
log.Infoln("Sched.SlaveLost() called.")
sched.s.NotNil(slaveId)
sched.s.Equal(slaveId.GetValue(), "test-slave-001")
sched.ch <- true
}
func (sched *testScheduler) FrameworkMessage(dr SchedulerDriver, execId *mesos.ExecutorID, slaveId *mesos.SlaveID, data string) {
log.Infoln("Sched.FrameworkMessage() called.")
sched.s.NotNil(slaveId)
sched.s.Equal(slaveId.GetValue(), "test-slave-001")
sched.s.NotNil(execId)
sched.s.NotNil(data)
sched.s.Equal("test-data-999", string(data))
sched.ch <- true
}
func (sched *testScheduler) ExecutorLost(SchedulerDriver, *mesos.ExecutorID, *mesos.SlaveID, int) {
log.Infoln("Sched.ExecutorLost called")
}
func (sched *testScheduler) Error(dr SchedulerDriver, err string) {
log.Infoln("Sched.Error() called.")
sched.s.Equal("test-error-999", err)
sched.ch <- true
}
func (sched *testScheduler) waitForCallback(timeout time.Duration) bool {
if timeout == 0 {
timeout = 2 * time.Second
}
select {
case <-sched.ch:
//callback complete
return true
case <-time.After(timeout):
sched.T().Fatalf("timed out after waiting %v for callback", timeout)
}
return false
}
func newTestScheduler(s *SchedulerIntegrationTestSuite) *testScheduler {
return &testScheduler{ch: make(chan bool), s: s}
}
type mockServerConfigurator func(frameworkId *mesos.FrameworkID, suite *SchedulerIntegrationTestSuite)
type SchedulerIntegrationTestSuiteCore struct {
SchedulerTestSuiteCore
server *testutil.MockMesosHttpServer
driver *MesosSchedulerDriver
sched *testScheduler
config mockServerConfigurator
validator http.HandlerFunc
registeredFrameworkId *mesos.FrameworkID
}
type SchedulerIntegrationTestSuite struct {
suite.Suite
SchedulerIntegrationTestSuiteCore
}
// sets up a mock Mesos HTTP master listener, scheduler, and scheduler driver for testing.
// attempts to wait for a registered or re-registered callback on the suite.sched.
func (suite *SchedulerIntegrationTestSuite) configure(frameworkId *mesos.FrameworkID) bool {
t := suite.T()
// start mock master server to handle connection
suite.server = testutil.NewMockMasterHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
log.Infoln("MockMaster - rcvd ", req.RequestURI)
if suite.validator != nil {
suite.validator(rsp, req)
} else {
ioutil.ReadAll(req.Body)
defer req.Body.Close()
rsp.WriteHeader(http.StatusAccepted)
}
})
t.Logf("test HTTP server listening on %v", suite.server.Addr)
suite.sched = newTestScheduler(suite)
suite.sched.ch = make(chan bool, 10) // big enough that it doesn't block callback processing
suite.driver = newTestSchedulerDriver(suite.T(), suite.sched, suite.framework, suite.server.Addr, nil)
suite.config(frameworkId, suite)
stat, err := suite.driver.Start()
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
ok := waitForConnected(t, suite.driver, 2*time.Second)
if ok {
ok = suite.sched.waitForCallback(0) // registered or re-registered callback
}
return ok
}
func (suite *SchedulerIntegrationTestSuite) configureServerWithRegisteredFramework() bool {
// suite.framework is used to initialize the FrameworkInfo of
// the driver, so if we clear the Id then we'll expect a registration message
id := suite.framework.Id
suite.framework.Id = nil
suite.registeredFrameworkId = id
return suite.configure(id)
}
var defaultMockServerConfigurator = mockServerConfigurator(func(frameworkId *mesos.FrameworkID, suite *SchedulerIntegrationTestSuite) {
t := suite.T()
masterInfo := util.NewMasterInfo("master", 123456, 1234)
suite.server.On("/master/mesos.internal.RegisterFrameworkMessage").Do(func(rsp http.ResponseWriter, req *http.Request) {
if suite.validator != nil {
t.Logf("validating registration request")
suite.validator(rsp, req)
} else {
ioutil.ReadAll(req.Body)
defer req.Body.Close()
rsp.WriteHeader(http.StatusAccepted)
}
// this is what the mocked scheduler is expecting to receive
suite.driver.frameworkRegistered(suite.driver.MasterPid, &mesos.FrameworkRegisteredMessage{
FrameworkId: frameworkId,
MasterInfo: masterInfo,
})
})
suite.server.On("/master/mesos.internal.ReregisterFrameworkMessage").Do(func(rsp http.ResponseWriter, req *http.Request) {
if suite.validator != nil {
suite.validator(rsp, req)
} else {
ioutil.ReadAll(req.Body)
defer req.Body.Close()
rsp.WriteHeader(http.StatusAccepted)
}
// this is what the mocked scheduler is expecting to receive
suite.driver.frameworkReregistered(suite.driver.MasterPid, &mesos.FrameworkReregisteredMessage{
FrameworkId: frameworkId,
MasterInfo: masterInfo,
})
})
})
func (s *SchedulerIntegrationTestSuite) newMockClient() *testutil.MockMesosClient {
return testutil.NewMockMesosClient(s.T(), s.server.PID)
}
func (s *SchedulerIntegrationTestSuite) SetupTest() {
s.SchedulerTestSuiteCore.SetupTest()
s.config = defaultMockServerConfigurator
}
func (s *SchedulerIntegrationTestSuite) TearDownTest() {
if s.server != nil {
s.server.Close()
}
if s.driver != nil && s.driver.Status() == mesos.Status_DRIVER_RUNNING {
s.driver.Abort()
}
}
// ---------------------------------- Tests ---------------------------------- //
func TestSchedulerIntegrationSuite(t *testing.T) {
suite.Run(t, new(SchedulerIntegrationTestSuite))
}
func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverRegisterFrameworkMessage() {
t := suite.T()
id := suite.framework.Id
suite.framework.Id = nil
validated := make(chan struct{})
var closeOnce sync.Once
suite.validator = http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) {
t.Logf("RCVD request %s", req.URL)
data, err := ioutil.ReadAll(req.Body)
if err != nil {
t.Fatalf("Missing message data from request")
}
defer req.Body.Close()
if "/master/mesos.internal.RegisterFrameworkMessage" != req.RequestURI {
rsp.WriteHeader(http.StatusAccepted)
return
}
defer closeOnce.Do(func() { close(validated) })
message := new(mesos.RegisterFrameworkMessage)
err = proto.Unmarshal(data, message)
if err != nil {
t.Fatal("Problem unmarshaling expected RegisterFrameworkMessage")
}
suite.NotNil(message)
info := message.GetFramework()
suite.NotNil(info)
suite.Equal(suite.framework.GetName(), info.GetName())
suite.True(reflect.DeepEqual(suite.framework.GetId(), info.GetId()))
rsp.WriteHeader(http.StatusOK)
})
ok := suite.configure(id)
suite.True(ok, "failed to establish running test server and driver")
select {
case <-time.After(1 * time.Second):
t.Fatalf("failed to complete validation of framework registration message")
case <-validated:
// noop
}
}
func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverFrameworkRegisteredEvent() {
ok := suite.configureServerWithRegisteredFramework()
suite.True(ok, "failed to establish running test server and driver")
}
func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverFrameworkReregisteredEvent() {
ok := suite.configure(suite.framework.Id)
suite.True(ok, "failed to establish running test server and driver")
}
func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverResourceOffersEvent() {
ok := suite.configureServerWithRegisteredFramework()
suite.True(ok, "failed to establish running test server and driver")
// Send a event to this SchedulerDriver (via http) to test handlers.
offer := util.NewOffer(
util.NewOfferID("test-offer-001"),
suite.registeredFrameworkId,
util.NewSlaveID("test-slave-001"),
"test-localhost",
)
pbMsg := &mesos.ResourceOffersMessage{
Offers: []*mesos.Offer{offer},
Pids: []string{"test-offer-001@test-slave-001:5051"},
}
c := suite.newMockClient()
c.SendMessage(suite.driver.self, pbMsg)
suite.sched.waitForCallback(0)
}
func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverRescindOfferEvent() {
ok := suite.configureServerWithRegisteredFramework()
suite.True(ok, "failed to establish running test server and driver")
// Send a event to this SchedulerDriver (via http) to test handlers.
pbMsg := &mesos.RescindResourceOfferMessage{
OfferId: util.NewOfferID("test-offer-001"),
}
c := suite.newMockClient()
c.SendMessage(suite.driver.self, pbMsg)
suite.sched.waitForCallback(0)
}
func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverStatusUpdatedEvent() {
t := suite.T()
var wg sync.WaitGroup
wg.Add(2)
suite.config = mockServerConfigurator(func(frameworkId *mesos.FrameworkID, suite *SchedulerIntegrationTestSuite) {
defaultMockServerConfigurator(frameworkId, suite)
suite.server.On("/master/mesos.internal.StatusUpdateAcknowledgementMessage").Do(func(rsp http.ResponseWriter, req *http.Request) {
log.Infoln("Master cvd ACK")
data, _ := ioutil.ReadAll(req.Body)
defer req.Body.Close()
assert.NotNil(t, data)
wg.Done()
log.Infof("MockMaster - Done with wait group %v \n", wg)
})
suite.sched.wg = &wg
})
ok := suite.configureServerWithRegisteredFramework()
suite.True(ok, "failed to establish running test server and driver")
// Send a event to this SchedulerDriver (via http) to test handlers.
pbMsg := &mesos.StatusUpdateMessage{
Update: util.NewStatusUpdate(
suite.registeredFrameworkId,
util.NewTaskStatus(util.NewTaskID("test-task-001"), mesos.TaskState_TASK_STARTING),
float64(time.Now().Unix()),
[]byte("test-abcd-ef-3455-454-001"),
),
Pid: proto.String(suite.driver.self.String()),
}
pbMsg.Update.SlaveId = &mesos.SlaveID{Value: proto.String("test-slave-001")}
c := suite.newMockClient()
c.SendMessage(suite.driver.self, pbMsg)
wg.Wait()
}
func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverLostSlaveEvent() {
ok := suite.configureServerWithRegisteredFramework()
suite.True(ok, "failed to establish running test server and driver")
// Send a event to this SchedulerDriver (via http) to test handlers. offer := util.NewOffer(
pbMsg := &mesos.LostSlaveMessage{
SlaveId: util.NewSlaveID("test-slave-001"),
}
c := suite.newMockClient()
c.SendMessage(suite.driver.self, pbMsg)
suite.sched.waitForCallback(0)
}
func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverFrameworkMessageEvent() {
ok := suite.configureServerWithRegisteredFramework()
suite.True(ok, "failed to establish running test server and driver")
// Send a event to this SchedulerDriver (via http) to test handlers. offer := util.NewOffer(
pbMsg := &mesos.ExecutorToFrameworkMessage{
SlaveId: util.NewSlaveID("test-slave-001"),
FrameworkId: suite.registeredFrameworkId,
ExecutorId: util.NewExecutorID("test-executor-001"),
Data: []byte("test-data-999"),
}
c := suite.newMockClient()
c.SendMessage(suite.driver.self, pbMsg)
suite.sched.waitForCallback(0)
}
func waitForConnected(t *testing.T, driver *MesosSchedulerDriver, timeout time.Duration) bool {
connected := make(chan struct{})
go func() {
defer close(connected)
for !driver.Connected() {
time.Sleep(200 * time.Millisecond)
}
}()
select {
case <-time.After(timeout):
t.Fatalf("driver failed to establish connection within %v", timeout)
return false
case <-connected:
return true
}
}
func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverFrameworkErrorEvent() {
ok := suite.configureServerWithRegisteredFramework()
suite.True(ok, "failed to establish running test server and driver")
// Send an error event to this SchedulerDriver (via http) to test handlers.
pbMsg := &mesos.FrameworkErrorMessage{
Message: proto.String("test-error-999"),
}
c := suite.newMockClient()
c.SendMessage(suite.driver.self, pbMsg)
suite.sched.waitForCallback(0)
suite.Equal(mesos.Status_DRIVER_ABORTED, suite.driver.Status())
}

View File

@@ -0,0 +1,653 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package scheduler
import (
"fmt"
"os"
"os/user"
"sync"
"testing"
"time"
"github.com/gogo/protobuf/proto"
log "github.com/golang/glog"
"github.com/mesos/mesos-go/detector"
"github.com/mesos/mesos-go/detector/zoo"
mesos "github.com/mesos/mesos-go/mesosproto"
util "github.com/mesos/mesos-go/mesosutil"
"github.com/mesos/mesos-go/messenger"
"github.com/mesos/mesos-go/upid"
"github.com/samuel/go-zookeeper/zk"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
)
var (
registerMockDetectorOnce sync.Once
)
func ensureMockDetectorRegistered() {
registerMockDetectorOnce.Do(func() {
var s *SchedulerTestSuite
err := s.registerMockDetector("testing://")
if err != nil {
log.Error(err)
}
})
}
type MockDetector struct {
mock.Mock
address string
}
func (m *MockDetector) Detect(listener detector.MasterChanged) error {
if listener != nil {
if pid, err := upid.Parse("master(2)@" + m.address); err != nil {
return err
} else {
go listener.OnMasterChanged(detector.CreateMasterInfo(pid))
}
}
return nil
}
func (m *MockDetector) Done() <-chan struct{} {
return nil
}
func (m *MockDetector) Cancel() {}
type SchedulerTestSuiteCore struct {
master string
masterUpid string
masterId string
frameworkID string
framework *mesos.FrameworkInfo
}
type SchedulerTestSuite struct {
suite.Suite
SchedulerTestSuiteCore
}
func (s *SchedulerTestSuite) registerMockDetector(prefix string) error {
address := ""
if s != nil {
address = s.master
} else {
address = "127.0.0.1:8080"
}
return detector.Register(prefix, detector.PluginFactory(func(spec string) (detector.Master, error) {
return &MockDetector{address: address}, nil
}))
}
func (s *SchedulerTestSuiteCore) SetupTest() {
s.master = "127.0.0.1:8080"
s.masterUpid = "master(2)@" + s.master
s.masterId = "some-master-id-uuid"
s.frameworkID = "some-framework-id-uuid"
s.framework = util.NewFrameworkInfo(
"test-user",
"test-name",
util.NewFrameworkID(s.frameworkID),
)
}
func TestSchedulerSuite(t *testing.T) {
t.Logf("running scheduler test suite..")
suite.Run(t, new(SchedulerTestSuite))
}
func newTestSchedulerDriver(t *testing.T, sched Scheduler, framework *mesos.FrameworkInfo, master string, cred *mesos.Credential) *MesosSchedulerDriver {
dconfig := DriverConfig{
Scheduler: sched,
Framework: framework,
Master: master,
Credential: cred,
}
driver, err := NewMesosSchedulerDriver(dconfig)
if err != nil {
t.Fatal(err)
}
return driver
}
func TestSchedulerDriverNew(t *testing.T) {
masterAddr := "localhost:5050"
driver := newTestSchedulerDriver(t, NewMockScheduler(), &mesos.FrameworkInfo{}, masterAddr, nil)
user, _ := user.Current()
assert.Equal(t, user.Username, driver.FrameworkInfo.GetUser())
host, _ := os.Hostname()
assert.Equal(t, host, driver.FrameworkInfo.GetHostname())
}
func TestSchedulerDriverNew_WithPid(t *testing.T) {
masterAddr := "master@127.0.0.1:5050"
mUpid, err := upid.Parse(masterAddr)
assert.NoError(t, err)
driver := newTestSchedulerDriver(t, NewMockScheduler(), &mesos.FrameworkInfo{}, masterAddr, nil)
driver.handleMasterChanged(driver.self, &mesos.InternalMasterChangeDetected{Master: &mesos.MasterInfo{Pid: proto.String(mUpid.String())}})
assert.True(t, driver.MasterPid.Equal(mUpid), fmt.Sprintf("expected upid %+v instead of %+v", mUpid, driver.MasterPid))
assert.NoError(t, err)
}
func (suite *SchedulerTestSuite) TestSchedulerDriverNew_WithZkUrl() {
masterAddr := "zk://127.0.0.1:5050/mesos"
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, masterAddr, nil)
md, err := zoo.NewMockMasterDetector(masterAddr)
suite.NoError(err)
suite.NotNil(md)
driver.masterDetector = md // override internal master detector
md.ScheduleConnEvent(zk.StateConnected)
done := make(chan struct{})
driver.masterDetector.Detect(detector.OnMasterChanged(func(m *mesos.MasterInfo) {
suite.NotNil(m)
suite.NotEqual(m.GetPid, suite.masterUpid)
close(done)
}))
//TODO(vlad) revisit, detector not responding.
//NOTE(jdef) this works for me, I wonder if the timeouts are too short, or if
//GOMAXPROCS settings are affecting the result?
// md.ScheduleSessEvent(zk.EventNodeChildrenChanged)
// select {
// case <-done:
// case <-time.After(time.Millisecond * 1000):
// suite.T().Errorf("Timed out waiting for children event.")
// }
}
func (suite *SchedulerTestSuite) TestSchedulerDriverNew_WithFrameworkInfo_Override() {
suite.framework.Hostname = proto.String("local-host")
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, "127.0.0.1:5050", nil)
suite.Equal(driver.FrameworkInfo.GetUser(), "test-user")
suite.Equal("local-host", driver.FrameworkInfo.GetHostname())
}
func (suite *SchedulerTestSuite) TestSchedulerDriverStartOK() {
sched := NewMockScheduler()
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
driver := newTestSchedulerDriver(suite.T(), sched, suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
stat, err := driver.Start()
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
suite.False(driver.Stopped())
}
func (suite *SchedulerTestSuite) TestSchedulerDriverStartWithMessengerFailure() {
sched := NewMockScheduler()
sched.On("Error").Return()
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(fmt.Errorf("Failed to start messenger"))
messenger.On("Stop").Return()
driver := newTestSchedulerDriver(suite.T(), sched, suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
stat, err := driver.Start()
suite.Error(err)
suite.True(driver.Stopped())
suite.True(!driver.Connected())
suite.Equal(mesos.Status_DRIVER_NOT_STARTED, driver.Status())
suite.Equal(mesos.Status_DRIVER_NOT_STARTED, stat)
}
func (suite *SchedulerTestSuite) TestSchedulerDriverStartWithRegistrationFailure() {
sched := NewMockScheduler()
sched.On("Error").Return()
// Set expections and return values.
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Stop").Return(nil)
driver := newTestSchedulerDriver(suite.T(), sched, suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
// reliable registration loops until the driver is stopped, connected, etc..
stat, err := driver.Start()
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
time.Sleep(5 * time.Second) // wait a bit, registration should be looping...
suite.False(driver.Stopped())
suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())
// stop the driver, should not panic!
driver.Stop(false) // not failing over
suite.True(driver.Stopped())
suite.Equal(mesos.Status_DRIVER_STOPPED, driver.Status())
messenger.AssertExpectations(suite.T())
}
func (suite *SchedulerTestSuite) TestSchedulerDriverJoinUnstarted() {
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
suite.True(driver.Stopped())
stat, err := driver.Join()
suite.Error(err)
suite.Equal(mesos.Status_DRIVER_NOT_STARTED, stat)
}
func (suite *SchedulerTestSuite) TestSchedulerDriverJoinOK() {
// Set expections and return values.
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
stat, err := driver.Start()
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
suite.False(driver.Stopped())
testCh := make(chan mesos.Status)
go func() {
stat, _ := driver.Join()
testCh <- stat
}()
close(driver.stopCh) // manually stopping
stat = <-testCh // when Stop() is called, stat will be DRIVER_STOPPED.
}
func (suite *SchedulerTestSuite) TestSchedulerDriverRun() {
// Set expections and return values.
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
go func() {
stat, err := driver.Run()
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_STOPPED, stat)
}()
time.Sleep(time.Millisecond * 1)
suite.False(driver.Stopped())
suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())
// close it all.
driver.setStatus(mesos.Status_DRIVER_STOPPED)
close(driver.stopCh)
time.Sleep(time.Millisecond * 1)
}
func (suite *SchedulerTestSuite) TestSchedulerDriverStopUnstarted() {
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
suite.True(driver.Stopped())
stat, err := driver.Stop(true)
suite.NotNil(err)
suite.True(driver.Stopped())
suite.Equal(mesos.Status_DRIVER_NOT_STARTED, stat)
}
func (suite *SchedulerTestSuite) TestSchdulerDriverStopOK() {
// Set expections and return values.
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
messenger.On("Route").Return(nil)
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
go func() {
stat, err := driver.Run()
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_STOPPED, stat)
}()
time.Sleep(time.Millisecond * 1)
suite.False(driver.Stopped())
suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())
driver.Stop(false)
time.Sleep(time.Millisecond * 1)
suite.True(driver.Stopped())
suite.Equal(mesos.Status_DRIVER_STOPPED, driver.Status())
}
func (suite *SchedulerTestSuite) TestSchdulerDriverAbort() {
// Set expections and return values.
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
messenger.On("Route").Return(nil)
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
go func() {
stat, err := driver.Run()
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_ABORTED, stat)
}()
time.Sleep(time.Millisecond * 1)
driver.setConnected(true) // simulated
suite.False(driver.Stopped())
suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())
stat, err := driver.Abort()
time.Sleep(time.Millisecond * 1)
suite.NoError(err)
suite.True(driver.Stopped())
suite.Equal(mesos.Status_DRIVER_ABORTED, stat)
suite.Equal(mesos.Status_DRIVER_ABORTED, driver.Status())
}
func (suite *SchedulerTestSuite) TestSchdulerDriverLunchTasksUnstarted() {
sched := NewMockScheduler()
sched.On("Error").Return()
// Set expections and return values.
messenger := messenger.NewMockedMessenger()
messenger.On("Route").Return(nil)
driver := newTestSchedulerDriver(suite.T(), sched, suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
stat, err := driver.LaunchTasks(
[]*mesos.OfferID{&mesos.OfferID{}},
[]*mesos.TaskInfo{},
&mesos.Filters{},
)
suite.Error(err)
suite.Equal(mesos.Status_DRIVER_NOT_STARTED, stat)
}
func (suite *SchedulerTestSuite) TestSchdulerDriverLaunchTasksWithError() {
sched := NewMockScheduler()
sched.On("StatusUpdate").Return(nil)
sched.On("Error").Return()
msgr := messenger.NewMockedMessenger()
msgr.On("Start").Return(nil)
msgr.On("Send").Return(nil)
msgr.On("UPID").Return(&upid.UPID{})
msgr.On("Stop").Return(nil)
msgr.On("Route").Return(nil)
driver := newTestSchedulerDriver(suite.T(), sched, suite.framework, suite.master, nil)
driver.messenger = msgr
suite.True(driver.Stopped())
go func() {
driver.Run()
}()
time.Sleep(time.Millisecond * 1)
driver.setConnected(true) // simulated
suite.False(driver.Stopped())
suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())
// to trigger error
msgr2 := messenger.NewMockedMessenger()
msgr2.On("Start").Return(nil)
msgr2.On("UPID").Return(&upid.UPID{})
msgr2.On("Send").Return(fmt.Errorf("Unable to send message"))
msgr2.On("Stop").Return(nil)
msgr.On("Route").Return(nil)
driver.messenger = msgr2
// setup an offer
offer := util.NewOffer(
util.NewOfferID("test-offer-001"),
suite.framework.Id,
util.NewSlaveID("test-slave-001"),
"test-slave(1)@localhost:5050",
)
pid, err := upid.Parse("test-slave(1)@localhost:5050")
suite.NoError(err)
driver.cache.putOffer(offer, pid)
// launch task
task := util.NewTaskInfo(
"simple-task",
util.NewTaskID("simpe-task-1"),
util.NewSlaveID("test-slave-001"),
[]*mesos.Resource{util.NewScalarResource("mem", 400)},
)
task.Command = util.NewCommandInfo("pwd")
task.Executor = util.NewExecutorInfo(util.NewExecutorID("test-exec"), task.Command)
tasks := []*mesos.TaskInfo{task}
stat, err := driver.LaunchTasks(
[]*mesos.OfferID{offer.Id},
tasks,
&mesos.Filters{},
)
suite.Error(err)
suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
func (suite *SchedulerTestSuite) TestSchdulerDriverLaunchTasks() {
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
messenger.On("Route").Return(nil)
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
go func() {
driver.Run()
}()
time.Sleep(time.Millisecond * 1)
driver.setConnected(true) // simulated
suite.False(driver.Stopped())
suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())
task := util.NewTaskInfo(
"simple-task",
util.NewTaskID("simpe-task-1"),
util.NewSlaveID("slave-1"),
[]*mesos.Resource{util.NewScalarResource("mem", 400)},
)
task.Command = util.NewCommandInfo("pwd")
tasks := []*mesos.TaskInfo{task}
stat, err := driver.LaunchTasks(
[]*mesos.OfferID{&mesos.OfferID{}},
tasks,
&mesos.Filters{},
)
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
func (suite *SchedulerTestSuite) TestSchdulerDriverKillTask() {
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
messenger.On("Route").Return(nil)
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
go func() {
driver.Run()
}()
time.Sleep(time.Millisecond * 1)
driver.setConnected(true) // simulated
suite.False(driver.Stopped())
suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())
stat, err := driver.KillTask(util.NewTaskID("test-task-1"))
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
func (suite *SchedulerTestSuite) TestSchdulerDriverRequestResources() {
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
messenger.On("Route").Return(nil)
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
driver.Start()
driver.setConnected(true) // simulated
suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())
stat, err := driver.RequestResources(
[]*mesos.Request{
&mesos.Request{
SlaveId: util.NewSlaveID("test-slave-001"),
Resources: []*mesos.Resource{
util.NewScalarResource("test-res-001", 33.00),
},
},
},
)
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
func (suite *SchedulerTestSuite) TestSchdulerDriverDeclineOffers() {
// see LaunchTasks test
}
func (suite *SchedulerTestSuite) TestSchdulerDriverReviveOffers() {
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
messenger.On("Route").Return(nil)
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
driver.Start()
driver.setConnected(true) // simulated
suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())
stat, err := driver.ReviveOffers()
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
func (suite *SchedulerTestSuite) TestSchdulerDriverSendFrameworkMessage() {
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
messenger.On("Route").Return(nil)
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
driver.Start()
driver.setConnected(true) // simulated
suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())
stat, err := driver.SendFrameworkMessage(
util.NewExecutorID("test-exec-001"),
util.NewSlaveID("test-slave-001"),
"Hello!",
)
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
func (suite *SchedulerTestSuite) TestSchdulerDriverReconcileTasks() {
messenger := messenger.NewMockedMessenger()
messenger.On("Start").Return(nil)
messenger.On("UPID").Return(&upid.UPID{})
messenger.On("Send").Return(nil)
messenger.On("Stop").Return(nil)
messenger.On("Route").Return(nil)
driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil)
driver.messenger = messenger
suite.True(driver.Stopped())
driver.Start()
driver.setConnected(true) // simulated
suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())
stat, err := driver.ReconcileTasks(
[]*mesos.TaskStatus{
util.NewTaskStatus(util.NewTaskID("test-task-001"), mesos.TaskState_TASK_FINISHED),
},
)
suite.NoError(err)
suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}

View File

@@ -304,10 +304,9 @@ function kube::build::ensure_golang() {
}
}
# Set up the context directory for the kube-build image and build it.
function kube::build::build_image() {
local -r build_context_dir="${LOCAL_OUTPUT_IMAGE_STAGING}/${KUBE_BUILD_IMAGE}"
local -r source=(
# The set of source targets to include in the kube-build image
function kube::build::source_targets() {
local targets=(
api
build
cmd
@@ -323,11 +322,22 @@ function kube::build::build_image() {
test
third_party
)
if [ -n "${KUBERNETES_CONTRIB:-}" ]; then
for contrib in "${KUBERNETES_CONTRIB}"; do
targets+=($(eval "kube::contrib::${contrib}::source_targets"))
done
fi
echo "${targets[@]}"
}
# Set up the context directory for the kube-build image and build it.
function kube::build::build_image() {
local -r build_context_dir="${LOCAL_OUTPUT_IMAGE_STAGING}/${KUBE_BUILD_IMAGE}"
kube::build::build_image_cross
mkdir -p "${build_context_dir}"
tar czf "${build_context_dir}/kube-source.tar.gz" "${source[@]}"
tar czf "${build_context_dir}/kube-source.tar.gz" $(kube::build::source_targets)
kube::version::get_version_vars
kube::version::save_version_vars "${build_context_dir}/kube-version-defs"
@@ -412,8 +422,12 @@ function kube::build::run_build_command() {
local -a docker_run_opts=(
"--name=${KUBE_BUILD_CONTAINER_NAME}"
"${DOCKER_MOUNT_ARGS[@]}"
)
"${DOCKER_MOUNT_ARGS[@]}"
)
if [ -n "${KUBERNETES_CONTRIB:-}" ]; then
docker_run_opts+=(-e "KUBERNETES_CONTRIB=${KUBERNETES_CONTRIB}")
fi
# If we have stdin we can run interactive. This allows things like 'shell.sh'
# to work. However, if we run this way and don't have stdin, then it ends up

View File

@@ -17,6 +17,9 @@ limitations under the License.
// Package app makes it easy to create a kubelet server for various contexts.
package app
// Note: if you change code in this file, you might need to change code in
// contrib/mesos/pkg/executor/service/.
import (
"crypto/tls"
"fmt"

View File

@@ -0,0 +1,18 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This package main implements the executable Kubernetes Mesos executor.
package main

View File

@@ -0,0 +1,47 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"fmt"
"os"
"runtime"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/service"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/GoogleCloudPlatform/kubernetes/pkg/version/verflag"
"github.com/spf13/pflag"
)
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
s := service.NewKubeletExecutorServer()
s.AddStandaloneFlags(pflag.CommandLine)
util.InitFlags()
util.InitLogs()
defer util.FlushLogs()
verflag.PrintAndExitIfRequested()
if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
}

View File

@@ -0,0 +1,21 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This package main is used for testing the redirfd package.
// Inspired by http://skarnet.org/software/execline/redirfd.html.
// Usage:
// k8sm-redirfb [-n] [-b] {mode} {fd} {file} {prog...}
package main

View File

@@ -0,0 +1,105 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"flag"
"fmt"
"os"
"os/exec"
"syscall"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/redirfd"
)
func main() {
nonblock := flag.Bool("n", false, "open file in non-blocking mode")
changemode := flag.Bool("b", false, "change mode of file after opening it: to non-blocking mode if the -n option was not given, to blocking mode if it was")
flag.Parse()
args := flag.Args()
if len(args) < 4 {
fmt.Fprintf(os.Stderr, "expected {mode} {fd} {file} instead of: %v\n", args)
os.Exit(1)
}
var mode redirfd.RedirectMode
switch m := args[0]; m {
case "r":
mode = redirfd.Read
case "w":
mode = redirfd.Write
case "u":
mode = redirfd.Update
case "a":
mode = redirfd.Append
case "c":
mode = redirfd.AppendExisting
case "x":
mode = redirfd.WriteNew
default:
fmt.Fprintf(os.Stderr, "unrecognized mode %q\n", mode)
os.Exit(1)
}
fd, err := redirfd.ParseFileDescriptor(args[1])
if err != nil {
fmt.Fprintf(os.Stderr, "failed to parse file descriptor: %v\n", err)
os.Exit(1)
}
file := args[2]
f, err := mode.Redirect(*nonblock, *changemode, fd, file)
if err != nil {
fmt.Fprintf(os.Stderr, "redirect failed: %q, %v\n", args[1], err)
os.Exit(1)
}
var pargs []string
if len(args) > 4 {
pargs = args[4:]
}
cmd := exec.Command(args[3], pargs...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
switch fd {
case redirfd.Stdin:
cmd.Stdin = f
case redirfd.Stdout:
cmd.Stdout = f
case redirfd.Stderr:
cmd.Stderr = f
default:
cmd.ExtraFiles = []*os.File{f}
}
defer f.Close()
if err = cmd.Run(); err != nil {
exiterr := err.(*exec.ExitError)
state := exiterr.ProcessState
if state != nil {
sys := state.Sys()
if waitStatus, ok := sys.(syscall.WaitStatus); ok {
if waitStatus.Signaled() {
os.Exit(256 + int(waitStatus.Signal()))
} else {
os.Exit(waitStatus.ExitStatus())
}
}
}
os.Exit(3)
}
}

View File

@@ -0,0 +1,18 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This package main implements the executable Kubernetes Mesos scheduler.
package main

View File

@@ -0,0 +1,46 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"fmt"
"os"
"runtime"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/service"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/GoogleCloudPlatform/kubernetes/pkg/version/verflag"
"github.com/spf13/pflag"
)
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
s := service.NewSchedulerServer()
s.AddStandaloneFlags(pflag.CommandLine)
util.InitFlags()
util.InitLogs()
defer util.FlushLogs()
verflag.PrintAndExitIfRequested()
if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
}

View File

@@ -0,0 +1,43 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package assert
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
)
// EventuallyTrue asserts that the given predicate becomes true within the given timeout. It
// checks the predicate regularly each 100ms.
func EventuallyTrue(t *testing.T, timeout time.Duration, fn func() bool, msgAndArgs ...interface{}) bool {
start := time.Now()
for {
if fn() {
return true
}
if time.Now().Sub(start) > timeout {
if len(msgAndArgs) > 0 {
return assert.Fail(t, msgAndArgs[0].(string), msgAndArgs[1:]...)
} else {
return assert.Fail(t, "predicate fn has not been true after %v", timeout.String())
}
}
time.Sleep(100 * time.Millisecond)
}
}

View File

@@ -0,0 +1,19 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package assert is an utility package containing reusable testing functionality
// extending github.com/stretchr/testify/assert
package assert

View File

@@ -0,0 +1,96 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package backoff
import (
"math/rand"
"sync"
"time"
log "github.com/golang/glog"
)
type clock interface {
Now() time.Time
}
type realClock struct{}
func (realClock) Now() time.Time {
return time.Now()
}
type backoffEntry struct {
backoff time.Duration
lastUpdate time.Time
}
type Backoff struct {
perItemBackoff map[string]*backoffEntry
lock sync.Mutex
clock clock
defaultDuration time.Duration
maxDuration time.Duration
}
func New(initial, max time.Duration) *Backoff {
return &Backoff{
perItemBackoff: map[string]*backoffEntry{},
clock: realClock{},
defaultDuration: initial,
maxDuration: max,
}
}
func (p *Backoff) getEntry(id string) *backoffEntry {
p.lock.Lock()
defer p.lock.Unlock()
entry, ok := p.perItemBackoff[id]
if !ok {
entry = &backoffEntry{backoff: p.defaultDuration}
p.perItemBackoff[id] = entry
}
entry.lastUpdate = p.clock.Now()
return entry
}
func (p *Backoff) Get(id string) time.Duration {
entry := p.getEntry(id)
duration := entry.backoff
entry.backoff *= 2
if entry.backoff > p.maxDuration {
entry.backoff = p.maxDuration
}
//TODO(jdef) parameterize use of jitter?
// add jitter, get better backoff distribution
duration = time.Duration(rand.Int63n(int64(duration)))
log.V(3).Infof("Backing off %v for pod %s", duration, id)
return duration
}
// Garbage collect records that have aged past maxDuration. Backoff users are expected
// to invoke this periodically.
func (p *Backoff) GC() {
p.lock.Lock()
defer p.lock.Unlock()
now := p.clock.Now()
for id, entry := range p.perItemBackoff {
if now.Sub(entry.lastUpdate) > p.maxDuration {
delete(p.perItemBackoff, id)
}
}
}

View File

@@ -0,0 +1,19 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package backoff provides backoff functionality with a simple API.
// Originally copied from Kubernetes: plugin/pkg/scheduler/factory/factory.go
package backoff

View File

@@ -0,0 +1,18 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package election provides interfaces used for master election.
package election

View File

@@ -0,0 +1,185 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package election
import (
"fmt"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/tools"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
"github.com/coreos/go-etcd/etcd"
"github.com/golang/glog"
)
// Master is used to announce the current elected master.
type Master string
// IsAnAPIObject is used solely so we can work with the watch package.
// TODO(k8s): Either fix watch so this isn't necessary, or make this a real API Object.
// TODO(k8s): when it becomes clear how this package will be used, move these declarations to
// to the proper place.
func (Master) IsAnAPIObject() {}
// NewEtcdMasterElector returns an implementation of election.MasterElector backed by etcd.
func NewEtcdMasterElector(h tools.EtcdGetSet) MasterElector {
return &etcdMasterElector{etcd: h}
}
type empty struct{}
// internal implementation struct
type etcdMasterElector struct {
etcd tools.EtcdGetSet
done chan empty
events chan watch.Event
}
// Elect implements the election.MasterElector interface.
func (e *etcdMasterElector) Elect(path, id string) watch.Interface {
e.done = make(chan empty)
e.events = make(chan watch.Event)
go util.Forever(func() { e.run(path, id) }, time.Second*5)
return e
}
func (e *etcdMasterElector) run(path, id string) {
masters := make(chan string)
errors := make(chan error)
go e.master(path, id, 30, masters, errors, e.done) // TODO(jdef) extract constant
for {
select {
case m := <-masters:
e.events <- watch.Event{
Type: watch.Modified,
Object: Master(m),
}
case e := <-errors:
glog.Errorf("error in election: %v", e)
}
}
}
// ResultChan implements the watch.Interface interface.
func (e *etcdMasterElector) ResultChan() <-chan watch.Event {
return e.events
}
// extendMaster attempts to extend ownership of a master lock for TTL seconds.
// returns "", nil if extension failed
// returns id, nil if extension succeeded
// returns "", err if an error occurred
func (e *etcdMasterElector) extendMaster(path, id string, ttl uint64, res *etcd.Response) (string, error) {
// If it matches the passed in id, extend the lease by writing a new entry.
// Uses compare and swap, so that if we TTL out in the meantime, the write will fail.
// We don't handle the TTL delete w/o a write case here, it's handled in the next loop
// iteration.
_, err := e.etcd.CompareAndSwap(path, id, ttl, "", res.Node.ModifiedIndex)
if err != nil && !tools.IsEtcdTestFailed(err) {
return "", err
}
if err != nil && tools.IsEtcdTestFailed(err) {
return "", nil
}
return id, nil
}
// becomeMaster attempts to become the master for this lock.
// returns "", nil if the attempt failed
// returns id, nil if the attempt succeeded
// returns "", err if an error occurred
func (e *etcdMasterElector) becomeMaster(path, id string, ttl uint64) (string, error) {
_, err := e.etcd.Create(path, id, ttl)
if err != nil && !tools.IsEtcdNodeExist(err) {
// unexpected error
return "", err
}
if err != nil && tools.IsEtcdNodeExist(err) {
return "", nil
}
return id, nil
}
// handleMaster performs one loop of master locking.
// on success it returns <master>, nil
// on error it returns "", err
// in situations where you should try again due to concurrent state changes (e.g. another actor simultaneously acquiring the lock)
// it returns "", nil
func (e *etcdMasterElector) handleMaster(path, id string, ttl uint64) (string, error) {
res, err := e.etcd.Get(path, false, false)
// Unexpected error, bail out
if err != nil && !tools.IsEtcdNotFound(err) {
return "", err
}
// There is no master, try to become the master.
if err != nil && tools.IsEtcdNotFound(err) {
return e.becomeMaster(path, id, ttl)
}
// This should never happen.
if res.Node == nil {
return "", fmt.Errorf("unexpected response: %#v", res)
}
// We're not the master, just return the current value
if res.Node.Value != id {
return res.Node.Value, nil
}
// We are the master, try to extend out lease
return e.extendMaster(path, id, ttl, res)
}
// master provices a distributed master election lock, maintains lock until failure, or someone sends something in the done channel.
// The basic algorithm is:
// while !done
// Get the current master
// If there is no current master
// Try to become the master
// Otherwise
// If we are the master, extend the lease
// If the master is different than the last time through the loop, report the master
// Sleep 80% of TTL
func (e *etcdMasterElector) master(path, id string, ttl uint64, masters chan<- string, errors chan<- error, done <-chan empty) {
lastMaster := ""
for {
master, err := e.handleMaster(path, id, ttl)
if err != nil {
errors <- err
} else if len(master) == 0 {
continue
} else if master != lastMaster {
lastMaster = master
masters <- master
}
// TODO(k8s): Add Watch here, skip the polling for faster reactions
// If done is closed, break out.
select {
case <-done:
return
case <-time.After(time.Duration((ttl*8)/10) * time.Second):
}
}
}
// ResultChan implements the watch.Interface interface
func (e *etcdMasterElector) Stop() {
close(e.done)
}

View File

@@ -0,0 +1,98 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package election
import (
"testing"
"github.com/GoogleCloudPlatform/kubernetes/pkg/tools"
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
"github.com/coreos/go-etcd/etcd"
)
func TestEtcdMasterOther(t *testing.T) {
path := "foo"
etcd := tools.NewFakeEtcdClient(t)
etcd.Set(path, "baz", 0)
master := NewEtcdMasterElector(etcd)
w := master.Elect(path, "bar")
result := <-w.ResultChan()
if result.Type != watch.Modified || result.Object.(Master) != "baz" {
t.Errorf("unexpected event: %#v", result)
}
w.Stop()
}
func TestEtcdMasterNoOther(t *testing.T) {
path := "foo"
e := tools.NewFakeEtcdClient(t)
e.TestIndex = true
e.Data["foo"] = tools.EtcdResponseWithError{
R: &etcd.Response{
Node: nil,
},
E: &etcd.EtcdError{
ErrorCode: tools.EtcdErrorCodeNotFound,
},
}
master := NewEtcdMasterElector(e)
w := master.Elect(path, "bar")
result := <-w.ResultChan()
if result.Type != watch.Modified || result.Object.(Master) != "bar" {
t.Errorf("unexpected event: %#v", result)
}
w.Stop()
}
func TestEtcdMasterNoOtherThenConflict(t *testing.T) {
path := "foo"
e := tools.NewFakeEtcdClient(t)
e.TestIndex = true
// Ok, so we set up a chain of responses from etcd:
// 1) Nothing there
// 2) conflict (someone else wrote)
// 3) new value (the data they wrote)
empty := tools.EtcdResponseWithError{
R: &etcd.Response{
Node: nil,
},
E: &etcd.EtcdError{
ErrorCode: tools.EtcdErrorCodeNotFound,
},
}
empty.N = &tools.EtcdResponseWithError{
R: &etcd.Response{},
E: &etcd.EtcdError{
ErrorCode: tools.EtcdErrorCodeNodeExist,
},
}
empty.N.N = &tools.EtcdResponseWithError{
R: &etcd.Response{
Node: &etcd.Node{
Value: "baz",
},
},
}
e.Data["foo"] = empty
master := NewEtcdMasterElector(e)
w := master.Elect(path, "bar")
result := <-w.ResultChan()
if result.Type != watch.Modified || result.Object.(Master) != "bar" {
t.Errorf("unexpected event: %#v", result)
}
w.Stop()
}

View File

@@ -0,0 +1,53 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package election
import (
"sync"
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
)
// Fake allows for testing of anything consuming a MasterElector.
type Fake struct {
mux *watch.Broadcaster
currentMaster Master
lock sync.Mutex // Protect access of currentMaster
}
// NewFake makes a new fake MasterElector.
func NewFake() *Fake {
// 0 means block for clients.
return &Fake{mux: watch.NewBroadcaster(0, watch.WaitIfChannelFull)}
}
func (f *Fake) ChangeMaster(newMaster Master) {
f.lock.Lock()
defer f.lock.Unlock()
f.mux.Action(watch.Modified, newMaster)
f.currentMaster = newMaster
}
func (f *Fake) Elect(path, id string) watch.Interface {
f.lock.Lock()
defer f.lock.Unlock()
w := f.mux.Watch()
if f.currentMaster != "" {
f.mux.Action(watch.Modified, f.currentMaster)
}
return w
}

View File

@@ -0,0 +1,134 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package election
import (
"sync"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
"github.com/golang/glog"
)
// MasterElector is an interface for services that can elect masters.
// Important Note: MasterElectors are not inter-operable, all participants in the election need to be
// using the same underlying implementation of this interface for correct behavior.
type MasterElector interface {
// Elect makes the caller represented by 'id' enter into a master election for the
// distributed lock defined by 'path'
// The returned watch.Interface provides a stream of Master objects which
// contain the current master.
// Calling Stop on the returned interface relinquishes ownership (if currently possesed)
// and removes the caller from the election
Elect(path, id string) watch.Interface
}
// Service represents anything that can start and stop on demand.
type Service interface {
Validate(desired, current Master)
Start()
Stop()
}
type notifier struct {
lock sync.Mutex
cond *sync.Cond
// desired is updated with every change, current is updated after
// Start()/Stop() finishes. 'cond' is used to signal that a change
// might be needed. This handles the case where mastership flops
// around without calling Start()/Stop() excessively.
desired, current Master
// for comparison, to see if we are master.
id Master
service Service
}
// Notify runs Elect() on m, and calls Start()/Stop() on s when the
// elected master starts/stops matching 'id'. Never returns.
func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) {
n := &notifier{id: Master(id), service: s}
n.cond = sync.NewCond(&n.lock)
finished := runtime.After(func() {
runtime.Until(func() {
for {
w := m.Elect(path, id)
for {
select {
case <-abort:
return
case event, open := <-w.ResultChan():
if !open {
break
}
if event.Type != watch.Modified {
continue
}
electedMaster, ok := event.Object.(Master)
if !ok {
glog.Errorf("Unexpected object from election channel: %v", event.Object)
break
}
func() {
n.lock.Lock()
defer n.lock.Unlock()
n.desired = electedMaster
if n.desired != n.current {
n.cond.Signal()
}
}()
}
}
}
}, 0, abort)
})
runtime.Until(func() { n.serviceLoop(finished) }, 0, abort)
}
// serviceLoop waits for changes, and calls Start()/Stop() as needed.
func (n *notifier) serviceLoop(abort <-chan struct{}) {
n.lock.Lock()
defer n.lock.Unlock()
for {
select {
case <-abort:
return
default:
for n.desired == n.current {
ch := runtime.After(n.cond.Wait)
select {
case <-abort:
n.cond.Signal() // ensure that Wait() returns
<-ch
return
case <-ch:
// we were notified and have the lock, proceed..
}
}
if n.current != n.id && n.desired == n.id {
n.service.Validate(n.desired, n.current)
n.service.Start()
} else if n.current == n.id && n.desired != n.id {
n.service.Stop()
}
n.current = n.desired
}
}
}

View File

@@ -0,0 +1,98 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package election
import (
"testing"
"time"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
)
type slowService struct {
t *testing.T
on bool
// We explicitly have no lock to prove that
// Start and Stop are not called concurrently.
changes chan<- bool
done <-chan struct{}
}
func (s *slowService) Validate(d, c Master) {
// noop
}
func (s *slowService) Start() {
select {
case <-s.done:
return // avoid writing to closed changes chan
default:
}
if s.on {
s.t.Errorf("started already on service")
}
time.Sleep(2 * time.Millisecond)
s.on = true
s.changes <- true
}
func (s *slowService) Stop() {
select {
case <-s.done:
return // avoid writing to closed changes chan
default:
}
if !s.on {
s.t.Errorf("stopped already off service")
}
time.Sleep(2 * time.Millisecond)
s.on = false
s.changes <- false
}
func Test(t *testing.T) {
m := NewFake()
changes := make(chan bool, 1500)
done := make(chan struct{})
s := &slowService{t: t, changes: changes, done: done}
notifyDone := runtime.After(func() { Notify(m, "", "me", s, done) })
go func() {
defer close(done)
for i := 0; i < 500; i++ {
for _, key := range []string{"me", "notme", "alsonotme"} {
m.ChangeMaster(Master(key))
}
}
}()
<-notifyDone
close(changes)
changeList := []bool{}
for {
change, ok := <-changes
if !ok {
break
}
changeList = append(changeList, change)
}
if len(changeList) > 1000 {
t.Errorf("unexpected number of changes: %v", len(changeList))
}
}

View File

@@ -0,0 +1,29 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"time"
)
// default values to use when constructing mesos ExecutorInfo messages
const (
DefaultInfoID = "k8sm-executor"
DefaultInfoSource = "kubernetes"
DefaultInfoName = "Kubelet-Executor"
DefaultSuicideTimeout = 20 * time.Minute
)

View File

@@ -0,0 +1,18 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package config contains executor configuration constants.
package config

View File

@@ -0,0 +1,21 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Package executor includes a mesos executor, which contains
a kubelet as its member to manage containers.
*/
package executor

View File

@@ -0,0 +1,847 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import (
"encoding/json"
"fmt"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
"github.com/fsouza/go-dockerclient"
"github.com/gogo/protobuf/proto"
log "github.com/golang/glog"
bindings "github.com/mesos/mesos-go/executor"
mesos "github.com/mesos/mesos-go/mesosproto"
mutil "github.com/mesos/mesos-go/mesosutil"
)
const (
containerPollTime = 300 * time.Millisecond
launchGracePeriod = 5 * time.Minute
)
type stateType int32
const (
disconnectedState stateType = iota
connectedState
suicidalState
terminalState
)
func (s *stateType) get() stateType {
return stateType(atomic.LoadInt32((*int32)(s)))
}
func (s *stateType) transition(from, to stateType) bool {
return atomic.CompareAndSwapInt32((*int32)(s), int32(from), int32(to))
}
func (s *stateType) transitionTo(to stateType, unless ...stateType) bool {
if len(unless) == 0 {
atomic.StoreInt32((*int32)(s), int32(to))
return true
}
for {
state := s.get()
for _, x := range unless {
if state == x {
return false
}
}
if s.transition(state, to) {
return true
}
}
}
type kuberTask struct {
mesosTaskInfo *mesos.TaskInfo
podName string
}
// func that attempts suicide
type jumper func(bindings.ExecutorDriver, <-chan struct{})
type suicideWatcher interface {
Next(time.Duration, bindings.ExecutorDriver, jumper) suicideWatcher
Reset(time.Duration) bool
Stop() bool
}
type podStatusFunc func() (*api.PodStatus, error)
// KubernetesExecutor is an mesos executor that runs pods
// in a minion machine.
type KubernetesExecutor struct {
kl *kubelet.Kubelet // the kubelet instance.
updateChan chan<- interface{} // to send pod config updates to the kubelet
state stateType
tasks map[string]*kuberTask
pods map[string]*api.Pod
lock sync.RWMutex
sourcename string
client *client.Client
events <-chan watch.Event
done chan struct{} // signals shutdown
outgoing chan func() (mesos.Status, error) // outgoing queue to the mesos driver
dockerClient dockertools.DockerInterface
suicideWatch suicideWatcher
suicideTimeout time.Duration
shutdownAlert func() // invoked just prior to executor shutdown
kubeletFinished <-chan struct{} // signals that kubelet Run() died
initialRegistration sync.Once
exitFunc func(int)
podStatusFunc func(*kubelet.Kubelet, *api.Pod) (*api.PodStatus, error)
}
type Config struct {
Kubelet *kubelet.Kubelet
Updates chan<- interface{} // to send pod config updates to the kubelet
SourceName string
APIClient *client.Client
Watch watch.Interface
Docker dockertools.DockerInterface
ShutdownAlert func()
SuicideTimeout time.Duration
KubeletFinished <-chan struct{} // signals that kubelet Run() died
ExitFunc func(int)
PodStatusFunc func(*kubelet.Kubelet, *api.Pod) (*api.PodStatus, error)
}
func (k *KubernetesExecutor) isConnected() bool {
return connectedState == (&k.state).get()
}
// New creates a new kubernetes executor.
func New(config Config) *KubernetesExecutor {
k := &KubernetesExecutor{
kl: config.Kubelet,
updateChan: config.Updates,
state: disconnectedState,
tasks: make(map[string]*kuberTask),
pods: make(map[string]*api.Pod),
sourcename: config.SourceName,
client: config.APIClient,
done: make(chan struct{}),
outgoing: make(chan func() (mesos.Status, error), 1024),
dockerClient: config.Docker,
suicideTimeout: config.SuicideTimeout,
kubeletFinished: config.KubeletFinished,
suicideWatch: &suicideTimer{},
shutdownAlert: config.ShutdownAlert,
exitFunc: config.ExitFunc,
podStatusFunc: config.PodStatusFunc,
}
//TODO(jdef) do something real with these events..
if config.Watch != nil {
events := config.Watch.ResultChan()
if events != nil {
go func() {
for e := range events {
// e ~= watch.Event { ADDED, *api.Event }
log.V(1).Info(e)
}
}()
k.events = events
}
}
return k
}
func (k *KubernetesExecutor) Init(driver bindings.ExecutorDriver) {
k.killKubeletContainers()
k.resetSuicideWatch(driver)
go k.sendLoop()
//TODO(jdef) monitor kubeletFinished and shutdown if it happens
}
func (k *KubernetesExecutor) Done() <-chan struct{} {
return k.done
}
func (k *KubernetesExecutor) isDone() bool {
select {
case <-k.done:
return true
default:
return false
}
}
// Registered is called when the executor is successfully registered with the slave.
func (k *KubernetesExecutor) Registered(driver bindings.ExecutorDriver,
executorInfo *mesos.ExecutorInfo, frameworkInfo *mesos.FrameworkInfo, slaveInfo *mesos.SlaveInfo) {
if k.isDone() {
return
}
log.Infof("Executor %v of framework %v registered with slave %v\n",
executorInfo, frameworkInfo, slaveInfo)
if !(&k.state).transition(disconnectedState, connectedState) {
log.Errorf("failed to register/transition to a connected state")
}
k.initialRegistration.Do(k.onInitialRegistration)
}
// Reregistered is called when the executor is successfully re-registered with the slave.
// This can happen when the slave fails over.
func (k *KubernetesExecutor) Reregistered(driver bindings.ExecutorDriver, slaveInfo *mesos.SlaveInfo) {
if k.isDone() {
return
}
log.Infof("Reregistered with slave %v\n", slaveInfo)
if !(&k.state).transition(disconnectedState, connectedState) {
log.Errorf("failed to reregister/transition to a connected state")
}
k.initialRegistration.Do(k.onInitialRegistration)
}
func (k *KubernetesExecutor) onInitialRegistration() {
// emit an empty update to allow the mesos "source" to be marked as seen
k.updateChan <- kubelet.PodUpdate{
Pods: []*api.Pod{},
Op: kubelet.SET,
Source: k.sourcename,
}
}
// Disconnected is called when the executor is disconnected from the slave.
func (k *KubernetesExecutor) Disconnected(driver bindings.ExecutorDriver) {
if k.isDone() {
return
}
log.Infof("Slave is disconnected\n")
if !(&k.state).transition(connectedState, disconnectedState) {
log.Errorf("failed to disconnect/transition to a disconnected state")
}
}
// LaunchTask is called when the executor receives a request to launch a task.
// The happens when the k8sm scheduler has decided to schedule the pod
// (which corresponds to a Mesos Task) onto the node where this executor
// is running, but the binding is not recorded in the Kubernetes store yet.
// This function is invoked to tell the executor to record the binding in the
// Kubernetes store and start the pod via the Kubelet.
func (k *KubernetesExecutor) LaunchTask(driver bindings.ExecutorDriver, taskInfo *mesos.TaskInfo) {
if k.isDone() {
return
}
log.Infof("Launch task %v\n", taskInfo)
if !k.isConnected() {
log.Errorf("Ignore launch task because the executor is disconnected\n")
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
messages.ExecutorUnregistered))
return
}
obj, err := api.Codec.Decode(taskInfo.GetData())
if err != nil {
log.Errorf("failed to extract yaml data from the taskInfo.data %v", err)
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
messages.UnmarshalTaskDataFailure))
return
}
pod, ok := obj.(*api.Pod)
if !ok {
log.Errorf("expected *api.Pod instead of %T: %+v", pod, pod)
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
messages.UnmarshalTaskDataFailure))
return
}
k.lock.Lock()
defer k.lock.Unlock()
taskId := taskInfo.GetTaskId().GetValue()
if _, found := k.tasks[taskId]; found {
log.Errorf("task already launched\n")
// Not to send back TASK_RUNNING here, because
// may be duplicated messages or duplicated task id.
return
}
// remember this task so that:
// (a) we ignore future launches for it
// (b) we have a record of it so that we can kill it if needed
// (c) we're leaving podName == "" for now, indicates we don't need to delete containers
k.tasks[taskId] = &kuberTask{
mesosTaskInfo: taskInfo,
}
k.resetSuicideWatch(driver)
go k.launchTask(driver, taskId, pod)
}
// TODO(jdef) add metrics for this?
type suicideTimer struct {
timer *time.Timer
}
func (w *suicideTimer) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher {
return &suicideTimer{
timer: time.AfterFunc(d, func() {
log.Warningf("Suicide timeout (%v) expired", d)
f(driver, nil)
}),
}
}
func (w *suicideTimer) Stop() (result bool) {
if w != nil && w.timer != nil {
log.Infoln("stopping suicide watch") //TODO(jdef) debug
result = w.timer.Stop()
}
return
}
// return true if the timer was successfully reset
func (w *suicideTimer) Reset(d time.Duration) bool {
if w != nil && w.timer != nil {
log.Infoln("resetting suicide watch") //TODO(jdef) debug
w.timer.Reset(d)
return true
}
return false
}
// determine whether we need to start a suicide countdown. if so, then start
// a timer that, upon expiration, causes this executor to commit suicide.
// this implementation runs asynchronously. callers that wish to wait for the
// reset to complete may wait for the returned signal chan to close.
func (k *KubernetesExecutor) resetSuicideWatch(driver bindings.ExecutorDriver) <-chan struct{} {
ch := make(chan struct{})
go func() {
defer close(ch)
k.lock.Lock()
defer k.lock.Unlock()
if k.suicideTimeout < 1 {
return
}
if k.suicideWatch != nil {
if len(k.tasks) > 0 {
k.suicideWatch.Stop()
return
}
if k.suicideWatch.Reset(k.suicideTimeout) {
// valid timer, reset was successful
return
}
}
//TODO(jdef) reduce verbosity here once we're convinced that suicide watch is working properly
log.Infof("resetting suicide watch timer for %v", k.suicideTimeout)
k.suicideWatch = k.suicideWatch.Next(k.suicideTimeout, driver, jumper(k.attemptSuicide))
}()
return ch
}
func (k *KubernetesExecutor) attemptSuicide(driver bindings.ExecutorDriver, abort <-chan struct{}) {
k.lock.Lock()
defer k.lock.Unlock()
// this attempt may have been queued and since been aborted
select {
case <-abort:
//TODO(jdef) reduce verbosity once suicide watch is working properly
log.Infof("aborting suicide attempt since watch was cancelled")
return
default: // continue
}
// fail-safe, will abort kamikaze attempts if there are tasks
if len(k.tasks) > 0 {
ids := []string{}
for taskid := range k.tasks {
ids = append(ids, taskid)
}
log.Errorf("suicide attempt failed, there are still running tasks: %v", ids)
return
}
log.Infoln("Attempting suicide")
if (&k.state).transitionTo(suicidalState, suicidalState, terminalState) {
//TODO(jdef) let the scheduler know?
//TODO(jdef) is suicide more graceful than slave-demanded shutdown?
k.doShutdown(driver)
}
}
// async continuation of LaunchTask
func (k *KubernetesExecutor) launchTask(driver bindings.ExecutorDriver, taskId string, pod *api.Pod) {
//HACK(jdef): cloned binding construction from k8s plugin/pkg/scheduler/scheduler.go
binding := &api.Binding{
ObjectMeta: api.ObjectMeta{
Namespace: pod.Namespace,
Name: pod.Name,
Annotations: make(map[string]string),
},
Target: api.ObjectReference{
Kind: "Node",
Name: pod.Annotations[meta.BindingHostKey],
},
}
// forward the annotations that the scheduler wants to apply
for k, v := range pod.Annotations {
binding.Annotations[k] = v
}
deleteTask := func() {
k.lock.Lock()
defer k.lock.Unlock()
delete(k.tasks, taskId)
k.resetSuicideWatch(driver)
}
log.Infof("Binding '%v/%v' to '%v' with annotations %+v...", pod.Namespace, pod.Name, binding.Target.Name, binding.Annotations)
ctx := api.WithNamespace(api.NewContext(), binding.Namespace)
// TODO(k8s): use Pods interface for binding once clusters are upgraded
// return b.Pods(binding.Namespace).Bind(binding)
err := k.client.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error()
if err != nil {
deleteTask()
k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED,
messages.CreateBindingFailure))
return
}
podFullName := container.GetPodFullName(pod)
// allow a recently failed-over scheduler the chance to recover the task/pod binding:
// it may have failed and recovered before the apiserver is able to report the updated
// binding information. replays of this status event will signal to the scheduler that
// the apiserver should be up-to-date.
data, err := json.Marshal(api.PodStatusResult{
ObjectMeta: api.ObjectMeta{
Name: podFullName,
SelfLink: "/podstatusresult",
},
})
if err != nil {
deleteTask()
log.Errorf("failed to marshal pod status result: %v", err)
k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED,
err.Error()))
return
}
k.lock.Lock()
defer k.lock.Unlock()
// Add the task.
task, found := k.tasks[taskId]
if !found {
log.V(1).Infof("task %v not found, probably killed: aborting launch, reporting lost", taskId)
k.reportLostTask(driver, taskId, messages.LaunchTaskFailed)
return
}
//TODO(jdef) check for duplicate pod name, if found send TASK_ERROR
// from here on, we need to delete containers associated with the task
// upon it going into a terminal state
task.podName = podFullName
k.pods[podFullName] = pod
// send the latest snapshot of the set of pods to the kubelet via the pod update channel.
// this results in the kubelet spinning up the new pod.
update := kubelet.PodUpdate{Op: kubelet.SET}
for _, p := range k.pods {
update.Pods = append(update.Pods, p)
}
k.updateChan <- update
statusUpdate := &mesos.TaskStatus{
TaskId: mutil.NewTaskID(taskId),
State: mesos.TaskState_TASK_STARTING.Enum(),
Message: proto.String(messages.CreateBindingSuccess),
Data: data,
}
k.sendStatus(driver, statusUpdate)
// Delay reporting 'task running' until container is up.
psf := podStatusFunc(func() (*api.PodStatus, error) {
return k.podStatusFunc(k.kl, pod)
})
go k._launchTask(driver, taskId, podFullName, psf)
}
func (k *KubernetesExecutor) _launchTask(driver bindings.ExecutorDriver, taskId, podFullName string, psf podStatusFunc) {
expired := make(chan struct{})
time.AfterFunc(launchGracePeriod, func() { close(expired) })
getMarshalledInfo := func() (data []byte, cancel bool) {
// potentially long call..
if podStatus, err := psf(); err == nil && podStatus != nil {
select {
case <-expired:
cancel = true
default:
k.lock.Lock()
defer k.lock.Unlock()
if _, found := k.tasks[taskId]; !found {
// don't bother with the pod status if the task is already gone
cancel = true
break
} else if podStatus.Phase != api.PodRunning {
// avoid sending back a running status before it's really running
break
}
log.V(2).Infof("Found pod status: '%v'", podStatus)
result := api.PodStatusResult{
ObjectMeta: api.ObjectMeta{
Name: podFullName,
SelfLink: "/podstatusresult",
},
Status: *podStatus,
}
if data, err = json.Marshal(result); err != nil {
log.Errorf("failed to marshal pod status result: %v", err)
}
}
}
return
}
waitForRunningPod:
for {
select {
case <-expired:
log.Warningf("Launch expired grace period of '%v'", launchGracePeriod)
break waitForRunningPod
case <-time.After(containerPollTime):
if data, cancel := getMarshalledInfo(); cancel {
break waitForRunningPod
} else if data == nil {
continue waitForRunningPod
} else {
k.lock.Lock()
defer k.lock.Unlock()
if _, found := k.tasks[taskId]; !found {
goto reportLost
}
statusUpdate := &mesos.TaskStatus{
TaskId: mutil.NewTaskID(taskId),
State: mesos.TaskState_TASK_RUNNING.Enum(),
Message: proto.String(fmt.Sprintf("pod-running:%s", podFullName)),
Data: data,
}
k.sendStatus(driver, statusUpdate)
// continue to monitor the health of the pod
go k.__launchTask(driver, taskId, podFullName, psf)
return
}
}
}
k.lock.Lock()
defer k.lock.Unlock()
reportLost:
k.reportLostTask(driver, taskId, messages.LaunchTaskFailed)
}
func (k *KubernetesExecutor) __launchTask(driver bindings.ExecutorDriver, taskId, podFullName string, psf podStatusFunc) {
// TODO(nnielsen): Monitor health of pod and report if lost.
// Should we also allow this to fail a couple of times before reporting lost?
// What if the docker daemon is restarting and we can't connect, but it's
// going to bring the pods back online as soon as it restarts?
knownPod := func() bool {
_, err := psf()
return err == nil
}
// Wait for the pod to go away and stop monitoring once it does
// TODO (jdefelice) replace with an /events watch?
for {
time.Sleep(containerPollTime)
if k.checkForLostPodTask(driver, taskId, knownPod) {
return
}
}
}
// Intended to be executed as part of the pod monitoring loop, this fn (ultimately) checks with Docker
// whether the pod is running. It will only return false if the task is still registered and the pod is
// registered in Docker. Otherwise it returns true. If there's still a task record on file, but no pod
// in Docker, then we'll also send a TASK_LOST event.
func (k *KubernetesExecutor) checkForLostPodTask(driver bindings.ExecutorDriver, taskId string, isKnownPod func() bool) bool {
// TODO (jdefelice) don't send false alarms for deleted pods (KILLED tasks)
k.lock.Lock()
defer k.lock.Unlock()
// TODO(jdef) we should really consider k.pods here, along with what docker is reporting, since the
// kubelet may constantly attempt to instantiate a pod as long as it's in the pod state that we're
// handing to it. otherwise, we're probably reporting a TASK_LOST prematurely. Should probably
// consult RestartPolicy to determine appropriate behavior. Should probably also gracefully handle
// docker daemon restarts.
if _, ok := k.tasks[taskId]; ok {
if isKnownPod() {
return false
} else {
log.Warningf("Detected lost pod, reporting lost task %v", taskId)
k.reportLostTask(driver, taskId, messages.ContainersDisappeared)
}
} else {
log.V(2).Infof("Task %v no longer registered, stop monitoring for lost pods", taskId)
}
return true
}
// KillTask is called when the executor receives a request to kill a task.
func (k *KubernetesExecutor) KillTask(driver bindings.ExecutorDriver, taskId *mesos.TaskID) {
if k.isDone() {
return
}
log.Infof("Kill task %v\n", taskId)
if !k.isConnected() {
//TODO(jdefelice) sent TASK_LOST here?
log.Warningf("Ignore kill task because the executor is disconnected\n")
return
}
k.lock.Lock()
defer k.lock.Unlock()
k.removePodTask(driver, taskId.GetValue(), messages.TaskKilled, mesos.TaskState_TASK_KILLED)
}
// Reports a lost task to the slave and updates internal task and pod tracking state.
// Assumes that the caller is locking around pod and task state.
func (k *KubernetesExecutor) reportLostTask(driver bindings.ExecutorDriver, tid, reason string) {
k.removePodTask(driver, tid, reason, mesos.TaskState_TASK_LOST)
}
// deletes the pod and task associated with the task identified by tid and sends a task
// status update to mesos. also attempts to reset the suicide watch.
// Assumes that the caller is locking around pod and task state.
func (k *KubernetesExecutor) removePodTask(driver bindings.ExecutorDriver, tid, reason string, state mesos.TaskState) {
task, ok := k.tasks[tid]
if !ok {
log.V(1).Infof("Failed to remove task, unknown task %v\n", tid)
return
}
delete(k.tasks, tid)
k.resetSuicideWatch(driver)
pid := task.podName
if _, found := k.pods[pid]; !found {
log.Warningf("Cannot remove unknown pod %v for task %v", pid, tid)
} else {
log.V(2).Infof("deleting pod %v for task %v", pid, tid)
delete(k.pods, pid)
// Send the pod updates to the channel.
update := kubelet.PodUpdate{Op: kubelet.SET}
for _, p := range k.pods {
update.Pods = append(update.Pods, p)
}
k.updateChan <- update
}
// TODO(jdef): ensure that the update propagates, perhaps return a signal chan?
k.sendStatus(driver, newStatus(mutil.NewTaskID(tid), state, reason))
}
// FrameworkMessage is called when the framework sends some message to the executor
func (k *KubernetesExecutor) FrameworkMessage(driver bindings.ExecutorDriver, message string) {
if k.isDone() {
return
}
if !k.isConnected() {
log.Warningf("Ignore framework message because the executor is disconnected\n")
return
}
log.Infof("Receives message from framework %v\n", message)
//TODO(jdef) master reported a lost task, reconcile this! @see scheduler.go:handleTaskLost
if strings.HasPrefix(message, "task-lost:") && len(message) > 10 {
taskId := message[10:]
if taskId != "" {
// clean up pod state
k.lock.Lock()
defer k.lock.Unlock()
k.reportLostTask(driver, taskId, messages.TaskLostAck)
}
}
switch message {
case messages.Kamikaze:
k.attemptSuicide(driver, nil)
}
}
// Shutdown is called when the executor receives a shutdown request.
func (k *KubernetesExecutor) Shutdown(driver bindings.ExecutorDriver) {
k.lock.Lock()
defer k.lock.Unlock()
k.doShutdown(driver)
}
// assumes that caller has obtained state lock
func (k *KubernetesExecutor) doShutdown(driver bindings.ExecutorDriver) {
defer func() {
log.Errorf("exiting with unclean shutdown: %v", recover())
if k.exitFunc != nil {
k.exitFunc(1)
}
}()
(&k.state).transitionTo(terminalState)
// signal to all listeners that this KubeletExecutor is done!
close(k.done)
if k.shutdownAlert != nil {
func() {
util.HandleCrash()
k.shutdownAlert()
}()
}
log.Infoln("Stopping executor driver")
_, err := driver.Stop()
if err != nil {
log.Warningf("failed to stop executor driver: %v", err)
}
log.Infoln("Shutdown the executor")
// according to docs, mesos will generate TASK_LOST updates for us
// if needed, so don't take extra time to do that here.
k.tasks = map[string]*kuberTask{}
select {
// the main Run() func may still be running... wait for it to finish: it will
// clear the pod configuration cleanly, telling k8s "there are no pods" and
// clean up resources (pods, volumes, etc).
case <-k.kubeletFinished:
//TODO(jdef) attempt to wait for events to propagate to API server?
// TODO(jdef) extract constant, should be smaller than whatever the
// slave graceful shutdown timeout period is.
case <-time.After(15 * time.Second):
log.Errorf("timed out waiting for kubelet Run() to die")
}
log.Infoln("exiting")
if k.exitFunc != nil {
k.exitFunc(0)
}
}
// Destroy existing k8s containers
func (k *KubernetesExecutor) killKubeletContainers() {
if containers, err := dockertools.GetKubeletDockerContainers(k.dockerClient, true); err == nil {
opts := docker.RemoveContainerOptions{
RemoveVolumes: true,
Force: true,
}
for _, container := range containers {
opts.ID = container.ID
log.V(2).Infof("Removing container: %v", opts.ID)
if err := k.dockerClient.RemoveContainer(opts); err != nil {
log.Warning(err)
}
}
} else {
log.Warningf("Failed to list kubelet docker containers: %v", err)
}
}
// Error is called when some error happens.
func (k *KubernetesExecutor) Error(driver bindings.ExecutorDriver, message string) {
log.Errorln(message)
}
func newStatus(taskId *mesos.TaskID, state mesos.TaskState, message string) *mesos.TaskStatus {
return &mesos.TaskStatus{
TaskId: taskId,
State: &state,
Message: proto.String(message),
}
}
func (k *KubernetesExecutor) sendStatus(driver bindings.ExecutorDriver, status *mesos.TaskStatus) {
select {
case <-k.done:
default:
k.outgoing <- func() (mesos.Status, error) { return driver.SendStatusUpdate(status) }
}
}
func (k *KubernetesExecutor) sendFrameworkMessage(driver bindings.ExecutorDriver, msg string) {
select {
case <-k.done:
default:
k.outgoing <- func() (mesos.Status, error) { return driver.SendFrameworkMessage(msg) }
}
}
func (k *KubernetesExecutor) sendLoop() {
defer log.V(1).Info("sender loop exiting")
for {
select {
case <-k.done:
return
default:
if !k.isConnected() {
select {
case <-k.done:
case <-time.After(1 * time.Second):
}
continue
}
sender, ok := <-k.outgoing
if !ok {
// programming error
panic("someone closed the outgoing channel")
}
if status, err := sender(); err == nil {
continue
} else {
log.Error(err)
if status == mesos.Status_DRIVER_ABORTED {
return
}
}
// attempt to re-queue the sender
select {
case <-k.done:
case k.outgoing <- sender:
}
}
}
}

View File

@@ -0,0 +1,618 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import (
"fmt"
"net/http"
"net/http/httptest"
"reflect"
"sync"
"sync/atomic"
"testing"
"time"
assertext "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/assert"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages"
kmruntime "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
"github.com/golang/glog"
bindings "github.com/mesos/mesos-go/executor"
"github.com/mesos/mesos-go/mesosproto"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
)
type suicideTracker struct {
suicideWatcher
stops uint32
resets uint32
timers uint32
jumps *uint32
}
func (t *suicideTracker) Reset(d time.Duration) bool {
defer func() { t.resets++ }()
return t.suicideWatcher.Reset(d)
}
func (t *suicideTracker) Stop() bool {
defer func() { t.stops++ }()
return t.suicideWatcher.Stop()
}
func (t *suicideTracker) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher {
tracker := &suicideTracker{
stops: t.stops,
resets: t.resets,
jumps: t.jumps,
timers: t.timers + 1,
}
jumper := tracker.makeJumper(f)
tracker.suicideWatcher = t.suicideWatcher.Next(d, driver, jumper)
return tracker
}
func (t *suicideTracker) makeJumper(_ jumper) jumper {
return jumper(func(driver bindings.ExecutorDriver, cancel <-chan struct{}) {
glog.Warningln("jumping?!")
if t.jumps != nil {
atomic.AddUint32(t.jumps, 1)
}
})
}
func TestSuicide_zeroTimeout(t *testing.T) {
defer glog.Flush()
k := New(Config{})
tracker := &suicideTracker{suicideWatcher: k.suicideWatch}
k.suicideWatch = tracker
ch := k.resetSuicideWatch(nil)
select {
case <-ch:
case <-time.After(2 * time.Second):
t.Fatalf("timeout waiting for reset of suicide watch")
}
if tracker.stops != 0 {
t.Fatalf("expected no stops since suicideWatchTimeout was never set")
}
if tracker.resets != 0 {
t.Fatalf("expected no resets since suicideWatchTimeout was never set")
}
if tracker.timers != 0 {
t.Fatalf("expected no timers since suicideWatchTimeout was never set")
}
}
func TestSuicide_WithTasks(t *testing.T) {
defer glog.Flush()
k := New(Config{
SuicideTimeout: 50 * time.Millisecond,
})
jumps := uint32(0)
tracker := &suicideTracker{suicideWatcher: k.suicideWatch, jumps: &jumps}
k.suicideWatch = tracker
k.tasks["foo"] = &kuberTask{} // prevent suicide attempts from succeeding
// call reset with a nil timer
glog.Infoln("resetting suicide watch with 1 task")
select {
case <-k.resetSuicideWatch(nil):
tracker = k.suicideWatch.(*suicideTracker)
if tracker.stops != 1 {
t.Fatalf("expected suicide attempt to Stop() since there are registered tasks")
}
if tracker.resets != 0 {
t.Fatalf("expected no resets since")
}
if tracker.timers != 0 {
t.Fatalf("expected no timers since")
}
case <-time.After(1 * time.Second):
t.Fatalf("initial suicide watch setup failed")
}
delete(k.tasks, "foo") // zero remaining tasks
k.suicideTimeout = 1500 * time.Millisecond
suicideStart := time.Now()
// reset the suicide watch, which should actually start a timer now
glog.Infoln("resetting suicide watch with 0 tasks")
select {
case <-k.resetSuicideWatch(nil):
tracker = k.suicideWatch.(*suicideTracker)
if tracker.stops != 1 {
t.Fatalf("did not expect suicide attempt to Stop() since there are no registered tasks")
}
if tracker.resets != 1 {
t.Fatalf("expected 1 resets instead of %d", tracker.resets)
}
if tracker.timers != 1 {
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
}
case <-time.After(1 * time.Second):
t.Fatalf("2nd suicide watch setup failed")
}
k.lock.Lock()
k.tasks["foo"] = &kuberTask{} // prevent suicide attempts from succeeding
k.lock.Unlock()
// reset the suicide watch, which should stop the existing timer
glog.Infoln("resetting suicide watch with 1 task")
select {
case <-k.resetSuicideWatch(nil):
tracker = k.suicideWatch.(*suicideTracker)
if tracker.stops != 2 {
t.Fatalf("expected 2 stops instead of %d since there are registered tasks", tracker.stops)
}
if tracker.resets != 1 {
t.Fatalf("expected 1 resets instead of %d", tracker.resets)
}
if tracker.timers != 1 {
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
}
case <-time.After(1 * time.Second):
t.Fatalf("3rd suicide watch setup failed")
}
k.lock.Lock()
delete(k.tasks, "foo") // allow suicide attempts to schedule
k.lock.Unlock()
// reset the suicide watch, which should reset a stopped timer
glog.Infoln("resetting suicide watch with 0 tasks")
select {
case <-k.resetSuicideWatch(nil):
tracker = k.suicideWatch.(*suicideTracker)
if tracker.stops != 2 {
t.Fatalf("expected 2 stops instead of %d since there are no registered tasks", tracker.stops)
}
if tracker.resets != 2 {
t.Fatalf("expected 2 resets instead of %d", tracker.resets)
}
if tracker.timers != 1 {
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
}
case <-time.After(1 * time.Second):
t.Fatalf("4th suicide watch setup failed")
}
sinceWatch := time.Since(suicideStart)
time.Sleep(3*time.Second - sinceWatch) // give the first timer to misfire (it shouldn't since Stop() was called)
if j := atomic.LoadUint32(&jumps); j != 1 {
t.Fatalf("expected 1 jumps instead of %d since stop was called", j)
} else {
glog.Infoln("jumps verified") // glog so we get a timestamp
}
}
// TestExecutorRegister ensures that the executor thinks it is connected
// after Register is called.
func TestExecutorRegister(t *testing.T) {
mockDriver := &MockExecutorDriver{}
updates := make(chan interface{}, 1024)
executor := New(Config{
Docker: dockertools.ConnectToDockerOrDie("fake://"),
Updates: updates,
SourceName: "executor_test",
})
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
initialPodUpdate := kubelet.PodUpdate{
Pods: []*api.Pod{},
Op: kubelet.SET,
Source: executor.sourcename,
}
receivedInitialPodUpdate := false
select {
case m := <-updates:
update, ok := m.(kubelet.PodUpdate)
if ok {
if reflect.DeepEqual(initialPodUpdate, update) {
receivedInitialPodUpdate = true
}
}
case <-time.After(time.Second):
}
assert.Equal(t, true, receivedInitialPodUpdate,
"executor should have sent an initial PodUpdate "+
"to the updates chan upon registration")
assert.Equal(t, true, executor.isConnected(), "executor should be connected")
mockDriver.AssertExpectations(t)
}
// TestExecutorDisconnect ensures that the executor thinks that it is not
// connected after a call to Disconnected has occured.
func TestExecutorDisconnect(t *testing.T) {
mockDriver := &MockExecutorDriver{}
executor := NewTestKubernetesExecutor()
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
executor.Disconnected(mockDriver)
assert.Equal(t, false, executor.isConnected(),
"executor should not be connected after Disconnected")
mockDriver.AssertExpectations(t)
}
// TestExecutorReregister ensures that the executor thinks it is connected
// after a connection problem happens, followed by a call to Reregistered.
func TestExecutorReregister(t *testing.T) {
mockDriver := &MockExecutorDriver{}
executor := NewTestKubernetesExecutor()
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
executor.Disconnected(mockDriver)
executor.Reregistered(mockDriver, nil)
assert.Equal(t, true, executor.isConnected(), "executor should be connected")
mockDriver.AssertExpectations(t)
}
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch
// and kill tasks while properly bookkeping its tasks.
func TestExecutorLaunchAndKillTask(t *testing.T) {
// create a fake pod watch. We use that below to submit new pods to the scheduler
podListWatch := NewMockPodsListWatch(api.PodList{})
// create fake apiserver
testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list)
defer testApiServer.server.Close()
mockDriver := &MockExecutorDriver{}
updates := make(chan interface{}, 1024)
config := Config{
Docker: dockertools.ConnectToDockerOrDie("fake://"),
Updates: updates,
APIClient: client.NewOrDie(&client.Config{
Host: testApiServer.server.URL,
Version: testapi.Version(),
}),
Kubelet: &kubelet.Kubelet{},
PodStatusFunc: func(kl *kubelet.Kubelet, pod *api.Pod) (*api.PodStatus, error) {
return &api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
Name: "foo",
State: api.ContainerState{
Running: &api.ContainerStateRunning{},
},
},
},
Phase: api.PodRunning,
}, nil
},
}
executor := New(config)
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
select {
case <-updates:
case <-time.After(time.Second):
t.Fatalf("Executor should send an intial update on Registration")
}
pod := NewTestPod(1)
podTask, err := podtask.New(api.NewDefaultContext(), "",
*pod, &mesosproto.ExecutorInfo{})
assert.Equal(t, nil, err, "must be able to create a task from a pod")
taskInfo := podTask.BuildTaskInfo()
data, err := testapi.Codec().Encode(pod)
assert.Equal(t, nil, err, "must be able to encode a pod's spec data")
taskInfo.Data = data
var statusUpdateCalls sync.WaitGroup
statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() }
statusUpdateCalls.Add(1)
mockDriver.On(
"SendStatusUpdate",
mesosproto.TaskState_TASK_STARTING,
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
statusUpdateCalls.Add(1)
mockDriver.On(
"SendStatusUpdate",
mesosproto.TaskState_TASK_RUNNING,
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
executor.LaunchTask(mockDriver, taskInfo)
assertext.EventuallyTrue(t, 5*time.Second, func() bool {
executor.lock.Lock()
defer executor.lock.Unlock()
return len(executor.tasks) == 1 && len(executor.pods) == 1
}, "executor must be able to create a task and a pod")
gotPodUpdate := false
select {
case m := <-updates:
update, ok := m.(kubelet.PodUpdate)
if ok && len(update.Pods) == 1 {
gotPodUpdate = true
}
case <-time.After(time.Second):
}
assert.Equal(t, true, gotPodUpdate,
"the executor should send an update about a new pod to "+
"the updates chan when creating a new one.")
// Allow some time for asynchronous requests to the driver.
finished := kmruntime.After(statusUpdateCalls.Wait)
select {
case <-finished:
case <-time.After(5 * time.Second):
t.Fatalf("timed out waiting for status update calls to finish")
}
statusUpdateCalls.Add(1)
mockDriver.On(
"SendStatusUpdate",
mesosproto.TaskState_TASK_KILLED,
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
executor.KillTask(mockDriver, taskInfo.TaskId)
assertext.EventuallyTrue(t, 5*time.Second, func() bool {
executor.lock.Lock()
defer executor.lock.Unlock()
return len(executor.tasks) == 0 && len(executor.pods) == 0
}, "executor must be able to kill a created task and pod")
// Allow some time for asynchronous requests to the driver.
finished = kmruntime.After(statusUpdateCalls.Wait)
select {
case <-finished:
case <-time.After(5 * time.Second):
t.Fatalf("timed out waiting for status update calls to finish")
}
mockDriver.AssertExpectations(t)
}
// TestExecutorFrameworkMessage ensures that the executor is able to
// handle messages from the framework, specifically about lost tasks
// and Kamikaze. When a task is lost, the executor needs to clean up
// its state. When a Kamikaze message is received, the executor should
// attempt suicide.
func TestExecutorFrameworkMessage(t *testing.T) {
mockDriver := &MockExecutorDriver{}
kubeletFinished := make(chan struct{})
config := Config{
Docker: dockertools.ConnectToDockerOrDie("fake://"),
Updates: make(chan interface{}, 1024),
APIClient: client.NewOrDie(&client.Config{
Host: "fakehost",
Version: testapi.Version(),
}),
ShutdownAlert: func() {
close(kubeletFinished)
},
KubeletFinished: kubeletFinished,
}
executor := New(config)
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
executor.FrameworkMessage(mockDriver, "test framework message")
// set up a pod to then lose
pod := NewTestPod(1)
podTask, _ := podtask.New(api.NewDefaultContext(), "foo",
*pod, &mesosproto.ExecutorInfo{})
taskInfo := podTask.BuildTaskInfo()
data, _ := testapi.Codec().Encode(pod)
taskInfo.Data = data
executor.LaunchTask(mockDriver, taskInfo)
// send task-lost message for it
called := make(chan struct{})
mockDriver.On(
"SendStatusUpdate",
mesosproto.TaskState_TASK_LOST,
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
executor.FrameworkMessage(mockDriver, "task-lost:foo")
assertext.EventuallyTrue(t, 5*time.Second, func() bool {
executor.lock.Lock()
defer executor.lock.Unlock()
return len(executor.tasks) == 0 && len(executor.pods) == 0
}, "executor must be able to kill a created task and pod")
select {
case <-called:
case <-time.After(5 * time.Second):
t.Fatalf("timed out waiting for SendStatusUpdate")
}
mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()
executor.FrameworkMessage(mockDriver, messages.Kamikaze)
assert.Equal(t, true, executor.isDone(),
"executor should have shut down after receiving a Kamikaze message")
mockDriver.AssertExpectations(t)
}
// Create a pod with a given index, requiring one port
func NewTestPod(i int) *api.Pod {
name := fmt.Sprintf("pod%d", i)
return &api.Pod{
TypeMeta: api.TypeMeta{APIVersion: testapi.Version()},
ObjectMeta: api.ObjectMeta{
Name: name,
Namespace: api.NamespaceDefault,
SelfLink: testapi.SelfLink("pods", string(i)),
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Ports: []api.ContainerPort{
{
ContainerPort: 8000 + i,
Protocol: api.ProtocolTCP,
},
},
},
},
},
Status: api.PodStatus{
Conditions: []api.PodCondition{
{
Type: api.PodReady,
Status: api.ConditionTrue,
},
},
},
}
}
// Create mock of pods ListWatch, usually listening on the apiserver pods watch endpoint
type MockPodsListWatch struct {
ListWatch cache.ListWatch
fakeWatcher *watch.FakeWatcher
list api.PodList
}
// A apiserver mock which partially mocks the pods API
type TestServer struct {
server *httptest.Server
Stats map[string]uint
lock sync.Mutex
}
func NewTestServer(t *testing.T, namespace string, pods *api.PodList) *TestServer {
ts := TestServer{
Stats: map[string]uint{},
}
mux := http.NewServeMux()
mux.HandleFunc(testapi.ResourcePath("bindings", namespace, ""), func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
})
ts.server = httptest.NewServer(mux)
return &ts
}
func NewMockPodsListWatch(initialPodList api.PodList) *MockPodsListWatch {
lw := MockPodsListWatch{
fakeWatcher: watch.NewFake(),
list: initialPodList,
}
lw.ListWatch = cache.ListWatch{
WatchFunc: func(resourceVersion string) (watch.Interface, error) {
return lw.fakeWatcher, nil
},
ListFunc: func() (runtime.Object, error) {
return &lw.list, nil
},
}
return &lw
}
// TestExecutorShutdown ensures that the executor properly shuts down
// when Shutdown is called.
func TestExecutorShutdown(t *testing.T) {
mockDriver := &MockExecutorDriver{}
kubeletFinished := make(chan struct{})
var exitCalled int32 = 0
config := Config{
Docker: dockertools.ConnectToDockerOrDie("fake://"),
Updates: make(chan interface{}, 1024),
ShutdownAlert: func() {
close(kubeletFinished)
},
KubeletFinished: kubeletFinished,
ExitFunc: func(_ int) {
atomic.AddInt32(&exitCalled, 1)
},
}
executor := New(config)
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()
executor.Shutdown(mockDriver)
assert.Equal(t, false, executor.isConnected(),
"executor should not be connected after Shutdown")
assert.Equal(t, true, executor.isDone(),
"executor should be in Done state after Shutdown")
select {
case <-executor.Done():
default:
t.Fatal("done channel should be closed after shutdown")
}
assert.Equal(t, true, atomic.LoadInt32(&exitCalled) > 0,
"the executor should call its ExitFunc when it is ready to close down")
mockDriver.AssertExpectations(t)
}
func TestExecutorsendFrameworkMessage(t *testing.T) {
mockDriver := &MockExecutorDriver{}
executor := NewTestKubernetesExecutor()
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
called := make(chan struct{})
mockDriver.On(
"SendFrameworkMessage",
"foo bar baz",
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
executor.sendFrameworkMessage(mockDriver, "foo bar baz")
// guard against data race in mock driver between AssertExpectations and Called
select {
case <-called: // expected
case <-time.After(5 * time.Second):
t.Fatalf("expected call to SendFrameworkMessage")
}
mockDriver.AssertExpectations(t)
}

View File

@@ -0,0 +1,18 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package messages exposes executor event/message names as constants.
package messages

View File

@@ -0,0 +1,32 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package messages
// messages that ship with TaskStatus objects
const (
ContainersDisappeared = "containers-disappeared"
CreateBindingFailure = "create-binding-failure"
CreateBindingSuccess = "create-binding-success"
ExecutorUnregistered = "executor-unregistered"
ExecutorShutdown = "executor-shutdown"
LaunchTaskFailed = "launch-task-failed"
TaskKilled = "task-killed"
UnmarshalTaskDataFailure = "unmarshal-task-data-failure"
TaskLostAck = "task-lost-ack" // executor acknowledgement of forwarded TASK_LOST framework message
Kamikaze = "kamikaze"
)

View File

@@ -0,0 +1,81 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import (
"testing"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
"github.com/mesos/mesos-go/mesosproto"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
)
type MockExecutorDriver struct {
mock.Mock
}
func (m *MockExecutorDriver) Start() (mesosproto.Status, error) {
args := m.Called()
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) Stop() (mesosproto.Status, error) {
args := m.Called()
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) Abort() (mesosproto.Status, error) {
args := m.Called()
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) Join() (mesosproto.Status, error) {
args := m.Called()
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) Run() (mesosproto.Status, error) {
args := m.Called()
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) SendStatusUpdate(taskStatus *mesosproto.TaskStatus) (mesosproto.Status, error) {
args := m.Called(*taskStatus.State)
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) SendFrameworkMessage(msg string) (mesosproto.Status, error) {
args := m.Called(msg)
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func NewTestKubernetesExecutor() *KubernetesExecutor {
return New(Config{
Docker: dockertools.ConnectToDockerOrDie("fake://"),
Updates: make(chan interface{}, 1024),
})
}
func TestExecutorNew(t *testing.T) {
mockDriver := &MockExecutorDriver{}
executor := NewTestKubernetesExecutor()
executor.Init(mockDriver)
assert.Equal(t, executor.isDone(), false, "executor should not be in Done state on initialization")
assert.Equal(t, executor.isConnected(), false, "executor should not be connected on initialization")
}

View File

@@ -0,0 +1,18 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package service contains the cmd/k8sm-executor glue code.
package service

View File

@@ -0,0 +1,600 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package service
import (
"bufio"
"fmt"
"io"
"math/rand"
"net"
"net/http"
"os"
"os/exec"
"strconv"
"strings"
"sync"
"time"
"github.com/GoogleCloudPlatform/kubernetes/cmd/kubelet/app"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/config"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/redirfd"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/credentialprovider"
"github.com/GoogleCloudPlatform/kubernetes/pkg/healthz"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
kconfig "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/config"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/mount"
log "github.com/golang/glog"
"github.com/kardianos/osext"
bindings "github.com/mesos/mesos-go/executor"
"github.com/spf13/pflag"
)
const (
// if we don't use this source then the kubelet will do funny, mirror things.
// @see ConfigSourceAnnotationKey
MESOS_CFG_SOURCE = kubelet.ApiserverSource
)
type KubeletExecutorServer struct {
*app.KubeletServer
RunProxy bool
ProxyLogV int
ProxyExec string
ProxyLogfile string
ProxyBindall bool
SuicideTimeout time.Duration
ShutdownFD int
ShutdownFIFO string
}
func NewKubeletExecutorServer() *KubeletExecutorServer {
k := &KubeletExecutorServer{
KubeletServer: app.NewKubeletServer(),
RunProxy: true,
ProxyExec: "./kube-proxy",
ProxyLogfile: "./proxy-log",
SuicideTimeout: config.DefaultSuicideTimeout,
}
if pwd, err := os.Getwd(); err != nil {
log.Warningf("failed to determine current directory: %v", err)
} else {
k.RootDirectory = pwd // mesos sandbox dir
}
k.Address = util.IP(net.ParseIP(defaultBindingAddress()))
k.ShutdownFD = -1 // indicates unspecified FD
return k
}
func NewHyperKubeletExecutorServer() *KubeletExecutorServer {
s := NewKubeletExecutorServer()
// cache this for later use
binary, err := osext.Executable()
if err != nil {
log.Fatalf("failed to determine currently running executable: %v", err)
}
s.ProxyExec = binary
return s
}
func (s *KubeletExecutorServer) addCoreFlags(fs *pflag.FlagSet) {
s.KubeletServer.AddFlags(fs)
fs.BoolVar(&s.RunProxy, "run-proxy", s.RunProxy, "Maintain a running kube-proxy instance as a child proc of this kubelet-executor.")
fs.IntVar(&s.ProxyLogV, "proxy-logv", s.ProxyLogV, "Log verbosity of the child kube-proxy.")
fs.StringVar(&s.ProxyLogfile, "proxy-logfile", s.ProxyLogfile, "Path to the kube-proxy log file.")
fs.BoolVar(&s.ProxyBindall, "proxy-bindall", s.ProxyBindall, "When true will cause kube-proxy to bind to 0.0.0.0.")
fs.DurationVar(&s.SuicideTimeout, "suicide-timeout", s.SuicideTimeout, "Self-terminate after this period of inactivity. Zero disables suicide watch.")
fs.IntVar(&s.ShutdownFD, "shutdown-fd", s.ShutdownFD, "File descriptor used to signal shutdown to external watchers, requires shutdown-fifo flag")
fs.StringVar(&s.ShutdownFIFO, "shutdown-fifo", s.ShutdownFIFO, "FIFO used to signal shutdown to external watchers, requires shutdown-fd flag")
}
func (s *KubeletExecutorServer) AddStandaloneFlags(fs *pflag.FlagSet) {
s.addCoreFlags(fs)
fs.StringVar(&s.ProxyExec, "proxy-exec", s.ProxyExec, "Path to the kube-proxy executable.")
}
func (s *KubeletExecutorServer) AddHyperkubeFlags(fs *pflag.FlagSet) {
s.addCoreFlags(fs)
}
// returns a Closer that should be closed to signal impending shutdown, but only if ShutdownFD
// and ShutdownFIFO were specified. if they are specified, then this func blocks until there's
// a reader on the FIFO stream.
func (s *KubeletExecutorServer) syncExternalShutdownWatcher() (io.Closer, error) {
if s.ShutdownFD == -1 || s.ShutdownFIFO == "" {
return nil, nil
}
// redirfd -w n fifo ... # (blocks until the fifo is read)
log.Infof("blocked, waiting for shutdown reader for FD %d FIFO at %s", s.ShutdownFD, s.ShutdownFIFO)
return redirfd.Write.Redirect(true, false, redirfd.FileDescriptor(s.ShutdownFD), s.ShutdownFIFO)
}
// Run runs the specified KubeletExecutorServer.
func (s *KubeletExecutorServer) Run(hks hyperkube.Interface, _ []string) error {
rand.Seed(time.Now().UTC().UnixNano())
if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
log.Info(err)
}
var apiclient *client.Client
clientConfig, err := s.CreateAPIServerClientConfig()
if err == nil {
apiclient, err = client.New(clientConfig)
}
if err != nil {
// required for k8sm since we need to send api.Binding information
// back to the apiserver
log.Fatalf("No API client: %v", err)
}
log.Infof("Using root directory: %v", s.RootDirectory)
credentialprovider.SetPreferredDockercfgPath(s.RootDirectory)
shutdownCloser, err := s.syncExternalShutdownWatcher()
if err != nil {
return err
}
cadvisorInterface, err := cadvisor.New(s.CadvisorPort)
if err != nil {
return err
}
imageGCPolicy := kubelet.ImageGCPolicy{
HighThresholdPercent: s.ImageGCHighThresholdPercent,
LowThresholdPercent: s.ImageGCLowThresholdPercent,
}
diskSpacePolicy := kubelet.DiskSpacePolicy{
DockerFreeDiskMB: s.LowDiskSpaceThresholdMB,
RootFreeDiskMB: s.LowDiskSpaceThresholdMB,
}
//TODO(jdef) intentionally NOT initializing a cloud provider here since:
//(a) the kubelet doesn't actually use it
//(b) we don't need to create N-kubelet connections to zookeeper for no good reason
//cloud := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile)
//log.Infof("Successfully initialized cloud provider: %q from the config file: %q\n", s.CloudProvider, s.CloudConfigFile)
hostNetworkSources, err := kubelet.GetValidatedSources(strings.Split(s.HostNetworkSources, ","))
if err != nil {
return err
}
tlsOptions, err := s.InitializeTLS()
if err != nil {
return err
}
mounter := mount.New()
if s.Containerized {
log.V(2).Info("Running kubelet in containerized mode (experimental)")
mounter = &mount.NsenterMounter{}
}
var dockerExecHandler dockertools.ExecHandler
switch s.DockerExecHandlerName {
case "native":
dockerExecHandler = &dockertools.NativeExecHandler{}
case "nsenter":
dockerExecHandler = &dockertools.NsenterExecHandler{}
default:
log.Warningf("Unknown Docker exec handler %q; defaulting to native", s.DockerExecHandlerName)
dockerExecHandler = &dockertools.NativeExecHandler{}
}
kcfg := app.KubeletConfig{
Address: s.Address,
AllowPrivileged: s.AllowPrivileged,
HostNetworkSources: hostNetworkSources,
HostnameOverride: s.HostnameOverride,
RootDirectory: s.RootDirectory,
// ConfigFile: ""
// ManifestURL: ""
// FileCheckFrequency
// HTTPCheckFrequency
PodInfraContainerImage: s.PodInfraContainerImage,
SyncFrequency: s.SyncFrequency,
RegistryPullQPS: s.RegistryPullQPS,
RegistryBurst: s.RegistryBurst,
MinimumGCAge: s.MinimumGCAge,
MaxPerPodContainerCount: s.MaxPerPodContainerCount,
MaxContainerCount: s.MaxContainerCount,
RegisterNode: s.RegisterNode,
ClusterDomain: s.ClusterDomain,
ClusterDNS: s.ClusterDNS,
Runonce: s.RunOnce,
Port: s.Port,
ReadOnlyPort: s.ReadOnlyPort,
CadvisorInterface: cadvisorInterface,
EnableServer: s.EnableServer,
EnableDebuggingHandlers: s.EnableDebuggingHandlers,
DockerClient: dockertools.ConnectToDockerOrDie(s.DockerEndpoint),
KubeClient: apiclient,
MasterServiceNamespace: s.MasterServiceNamespace,
VolumePlugins: app.ProbeVolumePlugins(),
NetworkPlugins: app.ProbeNetworkPlugins(),
NetworkPluginName: s.NetworkPluginName,
StreamingConnectionIdleTimeout: s.StreamingConnectionIdleTimeout,
TLSOptions: tlsOptions,
ImageGCPolicy: imageGCPolicy,
DiskSpacePolicy: diskSpacePolicy,
Cloud: nil, // TODO(jdef) Cloud, specifying null here because we don't want all kubelets polling mesos-master; need to account for this in the cloudprovider impl
NodeStatusUpdateFrequency: s.NodeStatusUpdateFrequency,
ResourceContainer: s.ResourceContainer,
CgroupRoot: s.CgroupRoot,
ContainerRuntime: s.ContainerRuntime,
Mounter: mounter,
DockerDaemonContainer: s.DockerDaemonContainer,
SystemContainer: s.SystemContainer,
ConfigureCBR0: s.ConfigureCBR0,
MaxPods: s.MaxPods,
DockerExecHandler: dockerExecHandler,
}
err = app.RunKubelet(&kcfg, app.KubeletBuilder(func(kc *app.KubeletConfig) (app.KubeletBootstrap, *kconfig.PodConfig, error) {
return s.createAndInitKubelet(kc, hks, clientConfig, shutdownCloser)
}))
if err != nil {
return err
}
if s.HealthzPort > 0 {
healthz.DefaultHealthz()
go util.Forever(func() {
err := http.ListenAndServe(net.JoinHostPort(s.HealthzBindAddress.String(), strconv.Itoa(s.HealthzPort)), nil)
if err != nil {
log.Errorf("Starting health server failed: %v", err)
}
}, 5*time.Second)
}
// block until executor is shut down or commits shutdown
select {}
}
func defaultBindingAddress() string {
libProcessIP := os.Getenv("LIBPROCESS_IP")
if libProcessIP == "" {
return "0.0.0.0"
} else {
return libProcessIP
}
}
func (ks *KubeletExecutorServer) createAndInitKubelet(
kc *app.KubeletConfig,
hks hyperkube.Interface,
clientConfig *client.Config,
shutdownCloser io.Closer,
) (app.KubeletBootstrap, *kconfig.PodConfig, error) {
// TODO(k8s): block until all sources have delivered at least one update to the channel, or break the sync loop
// up into "per source" synchronizations
// TODO(k8s): KubeletConfig.KubeClient should be a client interface, but client interface misses certain methods
// used by kubelet. Since NewMainKubelet expects a client interface, we need to make sure we are not passing
// a nil pointer to it when what we really want is a nil interface.
var kubeClient client.Interface
if kc.KubeClient == nil {
kubeClient = nil
} else {
kubeClient = kc.KubeClient
}
gcPolicy := kubelet.ContainerGCPolicy{
MinAge: kc.MinimumGCAge,
MaxPerPodContainer: kc.MaxPerPodContainerCount,
MaxContainers: kc.MaxContainerCount,
}
pc := kconfig.NewPodConfig(kconfig.PodConfigNotificationSnapshotAndUpdates, kc.Recorder)
updates := pc.Channel(MESOS_CFG_SOURCE)
klet, err := kubelet.NewMainKubelet(
kc.Hostname,
kc.DockerClient,
kubeClient,
kc.RootDirectory,
kc.PodInfraContainerImage,
kc.SyncFrequency,
float32(kc.RegistryPullQPS),
kc.RegistryBurst,
gcPolicy,
pc.SeenAllSources,
kc.RegisterNode,
kc.ClusterDomain,
net.IP(kc.ClusterDNS),
kc.MasterServiceNamespace,
kc.VolumePlugins,
kc.NetworkPlugins,
kc.NetworkPluginName,
kc.StreamingConnectionIdleTimeout,
kc.Recorder,
kc.CadvisorInterface,
kc.ImageGCPolicy,
kc.DiskSpacePolicy,
kc.Cloud,
kc.NodeStatusUpdateFrequency,
kc.ResourceContainer,
kc.OSInterface,
kc.CgroupRoot,
kc.ContainerRuntime,
kc.Mounter,
kc.DockerDaemonContainer,
kc.SystemContainer,
kc.ConfigureCBR0,
kc.MaxPods,
kc.DockerExecHandler,
)
if err != nil {
return nil, nil, err
}
//TODO(jdef) either configure Watch here with something useful, or else
// get rid of it from executor.Config
kubeletFinished := make(chan struct{})
exec := executor.New(executor.Config{
Kubelet: klet,
Updates: updates,
SourceName: MESOS_CFG_SOURCE,
APIClient: kc.KubeClient,
Docker: kc.DockerClient,
SuicideTimeout: ks.SuicideTimeout,
KubeletFinished: kubeletFinished,
ShutdownAlert: func() {
if shutdownCloser != nil {
if e := shutdownCloser.Close(); e != nil {
log.Warningf("failed to signal shutdown to external watcher: %v", e)
}
}
},
ExitFunc: os.Exit,
PodStatusFunc: func(kl *kubelet.Kubelet, pod *api.Pod) (*api.PodStatus, error) {
return kl.GetRuntime().GetPodStatus(pod)
},
})
k := &kubeletExecutor{
Kubelet: klet,
runProxy: ks.RunProxy,
proxyLogV: ks.ProxyLogV,
proxyExec: ks.ProxyExec,
proxyLogfile: ks.ProxyLogfile,
proxyBindall: ks.ProxyBindall,
address: ks.Address,
dockerClient: kc.DockerClient,
hks: hks,
kubeletFinished: kubeletFinished,
executorDone: exec.Done(),
clientConfig: clientConfig,
}
dconfig := bindings.DriverConfig{
Executor: exec,
HostnameOverride: ks.HostnameOverride,
BindingAddress: net.IP(ks.Address),
}
if driver, err := bindings.NewMesosExecutorDriver(dconfig); err != nil {
log.Fatalf("failed to create executor driver: %v", err)
} else {
k.driver = driver
}
log.V(2).Infof("Initialize executor driver...")
k.BirthCry()
exec.Init(k.driver)
k.StartGarbageCollection()
return k, pc, nil
}
// kubelet decorator
type kubeletExecutor struct {
*kubelet.Kubelet
initialize sync.Once
driver bindings.ExecutorDriver
runProxy bool
proxyLogV int
proxyExec string
proxyLogfile string
proxyBindall bool
address util.IP
dockerClient dockertools.DockerInterface
hks hyperkube.Interface
kubeletFinished chan struct{} // closed once kubelet.Run() returns
executorDone <-chan struct{} // from KubeletExecutor.Done()
clientConfig *client.Config
}
func (kl *kubeletExecutor) ListenAndServe(address net.IP, port uint, tlsOptions *kubelet.TLSOptions, enableDebuggingHandlers bool) {
// this func could be called many times, depending how often the HTTP server crashes,
// so only execute certain initialization procs once
kl.initialize.Do(func() {
if kl.runProxy {
go runtime.Until(kl.runProxyService, 5*time.Second, kl.executorDone)
}
go func() {
if _, err := kl.driver.Run(); err != nil {
log.Fatalf("executor driver failed: %v", err)
}
log.Info("executor Run completed")
}()
})
log.Infof("Starting kubelet server...")
kubelet.ListenAndServeKubeletServer(kl, address, port, tlsOptions, enableDebuggingHandlers)
}
// this function blocks as long as the proxy service is running; intended to be
// executed asynchronously.
func (kl *kubeletExecutor) runProxyService() {
log.Infof("Starting proxy process...")
const KM_PROXY = "proxy" //TODO(jdef) constant should be shared with km package
args := []string{}
if kl.hks.FindServer(KM_PROXY) {
args = append(args, KM_PROXY)
log.V(1).Infof("attempting to using km proxy service")
} else if _, err := os.Stat(kl.proxyExec); os.IsNotExist(err) {
log.Errorf("failed to locate proxy executable at '%v' and km not present: %v", kl.proxyExec, err)
return
}
bindAddress := "0.0.0.0"
if !kl.proxyBindall {
bindAddress = kl.address.String()
}
args = append(args,
fmt.Sprintf("--bind-address=%s", bindAddress),
fmt.Sprintf("--v=%d", kl.proxyLogV),
"--logtostderr=true",
)
// add client.Config args here. proxy still calls client.BindClientConfigFlags
appendStringArg := func(name, value string) {
if value != "" {
args = append(args, fmt.Sprintf("--%s=%s", name, value))
}
}
appendStringArg("master", kl.clientConfig.Host)
/* TODO(jdef) move these flags to a config file pointed to by --kubeconfig
appendStringArg("api-version", kl.clientConfig.Version)
appendStringArg("client-certificate", kl.clientConfig.CertFile)
appendStringArg("client-key", kl.clientConfig.KeyFile)
appendStringArg("certificate-authority", kl.clientConfig.CAFile)
args = append(args, fmt.Sprintf("--insecure-skip-tls-verify=%t", kl.clientConfig.Insecure))
*/
log.Infof("Spawning process executable %s with args '%+v'", kl.proxyExec, args)
cmd := exec.Command(kl.proxyExec, args...)
if _, err := cmd.StdoutPipe(); err != nil {
log.Fatal(err)
}
proxylogs, err := cmd.StderrPipe()
if err != nil {
log.Fatal(err)
}
//TODO(jdef) append instead of truncate? what if the disk is full?
logfile, err := os.Create(kl.proxyLogfile)
if err != nil {
log.Fatal(err)
}
defer logfile.Close()
ch := make(chan struct{})
go func() {
defer func() {
select {
case <-ch:
log.Infof("killing proxy process..")
if err = cmd.Process.Kill(); err != nil {
log.Errorf("failed to kill proxy process: %v", err)
}
default:
}
}()
writer := bufio.NewWriter(logfile)
defer writer.Flush()
<-ch
written, err := io.Copy(writer, proxylogs)
if err != nil {
log.Errorf("error writing data to proxy log: %v", err)
}
log.Infof("wrote %d bytes to proxy log", written)
}()
// if the proxy fails to start then we exit the executor, otherwise
// wait for the proxy process to end (and release resources after).
if err := cmd.Start(); err != nil {
log.Fatal(err)
}
close(ch)
if err := cmd.Wait(); err != nil {
log.Error(err)
}
}
// runs the main kubelet loop, closing the kubeletFinished chan when the loop exits.
// never returns.
func (kl *kubeletExecutor) Run(updates <-chan kubelet.PodUpdate) {
defer func() {
close(kl.kubeletFinished)
util.HandleCrash()
log.Infoln("kubelet run terminated") //TODO(jdef) turn down verbosity
// important: never return! this is in our contract
select {}
}()
// push updates through a closable pipe. when the executor indicates shutdown
// via Done() we want to stop the Kubelet from processing updates.
pipe := make(chan kubelet.PodUpdate)
go func() {
// closing pipe will cause our patched kubelet's syncLoop() to exit
defer close(pipe)
pipeLoop:
for {
select {
case <-kl.executorDone:
break pipeLoop
default:
select {
case u := <-updates:
select {
case pipe <- u: // noop
case <-kl.executorDone:
break pipeLoop
}
case <-kl.executorDone:
break pipeLoop
}
}
}
}()
// we expect that Run() will complete after the pipe is closed and the
// kubelet's syncLoop() has finished processing its backlog, which hopefully
// will not take very long. Peeking into the future (current k8s master) it
// seems that the backlog has grown from 1 to 50 -- this may negatively impact
// us going forward, time will tell.
util.Until(func() { kl.Kubelet.Run(pipe) }, 0, kl.executorDone)
//TODO(jdef) revisit this if/when executor failover lands
err := kl.SyncPods([]*api.Pod{}, nil, nil, time.Now())
if err != nil {
log.Errorf("failed to cleanly remove all pods and associated state: %v", err)
}
}

View File

@@ -0,0 +1,21 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package hyperkube facilitates the combination of multiple
// kubernetes-mesos components into a single binary form, providing a
// simple mechanism for intra-component discovery as per the original
// Kubernetes hyperkube package.
package hyperkube

View File

@@ -0,0 +1,54 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package hyperkube
import (
"github.com/spf13/pflag"
)
var (
nilKube = &nilKubeType{}
)
type Interface interface {
// FindServer will find a specific server named name.
FindServer(name string) bool
// The executable name, used for help and soft-link invocation
Name() string
// Flags returns a flagset for "global" flags.
Flags() *pflag.FlagSet
}
type nilKubeType struct{}
func (n *nilKubeType) FindServer(_ string) bool {
return false
}
func (n *nilKubeType) Name() string {
return ""
}
func (n *nilKubeType) Flags() *pflag.FlagSet {
return nil
}
func Nil() Interface {
return nilKube
}

View File

@@ -0,0 +1,18 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package offers contains code that manages Mesos offers.
package offers

View File

@@ -0,0 +1,19 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package metrics defines and exposes instrumentation metrics related to
// Mesos offers.
package metrics

View File

@@ -0,0 +1,89 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
)
const (
offerSubsystem = "mesos_offers"
)
type OfferDeclinedReason string
const (
OfferExpired = OfferDeclinedReason("expired")
OfferRescinded = OfferDeclinedReason("rescinded")
OfferCompat = OfferDeclinedReason("compat")
)
var (
OffersReceived = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: offerSubsystem,
Name: "received",
Help: "Counter of offers received from Mesos broken out by slave host.",
},
[]string{"hostname"},
)
OffersDeclined = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: offerSubsystem,
Name: "declined",
Help: "Counter of offers declined by the framework broken out by slave host.",
},
[]string{"hostname", "reason"},
)
OffersAcquired = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: offerSubsystem,
Name: "acquired",
Help: "Counter of offers acquired for task launch broken out by slave host.",
},
[]string{"hostname"},
)
OffersReleased = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: offerSubsystem,
Name: "released",
Help: "Counter of previously-acquired offers later released, broken out by slave host.",
},
[]string{"hostname"},
)
)
var registerMetrics sync.Once
func Register() {
registerMetrics.Do(func() {
prometheus.MustRegister(OffersReceived)
prometheus.MustRegister(OffersDeclined)
prometheus.MustRegister(OffersAcquired)
prometheus.MustRegister(OffersReleased)
})
}
func InMicroseconds(d time.Duration) float64 {
return float64(d.Nanoseconds() / time.Microsecond.Nanoseconds())
}

View File

@@ -0,0 +1,570 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package offers
import (
"fmt"
"reflect"
"sync"
"sync/atomic"
"time"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers/metrics"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
log "github.com/golang/glog"
mesos "github.com/mesos/mesos-go/mesosproto"
)
const (
offerListenerMaxAge = 12 // max number of times we'll attempt to fit an offer to a listener before requiring them to re-register themselves
offerIdCacheTTL = 1 * time.Second // determines expiration of cached offer ids, used in listener notification
deferredDeclineTtlFactor = 2 // this factor, multiplied by the offer ttl, determines how long to wait before attempting to decline previously claimed offers that were subsequently deleted, then released. see offerStorage.Delete
notifyListenersDelay = 0 // delay between offer listener notification attempts
)
type Filter func(*mesos.Offer) bool
type Registry interface {
// Initialize the instance, spawning necessary housekeeping go routines.
Init(<-chan struct{})
// Add offers to this registry, rejecting those that are deemed incompatible.
Add([]*mesos.Offer)
// Listen for arriving offers that are acceptable to the filter, sending
// a signal on (by closing) the returned channel. A listener will only
// ever be notified once, if at all.
Listen(id string, f Filter) <-chan struct{}
// invoked when offers are rescinded or expired
Delete(string, metrics.OfferDeclinedReason)
// when true, returns the offer that's registered for the given ID
Get(offerId string) (Perishable, bool)
// iterate through non-expired offers in this registry
Walk(Walker) error
// invalidate one or all (when offerId="") offers; offers are not declined,
// but are simply flagged as expired in the offer history
Invalidate(offerId string)
// invalidate all offers associated with the slave identified by slaveId.
InvalidateForSlave(slaveId string)
}
// callback that is invoked during a walk through a series of live offers,
// returning with stop=true (or err != nil) if the walk should stop permaturely.
type Walker func(offer Perishable) (stop bool, err error)
type RegistryConfig struct {
DeclineOffer func(offerId string) <-chan error // tell Mesos that we're declining the offer
Compat func(*mesos.Offer) bool // returns true if offer is compatible; incompatible offers are declined
TTL time.Duration // determines a perishable offer's expiration deadline: now+ttl
LingerTTL time.Duration // if zero, offers will not linger in the FIFO past their expiration deadline
ListenerDelay time.Duration // specifies the sleep time between offer listener notifications
}
type offerStorage struct {
RegistryConfig
offers *cache.FIFO // collection of Perishable, both live and expired
listeners *queue.DelayFIFO // collection of *offerListener
delayed *queue.DelayQueue // deadline-oriented offer-event queue
slaves *slaveStorage // slave to offer mappings
}
type liveOffer struct {
*mesos.Offer
expiration time.Time
acquired int32 // 1 = acquired, 0 = free
}
type expiredOffer struct {
offerSpec
deadline time.Time
}
// subset of mesos.OfferInfo useful for recordkeeping
type offerSpec struct {
id string
hostname string
}
// offers that may perish (all of them?) implement this interface.
// callers may expect to access these funcs concurrently so implementations
// must provide their own form of synchronization around mutable state.
type Perishable interface {
// returns true if this offer has expired
HasExpired() bool
// if not yet expired, return mesos offer details; otherwise nil
Details() *mesos.Offer
// mark this offer as acquired, returning true if it was previously unacquired. thread-safe.
Acquire() bool
// mark this offer as un-acquired. thread-safe.
Release()
// expire or delete this offer from storage
age(s *offerStorage)
// return a unique identifier for this offer
Id() string
// return the slave host for this offer
Host() string
addTo(*queue.DelayQueue)
}
func (e *expiredOffer) addTo(q *queue.DelayQueue) {
q.Add(e)
}
func (e *expiredOffer) Id() string {
return e.id
}
func (e *expiredOffer) Host() string {
return e.hostname
}
func (e *expiredOffer) HasExpired() bool {
return true
}
func (e *expiredOffer) Details() *mesos.Offer {
return nil
}
func (e *expiredOffer) Acquire() bool {
return false
}
func (e *expiredOffer) Release() {}
func (e *expiredOffer) age(s *offerStorage) {
log.V(3).Infof("Delete lingering offer: %v", e.id)
s.offers.Delete(e)
s.slaves.deleteOffer(e.id)
}
// return the time left to linger
func (e *expiredOffer) GetDelay() time.Duration {
return e.deadline.Sub(time.Now())
}
func (to *liveOffer) HasExpired() bool {
return time.Now().After(to.expiration)
}
func (to *liveOffer) Details() *mesos.Offer {
return to.Offer
}
func (to *liveOffer) Acquire() (acquired bool) {
if acquired = atomic.CompareAndSwapInt32(&to.acquired, 0, 1); acquired {
metrics.OffersAcquired.WithLabelValues(to.Host()).Inc()
}
return
}
func (to *liveOffer) Release() {
if released := atomic.CompareAndSwapInt32(&to.acquired, 1, 0); released {
metrics.OffersReleased.WithLabelValues(to.Host()).Inc()
}
}
func (to *liveOffer) age(s *offerStorage) {
s.Delete(to.Id(), metrics.OfferExpired)
}
func (to *liveOffer) Id() string {
return to.Offer.Id.GetValue()
}
func (to *liveOffer) Host() string {
return to.Offer.GetHostname()
}
func (to *liveOffer) addTo(q *queue.DelayQueue) {
q.Add(to)
}
// return the time remaining before the offer expires
func (to *liveOffer) GetDelay() time.Duration {
return to.expiration.Sub(time.Now())
}
func CreateRegistry(c RegistryConfig) Registry {
metrics.Register()
return &offerStorage{
RegistryConfig: c,
offers: cache.NewFIFO(cache.KeyFunc(func(v interface{}) (string, error) {
if perishable, ok := v.(Perishable); !ok {
return "", fmt.Errorf("expected perishable offer, not '%+v'", v)
} else {
return perishable.Id(), nil
}
})),
listeners: queue.NewDelayFIFO(),
delayed: queue.NewDelayQueue(),
slaves: newSlaveStorage(),
}
}
func (s *offerStorage) declineOffer(offerId, hostname string, reason metrics.OfferDeclinedReason) {
//TODO(jdef) might be nice to spec an abort chan here
runtime.Signal(proc.OnError(s.DeclineOffer(offerId), func(err error) {
log.Warningf("decline failed for offer id %v: %v", offerId, err)
}, nil)).Then(func() {
metrics.OffersDeclined.WithLabelValues(hostname, string(reason)).Inc()
})
}
func (s *offerStorage) Add(offers []*mesos.Offer) {
now := time.Now()
for _, offer := range offers {
if !s.Compat(offer) {
//TODO(jdef) would be nice to batch these up
offerId := offer.Id.GetValue()
log.V(3).Infof("Declining incompatible offer %v", offerId)
s.declineOffer(offerId, offer.GetHostname(), metrics.OfferCompat)
return
}
timed := &liveOffer{
Offer: offer,
expiration: now.Add(s.TTL),
acquired: 0,
}
log.V(3).Infof("Receiving offer %v", timed.Id())
s.offers.Add(timed)
s.delayed.Add(timed)
s.slaves.add(offer.SlaveId.GetValue(), timed.Id())
metrics.OffersReceived.WithLabelValues(timed.Host()).Inc()
}
}
// delete an offer from storage, implicitly expires the offer
func (s *offerStorage) Delete(offerId string, reason metrics.OfferDeclinedReason) {
if offer, ok := s.Get(offerId); ok {
log.V(3).Infof("Deleting offer %v", offerId)
// attempt to block others from consuming the offer. if it's already been
// claimed and is not yet lingering then don't decline it - just mark it as
// expired in the history: allow a prior claimant to attempt to launch with it
notYetClaimed := offer.Acquire()
if offer.Details() != nil {
if notYetClaimed {
log.V(3).Infof("Declining offer %v", offerId)
s.declineOffer(offerId, offer.Host(), reason)
} else {
// some pod has acquired this and may attempt to launch a task with it
// failed schedule/launch attempts are requried to Release() any claims on the offer
// TODO(jdef): not sure what a good value is here. the goal is to provide a
// launchTasks (driver) operation enough time to complete so that we don't end
// up declining an offer that we're actually attempting to use.
time.AfterFunc(deferredDeclineTtlFactor*s.TTL, func() {
// at this point the offer is in one of five states:
// a) permanently deleted: expired due to timeout
// b) permanently deleted: expired due to having been rescinded
// c) lingering: expired due to timeout
// d) lingering: expired due to having been rescinded
// e) claimed: task launched and it using resources from this offer
// we want to **avoid** declining an offer that's claimed: attempt to acquire
if offer.Acquire() {
// previously claimed offer was released, perhaps due to a launch
// failure, so we should attempt to decline
log.V(3).Infof("attempting to decline (previously claimed) offer %v", offerId)
s.declineOffer(offerId, offer.Host(), reason)
}
})
}
}
s.expireOffer(offer)
} // else, ignore offers not in the history
}
func (s *offerStorage) InvalidateForSlave(slaveId string) {
offerIds := s.slaves.deleteSlave(slaveId)
for oid := range offerIds {
s.invalidateOne(oid)
}
}
// if offerId == "" then expire all known, live offers, otherwise only the offer indicated
func (s *offerStorage) Invalidate(offerId string) {
if offerId != "" {
s.invalidateOne(offerId)
return
}
obj := s.offers.List()
for _, o := range obj {
offer, ok := o.(Perishable)
if !ok {
log.Errorf("Expected perishable offer, not %v", o)
continue
}
offer.Acquire() // attempt to block others from using it
s.expireOffer(offer)
// don't decline, we already know that it's an invalid offer
}
}
func (s *offerStorage) invalidateOne(offerId string) {
if offer, ok := s.Get(offerId); ok {
offer.Acquire() // attempt to block others from using it
s.expireOffer(offer)
// don't decline, we already know that it's an invalid offer
}
}
// Walk the collection of offers. The walk stops either as indicated by the
// Walker or when the end of the offer list is reached. Expired offers are
// never passed to a Walker.
func (s *offerStorage) Walk(w Walker) error {
for _, v := range s.offers.List() {
offer, ok := v.(Perishable)
if !ok {
// offer disappeared...
continue
}
if offer.HasExpired() {
// never pass expired offers to walkers
continue
}
if stop, err := w(offer); err != nil {
return err
} else if stop {
return nil
}
}
return nil
}
func Expired(offerId, hostname string, ttl time.Duration) *expiredOffer {
return &expiredOffer{offerSpec{id: offerId, hostname: hostname}, time.Now().Add(ttl)}
}
func (s *offerStorage) expireOffer(offer Perishable) {
// the offer may or may not be expired due to TTL so check for details
// since that's a more reliable determinant of lingering status
if details := offer.Details(); details != nil {
// recently expired, should linger
offerId := details.Id.GetValue()
log.V(3).Infof("Expiring offer %v", offerId)
if s.LingerTTL > 0 {
log.V(3).Infof("offer will linger: %v", offerId)
expired := Expired(offerId, offer.Host(), s.LingerTTL)
s.offers.Update(expired)
s.delayed.Add(expired)
} else {
log.V(3).Infof("Permanently deleting offer %v", offerId)
s.offers.Delete(offerId)
s.slaves.deleteOffer(offerId)
}
} // else, it's still lingering...
}
func (s *offerStorage) Get(id string) (Perishable, bool) {
if obj, ok, _ := s.offers.GetByKey(id); !ok {
return nil, false
} else {
to, ok := obj.(Perishable)
if !ok {
log.Errorf("invalid offer object in fifo '%v'", obj)
}
return to, ok
}
}
type offerListener struct {
id string
accepts Filter
notify chan<- struct{}
age int
deadline time.Time
sawVersion uint64
}
func (l *offerListener) GetUID() string {
return l.id
}
func (l *offerListener) Deadline() (time.Time, bool) {
return l.deadline, true
}
// register a listener for new offers, whom we'll notify upon receiving such.
// notification is delivered in the form of closing the channel, nothing is ever sent.
func (s *offerStorage) Listen(id string, f Filter) <-chan struct{} {
if f == nil {
return nil
}
ch := make(chan struct{})
listen := &offerListener{
id: id,
accepts: f,
notify: ch,
deadline: time.Now().Add(s.ListenerDelay),
}
log.V(3).Infof("Registering offer listener %s", listen.id)
s.listeners.Offer(listen, queue.ReplaceExisting)
return ch
}
func (s *offerStorage) ageOffers() {
offer, ok := s.delayed.Pop().(Perishable)
if !ok {
log.Errorf("Expected Perishable, not %v", offer)
return
}
if details := offer.Details(); details != nil && !offer.HasExpired() {
// live offer has not expired yet: timed out early
// FWIW: early timeouts are more frequent when GOMAXPROCS is > 1
offer.addTo(s.delayed)
} else {
offer.age(s)
}
}
func (s *offerStorage) nextListener() *offerListener {
obj := s.listeners.Pop()
if listen, ok := obj.(*offerListener); !ok {
//programming error
panic(fmt.Sprintf("unexpected listener object %v", obj))
} else {
return listen
}
}
// notify listeners if we find an acceptable offer for them. listeners
// are garbage collected after a certain age (see offerListenerMaxAge).
// ids lists offer IDs that are retrievable from offer storage.
func (s *offerStorage) notifyListeners(ids func() (util.StringSet, uint64)) {
listener := s.nextListener() // blocking
offerIds, version := ids()
if listener.sawVersion == version {
// no changes to offer list, avoid growing older - just wait for new offers to arrive
listener.deadline = time.Now().Add(s.ListenerDelay)
s.listeners.Offer(listener, queue.KeepExisting)
return
}
listener.sawVersion = version
// notify if we find an acceptable offer
for id := range offerIds {
if offer, ok := s.Get(id); !ok || offer.HasExpired() {
continue
} else if listener.accepts(offer.Details()) {
log.V(3).Infof("Notifying offer listener %s", listener.id)
close(listener.notify)
return
}
}
// no interesting offers found, re-queue the listener
listener.age++
if listener.age < offerListenerMaxAge {
listener.deadline = time.Now().Add(s.ListenerDelay)
s.listeners.Offer(listener, queue.KeepExisting)
} else {
// garbage collection is as simple as not re-adding the listener to the queue
log.V(3).Infof("garbage collecting offer listener %s", listener.id)
}
}
func (s *offerStorage) Init(done <-chan struct{}) {
// zero delay, reap offers as soon as they expire
go runtime.Until(s.ageOffers, 0, done)
// cached offer ids for the purposes of listener notification
idCache := &stringsCache{
refill: func() util.StringSet {
result := util.NewStringSet()
for _, v := range s.offers.List() {
if offer, ok := v.(Perishable); ok {
result.Insert(offer.Id())
}
}
return result
},
ttl: offerIdCacheTTL,
}
go runtime.Until(func() { s.notifyListeners(idCache.Strings) }, notifyListenersDelay, done)
}
type stringsCache struct {
expiresAt time.Time
cached util.StringSet
ttl time.Duration
refill func() util.StringSet
version uint64
}
// not thread-safe
func (c *stringsCache) Strings() (util.StringSet, uint64) {
now := time.Now()
if c.expiresAt.Before(now) {
old := c.cached
c.cached = c.refill()
c.expiresAt = now.Add(c.ttl)
if !reflect.DeepEqual(old, c.cached) {
c.version++
}
}
return c.cached, c.version
}
type slaveStorage struct {
sync.Mutex
index map[string]string // map offerId to slaveId
}
func newSlaveStorage() *slaveStorage {
return &slaveStorage{
index: make(map[string]string),
}
}
// create a mapping between a slave and an offer
func (self *slaveStorage) add(slaveId, offerId string) {
self.Lock()
defer self.Unlock()
self.index[offerId] = slaveId
}
// delete the slave-offer mappings for slaveId, returns the IDs of the offers that were unmapped
func (self *slaveStorage) deleteSlave(slaveId string) util.StringSet {
offerIds := util.NewStringSet()
self.Lock()
defer self.Unlock()
for oid, sid := range self.index {
if sid == slaveId {
offerIds.Insert(oid)
delete(self.index, oid)
}
}
return offerIds
}
// delete the slave-offer mappings for offerId
func (self *slaveStorage) deleteOffer(offerId string) {
self.Lock()
defer self.Unlock()
delete(self.index, offerId)
}

View File

@@ -0,0 +1,391 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package offers
import (
"errors"
"sync/atomic"
"testing"
"time"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
mesos "github.com/mesos/mesos-go/mesosproto"
util "github.com/mesos/mesos-go/mesosutil"
)
func TestExpiredOffer(t *testing.T) {
t.Parallel()
ttl := 2 * time.Second
o := Expired("test", "testhost", ttl)
if o.Id() != "test" {
t.Error("expiredOffer does not return its Id")
}
if o.Host() != "testhost" {
t.Error("expiredOffer does not return its hostname")
}
if o.HasExpired() != true {
t.Error("expiredOffer is not expired")
}
if o.Details() != nil {
t.Error("expiredOffer does not return nil Details")
}
if o.Acquire() != false {
t.Error("expiredOffer must not be able to be acquired")
}
if delay := o.GetDelay(); !(0 < delay && delay <= ttl) {
t.Error("expiredOffer does not return a valid deadline")
}
} // TestExpiredOffer
func TestTimedOffer(t *testing.T) {
t.Parallel()
ttl := 2 * time.Second
now := time.Now()
o := &liveOffer{nil, now.Add(ttl), 0}
if o.HasExpired() {
t.Errorf("offer ttl was %v and should not have expired yet", ttl)
}
if !o.Acquire() {
t.Fatal("1st acquisition of offer failed")
}
o.Release()
if !o.Acquire() {
t.Fatal("2nd acquisition of offer failed")
}
if o.Acquire() {
t.Fatal("3rd acquisition of offer passed but prior claim was not released")
}
o.Release()
if !o.Acquire() {
t.Fatal("4th acquisition of offer failed")
}
o.Release()
time.Sleep(ttl)
if !o.HasExpired() {
t.Fatal("offer not expired after ttl passed")
}
if !o.Acquire() {
t.Fatal("5th acquisition of offer failed; should not be tied to expiration")
}
if o.Acquire() {
t.Fatal("6th acquisition of offer succeeded; should already be acquired")
}
} // TestTimedOffer
func TestOfferStorage(t *testing.T) {
ttl := time.Second / 4
var declinedNum int32
getDeclinedNum := func() int32 { return atomic.LoadInt32(&declinedNum) }
config := RegistryConfig{
DeclineOffer: func(offerId string) <-chan error {
atomic.AddInt32(&declinedNum, 1)
return proc.ErrorChan(nil)
},
Compat: func(o *mesos.Offer) bool {
return o.Hostname == nil || *o.Hostname != "incompatiblehost"
},
TTL: ttl,
LingerTTL: 2 * ttl,
}
storage := CreateRegistry(config)
done := make(chan struct{})
storage.Init(done)
// Add offer
id := util.NewOfferID("foo")
o := &mesos.Offer{Id: id}
storage.Add([]*mesos.Offer{o})
// Added offer should be in the storage
if obj, ok := storage.Get(id.GetValue()); obj == nil || !ok {
t.Error("offer not added")
}
if obj, _ := storage.Get(id.GetValue()); obj.Details() != o {
t.Error("added offer differs from returned offer")
}
// Not-added offer is not in storage
if obj, ok := storage.Get("bar"); obj != nil || ok {
t.Error("offer bar should not exist in storage")
}
// Deleted offer lingers in storage, is acquired and declined
offer, _ := storage.Get(id.GetValue())
declinedNumBefore := getDeclinedNum()
storage.Delete(id.GetValue(), "deleted for test")
if obj, _ := storage.Get(id.GetValue()); obj == nil {
t.Error("deleted offer is not lingering")
}
if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
t.Error("deleted offer is no expired")
}
if ok := offer.Acquire(); ok {
t.Error("deleted offer can be acquired")
}
if getDeclinedNum() <= declinedNumBefore {
t.Error("deleted offer was not declined")
}
// Acquired offer is only declined after 2*ttl
id = util.NewOfferID("foo2")
o = &mesos.Offer{Id: id}
storage.Add([]*mesos.Offer{o})
offer, _ = storage.Get(id.GetValue())
declinedNumBefore = getDeclinedNum()
offer.Acquire()
storage.Delete(id.GetValue(), "deleted for test")
if getDeclinedNum() > declinedNumBefore {
t.Error("acquired offer is declined")
}
offer.Release()
time.Sleep(3 * ttl)
if getDeclinedNum() <= declinedNumBefore {
t.Error("released offer is not declined after 2*ttl")
}
// Added offer should be expired after ttl, but lingering
id = util.NewOfferID("foo3")
o = &mesos.Offer{Id: id}
storage.Add([]*mesos.Offer{o})
time.Sleep(2 * ttl)
obj, ok := storage.Get(id.GetValue())
if obj == nil || !ok {
t.Error("offer not lingering after ttl")
}
if !obj.HasExpired() {
t.Error("offer is not expired after ttl")
}
// Should be deleted when waiting longer than LingerTTL
time.Sleep(2 * ttl)
if obj, ok := storage.Get(id.GetValue()); obj != nil || ok {
t.Error("offer not deleted after LingerTTL")
}
// Incompatible offer is declined
id = util.NewOfferID("foo4")
incompatibleHostname := "incompatiblehost"
o = &mesos.Offer{Id: id, Hostname: &incompatibleHostname}
declinedNumBefore = getDeclinedNum()
storage.Add([]*mesos.Offer{o})
if obj, ok := storage.Get(id.GetValue()); obj != nil || ok {
t.Error("incompatible offer not rejected")
}
if getDeclinedNum() <= declinedNumBefore {
t.Error("incompatible offer is not declined")
}
// Invalidated offer are not declined, but expired
id = util.NewOfferID("foo5")
o = &mesos.Offer{Id: id}
storage.Add([]*mesos.Offer{o})
offer, _ = storage.Get(id.GetValue())
declinedNumBefore = getDeclinedNum()
storage.Invalidate(id.GetValue())
if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
t.Error("invalidated offer is not expired")
}
if getDeclinedNum() > declinedNumBefore {
t.Error("invalidated offer is declined")
}
if ok := offer.Acquire(); ok {
t.Error("invalidated offer can be acquired")
}
// Invalidate "" will invalidate all offers
id = util.NewOfferID("foo6")
o = &mesos.Offer{Id: id}
storage.Add([]*mesos.Offer{o})
id2 := util.NewOfferID("foo7")
o2 := &mesos.Offer{Id: id2}
storage.Add([]*mesos.Offer{o2})
storage.Invalidate("")
if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
t.Error("invalidated offer is not expired")
}
if obj2, _ := storage.Get(id2.GetValue()); !obj2.HasExpired() {
t.Error("invalidated offer is not expired")
}
// InvalidateForSlave invalides all offers for that slave, but only those
id = util.NewOfferID("foo8")
slaveId := util.NewSlaveID("test-slave")
o = &mesos.Offer{Id: id, SlaveId: slaveId}
storage.Add([]*mesos.Offer{o})
id2 = util.NewOfferID("foo9")
o2 = &mesos.Offer{Id: id2}
storage.Add([]*mesos.Offer{o2})
storage.InvalidateForSlave(slaveId.GetValue())
if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
t.Error("invalidated offer for test-slave is not expired")
}
if obj2, _ := storage.Get(id2.GetValue()); obj2.HasExpired() {
t.Error("invalidated offer another slave is expired")
}
close(done)
} // TestOfferStorage
func TestListen(t *testing.T) {
ttl := time.Second / 4
config := RegistryConfig{
DeclineOffer: func(offerId string) <-chan error {
return proc.ErrorChan(nil)
},
Compat: func(o *mesos.Offer) bool {
return true
},
TTL: ttl,
ListenerDelay: ttl / 2,
}
storage := CreateRegistry(config)
done := make(chan struct{})
storage.Init(done)
// Create two listeners with a hostname filter
hostname1 := "hostname1"
hostname2 := "hostname2"
listener1 := storage.Listen("listener1", func(offer *mesos.Offer) bool {
return offer.GetHostname() == hostname1
})
listener2 := storage.Listen("listener2", func(offer *mesos.Offer) bool {
return offer.GetHostname() == hostname2
})
// Add hostname1 offer
id := util.NewOfferID("foo")
o := &mesos.Offer{Id: id, Hostname: &hostname1}
storage.Add([]*mesos.Offer{o})
// listener1 is notified by closing channel
select {
case _, more := <-listener1:
if more {
t.Error("listener1 is not closed")
}
}
// listener2 is not notified within ttl
select {
case <-listener2:
t.Error("listener2 is notified")
case <-time.After(ttl):
}
close(done)
} // TestListen
func TestWalk(t *testing.T) {
t.Parallel()
config := RegistryConfig{
DeclineOffer: func(offerId string) <-chan error {
return proc.ErrorChan(nil)
},
TTL: 0 * time.Second,
LingerTTL: 0 * time.Second,
ListenerDelay: 0 * time.Second,
}
storage := CreateRegistry(config)
acceptedOfferId := ""
walked := 0
walker1 := func(p Perishable) (bool, error) {
walked++
if p.Acquire() {
acceptedOfferId = p.Details().Id.GetValue()
return true, nil
}
return false, nil
}
// sanity check
err := storage.Walk(walker1)
if err != nil {
t.Fatalf("received impossible error %v", err)
}
if walked != 0 {
t.Fatal("walked empty storage")
}
if acceptedOfferId != "" {
t.Fatal("somehow found an offer when registry was empty")
}
impl, ok := storage.(*offerStorage)
if !ok {
t.Fatal("unexpected offer storage impl")
}
// single offer
ttl := 2 * time.Second
now := time.Now()
o := &liveOffer{&mesos.Offer{Id: util.NewOfferID("foo")}, now.Add(ttl), 0}
impl.offers.Add(o)
err = storage.Walk(walker1)
if err != nil {
t.Fatalf("received impossible error %v", err)
}
if walked != 1 {
t.Fatalf("walk count %d", walked)
}
if acceptedOfferId != "foo" {
t.Fatalf("found offer %v", acceptedOfferId)
}
acceptedOfferId = ""
err = storage.Walk(walker1)
if err != nil {
t.Fatalf("received impossible error %v", err)
}
if walked != 2 {
t.Fatalf("walk count %d", walked)
}
if acceptedOfferId != "" {
t.Fatalf("found offer %v", acceptedOfferId)
}
walker2 := func(p Perishable) (bool, error) {
walked++
return true, nil
}
err = storage.Walk(walker2)
if err != nil {
t.Fatalf("received impossible error %v", err)
}
if walked != 3 {
t.Fatalf("walk count %d", walked)
}
if acceptedOfferId != "" {
t.Fatalf("found offer %v", acceptedOfferId)
}
walker3 := func(p Perishable) (bool, error) {
walked++
return true, errors.New("baz")
}
err = storage.Walk(walker3)
if err == nil {
t.Fatal("expected error")
}
if walked != 4 {
t.Fatalf("walk count %d", walked)
}
}

View File

@@ -0,0 +1,19 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package proc provides opinionated utilities for processing background
// operations and future errors, somewhat inspired by libprocess.
package proc

View File

@@ -0,0 +1,34 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package proc
import (
"errors"
)
var (
errProcessTerminated = errors.New("cannot execute action because process has terminated")
errIllegalState = errors.New("illegal state, cannot execute action")
)
func IsProcessTerminated(err error) bool {
return err == errProcessTerminated
}
func IsIllegalState(err error) bool {
return err == errIllegalState
}

View File

@@ -0,0 +1,377 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package proc
import (
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
log "github.com/golang/glog"
)
const (
// if the action processor crashes (if some Action panics) then we
// wait this long before spinning up the action processor again.
defaultActionHandlerCrashDelay = 100 * time.Millisecond
// how many actions we can store in the backlog
defaultActionQueueDepth = 1024
)
type procImpl struct {
Config
backlog chan Action // action queue
terminate chan struct{} // signaled via close()
wg sync.WaitGroup // End() terminates when the wait is over
done runtime.Signal
state *stateType
pid uint32
writeLock sync.Mutex // avoid data race between write and close of backlog
changed *sync.Cond // wait/signal for backlog changes
engine DoerFunc // isolated this for easier unit testing later on
running chan struct{} // closes once event loop processing starts
dead chan struct{} // closes upon completion of process termination
}
type Config struct {
// cooldown period in between deferred action crashes
actionHandlerCrashDelay time.Duration
// determines the size of the deferred action backlog
actionQueueDepth uint32
}
var (
defaultConfig = Config{
actionHandlerCrashDelay: defaultActionHandlerCrashDelay,
actionQueueDepth: defaultActionQueueDepth,
}
pid uint32
closedErrChan <-chan error
)
func init() {
ch := make(chan error)
close(ch)
closedErrChan = ch
}
func New() Process {
return newConfigured(defaultConfig)
}
func newConfigured(config Config) Process {
state := stateNew
pi := &procImpl{
Config: config,
backlog: make(chan Action, config.actionQueueDepth),
terminate: make(chan struct{}),
state: &state,
pid: atomic.AddUint32(&pid, 1),
running: make(chan struct{}),
dead: make(chan struct{}),
}
pi.engine = DoerFunc(pi.doLater)
pi.changed = sync.NewCond(&pi.writeLock)
pi.wg.Add(1) // symmetrical to wg.Done() in End()
pi.done = pi.begin()
return pi
}
// returns a chan that closes upon termination of the action processing loop
func (self *procImpl) Done() <-chan struct{} {
return self.done
}
func (self *procImpl) Running() <-chan struct{} {
return self.running
}
func (self *procImpl) begin() runtime.Signal {
if !self.state.transition(stateNew, stateRunning) {
panic(fmt.Errorf("failed to transition from New to Idle state"))
}
defer log.V(2).Infof("started process %d", self.pid)
var entered runtime.Latch
// execute actions on the backlog chan
return runtime.After(func() {
runtime.Until(func() {
if entered.Acquire() {
close(self.running)
self.wg.Add(1)
}
for action := range self.backlog {
select {
case <-self.terminate:
return
default:
// signal to indicate there's room in the backlog now
self.changed.Broadcast()
// rely on Until to handle action panics
action()
}
}
}, self.actionHandlerCrashDelay, self.terminate)
}).Then(func() {
log.V(2).Infof("finished processing action backlog for process %d", self.pid)
if !entered.Acquire() {
self.wg.Done()
}
})
}
// execute some action in the context of the current process. Actions
// executed via this func are to be executed in a concurrency-safe manner:
// no two actions should execute at the same time. invocations of this func
// should not block for very long, unless the action backlog is full or the
// process is terminating.
// returns errProcessTerminated if the process already ended.
func (self *procImpl) doLater(deferredAction Action) (err <-chan error) {
a := Action(func() {
self.wg.Add(1)
defer self.wg.Done()
deferredAction()
})
scheduled := false
self.writeLock.Lock()
defer self.writeLock.Unlock()
for err == nil && !scheduled {
switch s := self.state.get(); s {
case stateRunning:
select {
case self.backlog <- a:
scheduled = true
default:
self.changed.Wait()
}
case stateTerminal:
err = ErrorChan(errProcessTerminated)
default:
err = ErrorChan(errIllegalState)
}
}
return
}
// implementation of Doer interface, schedules some action to be executed via
// the current execution engine
func (self *procImpl) Do(a Action) <-chan error {
return self.engine(a)
}
// spawn a goroutine that waits for an error. if a non-nil error is read from the
// channel then the handler func is invoked, otherwise (nil error or closed chan)
// the handler is skipped. if a nil handler is specified then it's not invoked.
// the signal chan that's returned closes once the error process logic (and handler,
// if any) has completed.
func OnError(ch <-chan error, f func(error), abort <-chan struct{}) <-chan struct{} {
return runtime.After(func() {
if ch == nil {
return
}
select {
case err, ok := <-ch:
if ok && err != nil && f != nil {
f(err)
}
case <-abort:
if f != nil {
f(errProcessTerminated)
}
}
})
}
func (self *procImpl) OnError(ch <-chan error, f func(error)) <-chan struct{} {
return OnError(ch, f, self.Done())
}
func (self *procImpl) flush() {
log.V(2).Infof("flushing action backlog for process %d", self.pid)
i := 0
//TODO: replace with `for range self.backlog` once Go 1.3 support is dropped
for {
_, open := <-self.backlog
if !open {
break
}
i++
}
log.V(2).Infof("flushed %d backlog actions for process %d", i, self.pid)
}
func (self *procImpl) End() <-chan struct{} {
if self.state.transitionTo(stateTerminal, stateTerminal) {
go func() {
defer close(self.dead)
self.writeLock.Lock()
defer self.writeLock.Unlock()
log.V(2).Infof("terminating process %d", self.pid)
close(self.backlog)
close(self.terminate)
self.wg.Done()
self.changed.Broadcast()
log.V(2).Infof("waiting for deferred actions to complete")
// wait for all pending actions to complete, then flush the backlog
self.wg.Wait()
self.flush()
}()
}
return self.dead
}
type errorOnce struct {
once sync.Once
err chan error
abort <-chan struct{}
}
func NewErrorOnce(abort <-chan struct{}) ErrorOnce {
return &errorOnce{
err: make(chan error, 1),
abort: abort,
}
}
func (b *errorOnce) Err() <-chan error {
return b.err
}
func (b *errorOnce) Reportf(msg string, args ...interface{}) {
b.Report(fmt.Errorf(msg, args...))
}
func (b *errorOnce) Report(err error) {
b.once.Do(func() {
select {
case b.err <- err:
default:
}
})
}
func (b *errorOnce) Send(errIn <-chan error) ErrorOnce {
go b.forward(errIn)
return b
}
func (b *errorOnce) forward(errIn <-chan error) {
if errIn == nil {
b.Report(nil)
return
}
select {
case err, _ := <-errIn:
b.Report(err)
case <-b.abort:
b.Report(errProcessTerminated)
}
}
type processAdapter struct {
parent Process
delegate Doer
}
func (p *processAdapter) Do(a Action) <-chan error {
if p == nil || p.parent == nil || p.delegate == nil {
return ErrorChan(errIllegalState)
}
errCh := NewErrorOnce(p.Done())
go func() {
errOuter := p.parent.Do(func() {
errInner := p.delegate.Do(a)
errCh.forward(errInner)
})
// if the outer err is !nil then either the parent failed to schedule the
// the action, or else it backgrounded the scheduling task.
if errOuter != nil {
errCh.forward(errOuter)
}
}()
return errCh.Err()
}
func (p *processAdapter) End() <-chan struct{} {
if p != nil && p.parent != nil {
return p.parent.End()
}
return nil
}
func (p *processAdapter) Done() <-chan struct{} {
if p != nil && p.parent != nil {
return p.parent.Done()
}
return nil
}
func (p *processAdapter) Running() <-chan struct{} {
if p != nil && p.parent != nil {
return p.parent.Running()
}
return nil
}
func (p *processAdapter) OnError(ch <-chan error, f func(error)) <-chan struct{} {
if p != nil && p.parent != nil {
return p.parent.OnError(ch, f)
}
return nil
}
// returns a process that, within its execution context, delegates to the specified Doer.
// if the given Doer instance is nil, a valid Process is still returned though calls to its
// Do() implementation will always return errIllegalState.
// if the given Process instance is nil then in addition to the behavior in the prior sentence,
// calls to End() and Done() are effectively noops.
func DoWith(other Process, d Doer) Process {
return &processAdapter{
parent: other,
delegate: d,
}
}
func ErrorChanf(msg string, args ...interface{}) <-chan error {
return ErrorChan(fmt.Errorf(msg, args...))
}
func ErrorChan(err error) <-chan error {
if err == nil {
return closedErrChan
}
ch := make(chan error, 1)
ch <- err
return ch
}
// invoke the f on action a. returns an illegal state error if f is nil.
func (f DoerFunc) Do(a Action) <-chan error {
if f != nil {
return f(a)
}
return ErrorChan(errIllegalState)
}

View File

@@ -0,0 +1,373 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package proc
import (
"fmt"
"sync"
"testing"
"time"
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
log "github.com/golang/glog"
)
// logs a testing.Fatalf if the elapsed time d passes before signal chan done is closed
func fatalAfter(t *testing.T, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) {
select {
case <-done:
case <-time.After(d):
t.Fatalf(msg, args...)
}
}
func errorAfter(errOnce ErrorOnce, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) {
select {
case <-done:
case <-time.After(d):
errOnce.Reportf(msg, args...)
}
}
// logs a testing.Fatalf if the signal chan closes before the elapsed time d passes
func fatalOn(t *testing.T, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) {
select {
case <-done:
t.Fatalf(msg, args...)
case <-time.After(d):
}
}
func TestProc_manyEndings(t *testing.T) {
p := New()
const COUNT = 20
var wg sync.WaitGroup
wg.Add(COUNT)
for i := 0; i < COUNT; i++ {
runtime.On(p.End(), wg.Done)
}
fatalAfter(t, runtime.After(wg.Wait), 5*time.Second, "timed out waiting for loose End()s")
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
}
func TestProc_singleAction(t *testing.T) {
p := New()
scheduled := make(chan struct{})
called := make(chan struct{})
go func() {
log.Infof("do'ing deferred action")
defer close(scheduled)
err := p.Do(func() {
defer close(called)
log.Infof("deferred action invoked")
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
}()
fatalAfter(t, scheduled, 5*time.Second, "timed out waiting for deferred action to be scheduled")
fatalAfter(t, called, 5*time.Second, "timed out waiting for deferred action to be invoked")
p.End()
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
}
func TestProc_singleActionEnd(t *testing.T) {
p := New()
scheduled := make(chan struct{})
called := make(chan struct{})
go func() {
log.Infof("do'ing deferred action")
defer close(scheduled)
err := p.Do(func() {
defer close(called)
log.Infof("deferred action invoked")
p.End()
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
}()
fatalAfter(t, scheduled, 5*time.Second, "timed out waiting for deferred action to be scheduled")
fatalAfter(t, called, 5*time.Second, "timed out waiting for deferred action to be invoked")
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
}
func TestProc_multiAction(t *testing.T) {
p := New()
const COUNT = 10
var called sync.WaitGroup
called.Add(COUNT)
// test FIFO property
next := 0
for i := 0; i < COUNT; i++ {
log.Infof("do'ing deferred action %d", i)
idx := i
err := p.Do(func() {
defer called.Done()
log.Infof("deferred action invoked")
if next != idx {
t.Fatalf("expected index %d instead of %d", idx, next)
}
next++
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
}
fatalAfter(t, runtime.After(called.Wait), 2*time.Second, "timed out waiting for deferred actions to be invoked")
p.End()
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
}
func TestProc_goodLifecycle(t *testing.T) {
p := New()
p.End()
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
}
func TestProc_doWithDeadProc(t *testing.T) {
p := New()
p.End()
time.Sleep(100 * time.Millisecond)
errUnexpected := fmt.Errorf("unexpected execution of delegated action")
decorated := DoWith(p, DoerFunc(func(_ Action) <-chan error {
return ErrorChan(errUnexpected)
}))
decorated.Do(func() {})
fatalAfter(t, decorated.Done(), 5*time.Second, "timed out waiting for process death")
}
func TestProc_doWith(t *testing.T) {
p := New()
delegated := false
decorated := DoWith(p, DoerFunc(func(a Action) <-chan error {
delegated = true
a()
return nil
}))
executed := make(chan struct{})
err := decorated.Do(func() {
defer close(executed)
if !delegated {
t.Fatalf("expected delegated execution")
}
})
if err == nil {
t.Fatalf("expected !nil error chan")
}
fatalAfter(t, executed, 5*time.Second, "timed out waiting deferred execution")
fatalAfter(t, decorated.OnError(err, func(e error) {
t.Fatalf("unexpected error: %v", err)
}), 1*time.Second, "timed out waiting for doer result")
decorated.End()
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
}
func TestProc_doWithNestedTwice(t *testing.T) {
p := New()
delegated := false
decorated := DoWith(p, DoerFunc(func(a Action) <-chan error {
a()
return nil
}))
decorated2 := DoWith(decorated, DoerFunc(func(a Action) <-chan error {
delegated = true
a()
return nil
}))
executed := make(chan struct{})
err := decorated2.Do(func() {
defer close(executed)
if !delegated {
t.Fatalf("expected delegated execution")
}
})
if err == nil {
t.Fatalf("expected !nil error chan")
}
fatalAfter(t, executed, 5*time.Second, "timed out waiting deferred execution")
fatalAfter(t, decorated2.OnError(err, func(e error) {
t.Fatalf("unexpected error: %v", err)
}), 1*time.Second, "timed out waiting for doer result")
decorated2.End()
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
}
func TestProc_doWithNestedErrorPropagation(t *testing.T) {
p := New()
delegated := false
decorated := DoWith(p, DoerFunc(func(a Action) <-chan error {
a()
return nil
}))
expectedErr := fmt.Errorf("expecting this")
errOnce := NewErrorOnce(p.Done())
decorated2 := DoWith(decorated, DoerFunc(func(a Action) <-chan error {
delegated = true
a()
errOnce.Reportf("unexpected error in decorator2")
return ErrorChanf("another unexpected error in decorator2")
}))
executed := make(chan struct{})
err := decorated2.Do(func() {
defer close(executed)
if !delegated {
t.Fatalf("expected delegated execution")
}
errOnce.Report(expectedErr)
})
if err == nil {
t.Fatalf("expected !nil error chan")
}
errOnce.Send(err)
foundError := false
fatalAfter(t, executed, 1*time.Second, "timed out waiting deferred execution")
fatalAfter(t, decorated2.OnError(errOnce.Err(), func(e error) {
if e != expectedErr {
t.Fatalf("unexpected error: %v", err)
} else {
foundError = true
}
}), 1*time.Second, "timed out waiting for doer result")
if !foundError {
t.Fatalf("expected a propagated error")
}
decorated2.End()
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
}
func runDelegationTest(t *testing.T, p Process, name string, errOnce ErrorOnce) {
defer func() {
t.Logf("runDelegationTest finished at " + time.Now().String())
}()
var decorated Process
decorated = p
const DEPTH = 100
var wg sync.WaitGroup
wg.Add(DEPTH)
y := 0
for x := 1; x <= DEPTH; x++ {
x := x
nextp := DoWith(decorated, DoerFunc(func(a Action) <-chan error {
if x == 1 {
t.Logf("delegate chain invoked for " + name)
}
y++
if y != x {
return ErrorChanf("out of order delegated execution")
}
defer wg.Done()
a()
return nil
}))
decorated = nextp
}
executed := make(chan struct{})
errCh := decorated.Do(func() {
defer close(executed)
if y != DEPTH {
errOnce.Reportf("expected delegated execution")
}
t.Logf("executing deferred action: " + name + " at " + time.Now().String())
errOnce.Send(nil) // we completed without error, let the listener know
})
if errCh == nil {
t.Fatalf("expected !nil error chan")
}
// forward any scheduling errors to the listener; NOTHING else should attempt to read
// from errCh after this point
errOnce.Send(errCh)
errorAfter(errOnce, executed, 5*time.Second, "timed out waiting deferred execution")
t.Logf("runDelegationTest received executed signal at " + time.Now().String())
}
func TestProc_doWithNestedX(t *testing.T) {
t.Logf("starting test case at " + time.Now().String())
p := New()
errOnce := NewErrorOnce(p.Done())
runDelegationTest(t, p, "nested", errOnce)
<-p.End()
select {
case err := <-errOnce.Err():
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
case <-time.After(5 * time.Second):
t.Fatalf("timed out waiting for doer result")
}
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
}
// intended to be run with -race
func TestProc_doWithNestedXConcurrent(t *testing.T) {
p := New()
errOnce := NewErrorOnce(p.Done())
var wg sync.WaitGroup
const CONC = 20
wg.Add(CONC)
for i := 0; i < CONC; i++ {
i := i
runtime.After(func() { runDelegationTest(t, p, fmt.Sprintf("nested%d", i), errOnce) }).Then(wg.Done)
}
ch := runtime.After(wg.Wait)
fatalAfter(t, ch, 10*time.Second, "timed out waiting for concurrent delegates")
<-p.End()
select {
case err := <-errOnce.Err():
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
case <-time.After(5 * time.Second):
t.Fatalf("timed out waiting for doer result")
}
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
}

View File

@@ -0,0 +1,55 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package proc
import (
"sync/atomic"
)
type stateType int32
const (
stateNew stateType = iota
stateRunning
stateTerminal
)
func (s *stateType) get() stateType {
return stateType(atomic.LoadInt32((*int32)(s)))
}
func (s *stateType) transition(from, to stateType) bool {
return atomic.CompareAndSwapInt32((*int32)(s), int32(from), int32(to))
}
func (s *stateType) transitionTo(to stateType, unless ...stateType) bool {
if len(unless) == 0 {
atomic.StoreInt32((*int32)(s), int32(to))
return true
}
for {
state := s.get()
for _, x := range unless {
if state == x {
return false
}
}
if s.transition(state, to) {
return true
}
}
}

View File

@@ -0,0 +1,71 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package proc
// something that executes in the context of a process
type Action func()
type Context interface {
// end (terminate) the execution context
End() <-chan struct{}
// return a signal chan that will close upon the termination of this process
Done() <-chan struct{}
}
type Doer interface {
// execute some action in some context. actions are to be executed in a
// concurrency-safe manner: no two actions should execute at the same time.
// errors are generated if the action cannot be executed (not by the execution
// of the action) and should be testable with the error API of this package,
// for example, IsProcessTerminated.
Do(Action) <-chan error
}
// adapter func for Doer interface
type DoerFunc func(Action) <-chan error
type Process interface {
Context
Doer
// see top level OnError func. this implementation will terminate upon the arrival of
// an error (and subsequently invoke the error handler, if given) or else the termination
// of the process (testable via IsProcessTerminated).
OnError(<-chan error, func(error)) <-chan struct{}
// return a signal chan that will close once the process is ready to run actions
Running() <-chan struct{}
}
// this is an error promise. if we ever start building out support for other promise types it will probably
// make sense to group them in some sort of "promises" package.
type ErrorOnce interface {
// return a chan that only ever sends one error, either obtained via Report() or Forward()
Err() <-chan error
// reports the given error via Err(), but only if no other errors have been reported or forwarded
Report(error)
Reportf(string, ...interface{})
// waits for an error on the incoming chan, the result of which is later obtained via Err() (if no
// other errors have been reported or forwarded)
forward(<-chan error)
// non-blocking, spins up a goroutine that reports an error (if any) that occurs on the error chan.
Send(<-chan error) ErrorOnce
}

View File

@@ -0,0 +1,18 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package profile contains reusable code for profiling Go programs with pprof.
package profile

View File

@@ -0,0 +1,27 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package profile
import "net/http"
import "net/http/pprof"
func InstallHandler(m *http.ServeMux) {
// register similar endpoints as net/http/pprof.init() does
m.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index))
m.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile))
m.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
}

View File

@@ -0,0 +1,373 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package queue
import (
"container/heap"
"sync"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
)
type qitem struct {
value interface{}
priority Priority
index int
readd func(item *qitem) // re-add the value of the item to the queue
}
// A priorityQueue implements heap.Interface and holds qitems.
type priorityQueue []*qitem
func (pq priorityQueue) Len() int { return len(pq) }
func (pq priorityQueue) Less(i, j int) bool {
return pq[i].priority.ts.Before(pq[j].priority.ts)
}
func (pq priorityQueue) Swap(i, j int) {
pq[i], pq[j] = pq[j], pq[i]
pq[i].index = i
pq[j].index = j
}
func (pq *priorityQueue) Push(x interface{}) {
n := len(*pq)
item := x.(*qitem)
item.index = n
*pq = append(*pq, item)
}
func (pq *priorityQueue) Pop() interface{} {
old := *pq
n := len(old)
item := old[n-1]
item.index = -1 // for safety
*pq = old[0 : n-1]
return item
}
// concurrency-safe, deadline-oriented queue that returns items after their
// delay period has expired.
type DelayQueue struct {
queue priorityQueue
lock sync.RWMutex
cond sync.Cond
}
func NewDelayQueue() *DelayQueue {
q := &DelayQueue{}
q.cond.L = &q.lock
return q
}
func (q *DelayQueue) Add(d Delayed) {
deadline := extractFromDelayed(d)
q.lock.Lock()
defer q.lock.Unlock()
// readd using the original deadline computed from the original delay
var readd func(*qitem)
readd = func(qp *qitem) {
q.lock.Lock()
defer q.lock.Unlock()
heap.Push(&q.queue, &qitem{
value: d,
priority: deadline,
readd: readd,
})
q.cond.Broadcast()
}
heap.Push(&q.queue, &qitem{
value: d,
priority: deadline,
readd: readd,
})
q.cond.Broadcast()
}
// If there's a deadline reported by d.Deadline() then `d` is added to the
// queue and this func returns true.
func (q *DelayQueue) Offer(d Deadlined) bool {
deadline, ok := extractFromDeadlined(d)
if ok {
q.lock.Lock()
defer q.lock.Unlock()
heap.Push(&q.queue, &qitem{
value: d,
priority: deadline,
readd: func(qp *qitem) {
q.Offer(qp.value.(Deadlined))
},
})
q.cond.Broadcast()
}
return ok
}
// wait for the delay of the next item in the queue to expire, blocking if
// there are no items in the queue. does not guarantee first-come-first-serve
// ordering with respect to clients.
func (q *DelayQueue) Pop() interface{} {
// doesn't implement cancellation, will always return a non-nil value
return q.pop(func() *qitem {
q.lock.Lock()
defer q.lock.Unlock()
for q.queue.Len() == 0 {
q.cond.Wait()
}
x := heap.Pop(&q.queue)
item := x.(*qitem)
return item
}, nil)
}
// returns a non-nil value from the queue, or else nil if/when cancelled; if cancel
// is nil then cancellation is disabled and this func must return a non-nil value.
func (q *DelayQueue) pop(next func() *qitem, cancel <-chan struct{}) interface{} {
var ch chan struct{}
for {
item := next()
if item == nil {
// cancelled
return nil
}
x := item.value
waitingPeriod := item.priority.ts.Sub(time.Now())
if waitingPeriod >= 0 {
// listen for calls to Add() while we're waiting for the deadline
if ch == nil {
ch = make(chan struct{}, 1)
}
go func() {
q.lock.Lock()
defer q.lock.Unlock()
q.cond.Wait()
ch <- struct{}{}
}()
select {
case <-cancel:
item.readd(item)
return nil
case <-ch:
// we may no longer have the earliest deadline, re-try
item.readd(item)
continue
case <-time.After(waitingPeriod):
// noop
case <-item.priority.notify:
// noop
}
}
return x
}
}
// If multiple adds/updates of a single item happen while an item is in the
// queue before it has been processed, it will only be processed once, and
// when it is processed, the most recent version will be processed. Items are
// popped in order of their priority, currently controlled by a delay or
// deadline assigned to each item in the queue.
type DelayFIFO struct {
// internal deadline-based priority queue
delegate *DelayQueue
// We depend on the property that items in the set are in the queue and vice versa.
items map[string]*qitem
deadlinePolicy DeadlinePolicy
}
func (q *DelayFIFO) lock() {
q.delegate.lock.Lock()
}
func (q *DelayFIFO) unlock() {
q.delegate.lock.Unlock()
}
func (q *DelayFIFO) rlock() {
q.delegate.lock.RLock()
}
func (q *DelayFIFO) runlock() {
q.delegate.lock.RUnlock()
}
func (q *DelayFIFO) queue() *priorityQueue {
return &q.delegate.queue
}
func (q *DelayFIFO) cond() *sync.Cond {
return &q.delegate.cond
}
// Add inserts an item, and puts it in the queue. The item is only enqueued
// if it doesn't already exist in the set.
func (q *DelayFIFO) Add(d UniqueDelayed, rp ReplacementPolicy) {
deadline := extractFromDelayed(d)
id := d.GetUID()
var adder func(*qitem)
adder = func(*qitem) {
q.add(id, deadline, d, KeepExisting, adder)
}
q.add(id, deadline, d, rp, adder)
}
func (q *DelayFIFO) Offer(d UniqueDeadlined, rp ReplacementPolicy) bool {
if deadline, ok := extractFromDeadlined(d); ok {
id := d.GetUID()
q.add(id, deadline, d, rp, func(qp *qitem) { q.Offer(qp.value.(UniqueDeadlined), KeepExisting) })
return true
}
return false
}
func (q *DelayFIFO) add(id string, deadline Priority, value interface{}, rp ReplacementPolicy, adder func(*qitem)) {
q.lock()
defer q.unlock()
if item, exists := q.items[id]; !exists {
item = &qitem{
value: value,
priority: deadline,
readd: adder,
}
heap.Push(q.queue(), item)
q.items[id] = item
} else {
// this is an update of an existing item
item.value = rp.replacementValue(item.value, value)
item.priority = q.deadlinePolicy.nextDeadline(item.priority, deadline)
heap.Fix(q.queue(), item.index)
}
q.cond().Broadcast()
}
// Delete removes an item. It doesn't add it to the queue, because
// this implementation assumes the consumer only cares about the objects,
// not their priority order.
func (f *DelayFIFO) Delete(id string) {
f.lock()
defer f.unlock()
delete(f.items, id)
}
// List returns a list of all the items.
func (f *DelayFIFO) List() []UniqueID {
f.rlock()
defer f.runlock()
list := make([]UniqueID, 0, len(f.items))
for _, item := range f.items {
list = append(list, item.value.(UniqueDelayed))
}
return list
}
// ContainedIDs returns a util.StringSet containing all IDs of the stored items.
// This is a snapshot of a moment in time, and one should keep in mind that
// other go routines can add or remove items after you call this.
func (c *DelayFIFO) ContainedIDs() util.StringSet {
c.rlock()
defer c.runlock()
set := util.StringSet{}
for id := range c.items {
set.Insert(id)
}
return set
}
// Get returns the requested item, or sets exists=false.
func (f *DelayFIFO) Get(id string) (UniqueID, bool) {
f.rlock()
defer f.runlock()
if item, exists := f.items[id]; exists {
return item.value.(UniqueID), true
}
return nil, false
}
// Variant of DelayQueue.Pop() for UniqueDelayed items
func (q *DelayFIFO) Await(timeout time.Duration) UniqueID {
cancel := make(chan struct{})
ch := make(chan interface{}, 1)
go func() { ch <- q.pop(cancel) }()
var x interface{}
select {
case <-time.After(timeout):
close(cancel)
x = <-ch
case x = <-ch:
// noop
}
if x != nil {
return x.(UniqueID)
}
return nil
}
// Variant of DelayQueue.Pop() for UniqueDelayed items
func (q *DelayFIFO) Pop() UniqueID {
return q.pop(nil).(UniqueID)
}
// variant of DelayQueue.Pop that implements optional cancellation
func (q *DelayFIFO) pop(cancel chan struct{}) interface{} {
next := func() *qitem {
q.lock()
defer q.unlock()
for {
for q.queue().Len() == 0 {
signal := make(chan struct{})
go func() {
defer close(signal)
q.cond().Wait()
}()
select {
case <-cancel:
// we may not have the lock yet, so
// broadcast to abort Wait, then
// return after lock re-acquisition
q.cond().Broadcast()
<-signal
return nil
case <-signal:
// we have the lock, re-check
// the queue for data...
}
}
x := heap.Pop(q.queue())
item := x.(*qitem)
unique := item.value.(UniqueID)
uid := unique.GetUID()
if _, ok := q.items[uid]; !ok {
// item was deleted, keep looking
continue
}
delete(q.items, uid)
return item
}
}
return q.delegate.pop(next, cancel)
}
func NewDelayFIFO() *DelayFIFO {
f := &DelayFIFO{
delegate: NewDelayQueue(),
items: map[string]*qitem{},
}
return f
}

View File

@@ -0,0 +1,406 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package queue
import (
"sync/atomic"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
const (
tolerance = 100 * time.Millisecond // go time delays aren't perfect, this is our tolerance for errors WRT expected timeouts
)
func timedPriority(t time.Time) Priority {
return Priority{ts: t}
}
func TestPQ(t *testing.T) {
t.Parallel()
var pq priorityQueue
if pq.Len() != 0 {
t.Fatalf("pq should be empty")
}
now := timedPriority(time.Now())
now2 := timedPriority(now.ts.Add(2 * time.Second))
pq.Push(&qitem{priority: now2})
if pq.Len() != 1 {
t.Fatalf("pq.len should be 1")
}
x := pq.Pop()
if x == nil {
t.Fatalf("x is nil")
}
if pq.Len() != 0 {
t.Fatalf("pq should be empty")
}
item := x.(*qitem)
if !item.priority.Equal(now2) {
t.Fatalf("item.priority != now2")
}
pq.Push(&qitem{priority: now2})
pq.Push(&qitem{priority: now2})
pq.Push(&qitem{priority: now2})
pq.Push(&qitem{priority: now2})
pq.Push(&qitem{priority: now2})
pq.Pop()
pq.Pop()
pq.Pop()
pq.Pop()
pq.Pop()
if pq.Len() != 0 {
t.Fatalf("pq should be empty")
}
now4 := timedPriority(now.ts.Add(4 * time.Second))
now6 := timedPriority(now.ts.Add(4 * time.Second))
pq.Push(&qitem{priority: now2})
pq.Push(&qitem{priority: now4})
pq.Push(&qitem{priority: now6})
pq.Swap(0, 2)
if !pq[0].priority.Equal(now6) || !pq[2].priority.Equal(now2) {
t.Fatalf("swap failed")
}
if pq.Less(1, 2) {
t.Fatalf("now4 < now2")
}
}
func TestPopEmptyPQ(t *testing.T) {
t.Parallel()
defer func() {
if r := recover(); r == nil {
t.Fatalf("Expected panic from popping an empty PQ")
}
}()
var pq priorityQueue
pq.Pop()
}
type testjob struct {
d time.Duration
t time.Time
deadline *time.Time
uid string
instance int
}
func (j *testjob) GetDelay() time.Duration {
return j.d
}
func (j testjob) GetUID() string {
return j.uid
}
func (td *testjob) Deadline() (deadline time.Time, ok bool) {
if td.deadline != nil {
return *td.deadline, true
} else {
return time.Now(), false
}
}
func TestDQ_sanity_check(t *testing.T) {
t.Parallel()
dq := NewDelayQueue()
delay := 2 * time.Second
dq.Add(&testjob{d: delay})
before := time.Now()
x := dq.Pop()
now := time.Now()
waitPeriod := now.Sub(before)
if waitPeriod+tolerance < delay {
t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
}
if x == nil {
t.Fatalf("x is nil")
}
item := x.(*testjob)
if item.d != delay {
t.Fatalf("d != delay")
}
}
func TestDQ_Offer(t *testing.T) {
t.Parallel()
assert := assert.New(t)
dq := NewDelayQueue()
delay := time.Second
added := dq.Offer(&testjob{})
if added {
t.Fatalf("DelayQueue should not add offered job without deadline")
}
deadline := time.Now().Add(delay)
added = dq.Offer(&testjob{deadline: &deadline})
if !added {
t.Fatalf("DelayQueue should add offered job with deadline")
}
before := time.Now()
x := dq.Pop()
now := time.Now()
waitPeriod := now.Sub(before)
if waitPeriod+tolerance < delay {
t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
}
assert.NotNil(x)
assert.Equal(x.(*testjob).deadline, &deadline)
}
func TestDQ_ordered_add_pop(t *testing.T) {
t.Parallel()
dq := NewDelayQueue()
dq.Add(&testjob{d: 2 * time.Second})
dq.Add(&testjob{d: 1 * time.Second})
dq.Add(&testjob{d: 3 * time.Second})
var finished [3]*testjob
before := time.Now()
idx := int32(-1)
ch := make(chan bool, 3)
//TODO: replace with `for range finished` once Go 1.3 support is dropped
for n := 0; n < len(finished); n++ {
go func() {
var ok bool
x := dq.Pop()
i := atomic.AddInt32(&idx, 1)
if finished[i], ok = x.(*testjob); !ok {
t.Fatalf("expected a *testjob, not %v", x)
}
finished[i].t = time.Now()
ch <- true
}()
}
<-ch
<-ch
<-ch
after := time.Now()
totalDelay := after.Sub(before)
if totalDelay+tolerance < (3 * time.Second) {
t.Fatalf("totalDelay < 3s: %v", totalDelay)
}
for i, v := range finished {
if v == nil {
t.Fatalf("task %d was nil", i)
}
expected := time.Duration(i+1) * time.Second
if v.d != expected {
t.Fatalf("task %d had delay-priority %v, expected %v", i, v.d, expected)
}
actualDelay := v.t.Sub(before)
if actualDelay+tolerance < v.d {
t.Fatalf("task %d had actual-delay %v < expected delay %v", i, actualDelay, v.d)
}
}
}
func TestDQ_always_pop_earliest_deadline(t *testing.T) {
t.Parallel()
// add a testjob with delay of 2s
// spawn a func f1 that attempts to Pop() and wait for f1 to begin
// add a testjob with a delay of 1s
// check that the func f1 actually popped the 1s task (not the 2s task)
dq := NewDelayQueue()
dq.Add(&testjob{d: 2 * time.Second})
ch := make(chan *testjob)
started := make(chan bool)
go func() {
started <- true
x := dq.Pop()
job := x.(*testjob)
job.t = time.Now()
ch <- job
}()
<-started
time.Sleep(500 * time.Millisecond) // give plently of time for Pop() to enter
expected := 1 * time.Second
dq.Add(&testjob{d: expected})
job := <-ch
if expected != job.d {
t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
}
job = dq.Pop().(*testjob)
expected = 2 * time.Second
if expected != job.d {
t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
}
}
func TestDQ_always_pop_earliest_deadline_multi(t *testing.T) {
t.Parallel()
dq := NewDelayQueue()
dq.Add(&testjob{d: 2 * time.Second})
ch := make(chan *testjob)
multi := 10
started := make(chan bool, multi)
go func() {
started <- true
for i := 0; i < multi; i++ {
x := dq.Pop()
job := x.(*testjob)
job.t = time.Now()
ch <- job
}
}()
<-started
time.Sleep(500 * time.Millisecond) // give plently of time for Pop() to enter
expected := 1 * time.Second
for i := 0; i < multi; i++ {
dq.Add(&testjob{d: expected})
}
for i := 0; i < multi; i++ {
job := <-ch
if expected != job.d {
t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
}
}
job := dq.Pop().(*testjob)
expected = 2 * time.Second
if expected != job.d {
t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
}
}
func TestDQ_negative_delay(t *testing.T) {
t.Parallel()
dq := NewDelayQueue()
delay := -2 * time.Second
dq.Add(&testjob{d: delay})
before := time.Now()
x := dq.Pop()
now := time.Now()
waitPeriod := now.Sub(before)
if waitPeriod > tolerance {
t.Fatalf("delay too long: %v, expected something less than: %v", waitPeriod, tolerance)
}
if x == nil {
t.Fatalf("x is nil")
}
item := x.(*testjob)
if item.d != delay {
t.Fatalf("d != delay")
}
}
func TestDFIFO_sanity_check(t *testing.T) {
t.Parallel()
assert := assert.New(t)
df := NewDelayFIFO()
delay := 2 * time.Second
df.Add(&testjob{d: delay, uid: "a", instance: 1}, ReplaceExisting)
assert.True(df.ContainedIDs().Has("a"))
// re-add by ReplaceExisting
df.Add(&testjob{d: delay, uid: "a", instance: 2}, ReplaceExisting)
assert.True(df.ContainedIDs().Has("a"))
a, ok := df.Get("a")
assert.True(ok)
assert.Equal(a.(*testjob).instance, 2)
// re-add by KeepExisting
df.Add(&testjob{d: delay, uid: "a", instance: 3}, KeepExisting)
assert.True(df.ContainedIDs().Has("a"))
a, ok = df.Get("a")
assert.True(ok)
assert.Equal(a.(*testjob).instance, 2)
// pop last
before := time.Now()
x := df.Pop()
assert.Equal(a.(*testjob).instance, 2)
now := time.Now()
waitPeriod := now.Sub(before)
if waitPeriod+tolerance < delay {
t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
}
if x == nil {
t.Fatalf("x is nil")
}
item := x.(*testjob)
if item.d != delay {
t.Fatalf("d != delay")
}
}
func TestDFIFO_Offer(t *testing.T) {
t.Parallel()
assert := assert.New(t)
dq := NewDelayFIFO()
delay := time.Second
added := dq.Offer(&testjob{instance: 1}, ReplaceExisting)
if added {
t.Fatalf("DelayFIFO should not add offered job without deadline")
}
deadline := time.Now().Add(delay)
added = dq.Offer(&testjob{deadline: &deadline, instance: 2}, ReplaceExisting)
if !added {
t.Fatalf("DelayFIFO should add offered job with deadline")
}
before := time.Now()
x := dq.Pop()
now := time.Now()
waitPeriod := now.Sub(before)
if waitPeriod+tolerance < delay {
t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
}
assert.NotNil(x)
assert.Equal(x.(*testjob).instance, 2)
}

View File

@@ -0,0 +1,19 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package queue provides several queue implementations, originally
// inspired by Kubernetes pkg/client/cache/fifo.
package queue

View File

@@ -0,0 +1,403 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package queue
import (
"fmt"
"reflect"
"sync"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
)
type entry struct {
value UniqueCopyable
event EventType
}
type deletedEntry struct {
*entry
expiration time.Time
}
func (e *entry) Value() UniqueCopyable {
return e.value
}
func (e *entry) Copy() Copyable {
if e == nil {
return nil
}
return &entry{e.value.Copy().(UniqueCopyable), e.event}
}
func (e *entry) Is(types EventType) bool {
return types&e.event != 0
}
func (e *deletedEntry) Copy() Copyable {
if e == nil {
return nil
}
return &deletedEntry{e.entry.Copy().(*entry), e.expiration}
}
// deliver a message
type pigeon func(msg Entry)
func dead(msg Entry) {
// intentionally blank
}
// HistoricalFIFO receives adds and updates from a Reflector, and puts them in a queue for
// FIFO order processing. If multiple adds/updates of a single item happen while
// an item is in the queue before it has been processed, it will only be
// processed once, and when it is processed, the most recent version will be
// processed. This can't be done with a channel.
type HistoricalFIFO struct {
lock sync.RWMutex
cond sync.Cond
items map[string]Entry // We depend on the property that items in the queue are in the set.
queue []string
carrier pigeon // may be dead, but never nil
gcc int
lingerTTL time.Duration
}
// panics if obj doesn't implement UniqueCopyable; otherwise returns the same, typecast object
func checkType(obj interface{}) UniqueCopyable {
if v, ok := obj.(UniqueCopyable); !ok {
panic(fmt.Sprintf("Illegal object type, expected UniqueCopyable: %T", obj))
} else {
return v
}
}
// Add inserts an item, and puts it in the queue. The item is only enqueued
// if it doesn't already exist in the set.
func (f *HistoricalFIFO) Add(v interface{}) error {
obj := checkType(v)
notifications := []Entry(nil)
defer func() {
for _, e := range notifications {
f.carrier(e)
}
}()
f.lock.Lock()
defer f.lock.Unlock()
id := obj.GetUID()
if entry, exists := f.items[id]; !exists {
f.queue = append(f.queue, id)
} else {
if entry.Is(DELETE_EVENT | POP_EVENT) {
f.queue = append(f.queue, id)
}
}
notifications = f.merge(id, obj)
f.cond.Broadcast()
return nil
}
// Update is the same as Add in this implementation.
func (f *HistoricalFIFO) Update(obj interface{}) error {
return f.Add(obj)
}
// Delete removes an item. It doesn't add it to the queue, because
// this implementation assumes the consumer only cares about the objects,
// not the order in which they were created/added.
func (f *HistoricalFIFO) Delete(v interface{}) error {
obj := checkType(v)
deleteEvent := (Entry)(nil)
defer func() {
f.carrier(deleteEvent)
}()
f.lock.Lock()
defer f.lock.Unlock()
id := obj.GetUID()
item, exists := f.items[id]
if exists && !item.Is(DELETE_EVENT) {
e := item.(*entry)
e.event = DELETE_EVENT
deleteEvent = &deletedEntry{e, time.Now().Add(f.lingerTTL)}
f.items[id] = deleteEvent
}
return nil
}
// List returns a list of all the items.
func (f *HistoricalFIFO) List() []interface{} {
f.lock.RLock()
defer f.lock.RUnlock()
// TODO(jdef): slightly overallocates b/c of deleted items
list := make([]interface{}, 0, len(f.queue))
for _, entry := range f.items {
if entry.Is(DELETE_EVENT | POP_EVENT) {
continue
}
list = append(list, entry.Value().Copy())
}
return list
}
// List returns a list of all the items.
func (f *HistoricalFIFO) ListKeys() []string {
f.lock.RLock()
defer f.lock.RUnlock()
// TODO(jdef): slightly overallocates b/c of deleted items
list := make([]string, 0, len(f.queue))
for key, entry := range f.items {
if entry.Is(DELETE_EVENT | POP_EVENT) {
continue
}
list = append(list, key)
}
return list
}
// ContainedIDs returns a util.StringSet containing all IDs of the stored items.
// This is a snapshot of a moment in time, and one should keep in mind that
// other go routines can add or remove items after you call this.
func (c *HistoricalFIFO) ContainedIDs() util.StringSet {
c.lock.RLock()
defer c.lock.RUnlock()
set := util.StringSet{}
for id, entry := range c.items {
if entry.Is(DELETE_EVENT | POP_EVENT) {
continue
}
set.Insert(id)
}
return set
}
// Get returns the requested item, or sets exists=false.
func (f *HistoricalFIFO) Get(v interface{}) (interface{}, bool, error) {
obj := checkType(v)
return f.GetByKey(obj.GetUID())
}
// Get returns the requested item, or sets exists=false.
func (f *HistoricalFIFO) GetByKey(id string) (interface{}, bool, error) {
f.lock.RLock()
defer f.lock.RUnlock()
entry, exists := f.items[id]
if exists && !entry.Is(DELETE_EVENT|POP_EVENT) {
return entry.Value().Copy(), true, nil
}
return nil, false, nil
}
// Get returns the requested item, or sets exists=false.
func (f *HistoricalFIFO) Poll(id string, t EventType) bool {
f.lock.RLock()
defer f.lock.RUnlock()
entry, exists := f.items[id]
return exists && entry.Is(t)
}
// Variant of DelayQueue.Pop() for UniqueDelayed items
func (q *HistoricalFIFO) Await(timeout time.Duration) interface{} {
cancel := make(chan struct{})
ch := make(chan interface{}, 1)
go func() { ch <- q.pop(cancel) }()
select {
case <-time.After(timeout):
close(cancel)
return <-ch
case x := <-ch:
return x
}
}
func (f *HistoricalFIFO) Pop() interface{} {
return f.pop(nil)
}
func (f *HistoricalFIFO) pop(cancel chan struct{}) interface{} {
popEvent := (Entry)(nil)
defer func() {
f.carrier(popEvent)
}()
f.lock.Lock()
defer f.lock.Unlock()
for {
for len(f.queue) == 0 {
signal := make(chan struct{})
go func() {
defer close(signal)
f.cond.Wait()
}()
select {
case <-cancel:
// we may not have the lock yet, so
// broadcast to abort Wait, then
// return after lock re-acquisition
f.cond.Broadcast()
<-signal
return nil
case <-signal:
// we have the lock, re-check
// the queue for data...
}
}
id := f.queue[0]
f.queue = f.queue[1:]
item, ok := f.items[id]
if !ok || item.Is(DELETE_EVENT|POP_EVENT) {
// Item may have been deleted subsequently.
continue
}
value := item.Value()
popEvent = &entry{value, POP_EVENT}
f.items[id] = popEvent
return value.Copy()
}
}
func (f *HistoricalFIFO) Replace(objs []interface{}) error {
notifications := make([]Entry, 0, len(objs))
defer func() {
for _, e := range notifications {
f.carrier(e)
}
}()
idToObj := make(map[string]interface{})
for _, v := range objs {
obj := checkType(v)
idToObj[obj.GetUID()] = v
}
f.lock.Lock()
defer f.lock.Unlock()
f.queue = f.queue[:0]
now := time.Now()
for id, v := range f.items {
if _, exists := idToObj[id]; !exists && !v.Is(DELETE_EVENT) {
// a non-deleted entry in the items list that doesn't show up in the
// new list: mark it as deleted
ent := v.(*entry)
ent.event = DELETE_EVENT
e := &deletedEntry{ent, now.Add(f.lingerTTL)}
f.items[id] = e
notifications = append(notifications, e)
}
}
for id, v := range idToObj {
obj := checkType(v)
f.queue = append(f.queue, id)
n := f.merge(id, obj)
notifications = append(notifications, n...)
}
if len(f.queue) > 0 {
f.cond.Broadcast()
}
return nil
}
// garbage collect DELETEd items whose TTL has expired; the IDs of such items are removed
// from the queue. This impl assumes that caller has acquired state lock.
func (f *HistoricalFIFO) gc() {
now := time.Now()
deleted := make(map[string]struct{})
for id, v := range f.items {
if v.Is(DELETE_EVENT) {
ent := v.(*deletedEntry)
if ent.expiration.Before(now) {
delete(f.items, id)
deleted[id] = struct{}{}
}
}
}
// remove deleted items from the queue, will likely (slightly) overallocate here
queue := make([]string, 0, len(f.queue))
for _, id := range f.queue {
if _, exists := deleted[id]; !exists {
queue = append(queue, id)
}
}
f.queue = queue
}
// Assumes that the caller has acquired the state lock.
func (f *HistoricalFIFO) merge(id string, obj UniqueCopyable) (notifications []Entry) {
item, exists := f.items[id]
now := time.Now()
if !exists {
e := &entry{obj.Copy().(UniqueCopyable), ADD_EVENT}
f.items[id] = e
notifications = append(notifications, e)
} else {
if !item.Is(DELETE_EVENT) && item.Value().GetUID() != obj.GetUID() {
// hidden DELETE!
// (1) append a DELETE
// (2) append an ADD
// .. and notify listeners in that order
ent := item.(*entry)
ent.event = DELETE_EVENT
e1 := &deletedEntry{ent, now.Add(f.lingerTTL)}
e2 := &entry{obj.Copy().(UniqueCopyable), ADD_EVENT}
f.items[id] = e2
notifications = append(notifications, e1, e2)
} else if !reflect.DeepEqual(obj, item.Value()) {
//TODO(jdef): it would be nice if we could rely on resource versions
//instead of doing a DeepEqual. Maybe someday we'll be able to.
e := &entry{obj.Copy().(UniqueCopyable), UPDATE_EVENT}
f.items[id] = e
notifications = append(notifications, e)
}
}
// check for garbage collection
f.gcc++
if f.gcc%256 == 0 { //TODO(jdef): extract constant
f.gcc = 0
f.gc()
}
return
}
// NewHistorical returns a Store which can be used to queue up items to
// process. If a non-nil Mux is provided, then modifications to the
// the FIFO are delivered on a channel specific to this fifo.
func NewHistorical(ch chan<- Entry) FIFO {
carrier := dead
if ch != nil {
carrier = func(msg Entry) {
if msg != nil {
ch <- msg.Copy().(Entry)
}
}
}
f := &HistoricalFIFO{
items: map[string]Entry{},
queue: []string{},
carrier: carrier,
lingerTTL: 5 * time.Minute, // TODO(jdef): extract constant
}
f.cond.L = &f.lock
return f
}

View File

@@ -0,0 +1,191 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package queue
import (
"fmt"
"testing"
"time"
)
type _int int
type _uint uint
func (i _int) Copy() Copyable {
return i
}
func (i _int) GetUID() string {
return fmt.Sprintf("INT%d", int(i))
}
func (i _uint) Copy() Copyable {
return i
}
func (i _uint) GetUID() string {
return fmt.Sprintf("UINT%d", uint64(i))
}
type testObj struct {
id string
value int
}
func (i *testObj) Copy() Copyable {
if i == nil {
return nil
} else {
return &testObj{i.id, i.value}
}
}
func (i *testObj) GetUID() string {
return i.id
}
func TestFIFO_basic(t *testing.T) {
f := NewHistorical(nil)
const amount = 500
go func() {
for i := 0; i < amount; i++ {
f.Add(_int(i + 1))
}
}()
go func() {
for u := uint(0); u < amount; u++ {
f.Add(_uint(u + 1))
}
}()
lastInt := _int(0)
lastUint := _uint(0)
for i := 0; i < amount*2; i++ {
switch obj := f.Pop().(type) {
case _int:
if obj <= lastInt {
t.Errorf("got %v (int) out of order, last was %v", obj, lastInt)
}
lastInt = obj
case _uint:
if obj <= lastUint {
t.Errorf("got %v (uint) out of order, last was %v", obj, lastUint)
} else {
lastUint = obj
}
default:
t.Fatalf("unexpected type %#v", obj)
}
}
}
func TestFIFO_addUpdate(t *testing.T) {
f := NewHistorical(nil)
f.Add(&testObj{"foo", 10})
f.Update(&testObj{"foo", 15})
got := make(chan *testObj, 2)
go func() {
for {
got <- f.Pop().(*testObj)
}
}()
first := <-got
if e, a := 15, first.value; e != a {
t.Errorf("Didn't get updated value (%v), got %v", e, a)
}
select {
case unexpected := <-got:
t.Errorf("Got second value %v", unexpected)
case <-time.After(50 * time.Millisecond):
}
_, exists, _ := f.GetByKey("foo")
if exists {
t.Errorf("item did not get removed")
}
}
func TestFIFO_addReplace(t *testing.T) {
f := NewHistorical(nil)
f.Add(&testObj{"foo", 10})
f.Replace([]interface{}{&testObj{"foo", 15}})
got := make(chan *testObj, 2)
go func() {
for {
got <- f.Pop().(*testObj)
}
}()
first := <-got
if e, a := 15, first.value; e != a {
t.Errorf("Didn't get updated value (%v), got %v", e, a)
}
select {
case unexpected := <-got:
t.Errorf("Got second value %v", unexpected)
case <-time.After(50 * time.Millisecond):
}
_, exists, _ := f.GetByKey("foo")
if exists {
t.Errorf("item did not get removed")
}
}
func TestFIFO_detectLineJumpers(t *testing.T) {
f := NewHistorical(nil)
f.Add(&testObj{"foo", 10})
f.Add(&testObj{"bar", 1})
f.Add(&testObj{"foo", 11})
f.Add(&testObj{"foo", 13})
f.Add(&testObj{"zab", 30})
err := error(nil)
done := make(chan struct{})
go func() {
defer close(done)
if e, a := 13, f.Pop().(*testObj).value; a != e {
err = fmt.Errorf("expected %d, got %d", e, a)
return
}
f.Add(&testObj{"foo", 14}) // ensure foo doesn't jump back in line
if e, a := 1, f.Pop().(*testObj).value; a != e {
err = fmt.Errorf("expected %d, got %d", e, a)
return
}
if e, a := 30, f.Pop().(*testObj).value; a != e {
err = fmt.Errorf("expected %d, got %d", e, a)
return
}
if e, a := 14, f.Pop().(*testObj).value; a != e {
err = fmt.Errorf("expected %d, got %d", e, a)
return
}
}()
select {
case <-done:
if err != nil {
t.Fatal(err)
}
case <-time.After(1 * time.Second):
t.Fatal("Deadlocked unit test")
}
}

View File

@@ -0,0 +1,103 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package queue
import (
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
)
type EventType int
const (
ADD_EVENT EventType = 1 << iota
UPDATE_EVENT
DELETE_EVENT
POP_EVENT
)
type Entry interface {
Copyable
Value() UniqueCopyable
// types is a logically OR'd combination of EventType, e.g. ADD_EVENT|UPDATE_EVENT
Is(types EventType) bool
}
type Copyable interface {
// return an independent copy (deep clone) of the current object
Copy() Copyable
}
type UniqueID interface {
GetUID() string
}
type UniqueCopyable interface {
Copyable
UniqueID
}
type FIFO interface {
cache.Store
// Pop waits until an item is ready and returns it. If multiple items are
// ready, they are returned in the order in which they were added/updated.
// The item is removed from the queue (and the store) before it is returned,
// so if you don't succesfully process it, you need to add it back with Add().
Pop() interface{}
// Await attempts to Pop within the given interval; upon success the non-nil
// item is returned, otherwise nil
Await(timeout time.Duration) interface{}
// Is there an entry for the id that matches the event mask?
Poll(id string, types EventType) bool
}
type Delayed interface {
// return the remaining delay; a non-positive value indicates no delay
GetDelay() time.Duration
}
type Deadlined interface {
// when ok, returns the time when this object should be activated/executed/evaluated
Deadline() (deadline time.Time, ok bool)
}
// No objects are ever expected to be sent over this channel. References to BreakChan
// instances may be nil (always blocking). Signalling over this channel is performed by
// closing the channel. As such there can only ever be a single signal sent over the
// lifetime of the channel.
type BreakChan <-chan struct{}
// an optional interface to be implemented by Delayed objects; returning a nil
// channel from Breaker() results in waiting the full delay duration
type Breakout interface {
// return a channel that signals early departure from a blocking delay
Breaker() BreakChan
}
type UniqueDelayed interface {
UniqueID
Delayed
}
type UniqueDeadlined interface {
UniqueID
Deadlined
}

View File

@@ -0,0 +1,70 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package queue
// Decide whether a pre-existing deadline for an item in a delay-queue should be
// updated if an attempt is made to offer/add a new deadline for said item. Whether
// the deadline changes or not has zero impact on the data blob associated with the
// entry in the queue.
type DeadlinePolicy int
const (
PreferLatest DeadlinePolicy = iota
PreferEarliest
)
// Decide whether a pre-existing data blob in a delay-queue should be replaced if an
// an attempt is made to add/offer a new data blob in its place. Whether the data is
// replaced has no bearing on the deadline (priority) of the item in the queue.
type ReplacementPolicy int
const (
KeepExisting ReplacementPolicy = iota
ReplaceExisting
)
func (rp ReplacementPolicy) replacementValue(original, replacement interface{}) (result interface{}) {
switch rp {
case KeepExisting:
result = original
case ReplaceExisting:
fallthrough
default:
result = replacement
}
return
}
func (dp DeadlinePolicy) nextDeadline(a, b Priority) (result Priority) {
switch dp {
case PreferEarliest:
if a.ts.Before(b.ts) {
result = a
} else {
result = b
}
case PreferLatest:
fallthrough
default:
if a.ts.After(b.ts) {
result = a
} else {
result = b
}
}
return
}

Some files were not shown because too many files have changed in this diff Show More