Kubernetes Mesos integration
This commit includes the fundamental components of the Kubernetes Mesos integration: * Kubernetes-Mesos scheduler * Kubernetes-Mesos executor * Supporting libs Dependencies and upstream changes are included in a separate commit for easy review. After this initial upstream, there'll be two PRs following. * km (hypercube) and k8sm-controller-manager #9265 * Static pods support #9077 Fixes applied: - Precise metrics subsystems definitions - mesosphere/kubernetes-mesos#331 - https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion_r31875232 - https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion_r31875240 - Improve comments and add clarifications - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875208 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875226 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875227 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875228 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875239 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875243 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875234 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875256 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875255 - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875251 - Clarify which Schedule function is actually called - Fixes https://github.com/GoogleCloudPlatform/kubernetes/pull/8882#discussion-diff-31875246
This commit is contained in:
18
contrib/mesos/cmd/k8sm-executor/doc.go
Normal file
18
contrib/mesos/cmd/k8sm-executor/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// This package main implements the executable Kubernetes Mesos executor.
|
||||
package main
|
||||
47
contrib/mesos/cmd/k8sm-executor/main.go
Normal file
47
contrib/mesos/cmd/k8sm-executor/main.go
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/service"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/version/verflag"
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
func main() {
|
||||
runtime.GOMAXPROCS(runtime.NumCPU())
|
||||
|
||||
s := service.NewKubeletExecutorServer()
|
||||
s.AddStandaloneFlags(pflag.CommandLine)
|
||||
|
||||
util.InitFlags()
|
||||
util.InitLogs()
|
||||
defer util.FlushLogs()
|
||||
|
||||
verflag.PrintAndExitIfRequested()
|
||||
|
||||
if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil {
|
||||
fmt.Fprintf(os.Stderr, err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
21
contrib/mesos/cmd/k8sm-redirfd/doc.go
Normal file
21
contrib/mesos/cmd/k8sm-redirfd/doc.go
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// This package main is used for testing the redirfd package.
|
||||
// Inspired by http://skarnet.org/software/execline/redirfd.html.
|
||||
// Usage:
|
||||
// k8sm-redirfb [-n] [-b] {mode} {fd} {file} {prog...}
|
||||
package main
|
||||
105
contrib/mesos/cmd/k8sm-redirfd/redirfd.go
Normal file
105
contrib/mesos/cmd/k8sm-redirfd/redirfd.go
Normal file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/redirfd"
|
||||
)
|
||||
|
||||
func main() {
|
||||
nonblock := flag.Bool("n", false, "open file in non-blocking mode")
|
||||
changemode := flag.Bool("b", false, "change mode of file after opening it: to non-blocking mode if the -n option was not given, to blocking mode if it was")
|
||||
flag.Parse()
|
||||
|
||||
args := flag.Args()
|
||||
if len(args) < 4 {
|
||||
fmt.Fprintf(os.Stderr, "expected {mode} {fd} {file} instead of: %v\n", args)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var mode redirfd.RedirectMode
|
||||
switch m := args[0]; m {
|
||||
case "r":
|
||||
mode = redirfd.Read
|
||||
case "w":
|
||||
mode = redirfd.Write
|
||||
case "u":
|
||||
mode = redirfd.Update
|
||||
case "a":
|
||||
mode = redirfd.Append
|
||||
case "c":
|
||||
mode = redirfd.AppendExisting
|
||||
case "x":
|
||||
mode = redirfd.WriteNew
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unrecognized mode %q\n", mode)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
fd, err := redirfd.ParseFileDescriptor(args[1])
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "failed to parse file descriptor: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
file := args[2]
|
||||
|
||||
f, err := mode.Redirect(*nonblock, *changemode, fd, file)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "redirect failed: %q, %v\n", args[1], err)
|
||||
os.Exit(1)
|
||||
}
|
||||
var pargs []string
|
||||
if len(args) > 4 {
|
||||
pargs = args[4:]
|
||||
}
|
||||
cmd := exec.Command(args[3], pargs...)
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
switch fd {
|
||||
case redirfd.Stdin:
|
||||
cmd.Stdin = f
|
||||
case redirfd.Stdout:
|
||||
cmd.Stdout = f
|
||||
case redirfd.Stderr:
|
||||
cmd.Stderr = f
|
||||
default:
|
||||
cmd.ExtraFiles = []*os.File{f}
|
||||
}
|
||||
defer f.Close()
|
||||
if err = cmd.Run(); err != nil {
|
||||
exiterr := err.(*exec.ExitError)
|
||||
state := exiterr.ProcessState
|
||||
if state != nil {
|
||||
sys := state.Sys()
|
||||
if waitStatus, ok := sys.(syscall.WaitStatus); ok {
|
||||
if waitStatus.Signaled() {
|
||||
os.Exit(256 + int(waitStatus.Signal()))
|
||||
} else {
|
||||
os.Exit(waitStatus.ExitStatus())
|
||||
}
|
||||
}
|
||||
}
|
||||
os.Exit(3)
|
||||
}
|
||||
}
|
||||
18
contrib/mesos/cmd/k8sm-scheduler/doc.go
Normal file
18
contrib/mesos/cmd/k8sm-scheduler/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// This package main implements the executable Kubernetes Mesos scheduler.
|
||||
package main
|
||||
46
contrib/mesos/cmd/k8sm-scheduler/main.go
Normal file
46
contrib/mesos/cmd/k8sm-scheduler/main.go
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/service"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/version/verflag"
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
func main() {
|
||||
runtime.GOMAXPROCS(runtime.NumCPU())
|
||||
s := service.NewSchedulerServer()
|
||||
s.AddStandaloneFlags(pflag.CommandLine)
|
||||
|
||||
util.InitFlags()
|
||||
util.InitLogs()
|
||||
defer util.FlushLogs()
|
||||
|
||||
verflag.PrintAndExitIfRequested()
|
||||
|
||||
if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil {
|
||||
fmt.Fprintf(os.Stderr, err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
43
contrib/mesos/pkg/assert/assert.go
Normal file
43
contrib/mesos/pkg/assert/assert.go
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package assert
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// EventuallyTrue asserts that the given predicate becomes true within the given timeout. It
|
||||
// checks the predicate regularly each 100ms.
|
||||
func EventuallyTrue(t *testing.T, timeout time.Duration, fn func() bool, msgAndArgs ...interface{}) bool {
|
||||
start := time.Now()
|
||||
for {
|
||||
if fn() {
|
||||
return true
|
||||
}
|
||||
if time.Now().Sub(start) > timeout {
|
||||
if len(msgAndArgs) > 0 {
|
||||
return assert.Fail(t, msgAndArgs[0].(string), msgAndArgs[1:]...)
|
||||
} else {
|
||||
return assert.Fail(t, "predicate fn has not been true after %v", timeout.String())
|
||||
}
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
19
contrib/mesos/pkg/assert/doc.go
Normal file
19
contrib/mesos/pkg/assert/doc.go
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package assert is an utility package containing reusable testing functionality
|
||||
// extending github.com/stretchr/testify/assert
|
||||
package assert
|
||||
96
contrib/mesos/pkg/backoff/backoff.go
Normal file
96
contrib/mesos/pkg/backoff/backoff.go
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package backoff
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
type clock interface {
|
||||
Now() time.Time
|
||||
}
|
||||
|
||||
type realClock struct{}
|
||||
|
||||
func (realClock) Now() time.Time {
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
type backoffEntry struct {
|
||||
backoff time.Duration
|
||||
lastUpdate time.Time
|
||||
}
|
||||
|
||||
type Backoff struct {
|
||||
perItemBackoff map[string]*backoffEntry
|
||||
lock sync.Mutex
|
||||
clock clock
|
||||
defaultDuration time.Duration
|
||||
maxDuration time.Duration
|
||||
}
|
||||
|
||||
func New(initial, max time.Duration) *Backoff {
|
||||
return &Backoff{
|
||||
perItemBackoff: map[string]*backoffEntry{},
|
||||
clock: realClock{},
|
||||
defaultDuration: initial,
|
||||
maxDuration: max,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Backoff) getEntry(id string) *backoffEntry {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
entry, ok := p.perItemBackoff[id]
|
||||
if !ok {
|
||||
entry = &backoffEntry{backoff: p.defaultDuration}
|
||||
p.perItemBackoff[id] = entry
|
||||
}
|
||||
entry.lastUpdate = p.clock.Now()
|
||||
return entry
|
||||
}
|
||||
|
||||
func (p *Backoff) Get(id string) time.Duration {
|
||||
entry := p.getEntry(id)
|
||||
duration := entry.backoff
|
||||
entry.backoff *= 2
|
||||
if entry.backoff > p.maxDuration {
|
||||
entry.backoff = p.maxDuration
|
||||
}
|
||||
//TODO(jdef) parameterize use of jitter?
|
||||
// add jitter, get better backoff distribution
|
||||
duration = time.Duration(rand.Int63n(int64(duration)))
|
||||
log.V(3).Infof("Backing off %v for pod %s", duration, id)
|
||||
return duration
|
||||
}
|
||||
|
||||
// Garbage collect records that have aged past maxDuration. Backoff users are expected
|
||||
// to invoke this periodically.
|
||||
func (p *Backoff) GC() {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
now := p.clock.Now()
|
||||
for id, entry := range p.perItemBackoff {
|
||||
if now.Sub(entry.lastUpdate) > p.maxDuration {
|
||||
delete(p.perItemBackoff, id)
|
||||
}
|
||||
}
|
||||
}
|
||||
19
contrib/mesos/pkg/backoff/doc.go
Normal file
19
contrib/mesos/pkg/backoff/doc.go
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package backoff provides backoff functionality with a simple API.
|
||||
// Originally copied from Kubernetes: plugin/pkg/scheduler/factory/factory.go
|
||||
package backoff
|
||||
18
contrib/mesos/pkg/election/doc.go
Normal file
18
contrib/mesos/pkg/election/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package election provides interfaces used for master election.
|
||||
package election
|
||||
185
contrib/mesos/pkg/election/etcd_master.go
Normal file
185
contrib/mesos/pkg/election/etcd_master.go
Normal file
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package election
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/tools"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
||||
"github.com/coreos/go-etcd/etcd"
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// Master is used to announce the current elected master.
|
||||
type Master string
|
||||
|
||||
// IsAnAPIObject is used solely so we can work with the watch package.
|
||||
// TODO(k8s): Either fix watch so this isn't necessary, or make this a real API Object.
|
||||
// TODO(k8s): when it becomes clear how this package will be used, move these declarations to
|
||||
// to the proper place.
|
||||
func (Master) IsAnAPIObject() {}
|
||||
|
||||
// NewEtcdMasterElector returns an implementation of election.MasterElector backed by etcd.
|
||||
func NewEtcdMasterElector(h tools.EtcdGetSet) MasterElector {
|
||||
return &etcdMasterElector{etcd: h}
|
||||
}
|
||||
|
||||
type empty struct{}
|
||||
|
||||
// internal implementation struct
|
||||
type etcdMasterElector struct {
|
||||
etcd tools.EtcdGetSet
|
||||
done chan empty
|
||||
events chan watch.Event
|
||||
}
|
||||
|
||||
// Elect implements the election.MasterElector interface.
|
||||
func (e *etcdMasterElector) Elect(path, id string) watch.Interface {
|
||||
e.done = make(chan empty)
|
||||
e.events = make(chan watch.Event)
|
||||
go util.Forever(func() { e.run(path, id) }, time.Second*5)
|
||||
return e
|
||||
}
|
||||
|
||||
func (e *etcdMasterElector) run(path, id string) {
|
||||
masters := make(chan string)
|
||||
errors := make(chan error)
|
||||
go e.master(path, id, 30, masters, errors, e.done) // TODO(jdef) extract constant
|
||||
for {
|
||||
select {
|
||||
case m := <-masters:
|
||||
e.events <- watch.Event{
|
||||
Type: watch.Modified,
|
||||
Object: Master(m),
|
||||
}
|
||||
case e := <-errors:
|
||||
glog.Errorf("error in election: %v", e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ResultChan implements the watch.Interface interface.
|
||||
func (e *etcdMasterElector) ResultChan() <-chan watch.Event {
|
||||
return e.events
|
||||
}
|
||||
|
||||
// extendMaster attempts to extend ownership of a master lock for TTL seconds.
|
||||
// returns "", nil if extension failed
|
||||
// returns id, nil if extension succeeded
|
||||
// returns "", err if an error occurred
|
||||
func (e *etcdMasterElector) extendMaster(path, id string, ttl uint64, res *etcd.Response) (string, error) {
|
||||
// If it matches the passed in id, extend the lease by writing a new entry.
|
||||
// Uses compare and swap, so that if we TTL out in the meantime, the write will fail.
|
||||
// We don't handle the TTL delete w/o a write case here, it's handled in the next loop
|
||||
// iteration.
|
||||
_, err := e.etcd.CompareAndSwap(path, id, ttl, "", res.Node.ModifiedIndex)
|
||||
if err != nil && !tools.IsEtcdTestFailed(err) {
|
||||
return "", err
|
||||
}
|
||||
if err != nil && tools.IsEtcdTestFailed(err) {
|
||||
return "", nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// becomeMaster attempts to become the master for this lock.
|
||||
// returns "", nil if the attempt failed
|
||||
// returns id, nil if the attempt succeeded
|
||||
// returns "", err if an error occurred
|
||||
func (e *etcdMasterElector) becomeMaster(path, id string, ttl uint64) (string, error) {
|
||||
_, err := e.etcd.Create(path, id, ttl)
|
||||
if err != nil && !tools.IsEtcdNodeExist(err) {
|
||||
// unexpected error
|
||||
return "", err
|
||||
}
|
||||
if err != nil && tools.IsEtcdNodeExist(err) {
|
||||
return "", nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// handleMaster performs one loop of master locking.
|
||||
// on success it returns <master>, nil
|
||||
// on error it returns "", err
|
||||
// in situations where you should try again due to concurrent state changes (e.g. another actor simultaneously acquiring the lock)
|
||||
// it returns "", nil
|
||||
func (e *etcdMasterElector) handleMaster(path, id string, ttl uint64) (string, error) {
|
||||
res, err := e.etcd.Get(path, false, false)
|
||||
|
||||
// Unexpected error, bail out
|
||||
if err != nil && !tools.IsEtcdNotFound(err) {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// There is no master, try to become the master.
|
||||
if err != nil && tools.IsEtcdNotFound(err) {
|
||||
return e.becomeMaster(path, id, ttl)
|
||||
}
|
||||
|
||||
// This should never happen.
|
||||
if res.Node == nil {
|
||||
return "", fmt.Errorf("unexpected response: %#v", res)
|
||||
}
|
||||
|
||||
// We're not the master, just return the current value
|
||||
if res.Node.Value != id {
|
||||
return res.Node.Value, nil
|
||||
}
|
||||
|
||||
// We are the master, try to extend out lease
|
||||
return e.extendMaster(path, id, ttl, res)
|
||||
}
|
||||
|
||||
// master provices a distributed master election lock, maintains lock until failure, or someone sends something in the done channel.
|
||||
// The basic algorithm is:
|
||||
// while !done
|
||||
// Get the current master
|
||||
// If there is no current master
|
||||
// Try to become the master
|
||||
// Otherwise
|
||||
// If we are the master, extend the lease
|
||||
// If the master is different than the last time through the loop, report the master
|
||||
// Sleep 80% of TTL
|
||||
func (e *etcdMasterElector) master(path, id string, ttl uint64, masters chan<- string, errors chan<- error, done <-chan empty) {
|
||||
lastMaster := ""
|
||||
for {
|
||||
master, err := e.handleMaster(path, id, ttl)
|
||||
if err != nil {
|
||||
errors <- err
|
||||
} else if len(master) == 0 {
|
||||
continue
|
||||
} else if master != lastMaster {
|
||||
lastMaster = master
|
||||
masters <- master
|
||||
}
|
||||
// TODO(k8s): Add Watch here, skip the polling for faster reactions
|
||||
// If done is closed, break out.
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
case <-time.After(time.Duration((ttl*8)/10) * time.Second):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ResultChan implements the watch.Interface interface
|
||||
func (e *etcdMasterElector) Stop() {
|
||||
close(e.done)
|
||||
}
|
||||
98
contrib/mesos/pkg/election/etcd_master_test.go
Normal file
98
contrib/mesos/pkg/election/etcd_master_test.go
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package election
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/tools"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
||||
"github.com/coreos/go-etcd/etcd"
|
||||
)
|
||||
|
||||
func TestEtcdMasterOther(t *testing.T) {
|
||||
path := "foo"
|
||||
etcd := tools.NewFakeEtcdClient(t)
|
||||
etcd.Set(path, "baz", 0)
|
||||
master := NewEtcdMasterElector(etcd)
|
||||
w := master.Elect(path, "bar")
|
||||
result := <-w.ResultChan()
|
||||
if result.Type != watch.Modified || result.Object.(Master) != "baz" {
|
||||
t.Errorf("unexpected event: %#v", result)
|
||||
}
|
||||
w.Stop()
|
||||
}
|
||||
|
||||
func TestEtcdMasterNoOther(t *testing.T) {
|
||||
path := "foo"
|
||||
e := tools.NewFakeEtcdClient(t)
|
||||
e.TestIndex = true
|
||||
e.Data["foo"] = tools.EtcdResponseWithError{
|
||||
R: &etcd.Response{
|
||||
Node: nil,
|
||||
},
|
||||
E: &etcd.EtcdError{
|
||||
ErrorCode: tools.EtcdErrorCodeNotFound,
|
||||
},
|
||||
}
|
||||
master := NewEtcdMasterElector(e)
|
||||
w := master.Elect(path, "bar")
|
||||
result := <-w.ResultChan()
|
||||
if result.Type != watch.Modified || result.Object.(Master) != "bar" {
|
||||
t.Errorf("unexpected event: %#v", result)
|
||||
}
|
||||
w.Stop()
|
||||
}
|
||||
|
||||
func TestEtcdMasterNoOtherThenConflict(t *testing.T) {
|
||||
path := "foo"
|
||||
e := tools.NewFakeEtcdClient(t)
|
||||
e.TestIndex = true
|
||||
// Ok, so we set up a chain of responses from etcd:
|
||||
// 1) Nothing there
|
||||
// 2) conflict (someone else wrote)
|
||||
// 3) new value (the data they wrote)
|
||||
empty := tools.EtcdResponseWithError{
|
||||
R: &etcd.Response{
|
||||
Node: nil,
|
||||
},
|
||||
E: &etcd.EtcdError{
|
||||
ErrorCode: tools.EtcdErrorCodeNotFound,
|
||||
},
|
||||
}
|
||||
empty.N = &tools.EtcdResponseWithError{
|
||||
R: &etcd.Response{},
|
||||
E: &etcd.EtcdError{
|
||||
ErrorCode: tools.EtcdErrorCodeNodeExist,
|
||||
},
|
||||
}
|
||||
empty.N.N = &tools.EtcdResponseWithError{
|
||||
R: &etcd.Response{
|
||||
Node: &etcd.Node{
|
||||
Value: "baz",
|
||||
},
|
||||
},
|
||||
}
|
||||
e.Data["foo"] = empty
|
||||
master := NewEtcdMasterElector(e)
|
||||
w := master.Elect(path, "bar")
|
||||
result := <-w.ResultChan()
|
||||
if result.Type != watch.Modified || result.Object.(Master) != "bar" {
|
||||
t.Errorf("unexpected event: %#v", result)
|
||||
}
|
||||
w.Stop()
|
||||
}
|
||||
53
contrib/mesos/pkg/election/fake.go
Normal file
53
contrib/mesos/pkg/election/fake.go
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package election
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
||||
)
|
||||
|
||||
// Fake allows for testing of anything consuming a MasterElector.
|
||||
type Fake struct {
|
||||
mux *watch.Broadcaster
|
||||
currentMaster Master
|
||||
lock sync.Mutex // Protect access of currentMaster
|
||||
}
|
||||
|
||||
// NewFake makes a new fake MasterElector.
|
||||
func NewFake() *Fake {
|
||||
// 0 means block for clients.
|
||||
return &Fake{mux: watch.NewBroadcaster(0, watch.WaitIfChannelFull)}
|
||||
}
|
||||
|
||||
func (f *Fake) ChangeMaster(newMaster Master) {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
f.mux.Action(watch.Modified, newMaster)
|
||||
f.currentMaster = newMaster
|
||||
}
|
||||
|
||||
func (f *Fake) Elect(path, id string) watch.Interface {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
w := f.mux.Watch()
|
||||
if f.currentMaster != "" {
|
||||
f.mux.Action(watch.Modified, f.currentMaster)
|
||||
}
|
||||
return w
|
||||
}
|
||||
134
contrib/mesos/pkg/election/master.go
Normal file
134
contrib/mesos/pkg/election/master.go
Normal file
@@ -0,0 +1,134 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package election
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// MasterElector is an interface for services that can elect masters.
|
||||
// Important Note: MasterElectors are not inter-operable, all participants in the election need to be
|
||||
// using the same underlying implementation of this interface for correct behavior.
|
||||
type MasterElector interface {
|
||||
// Elect makes the caller represented by 'id' enter into a master election for the
|
||||
// distributed lock defined by 'path'
|
||||
// The returned watch.Interface provides a stream of Master objects which
|
||||
// contain the current master.
|
||||
// Calling Stop on the returned interface relinquishes ownership (if currently possesed)
|
||||
// and removes the caller from the election
|
||||
Elect(path, id string) watch.Interface
|
||||
}
|
||||
|
||||
// Service represents anything that can start and stop on demand.
|
||||
type Service interface {
|
||||
Validate(desired, current Master)
|
||||
Start()
|
||||
Stop()
|
||||
}
|
||||
|
||||
type notifier struct {
|
||||
lock sync.Mutex
|
||||
cond *sync.Cond
|
||||
|
||||
// desired is updated with every change, current is updated after
|
||||
// Start()/Stop() finishes. 'cond' is used to signal that a change
|
||||
// might be needed. This handles the case where mastership flops
|
||||
// around without calling Start()/Stop() excessively.
|
||||
desired, current Master
|
||||
|
||||
// for comparison, to see if we are master.
|
||||
id Master
|
||||
|
||||
service Service
|
||||
}
|
||||
|
||||
// Notify runs Elect() on m, and calls Start()/Stop() on s when the
|
||||
// elected master starts/stops matching 'id'. Never returns.
|
||||
func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) {
|
||||
n := ¬ifier{id: Master(id), service: s}
|
||||
n.cond = sync.NewCond(&n.lock)
|
||||
finished := runtime.After(func() {
|
||||
runtime.Until(func() {
|
||||
for {
|
||||
w := m.Elect(path, id)
|
||||
for {
|
||||
select {
|
||||
case <-abort:
|
||||
return
|
||||
case event, open := <-w.ResultChan():
|
||||
if !open {
|
||||
break
|
||||
}
|
||||
if event.Type != watch.Modified {
|
||||
continue
|
||||
}
|
||||
electedMaster, ok := event.Object.(Master)
|
||||
if !ok {
|
||||
glog.Errorf("Unexpected object from election channel: %v", event.Object)
|
||||
break
|
||||
}
|
||||
func() {
|
||||
n.lock.Lock()
|
||||
defer n.lock.Unlock()
|
||||
n.desired = electedMaster
|
||||
if n.desired != n.current {
|
||||
n.cond.Signal()
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
}
|
||||
}, 0, abort)
|
||||
})
|
||||
runtime.Until(func() { n.serviceLoop(finished) }, 0, abort)
|
||||
}
|
||||
|
||||
// serviceLoop waits for changes, and calls Start()/Stop() as needed.
|
||||
func (n *notifier) serviceLoop(abort <-chan struct{}) {
|
||||
n.lock.Lock()
|
||||
defer n.lock.Unlock()
|
||||
for {
|
||||
select {
|
||||
case <-abort:
|
||||
return
|
||||
default:
|
||||
for n.desired == n.current {
|
||||
ch := runtime.After(n.cond.Wait)
|
||||
select {
|
||||
case <-abort:
|
||||
n.cond.Signal() // ensure that Wait() returns
|
||||
<-ch
|
||||
return
|
||||
case <-ch:
|
||||
// we were notified and have the lock, proceed..
|
||||
}
|
||||
}
|
||||
if n.current != n.id && n.desired == n.id {
|
||||
n.service.Validate(n.desired, n.current)
|
||||
n.service.Start()
|
||||
} else if n.current == n.id && n.desired != n.id {
|
||||
n.service.Stop()
|
||||
}
|
||||
n.current = n.desired
|
||||
}
|
||||
}
|
||||
}
|
||||
98
contrib/mesos/pkg/election/master_test.go
Normal file
98
contrib/mesos/pkg/election/master_test.go
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package election
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
|
||||
)
|
||||
|
||||
type slowService struct {
|
||||
t *testing.T
|
||||
on bool
|
||||
// We explicitly have no lock to prove that
|
||||
// Start and Stop are not called concurrently.
|
||||
changes chan<- bool
|
||||
done <-chan struct{}
|
||||
}
|
||||
|
||||
func (s *slowService) Validate(d, c Master) {
|
||||
// noop
|
||||
}
|
||||
|
||||
func (s *slowService) Start() {
|
||||
select {
|
||||
case <-s.done:
|
||||
return // avoid writing to closed changes chan
|
||||
default:
|
||||
}
|
||||
if s.on {
|
||||
s.t.Errorf("started already on service")
|
||||
}
|
||||
time.Sleep(2 * time.Millisecond)
|
||||
s.on = true
|
||||
s.changes <- true
|
||||
}
|
||||
|
||||
func (s *slowService) Stop() {
|
||||
select {
|
||||
case <-s.done:
|
||||
return // avoid writing to closed changes chan
|
||||
default:
|
||||
}
|
||||
if !s.on {
|
||||
s.t.Errorf("stopped already off service")
|
||||
}
|
||||
time.Sleep(2 * time.Millisecond)
|
||||
s.on = false
|
||||
s.changes <- false
|
||||
}
|
||||
|
||||
func Test(t *testing.T) {
|
||||
m := NewFake()
|
||||
changes := make(chan bool, 1500)
|
||||
done := make(chan struct{})
|
||||
s := &slowService{t: t, changes: changes, done: done}
|
||||
notifyDone := runtime.After(func() { Notify(m, "", "me", s, done) })
|
||||
|
||||
go func() {
|
||||
defer close(done)
|
||||
for i := 0; i < 500; i++ {
|
||||
for _, key := range []string{"me", "notme", "alsonotme"} {
|
||||
m.ChangeMaster(Master(key))
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
<-notifyDone
|
||||
close(changes)
|
||||
|
||||
changeList := []bool{}
|
||||
for {
|
||||
change, ok := <-changes
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
changeList = append(changeList, change)
|
||||
}
|
||||
|
||||
if len(changeList) > 1000 {
|
||||
t.Errorf("unexpected number of changes: %v", len(changeList))
|
||||
}
|
||||
}
|
||||
29
contrib/mesos/pkg/executor/config/config.go
Normal file
29
contrib/mesos/pkg/executor/config/config.go
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// default values to use when constructing mesos ExecutorInfo messages
|
||||
const (
|
||||
DefaultInfoID = "k8sm-executor"
|
||||
DefaultInfoSource = "kubernetes"
|
||||
DefaultInfoName = "Kubelet-Executor"
|
||||
DefaultSuicideTimeout = 20 * time.Minute
|
||||
)
|
||||
18
contrib/mesos/pkg/executor/config/doc.go
Normal file
18
contrib/mesos/pkg/executor/config/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package config contains executor configuration constants.
|
||||
package config
|
||||
21
contrib/mesos/pkg/executor/doc.go
Normal file
21
contrib/mesos/pkg/executor/doc.go
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
Package executor includes a mesos executor, which contains
|
||||
a kubelet as its member to manage containers.
|
||||
*/
|
||||
package executor
|
||||
846
contrib/mesos/pkg/executor/executor.go
Normal file
846
contrib/mesos/pkg/executor/executor.go
Normal file
@@ -0,0 +1,846 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
||||
"github.com/fsouza/go-dockerclient"
|
||||
"github.com/gogo/protobuf/proto"
|
||||
log "github.com/golang/glog"
|
||||
bindings "github.com/mesos/mesos-go/executor"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
mutil "github.com/mesos/mesos-go/mesosutil"
|
||||
)
|
||||
|
||||
const (
|
||||
containerPollTime = 300 * time.Millisecond
|
||||
launchGracePeriod = 5 * time.Minute
|
||||
)
|
||||
|
||||
type stateType int32
|
||||
|
||||
const (
|
||||
disconnectedState stateType = iota
|
||||
connectedState
|
||||
suicidalState
|
||||
terminalState
|
||||
)
|
||||
|
||||
func (s *stateType) get() stateType {
|
||||
return stateType(atomic.LoadInt32((*int32)(s)))
|
||||
}
|
||||
|
||||
func (s *stateType) transition(from, to stateType) bool {
|
||||
return atomic.CompareAndSwapInt32((*int32)(s), int32(from), int32(to))
|
||||
}
|
||||
|
||||
func (s *stateType) transitionTo(to stateType, unless ...stateType) bool {
|
||||
if len(unless) == 0 {
|
||||
atomic.StoreInt32((*int32)(s), int32(to))
|
||||
return true
|
||||
}
|
||||
for {
|
||||
state := s.get()
|
||||
for _, x := range unless {
|
||||
if state == x {
|
||||
return false
|
||||
}
|
||||
}
|
||||
if s.transition(state, to) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type kuberTask struct {
|
||||
mesosTaskInfo *mesos.TaskInfo
|
||||
podName string
|
||||
}
|
||||
|
||||
// func that attempts suicide
|
||||
type jumper func(bindings.ExecutorDriver, <-chan struct{})
|
||||
|
||||
type suicideWatcher interface {
|
||||
Next(time.Duration, bindings.ExecutorDriver, jumper) suicideWatcher
|
||||
Reset(time.Duration) bool
|
||||
Stop() bool
|
||||
}
|
||||
|
||||
type podStatusFunc func() (*api.PodStatus, error)
|
||||
|
||||
// KubernetesExecutor is an mesos executor that runs pods
|
||||
// in a minion machine.
|
||||
type KubernetesExecutor struct {
|
||||
kl *kubelet.Kubelet // the kubelet instance.
|
||||
updateChan chan<- interface{} // to send pod config updates to the kubelet
|
||||
state stateType
|
||||
tasks map[string]*kuberTask
|
||||
pods map[string]*api.Pod
|
||||
lock sync.RWMutex
|
||||
sourcename string
|
||||
client *client.Client
|
||||
events <-chan watch.Event
|
||||
done chan struct{} // signals shutdown
|
||||
outgoing chan func() (mesos.Status, error) // outgoing queue to the mesos driver
|
||||
dockerClient dockertools.DockerInterface
|
||||
suicideWatch suicideWatcher
|
||||
suicideTimeout time.Duration
|
||||
shutdownAlert func() // invoked just prior to executor shutdown
|
||||
kubeletFinished <-chan struct{} // signals that kubelet Run() died
|
||||
initialRegistration sync.Once
|
||||
exitFunc func(int)
|
||||
podStatusFunc func(*kubelet.Kubelet, *api.Pod) (*api.PodStatus, error)
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Kubelet *kubelet.Kubelet
|
||||
Updates chan<- interface{} // to send pod config updates to the kubelet
|
||||
SourceName string
|
||||
APIClient *client.Client
|
||||
Watch watch.Interface
|
||||
Docker dockertools.DockerInterface
|
||||
ShutdownAlert func()
|
||||
SuicideTimeout time.Duration
|
||||
KubeletFinished <-chan struct{} // signals that kubelet Run() died
|
||||
ExitFunc func(int)
|
||||
PodStatusFunc func(*kubelet.Kubelet, *api.Pod) (*api.PodStatus, error)
|
||||
}
|
||||
|
||||
func (k *KubernetesExecutor) isConnected() bool {
|
||||
return connectedState == (&k.state).get()
|
||||
}
|
||||
|
||||
// New creates a new kubernetes executor.
|
||||
func New(config Config) *KubernetesExecutor {
|
||||
k := &KubernetesExecutor{
|
||||
kl: config.Kubelet,
|
||||
updateChan: config.Updates,
|
||||
state: disconnectedState,
|
||||
tasks: make(map[string]*kuberTask),
|
||||
pods: make(map[string]*api.Pod),
|
||||
sourcename: config.SourceName,
|
||||
client: config.APIClient,
|
||||
done: make(chan struct{}),
|
||||
outgoing: make(chan func() (mesos.Status, error), 1024),
|
||||
dockerClient: config.Docker,
|
||||
suicideTimeout: config.SuicideTimeout,
|
||||
kubeletFinished: config.KubeletFinished,
|
||||
suicideWatch: &suicideTimer{},
|
||||
shutdownAlert: config.ShutdownAlert,
|
||||
exitFunc: config.ExitFunc,
|
||||
podStatusFunc: config.PodStatusFunc,
|
||||
}
|
||||
//TODO(jdef) do something real with these events..
|
||||
if config.Watch != nil {
|
||||
events := config.Watch.ResultChan()
|
||||
if events != nil {
|
||||
go func() {
|
||||
for e := range events {
|
||||
// e ~= watch.Event { ADDED, *api.Event }
|
||||
log.V(1).Info(e)
|
||||
}
|
||||
}()
|
||||
k.events = events
|
||||
}
|
||||
}
|
||||
return k
|
||||
}
|
||||
|
||||
func (k *KubernetesExecutor) Init(driver bindings.ExecutorDriver) {
|
||||
k.killKubeletContainers()
|
||||
k.resetSuicideWatch(driver)
|
||||
go k.sendLoop()
|
||||
//TODO(jdef) monitor kubeletFinished and shutdown if it happens
|
||||
}
|
||||
|
||||
func (k *KubernetesExecutor) Done() <-chan struct{} {
|
||||
return k.done
|
||||
}
|
||||
|
||||
func (k *KubernetesExecutor) isDone() bool {
|
||||
select {
|
||||
case <-k.done:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Registered is called when the executor is successfully registered with the slave.
|
||||
func (k *KubernetesExecutor) Registered(driver bindings.ExecutorDriver,
|
||||
executorInfo *mesos.ExecutorInfo, frameworkInfo *mesos.FrameworkInfo, slaveInfo *mesos.SlaveInfo) {
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
log.Infof("Executor %v of framework %v registered with slave %v\n",
|
||||
executorInfo, frameworkInfo, slaveInfo)
|
||||
if !(&k.state).transition(disconnectedState, connectedState) {
|
||||
log.Errorf("failed to register/transition to a connected state")
|
||||
}
|
||||
k.initialRegistration.Do(k.onInitialRegistration)
|
||||
}
|
||||
|
||||
// Reregistered is called when the executor is successfully re-registered with the slave.
|
||||
// This can happen when the slave fails over.
|
||||
func (k *KubernetesExecutor) Reregistered(driver bindings.ExecutorDriver, slaveInfo *mesos.SlaveInfo) {
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
log.Infof("Reregistered with slave %v\n", slaveInfo)
|
||||
if !(&k.state).transition(disconnectedState, connectedState) {
|
||||
log.Errorf("failed to reregister/transition to a connected state")
|
||||
}
|
||||
k.initialRegistration.Do(k.onInitialRegistration)
|
||||
}
|
||||
|
||||
func (k *KubernetesExecutor) onInitialRegistration() {
|
||||
// emit an empty update to allow the mesos "source" to be marked as seen
|
||||
k.updateChan <- kubelet.PodUpdate{
|
||||
Pods: []*api.Pod{},
|
||||
Op: kubelet.SET,
|
||||
Source: k.sourcename,
|
||||
}
|
||||
}
|
||||
|
||||
// Disconnected is called when the executor is disconnected from the slave.
|
||||
func (k *KubernetesExecutor) Disconnected(driver bindings.ExecutorDriver) {
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
log.Infof("Slave is disconnected\n")
|
||||
if !(&k.state).transition(connectedState, disconnectedState) {
|
||||
log.Errorf("failed to disconnect/transition to a disconnected state")
|
||||
}
|
||||
}
|
||||
|
||||
// LaunchTask is called when the executor receives a request to launch a task.
|
||||
// The happens when the k8sm scheduler has decided to schedule the pod
|
||||
// (which corresponds to a Mesos Task) onto the node where this executor
|
||||
// is running, but the binding is not recorded in the Kubernetes store yet.
|
||||
// This function is invoked to tell the executor to record the binding in the
|
||||
// Kubernetes store and start the pod via the Kubelet.
|
||||
func (k *KubernetesExecutor) LaunchTask(driver bindings.ExecutorDriver, taskInfo *mesos.TaskInfo) {
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
log.Infof("Launch task %v\n", taskInfo)
|
||||
|
||||
if !k.isConnected() {
|
||||
log.Errorf("Ignore launch task because the executor is disconnected\n")
|
||||
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
|
||||
messages.ExecutorUnregistered))
|
||||
return
|
||||
}
|
||||
|
||||
obj, err := api.Codec.Decode(taskInfo.GetData())
|
||||
if err != nil {
|
||||
log.Errorf("failed to extract yaml data from the taskInfo.data %v", err)
|
||||
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
|
||||
messages.UnmarshalTaskDataFailure))
|
||||
return
|
||||
}
|
||||
pod, ok := obj.(*api.Pod)
|
||||
if !ok {
|
||||
log.Errorf("expected *api.Pod instead of %T: %+v", pod, pod)
|
||||
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
|
||||
messages.UnmarshalTaskDataFailure))
|
||||
return
|
||||
}
|
||||
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
|
||||
taskId := taskInfo.GetTaskId().GetValue()
|
||||
if _, found := k.tasks[taskId]; found {
|
||||
log.Errorf("task already launched\n")
|
||||
// Not to send back TASK_RUNNING here, because
|
||||
// may be duplicated messages or duplicated task id.
|
||||
return
|
||||
}
|
||||
// remember this task so that:
|
||||
// (a) we ignore future launches for it
|
||||
// (b) we have a record of it so that we can kill it if needed
|
||||
// (c) we're leaving podName == "" for now, indicates we don't need to delete containers
|
||||
k.tasks[taskId] = &kuberTask{
|
||||
mesosTaskInfo: taskInfo,
|
||||
}
|
||||
k.resetSuicideWatch(driver)
|
||||
|
||||
go k.launchTask(driver, taskId, pod)
|
||||
}
|
||||
|
||||
// TODO(jdef) add metrics for this?
|
||||
type suicideTimer struct {
|
||||
timer *time.Timer
|
||||
}
|
||||
|
||||
func (w *suicideTimer) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher {
|
||||
return &suicideTimer{
|
||||
timer: time.AfterFunc(d, func() {
|
||||
log.Warningf("Suicide timeout (%v) expired", d)
|
||||
f(driver, nil)
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
func (w *suicideTimer) Stop() (result bool) {
|
||||
if w != nil && w.timer != nil {
|
||||
log.Infoln("stopping suicide watch") //TODO(jdef) debug
|
||||
result = w.timer.Stop()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// return true if the timer was successfully reset
|
||||
func (w *suicideTimer) Reset(d time.Duration) bool {
|
||||
if w != nil && w.timer != nil {
|
||||
log.Infoln("resetting suicide watch") //TODO(jdef) debug
|
||||
w.timer.Reset(d)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// determine whether we need to start a suicide countdown. if so, then start
|
||||
// a timer that, upon expiration, causes this executor to commit suicide.
|
||||
// this implementation runs asynchronously. callers that wish to wait for the
|
||||
// reset to complete may wait for the returned signal chan to close.
|
||||
func (k *KubernetesExecutor) resetSuicideWatch(driver bindings.ExecutorDriver) <-chan struct{} {
|
||||
ch := make(chan struct{})
|
||||
go func() {
|
||||
defer close(ch)
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
|
||||
if k.suicideTimeout < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
if k.suicideWatch != nil {
|
||||
if len(k.tasks) > 0 {
|
||||
k.suicideWatch.Stop()
|
||||
return
|
||||
}
|
||||
if k.suicideWatch.Reset(k.suicideTimeout) {
|
||||
// valid timer, reset was successful
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
//TODO(jdef) reduce verbosity here once we're convinced that suicide watch is working properly
|
||||
log.Infof("resetting suicide watch timer for %v", k.suicideTimeout)
|
||||
|
||||
k.suicideWatch = k.suicideWatch.Next(k.suicideTimeout, driver, jumper(k.attemptSuicide))
|
||||
}()
|
||||
return ch
|
||||
}
|
||||
|
||||
func (k *KubernetesExecutor) attemptSuicide(driver bindings.ExecutorDriver, abort <-chan struct{}) {
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
|
||||
// this attempt may have been queued and since been aborted
|
||||
select {
|
||||
case <-abort:
|
||||
//TODO(jdef) reduce verbosity once suicide watch is working properly
|
||||
log.Infof("aborting suicide attempt since watch was cancelled")
|
||||
return
|
||||
default: // continue
|
||||
}
|
||||
|
||||
// fail-safe, will abort kamikaze attempts if there are tasks
|
||||
if len(k.tasks) > 0 {
|
||||
ids := []string{}
|
||||
for taskid := range k.tasks {
|
||||
ids = append(ids, taskid)
|
||||
}
|
||||
log.Errorf("suicide attempt failed, there are still running tasks: %v", ids)
|
||||
return
|
||||
}
|
||||
|
||||
log.Infoln("Attempting suicide")
|
||||
if (&k.state).transitionTo(suicidalState, suicidalState, terminalState) {
|
||||
//TODO(jdef) let the scheduler know?
|
||||
//TODO(jdef) is suicide more graceful than slave-demanded shutdown?
|
||||
k.doShutdown(driver)
|
||||
}
|
||||
}
|
||||
|
||||
// async continuation of LaunchTask
|
||||
func (k *KubernetesExecutor) launchTask(driver bindings.ExecutorDriver, taskId string, pod *api.Pod) {
|
||||
|
||||
//HACK(jdef): cloned binding construction from k8s plugin/pkg/scheduler/scheduler.go
|
||||
binding := &api.Binding{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Namespace: pod.Namespace,
|
||||
Name: pod.Name,
|
||||
Annotations: make(map[string]string),
|
||||
},
|
||||
Target: api.ObjectReference{
|
||||
Kind: "Node",
|
||||
Name: pod.Annotations[meta.BindingHostKey],
|
||||
},
|
||||
}
|
||||
|
||||
// forward the annotations that the scheduler wants to apply
|
||||
for k, v := range pod.Annotations {
|
||||
binding.Annotations[k] = v
|
||||
}
|
||||
|
||||
deleteTask := func() {
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
delete(k.tasks, taskId)
|
||||
k.resetSuicideWatch(driver)
|
||||
}
|
||||
|
||||
log.Infof("Binding '%v/%v' to '%v' with annotations %+v...", pod.Namespace, pod.Name, binding.Target.Name, binding.Annotations)
|
||||
ctx := api.WithNamespace(api.NewContext(), binding.Namespace)
|
||||
// TODO(k8s): use Pods interface for binding once clusters are upgraded
|
||||
// return b.Pods(binding.Namespace).Bind(binding)
|
||||
err := k.client.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error()
|
||||
if err != nil {
|
||||
deleteTask()
|
||||
k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED,
|
||||
messages.CreateBindingFailure))
|
||||
return
|
||||
}
|
||||
podFullName := container.GetPodFullName(pod)
|
||||
|
||||
// allow a recently failed-over scheduler the chance to recover the task/pod binding:
|
||||
// it may have failed and recovered before the apiserver is able to report the updated
|
||||
// binding information. replays of this status event will signal to the scheduler that
|
||||
// the apiserver should be up-to-date.
|
||||
data, err := json.Marshal(api.PodStatusResult{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: podFullName,
|
||||
SelfLink: "/podstatusresult",
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
deleteTask()
|
||||
log.Errorf("failed to marshal pod status result: %v", err)
|
||||
k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED,
|
||||
err.Error()))
|
||||
return
|
||||
}
|
||||
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
|
||||
// Add the task.
|
||||
task, found := k.tasks[taskId]
|
||||
if !found {
|
||||
log.V(1).Infof("task %v not found, probably killed: aborting launch, reporting lost", taskId)
|
||||
k.reportLostTask(driver, taskId, messages.LaunchTaskFailed)
|
||||
return
|
||||
}
|
||||
|
||||
//TODO(jdef) check for duplicate pod name, if found send TASK_ERROR
|
||||
|
||||
// from here on, we need to delete containers associated with the task
|
||||
// upon it going into a terminal state
|
||||
task.podName = podFullName
|
||||
k.pods[podFullName] = pod
|
||||
|
||||
// send the latest snapshot of the set of pods to the kubelet via the pod update channel
|
||||
update := kubelet.PodUpdate{Op: kubelet.SET}
|
||||
for _, p := range k.pods {
|
||||
update.Pods = append(update.Pods, p)
|
||||
}
|
||||
k.updateChan <- update
|
||||
|
||||
statusUpdate := &mesos.TaskStatus{
|
||||
TaskId: mutil.NewTaskID(taskId),
|
||||
State: mesos.TaskState_TASK_STARTING.Enum(),
|
||||
Message: proto.String(messages.CreateBindingSuccess),
|
||||
Data: data,
|
||||
}
|
||||
k.sendStatus(driver, statusUpdate)
|
||||
|
||||
// Delay reporting 'task running' until container is up.
|
||||
psf := podStatusFunc(func() (*api.PodStatus, error) {
|
||||
return k.podStatusFunc(k.kl, pod)
|
||||
})
|
||||
|
||||
go k._launchTask(driver, taskId, podFullName, psf)
|
||||
}
|
||||
|
||||
func (k *KubernetesExecutor) _launchTask(driver bindings.ExecutorDriver, taskId, podFullName string, psf podStatusFunc) {
|
||||
|
||||
expired := make(chan struct{})
|
||||
time.AfterFunc(launchGracePeriod, func() { close(expired) })
|
||||
|
||||
getMarshalledInfo := func() (data []byte, cancel bool) {
|
||||
// potentially long call..
|
||||
if podStatus, err := psf(); err == nil && podStatus != nil {
|
||||
select {
|
||||
case <-expired:
|
||||
cancel = true
|
||||
default:
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
if _, found := k.tasks[taskId]; !found {
|
||||
// don't bother with the pod status if the task is already gone
|
||||
cancel = true
|
||||
break
|
||||
} else if podStatus.Phase != api.PodRunning {
|
||||
// avoid sending back a running status before it's really running
|
||||
break
|
||||
}
|
||||
log.V(2).Infof("Found pod status: '%v'", podStatus)
|
||||
result := api.PodStatusResult{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: podFullName,
|
||||
SelfLink: "/podstatusresult",
|
||||
},
|
||||
Status: *podStatus,
|
||||
}
|
||||
if data, err = json.Marshal(result); err != nil {
|
||||
log.Errorf("failed to marshal pod status result: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
waitForRunningPod:
|
||||
for {
|
||||
select {
|
||||
case <-expired:
|
||||
log.Warningf("Launch expired grace period of '%v'", launchGracePeriod)
|
||||
break waitForRunningPod
|
||||
case <-time.After(containerPollTime):
|
||||
if data, cancel := getMarshalledInfo(); cancel {
|
||||
break waitForRunningPod
|
||||
} else if data == nil {
|
||||
continue waitForRunningPod
|
||||
} else {
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
if _, found := k.tasks[taskId]; !found {
|
||||
goto reportLost
|
||||
}
|
||||
|
||||
statusUpdate := &mesos.TaskStatus{
|
||||
TaskId: mutil.NewTaskID(taskId),
|
||||
State: mesos.TaskState_TASK_RUNNING.Enum(),
|
||||
Message: proto.String(fmt.Sprintf("pod-running:%s", podFullName)),
|
||||
Data: data,
|
||||
}
|
||||
|
||||
k.sendStatus(driver, statusUpdate)
|
||||
|
||||
// continue to monitor the health of the pod
|
||||
go k.__launchTask(driver, taskId, podFullName, psf)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
reportLost:
|
||||
k.reportLostTask(driver, taskId, messages.LaunchTaskFailed)
|
||||
}
|
||||
|
||||
func (k *KubernetesExecutor) __launchTask(driver bindings.ExecutorDriver, taskId, podFullName string, psf podStatusFunc) {
|
||||
// TODO(nnielsen): Monitor health of pod and report if lost.
|
||||
// Should we also allow this to fail a couple of times before reporting lost?
|
||||
// What if the docker daemon is restarting and we can't connect, but it's
|
||||
// going to bring the pods back online as soon as it restarts?
|
||||
knownPod := func() bool {
|
||||
_, err := psf()
|
||||
return err == nil
|
||||
}
|
||||
// Wait for the pod to go away and stop monitoring once it does
|
||||
// TODO (jdefelice) replace with an /events watch?
|
||||
for {
|
||||
time.Sleep(containerPollTime)
|
||||
if k.checkForLostPodTask(driver, taskId, knownPod) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Intended to be executed as part of the pod monitoring loop, this fn (ultimately) checks with Docker
|
||||
// whether the pod is running. It will only return false if the task is still registered and the pod is
|
||||
// registered in Docker. Otherwise it returns true. If there's still a task record on file, but no pod
|
||||
// in Docker, then we'll also send a TASK_LOST event.
|
||||
func (k *KubernetesExecutor) checkForLostPodTask(driver bindings.ExecutorDriver, taskId string, isKnownPod func() bool) bool {
|
||||
// TODO (jdefelice) don't send false alarms for deleted pods (KILLED tasks)
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
|
||||
// TODO(jdef) we should really consider k.pods here, along with what docker is reporting, since the
|
||||
// kubelet may constantly attempt to instantiate a pod as long as it's in the pod state that we're
|
||||
// handing to it. otherwise, we're probably reporting a TASK_LOST prematurely. Should probably
|
||||
// consult RestartPolicy to determine appropriate behavior. Should probably also gracefully handle
|
||||
// docker daemon restarts.
|
||||
if _, ok := k.tasks[taskId]; ok {
|
||||
if isKnownPod() {
|
||||
return false
|
||||
} else {
|
||||
log.Warningf("Detected lost pod, reporting lost task %v", taskId)
|
||||
k.reportLostTask(driver, taskId, messages.ContainersDisappeared)
|
||||
}
|
||||
} else {
|
||||
log.V(2).Infof("Task %v no longer registered, stop monitoring for lost pods", taskId)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// KillTask is called when the executor receives a request to kill a task.
|
||||
func (k *KubernetesExecutor) KillTask(driver bindings.ExecutorDriver, taskId *mesos.TaskID) {
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
log.Infof("Kill task %v\n", taskId)
|
||||
|
||||
if !k.isConnected() {
|
||||
//TODO(jdefelice) sent TASK_LOST here?
|
||||
log.Warningf("Ignore kill task because the executor is disconnected\n")
|
||||
return
|
||||
}
|
||||
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
k.removePodTask(driver, taskId.GetValue(), messages.TaskKilled, mesos.TaskState_TASK_KILLED)
|
||||
}
|
||||
|
||||
// Reports a lost task to the slave and updates internal task and pod tracking state.
|
||||
// Assumes that the caller is locking around pod and task state.
|
||||
func (k *KubernetesExecutor) reportLostTask(driver bindings.ExecutorDriver, tid, reason string) {
|
||||
k.removePodTask(driver, tid, reason, mesos.TaskState_TASK_LOST)
|
||||
}
|
||||
|
||||
// deletes the pod and task associated with the task identified by tid and sends a task
|
||||
// status update to mesos. also attempts to reset the suicide watch.
|
||||
// Assumes that the caller is locking around pod and task state.
|
||||
func (k *KubernetesExecutor) removePodTask(driver bindings.ExecutorDriver, tid, reason string, state mesos.TaskState) {
|
||||
task, ok := k.tasks[tid]
|
||||
if !ok {
|
||||
log.V(1).Infof("Failed to remove task, unknown task %v\n", tid)
|
||||
return
|
||||
}
|
||||
delete(k.tasks, tid)
|
||||
k.resetSuicideWatch(driver)
|
||||
|
||||
pid := task.podName
|
||||
if _, found := k.pods[pid]; !found {
|
||||
log.Warningf("Cannot remove unknown pod %v for task %v", pid, tid)
|
||||
} else {
|
||||
log.V(2).Infof("deleting pod %v for task %v", pid, tid)
|
||||
delete(k.pods, pid)
|
||||
|
||||
// Send the pod updates to the channel.
|
||||
update := kubelet.PodUpdate{Op: kubelet.SET}
|
||||
for _, p := range k.pods {
|
||||
update.Pods = append(update.Pods, p)
|
||||
}
|
||||
k.updateChan <- update
|
||||
}
|
||||
// TODO(jdef): ensure that the update propagates, perhaps return a signal chan?
|
||||
k.sendStatus(driver, newStatus(mutil.NewTaskID(tid), state, reason))
|
||||
}
|
||||
|
||||
// FrameworkMessage is called when the framework sends some message to the executor
|
||||
func (k *KubernetesExecutor) FrameworkMessage(driver bindings.ExecutorDriver, message string) {
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
if !k.isConnected() {
|
||||
log.Warningf("Ignore framework message because the executor is disconnected\n")
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("Receives message from framework %v\n", message)
|
||||
//TODO(jdef) master reported a lost task, reconcile this! @see scheduler.go:handleTaskLost
|
||||
if strings.HasPrefix(message, "task-lost:") && len(message) > 10 {
|
||||
taskId := message[10:]
|
||||
if taskId != "" {
|
||||
// clean up pod state
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
k.reportLostTask(driver, taskId, messages.TaskLostAck)
|
||||
}
|
||||
}
|
||||
|
||||
switch message {
|
||||
case messages.Kamikaze:
|
||||
k.attemptSuicide(driver, nil)
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown is called when the executor receives a shutdown request.
|
||||
func (k *KubernetesExecutor) Shutdown(driver bindings.ExecutorDriver) {
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
k.doShutdown(driver)
|
||||
}
|
||||
|
||||
// assumes that caller has obtained state lock
|
||||
func (k *KubernetesExecutor) doShutdown(driver bindings.ExecutorDriver) {
|
||||
defer func() {
|
||||
log.Errorf("exiting with unclean shutdown: %v", recover())
|
||||
if k.exitFunc != nil {
|
||||
k.exitFunc(1)
|
||||
}
|
||||
}()
|
||||
|
||||
(&k.state).transitionTo(terminalState)
|
||||
|
||||
// signal to all listeners that this KubeletExecutor is done!
|
||||
close(k.done)
|
||||
|
||||
if k.shutdownAlert != nil {
|
||||
func() {
|
||||
util.HandleCrash()
|
||||
k.shutdownAlert()
|
||||
}()
|
||||
}
|
||||
|
||||
log.Infoln("Stopping executor driver")
|
||||
_, err := driver.Stop()
|
||||
if err != nil {
|
||||
log.Warningf("failed to stop executor driver: %v", err)
|
||||
}
|
||||
|
||||
log.Infoln("Shutdown the executor")
|
||||
|
||||
// according to docs, mesos will generate TASK_LOST updates for us
|
||||
// if needed, so don't take extra time to do that here.
|
||||
k.tasks = map[string]*kuberTask{}
|
||||
|
||||
select {
|
||||
// the main Run() func may still be running... wait for it to finish: it will
|
||||
// clear the pod configuration cleanly, telling k8s "there are no pods" and
|
||||
// clean up resources (pods, volumes, etc).
|
||||
case <-k.kubeletFinished:
|
||||
|
||||
//TODO(jdef) attempt to wait for events to propagate to API server?
|
||||
|
||||
// TODO(jdef) extract constant, should be smaller than whatever the
|
||||
// slave graceful shutdown timeout period is.
|
||||
case <-time.After(15 * time.Second):
|
||||
log.Errorf("timed out waiting for kubelet Run() to die")
|
||||
}
|
||||
|
||||
log.Infoln("exiting")
|
||||
if k.exitFunc != nil {
|
||||
k.exitFunc(0)
|
||||
}
|
||||
}
|
||||
|
||||
// Destroy existing k8s containers
|
||||
func (k *KubernetesExecutor) killKubeletContainers() {
|
||||
if containers, err := dockertools.GetKubeletDockerContainers(k.dockerClient, true); err == nil {
|
||||
opts := docker.RemoveContainerOptions{
|
||||
RemoveVolumes: true,
|
||||
Force: true,
|
||||
}
|
||||
for _, container := range containers {
|
||||
opts.ID = container.ID
|
||||
log.V(2).Infof("Removing container: %v", opts.ID)
|
||||
if err := k.dockerClient.RemoveContainer(opts); err != nil {
|
||||
log.Warning(err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Warningf("Failed to list kubelet docker containers: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Error is called when some error happens.
|
||||
func (k *KubernetesExecutor) Error(driver bindings.ExecutorDriver, message string) {
|
||||
log.Errorln(message)
|
||||
}
|
||||
|
||||
func newStatus(taskId *mesos.TaskID, state mesos.TaskState, message string) *mesos.TaskStatus {
|
||||
return &mesos.TaskStatus{
|
||||
TaskId: taskId,
|
||||
State: &state,
|
||||
Message: proto.String(message),
|
||||
}
|
||||
}
|
||||
|
||||
func (k *KubernetesExecutor) sendStatus(driver bindings.ExecutorDriver, status *mesos.TaskStatus) {
|
||||
select {
|
||||
case <-k.done:
|
||||
default:
|
||||
k.outgoing <- func() (mesos.Status, error) { return driver.SendStatusUpdate(status) }
|
||||
}
|
||||
}
|
||||
|
||||
func (k *KubernetesExecutor) sendFrameworkMessage(driver bindings.ExecutorDriver, msg string) {
|
||||
select {
|
||||
case <-k.done:
|
||||
default:
|
||||
k.outgoing <- func() (mesos.Status, error) { return driver.SendFrameworkMessage(msg) }
|
||||
}
|
||||
}
|
||||
|
||||
func (k *KubernetesExecutor) sendLoop() {
|
||||
defer log.V(1).Info("sender loop exiting")
|
||||
for {
|
||||
select {
|
||||
case <-k.done:
|
||||
return
|
||||
default:
|
||||
if !k.isConnected() {
|
||||
select {
|
||||
case <-k.done:
|
||||
case <-time.After(1 * time.Second):
|
||||
}
|
||||
continue
|
||||
}
|
||||
sender, ok := <-k.outgoing
|
||||
if !ok {
|
||||
// programming error
|
||||
panic("someone closed the outgoing channel")
|
||||
}
|
||||
if status, err := sender(); err == nil {
|
||||
continue
|
||||
} else {
|
||||
log.Error(err)
|
||||
if status == mesos.Status_DRIVER_ABORTED {
|
||||
return
|
||||
}
|
||||
}
|
||||
// attempt to re-queue the sender
|
||||
select {
|
||||
case <-k.done:
|
||||
case k.outgoing <- sender:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
618
contrib/mesos/pkg/executor/executor_test.go
Normal file
618
contrib/mesos/pkg/executor/executor_test.go
Normal file
@@ -0,0 +1,618 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"reflect"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
assertext "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/assert"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages"
|
||||
kmruntime "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
||||
|
||||
"github.com/golang/glog"
|
||||
bindings "github.com/mesos/mesos-go/executor"
|
||||
"github.com/mesos/mesos-go/mesosproto"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/mock"
|
||||
)
|
||||
|
||||
type suicideTracker struct {
|
||||
suicideWatcher
|
||||
stops uint32
|
||||
resets uint32
|
||||
timers uint32
|
||||
jumps *uint32
|
||||
}
|
||||
|
||||
func (t *suicideTracker) Reset(d time.Duration) bool {
|
||||
defer func() { t.resets++ }()
|
||||
return t.suicideWatcher.Reset(d)
|
||||
}
|
||||
|
||||
func (t *suicideTracker) Stop() bool {
|
||||
defer func() { t.stops++ }()
|
||||
return t.suicideWatcher.Stop()
|
||||
}
|
||||
|
||||
func (t *suicideTracker) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher {
|
||||
tracker := &suicideTracker{
|
||||
stops: t.stops,
|
||||
resets: t.resets,
|
||||
jumps: t.jumps,
|
||||
timers: t.timers + 1,
|
||||
}
|
||||
jumper := tracker.makeJumper(f)
|
||||
tracker.suicideWatcher = t.suicideWatcher.Next(d, driver, jumper)
|
||||
return tracker
|
||||
}
|
||||
|
||||
func (t *suicideTracker) makeJumper(_ jumper) jumper {
|
||||
return jumper(func(driver bindings.ExecutorDriver, cancel <-chan struct{}) {
|
||||
glog.Warningln("jumping?!")
|
||||
if t.jumps != nil {
|
||||
atomic.AddUint32(t.jumps, 1)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestSuicide_zeroTimeout(t *testing.T) {
|
||||
defer glog.Flush()
|
||||
|
||||
k := New(Config{})
|
||||
tracker := &suicideTracker{suicideWatcher: k.suicideWatch}
|
||||
k.suicideWatch = tracker
|
||||
|
||||
ch := k.resetSuicideWatch(nil)
|
||||
|
||||
select {
|
||||
case <-ch:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatalf("timeout waiting for reset of suicide watch")
|
||||
}
|
||||
if tracker.stops != 0 {
|
||||
t.Fatalf("expected no stops since suicideWatchTimeout was never set")
|
||||
}
|
||||
if tracker.resets != 0 {
|
||||
t.Fatalf("expected no resets since suicideWatchTimeout was never set")
|
||||
}
|
||||
if tracker.timers != 0 {
|
||||
t.Fatalf("expected no timers since suicideWatchTimeout was never set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSuicide_WithTasks(t *testing.T) {
|
||||
defer glog.Flush()
|
||||
|
||||
k := New(Config{
|
||||
SuicideTimeout: 50 * time.Millisecond,
|
||||
})
|
||||
|
||||
jumps := uint32(0)
|
||||
tracker := &suicideTracker{suicideWatcher: k.suicideWatch, jumps: &jumps}
|
||||
k.suicideWatch = tracker
|
||||
|
||||
k.tasks["foo"] = &kuberTask{} // prevent suicide attempts from succeeding
|
||||
|
||||
// call reset with a nil timer
|
||||
glog.Infoln("resetting suicide watch with 1 task")
|
||||
select {
|
||||
case <-k.resetSuicideWatch(nil):
|
||||
tracker = k.suicideWatch.(*suicideTracker)
|
||||
if tracker.stops != 1 {
|
||||
t.Fatalf("expected suicide attempt to Stop() since there are registered tasks")
|
||||
}
|
||||
if tracker.resets != 0 {
|
||||
t.Fatalf("expected no resets since")
|
||||
}
|
||||
if tracker.timers != 0 {
|
||||
t.Fatalf("expected no timers since")
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatalf("initial suicide watch setup failed")
|
||||
}
|
||||
|
||||
delete(k.tasks, "foo") // zero remaining tasks
|
||||
k.suicideTimeout = 1500 * time.Millisecond
|
||||
suicideStart := time.Now()
|
||||
|
||||
// reset the suicide watch, which should actually start a timer now
|
||||
glog.Infoln("resetting suicide watch with 0 tasks")
|
||||
select {
|
||||
case <-k.resetSuicideWatch(nil):
|
||||
tracker = k.suicideWatch.(*suicideTracker)
|
||||
if tracker.stops != 1 {
|
||||
t.Fatalf("did not expect suicide attempt to Stop() since there are no registered tasks")
|
||||
}
|
||||
if tracker.resets != 1 {
|
||||
t.Fatalf("expected 1 resets instead of %d", tracker.resets)
|
||||
}
|
||||
if tracker.timers != 1 {
|
||||
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatalf("2nd suicide watch setup failed")
|
||||
}
|
||||
|
||||
k.lock.Lock()
|
||||
k.tasks["foo"] = &kuberTask{} // prevent suicide attempts from succeeding
|
||||
k.lock.Unlock()
|
||||
|
||||
// reset the suicide watch, which should stop the existing timer
|
||||
glog.Infoln("resetting suicide watch with 1 task")
|
||||
select {
|
||||
case <-k.resetSuicideWatch(nil):
|
||||
tracker = k.suicideWatch.(*suicideTracker)
|
||||
if tracker.stops != 2 {
|
||||
t.Fatalf("expected 2 stops instead of %d since there are registered tasks", tracker.stops)
|
||||
}
|
||||
if tracker.resets != 1 {
|
||||
t.Fatalf("expected 1 resets instead of %d", tracker.resets)
|
||||
}
|
||||
if tracker.timers != 1 {
|
||||
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatalf("3rd suicide watch setup failed")
|
||||
}
|
||||
|
||||
k.lock.Lock()
|
||||
delete(k.tasks, "foo") // allow suicide attempts to schedule
|
||||
k.lock.Unlock()
|
||||
|
||||
// reset the suicide watch, which should reset a stopped timer
|
||||
glog.Infoln("resetting suicide watch with 0 tasks")
|
||||
select {
|
||||
case <-k.resetSuicideWatch(nil):
|
||||
tracker = k.suicideWatch.(*suicideTracker)
|
||||
if tracker.stops != 2 {
|
||||
t.Fatalf("expected 2 stops instead of %d since there are no registered tasks", tracker.stops)
|
||||
}
|
||||
if tracker.resets != 2 {
|
||||
t.Fatalf("expected 2 resets instead of %d", tracker.resets)
|
||||
}
|
||||
if tracker.timers != 1 {
|
||||
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatalf("4th suicide watch setup failed")
|
||||
}
|
||||
|
||||
sinceWatch := time.Since(suicideStart)
|
||||
time.Sleep(3*time.Second - sinceWatch) // give the first timer to misfire (it shouldn't since Stop() was called)
|
||||
|
||||
if j := atomic.LoadUint32(&jumps); j != 1 {
|
||||
t.Fatalf("expected 1 jumps instead of %d since stop was called", j)
|
||||
} else {
|
||||
glog.Infoln("jumps verified") // glog so we get a timestamp
|
||||
}
|
||||
}
|
||||
|
||||
// TestExecutorRegister ensures that the executor thinks it is connected
|
||||
// after Register is called.
|
||||
func TestExecutorRegister(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
updates := make(chan interface{}, 1024)
|
||||
executor := New(Config{
|
||||
Docker: dockertools.ConnectToDockerOrDie("fake://"),
|
||||
Updates: updates,
|
||||
SourceName: "executor_test",
|
||||
})
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
|
||||
initialPodUpdate := kubelet.PodUpdate{
|
||||
Pods: []*api.Pod{},
|
||||
Op: kubelet.SET,
|
||||
Source: executor.sourcename,
|
||||
}
|
||||
receivedInitialPodUpdate := false
|
||||
select {
|
||||
case m := <-updates:
|
||||
update, ok := m.(kubelet.PodUpdate)
|
||||
if ok {
|
||||
if reflect.DeepEqual(initialPodUpdate, update) {
|
||||
receivedInitialPodUpdate = true
|
||||
}
|
||||
}
|
||||
case <-time.After(time.Second):
|
||||
}
|
||||
assert.Equal(t, true, receivedInitialPodUpdate,
|
||||
"executor should have sent an initial PodUpdate "+
|
||||
"to the updates chan upon registration")
|
||||
|
||||
assert.Equal(t, true, executor.isConnected(), "executor should be connected")
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// TestExecutorDisconnect ensures that the executor thinks that it is not
|
||||
// connected after a call to Disconnected has occured.
|
||||
func TestExecutorDisconnect(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
executor := NewTestKubernetesExecutor()
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
executor.Disconnected(mockDriver)
|
||||
|
||||
assert.Equal(t, false, executor.isConnected(),
|
||||
"executor should not be connected after Disconnected")
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// TestExecutorReregister ensures that the executor thinks it is connected
|
||||
// after a connection problem happens, followed by a call to Reregistered.
|
||||
func TestExecutorReregister(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
executor := NewTestKubernetesExecutor()
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
executor.Disconnected(mockDriver)
|
||||
executor.Reregistered(mockDriver, nil)
|
||||
|
||||
assert.Equal(t, true, executor.isConnected(), "executor should be connected")
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch
|
||||
// and kill tasks while properly bookkeping its tasks.
|
||||
func TestExecutorLaunchAndKillTask(t *testing.T) {
|
||||
// create a fake pod watch. We use that below to submit new pods to the scheduler
|
||||
podListWatch := NewMockPodsListWatch(api.PodList{})
|
||||
|
||||
// create fake apiserver
|
||||
testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list)
|
||||
defer testApiServer.server.Close()
|
||||
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
updates := make(chan interface{}, 1024)
|
||||
config := Config{
|
||||
Docker: dockertools.ConnectToDockerOrDie("fake://"),
|
||||
Updates: updates,
|
||||
APIClient: client.NewOrDie(&client.Config{
|
||||
Host: testApiServer.server.URL,
|
||||
Version: testapi.Version(),
|
||||
}),
|
||||
Kubelet: &kubelet.Kubelet{},
|
||||
PodStatusFunc: func(kl *kubelet.Kubelet, pod *api.Pod) (*api.PodStatus, error) {
|
||||
return &api.PodStatus{
|
||||
ContainerStatuses: []api.ContainerStatus{
|
||||
{
|
||||
Name: "foo",
|
||||
State: api.ContainerState{
|
||||
Running: &api.ContainerStateRunning{},
|
||||
},
|
||||
},
|
||||
},
|
||||
Phase: api.PodRunning,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
executor := New(config)
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
|
||||
select {
|
||||
case <-updates:
|
||||
case <-time.After(time.Second):
|
||||
t.Fatalf("Executor should send an intial update on Registration")
|
||||
}
|
||||
|
||||
pod := NewTestPod(1)
|
||||
podTask, err := podtask.New(api.NewDefaultContext(), "",
|
||||
*pod, &mesosproto.ExecutorInfo{})
|
||||
assert.Equal(t, nil, err, "must be able to create a task from a pod")
|
||||
|
||||
taskInfo := podTask.BuildTaskInfo()
|
||||
data, err := testapi.Codec().Encode(pod)
|
||||
assert.Equal(t, nil, err, "must be able to encode a pod's spec data")
|
||||
taskInfo.Data = data
|
||||
var statusUpdateCalls sync.WaitGroup
|
||||
statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() }
|
||||
|
||||
statusUpdateCalls.Add(1)
|
||||
mockDriver.On(
|
||||
"SendStatusUpdate",
|
||||
mesosproto.TaskState_TASK_STARTING,
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
|
||||
|
||||
statusUpdateCalls.Add(1)
|
||||
mockDriver.On(
|
||||
"SendStatusUpdate",
|
||||
mesosproto.TaskState_TASK_RUNNING,
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
|
||||
|
||||
executor.LaunchTask(mockDriver, taskInfo)
|
||||
|
||||
assertext.EventuallyTrue(t, 5*time.Second, func() bool {
|
||||
executor.lock.Lock()
|
||||
defer executor.lock.Unlock()
|
||||
return len(executor.tasks) == 1 && len(executor.pods) == 1
|
||||
}, "executor must be able to create a task and a pod")
|
||||
|
||||
gotPodUpdate := false
|
||||
select {
|
||||
case m := <-updates:
|
||||
update, ok := m.(kubelet.PodUpdate)
|
||||
if ok && len(update.Pods) == 1 {
|
||||
gotPodUpdate = true
|
||||
}
|
||||
case <-time.After(time.Second):
|
||||
}
|
||||
assert.Equal(t, true, gotPodUpdate,
|
||||
"the executor should send an update about a new pod to "+
|
||||
"the updates chan when creating a new one.")
|
||||
|
||||
// Allow some time for asynchronous requests to the driver.
|
||||
finished := kmruntime.After(statusUpdateCalls.Wait)
|
||||
select {
|
||||
case <-finished:
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timed out waiting for status update calls to finish")
|
||||
}
|
||||
|
||||
statusUpdateCalls.Add(1)
|
||||
mockDriver.On(
|
||||
"SendStatusUpdate",
|
||||
mesosproto.TaskState_TASK_KILLED,
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
|
||||
|
||||
executor.KillTask(mockDriver, taskInfo.TaskId)
|
||||
|
||||
assertext.EventuallyTrue(t, 5*time.Second, func() bool {
|
||||
executor.lock.Lock()
|
||||
defer executor.lock.Unlock()
|
||||
return len(executor.tasks) == 0 && len(executor.pods) == 0
|
||||
}, "executor must be able to kill a created task and pod")
|
||||
|
||||
// Allow some time for asynchronous requests to the driver.
|
||||
finished = kmruntime.After(statusUpdateCalls.Wait)
|
||||
select {
|
||||
case <-finished:
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timed out waiting for status update calls to finish")
|
||||
}
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// TestExecutorFrameworkMessage ensures that the executor is able to
|
||||
// handle messages from the framework, specifically about lost tasks
|
||||
// and Kamikaze. When a task is lost, the executor needs to clean up
|
||||
// its state. When a Kamikaze message is received, the executor should
|
||||
// attempt suicide.
|
||||
func TestExecutorFrameworkMessage(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
kubeletFinished := make(chan struct{})
|
||||
config := Config{
|
||||
Docker: dockertools.ConnectToDockerOrDie("fake://"),
|
||||
Updates: make(chan interface{}, 1024),
|
||||
APIClient: client.NewOrDie(&client.Config{
|
||||
Host: "fakehost",
|
||||
Version: testapi.Version(),
|
||||
}),
|
||||
ShutdownAlert: func() {
|
||||
close(kubeletFinished)
|
||||
},
|
||||
KubeletFinished: kubeletFinished,
|
||||
}
|
||||
executor := New(config)
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
|
||||
executor.FrameworkMessage(mockDriver, "test framework message")
|
||||
|
||||
// set up a pod to then lose
|
||||
pod := NewTestPod(1)
|
||||
podTask, _ := podtask.New(api.NewDefaultContext(), "foo",
|
||||
*pod, &mesosproto.ExecutorInfo{})
|
||||
|
||||
taskInfo := podTask.BuildTaskInfo()
|
||||
data, _ := testapi.Codec().Encode(pod)
|
||||
taskInfo.Data = data
|
||||
|
||||
executor.LaunchTask(mockDriver, taskInfo)
|
||||
|
||||
// send task-lost message for it
|
||||
called := make(chan struct{})
|
||||
mockDriver.On(
|
||||
"SendStatusUpdate",
|
||||
mesosproto.TaskState_TASK_LOST,
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
|
||||
|
||||
executor.FrameworkMessage(mockDriver, "task-lost:foo")
|
||||
assertext.EventuallyTrue(t, 5*time.Second, func() bool {
|
||||
executor.lock.Lock()
|
||||
defer executor.lock.Unlock()
|
||||
return len(executor.tasks) == 0 && len(executor.pods) == 0
|
||||
}, "executor must be able to kill a created task and pod")
|
||||
|
||||
select {
|
||||
case <-called:
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timed out waiting for SendStatusUpdate")
|
||||
}
|
||||
|
||||
mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()
|
||||
|
||||
executor.FrameworkMessage(mockDriver, messages.Kamikaze)
|
||||
assert.Equal(t, true, executor.isDone(),
|
||||
"executor should have shut down after receiving a Kamikaze message")
|
||||
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// Create a pod with a given index, requiring one port
|
||||
func NewTestPod(i int) *api.Pod {
|
||||
name := fmt.Sprintf("pod%d", i)
|
||||
return &api.Pod{
|
||||
TypeMeta: api.TypeMeta{APIVersion: testapi.Version()},
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: api.NamespaceDefault,
|
||||
SelfLink: testapi.SelfLink("pods", string(i)),
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Ports: []api.ContainerPort{
|
||||
{
|
||||
ContainerPort: 8000 + i,
|
||||
Protocol: api.ProtocolTCP,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Status: api.PodStatus{
|
||||
Conditions: []api.PodCondition{
|
||||
{
|
||||
Type: api.PodReady,
|
||||
Status: api.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Create mock of pods ListWatch, usually listening on the apiserver pods watch endpoint
|
||||
type MockPodsListWatch struct {
|
||||
ListWatch cache.ListWatch
|
||||
fakeWatcher *watch.FakeWatcher
|
||||
list api.PodList
|
||||
}
|
||||
|
||||
// A apiserver mock which partially mocks the pods API
|
||||
type TestServer struct {
|
||||
server *httptest.Server
|
||||
Stats map[string]uint
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
func NewTestServer(t *testing.T, namespace string, pods *api.PodList) *TestServer {
|
||||
ts := TestServer{
|
||||
Stats: map[string]uint{},
|
||||
}
|
||||
mux := http.NewServeMux()
|
||||
|
||||
mux.HandleFunc(testapi.ResourcePath("bindings", namespace, ""), func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
ts.server = httptest.NewServer(mux)
|
||||
return &ts
|
||||
}
|
||||
|
||||
func NewMockPodsListWatch(initialPodList api.PodList) *MockPodsListWatch {
|
||||
lw := MockPodsListWatch{
|
||||
fakeWatcher: watch.NewFake(),
|
||||
list: initialPodList,
|
||||
}
|
||||
lw.ListWatch = cache.ListWatch{
|
||||
WatchFunc: func(resourceVersion string) (watch.Interface, error) {
|
||||
return lw.fakeWatcher, nil
|
||||
},
|
||||
ListFunc: func() (runtime.Object, error) {
|
||||
return &lw.list, nil
|
||||
},
|
||||
}
|
||||
return &lw
|
||||
}
|
||||
|
||||
// TestExecutorShutdown ensures that the executor properly shuts down
|
||||
// when Shutdown is called.
|
||||
func TestExecutorShutdown(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
kubeletFinished := make(chan struct{})
|
||||
var exitCalled int32 = 0
|
||||
config := Config{
|
||||
Docker: dockertools.ConnectToDockerOrDie("fake://"),
|
||||
Updates: make(chan interface{}, 1024),
|
||||
ShutdownAlert: func() {
|
||||
close(kubeletFinished)
|
||||
},
|
||||
KubeletFinished: kubeletFinished,
|
||||
ExitFunc: func(_ int) {
|
||||
atomic.AddInt32(&exitCalled, 1)
|
||||
},
|
||||
}
|
||||
executor := New(config)
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
|
||||
mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()
|
||||
|
||||
executor.Shutdown(mockDriver)
|
||||
|
||||
assert.Equal(t, false, executor.isConnected(),
|
||||
"executor should not be connected after Shutdown")
|
||||
assert.Equal(t, true, executor.isDone(),
|
||||
"executor should be in Done state after Shutdown")
|
||||
|
||||
select {
|
||||
case <-executor.Done():
|
||||
default:
|
||||
t.Fatal("done channel should be closed after shutdown")
|
||||
}
|
||||
|
||||
assert.Equal(t, true, atomic.LoadInt32(&exitCalled) > 0,
|
||||
"the executor should call its ExitFunc when it is ready to close down")
|
||||
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestExecutorsendFrameworkMessage(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
executor := NewTestKubernetesExecutor()
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
|
||||
called := make(chan struct{})
|
||||
mockDriver.On(
|
||||
"SendFrameworkMessage",
|
||||
"foo bar baz",
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
|
||||
executor.sendFrameworkMessage(mockDriver, "foo bar baz")
|
||||
|
||||
// guard against data race in mock driver between AssertExpectations and Called
|
||||
select {
|
||||
case <-called: // expected
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("expected call to SendFrameworkMessage")
|
||||
}
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
||||
18
contrib/mesos/pkg/executor/messages/doc.go
Normal file
18
contrib/mesos/pkg/executor/messages/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package messages exposes executor event/message names as constants.
|
||||
package messages
|
||||
32
contrib/mesos/pkg/executor/messages/messages.go
Normal file
32
contrib/mesos/pkg/executor/messages/messages.go
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package messages
|
||||
|
||||
// messages that ship with TaskStatus objects
|
||||
|
||||
const (
|
||||
ContainersDisappeared = "containers-disappeared"
|
||||
CreateBindingFailure = "create-binding-failure"
|
||||
CreateBindingSuccess = "create-binding-success"
|
||||
ExecutorUnregistered = "executor-unregistered"
|
||||
ExecutorShutdown = "executor-shutdown"
|
||||
LaunchTaskFailed = "launch-task-failed"
|
||||
TaskKilled = "task-killed"
|
||||
UnmarshalTaskDataFailure = "unmarshal-task-data-failure"
|
||||
TaskLostAck = "task-lost-ack" // executor acknowledgement of forwarded TASK_LOST framework message
|
||||
Kamikaze = "kamikaze"
|
||||
)
|
||||
81
contrib/mesos/pkg/executor/mock_test.go
Normal file
81
contrib/mesos/pkg/executor/mock_test.go
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
|
||||
"github.com/mesos/mesos-go/mesosproto"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/mock"
|
||||
)
|
||||
|
||||
type MockExecutorDriver struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) Start() (mesosproto.Status, error) {
|
||||
args := m.Called()
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) Stop() (mesosproto.Status, error) {
|
||||
args := m.Called()
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) Abort() (mesosproto.Status, error) {
|
||||
args := m.Called()
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) Join() (mesosproto.Status, error) {
|
||||
args := m.Called()
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) Run() (mesosproto.Status, error) {
|
||||
args := m.Called()
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) SendStatusUpdate(taskStatus *mesosproto.TaskStatus) (mesosproto.Status, error) {
|
||||
args := m.Called(*taskStatus.State)
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) SendFrameworkMessage(msg string) (mesosproto.Status, error) {
|
||||
args := m.Called(msg)
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func NewTestKubernetesExecutor() *KubernetesExecutor {
|
||||
return New(Config{
|
||||
Docker: dockertools.ConnectToDockerOrDie("fake://"),
|
||||
Updates: make(chan interface{}, 1024),
|
||||
})
|
||||
}
|
||||
|
||||
func TestExecutorNew(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
executor := NewTestKubernetesExecutor()
|
||||
executor.Init(mockDriver)
|
||||
|
||||
assert.Equal(t, executor.isDone(), false, "executor should not be in Done state on initialization")
|
||||
assert.Equal(t, executor.isConnected(), false, "executor should not be connected on initialization")
|
||||
}
|
||||
18
contrib/mesos/pkg/executor/service/doc.go
Normal file
18
contrib/mesos/pkg/executor/service/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package service contains the cmd/k8sm-executor glue code.
|
||||
package service
|
||||
600
contrib/mesos/pkg/executor/service/service.go
Normal file
600
contrib/mesos/pkg/executor/service/service.go
Normal file
@@ -0,0 +1,600 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/cmd/kubelet/app"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/config"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/redirfd"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/credentialprovider"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/healthz"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
|
||||
kconfig "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/config"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/mount"
|
||||
log "github.com/golang/glog"
|
||||
"github.com/kardianos/osext"
|
||||
bindings "github.com/mesos/mesos-go/executor"
|
||||
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
const (
|
||||
// if we don't use this source then the kubelet will do funny, mirror things.
|
||||
// @see ConfigSourceAnnotationKey
|
||||
MESOS_CFG_SOURCE = kubelet.ApiserverSource
|
||||
)
|
||||
|
||||
type KubeletExecutorServer struct {
|
||||
*app.KubeletServer
|
||||
RunProxy bool
|
||||
ProxyLogV int
|
||||
ProxyExec string
|
||||
ProxyLogfile string
|
||||
ProxyBindall bool
|
||||
SuicideTimeout time.Duration
|
||||
ShutdownFD int
|
||||
ShutdownFIFO string
|
||||
}
|
||||
|
||||
func NewKubeletExecutorServer() *KubeletExecutorServer {
|
||||
k := &KubeletExecutorServer{
|
||||
KubeletServer: app.NewKubeletServer(),
|
||||
RunProxy: true,
|
||||
ProxyExec: "./kube-proxy",
|
||||
ProxyLogfile: "./proxy-log",
|
||||
SuicideTimeout: config.DefaultSuicideTimeout,
|
||||
}
|
||||
if pwd, err := os.Getwd(); err != nil {
|
||||
log.Warningf("failed to determine current directory: %v", err)
|
||||
} else {
|
||||
k.RootDirectory = pwd // mesos sandbox dir
|
||||
}
|
||||
k.Address = util.IP(net.ParseIP(defaultBindingAddress()))
|
||||
k.ShutdownFD = -1 // indicates unspecified FD
|
||||
return k
|
||||
}
|
||||
|
||||
func NewHyperKubeletExecutorServer() *KubeletExecutorServer {
|
||||
s := NewKubeletExecutorServer()
|
||||
|
||||
// cache this for later use
|
||||
binary, err := osext.Executable()
|
||||
if err != nil {
|
||||
log.Fatalf("failed to determine currently running executable: %v", err)
|
||||
}
|
||||
|
||||
s.ProxyExec = binary
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *KubeletExecutorServer) addCoreFlags(fs *pflag.FlagSet) {
|
||||
s.KubeletServer.AddFlags(fs)
|
||||
fs.BoolVar(&s.RunProxy, "run-proxy", s.RunProxy, "Maintain a running kube-proxy instance as a child proc of this kubelet-executor.")
|
||||
fs.IntVar(&s.ProxyLogV, "proxy-logv", s.ProxyLogV, "Log verbosity of the child kube-proxy.")
|
||||
fs.StringVar(&s.ProxyLogfile, "proxy-logfile", s.ProxyLogfile, "Path to the kube-proxy log file.")
|
||||
fs.BoolVar(&s.ProxyBindall, "proxy-bindall", s.ProxyBindall, "When true will cause kube-proxy to bind to 0.0.0.0.")
|
||||
fs.DurationVar(&s.SuicideTimeout, "suicide-timeout", s.SuicideTimeout, "Self-terminate after this period of inactivity. Zero disables suicide watch.")
|
||||
fs.IntVar(&s.ShutdownFD, "shutdown-fd", s.ShutdownFD, "File descriptor used to signal shutdown to external watchers, requires shutdown-fifo flag")
|
||||
fs.StringVar(&s.ShutdownFIFO, "shutdown-fifo", s.ShutdownFIFO, "FIFO used to signal shutdown to external watchers, requires shutdown-fd flag")
|
||||
}
|
||||
|
||||
func (s *KubeletExecutorServer) AddStandaloneFlags(fs *pflag.FlagSet) {
|
||||
s.addCoreFlags(fs)
|
||||
fs.StringVar(&s.ProxyExec, "proxy-exec", s.ProxyExec, "Path to the kube-proxy executable.")
|
||||
}
|
||||
|
||||
func (s *KubeletExecutorServer) AddHyperkubeFlags(fs *pflag.FlagSet) {
|
||||
s.addCoreFlags(fs)
|
||||
}
|
||||
|
||||
// returns a Closer that should be closed to signal impending shutdown, but only if ShutdownFD
|
||||
// and ShutdownFIFO were specified. if they are specified, then this func blocks until there's
|
||||
// a reader on the FIFO stream.
|
||||
func (s *KubeletExecutorServer) syncExternalShutdownWatcher() (io.Closer, error) {
|
||||
if s.ShutdownFD == -1 || s.ShutdownFIFO == "" {
|
||||
return nil, nil
|
||||
}
|
||||
// redirfd -w n fifo ... # (blocks until the fifo is read)
|
||||
log.Infof("blocked, waiting for shutdown reader for FD %d FIFO at %s", s.ShutdownFD, s.ShutdownFIFO)
|
||||
return redirfd.Write.Redirect(true, false, redirfd.FileDescriptor(s.ShutdownFD), s.ShutdownFIFO)
|
||||
}
|
||||
|
||||
// Run runs the specified KubeletExecutorServer.
|
||||
func (s *KubeletExecutorServer) Run(hks hyperkube.Interface, _ []string) error {
|
||||
rand.Seed(time.Now().UTC().UnixNano())
|
||||
|
||||
if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
|
||||
log.Info(err)
|
||||
}
|
||||
|
||||
var apiclient *client.Client
|
||||
clientConfig, err := s.CreateAPIServerClientConfig()
|
||||
if err == nil {
|
||||
apiclient, err = client.New(clientConfig)
|
||||
}
|
||||
if err != nil {
|
||||
// required for k8sm since we need to send api.Binding information
|
||||
// back to the apiserver
|
||||
log.Fatalf("No API client: %v", err)
|
||||
}
|
||||
|
||||
log.Infof("Using root directory: %v", s.RootDirectory)
|
||||
credentialprovider.SetPreferredDockercfgPath(s.RootDirectory)
|
||||
|
||||
shutdownCloser, err := s.syncExternalShutdownWatcher()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cadvisorInterface, err := cadvisor.New(s.CadvisorPort)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
imageGCPolicy := kubelet.ImageGCPolicy{
|
||||
HighThresholdPercent: s.ImageGCHighThresholdPercent,
|
||||
LowThresholdPercent: s.ImageGCLowThresholdPercent,
|
||||
}
|
||||
|
||||
diskSpacePolicy := kubelet.DiskSpacePolicy{
|
||||
DockerFreeDiskMB: s.LowDiskSpaceThresholdMB,
|
||||
RootFreeDiskMB: s.LowDiskSpaceThresholdMB,
|
||||
}
|
||||
|
||||
//TODO(jdef) intentionally NOT initializing a cloud provider here since:
|
||||
//(a) the kubelet doesn't actually use it
|
||||
//(b) we don't need to create N-kubelet connections to zookeeper for no good reason
|
||||
//cloud := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile)
|
||||
//log.Infof("Successfully initialized cloud provider: %q from the config file: %q\n", s.CloudProvider, s.CloudConfigFile)
|
||||
|
||||
hostNetworkSources, err := kubelet.GetValidatedSources(strings.Split(s.HostNetworkSources, ","))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
tlsOptions, err := s.InitializeTLS()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mounter := mount.New()
|
||||
if s.Containerized {
|
||||
log.V(2).Info("Running kubelet in containerized mode (experimental)")
|
||||
mounter = &mount.NsenterMounter{}
|
||||
}
|
||||
|
||||
var dockerExecHandler dockertools.ExecHandler
|
||||
switch s.DockerExecHandlerName {
|
||||
case "native":
|
||||
dockerExecHandler = &dockertools.NativeExecHandler{}
|
||||
case "nsenter":
|
||||
dockerExecHandler = &dockertools.NsenterExecHandler{}
|
||||
default:
|
||||
log.Warningf("Unknown Docker exec handler %q; defaulting to native", s.DockerExecHandlerName)
|
||||
dockerExecHandler = &dockertools.NativeExecHandler{}
|
||||
}
|
||||
|
||||
kcfg := app.KubeletConfig{
|
||||
Address: s.Address,
|
||||
AllowPrivileged: s.AllowPrivileged,
|
||||
HostNetworkSources: hostNetworkSources,
|
||||
HostnameOverride: s.HostnameOverride,
|
||||
RootDirectory: s.RootDirectory,
|
||||
// ConfigFile: ""
|
||||
// ManifestURL: ""
|
||||
// FileCheckFrequency
|
||||
// HTTPCheckFrequency
|
||||
PodInfraContainerImage: s.PodInfraContainerImage,
|
||||
SyncFrequency: s.SyncFrequency,
|
||||
RegistryPullQPS: s.RegistryPullQPS,
|
||||
RegistryBurst: s.RegistryBurst,
|
||||
MinimumGCAge: s.MinimumGCAge,
|
||||
MaxPerPodContainerCount: s.MaxPerPodContainerCount,
|
||||
MaxContainerCount: s.MaxContainerCount,
|
||||
RegisterNode: s.RegisterNode,
|
||||
ClusterDomain: s.ClusterDomain,
|
||||
ClusterDNS: s.ClusterDNS,
|
||||
Runonce: s.RunOnce,
|
||||
Port: s.Port,
|
||||
ReadOnlyPort: s.ReadOnlyPort,
|
||||
CadvisorInterface: cadvisorInterface,
|
||||
EnableServer: s.EnableServer,
|
||||
EnableDebuggingHandlers: s.EnableDebuggingHandlers,
|
||||
DockerClient: dockertools.ConnectToDockerOrDie(s.DockerEndpoint),
|
||||
KubeClient: apiclient,
|
||||
MasterServiceNamespace: s.MasterServiceNamespace,
|
||||
VolumePlugins: app.ProbeVolumePlugins(),
|
||||
NetworkPlugins: app.ProbeNetworkPlugins(),
|
||||
NetworkPluginName: s.NetworkPluginName,
|
||||
StreamingConnectionIdleTimeout: s.StreamingConnectionIdleTimeout,
|
||||
TLSOptions: tlsOptions,
|
||||
ImageGCPolicy: imageGCPolicy,
|
||||
DiskSpacePolicy: diskSpacePolicy,
|
||||
Cloud: nil, // TODO(jdef) Cloud, specifying null here because we don't want all kubelets polling mesos-master; need to account for this in the cloudprovider impl
|
||||
NodeStatusUpdateFrequency: s.NodeStatusUpdateFrequency,
|
||||
ResourceContainer: s.ResourceContainer,
|
||||
CgroupRoot: s.CgroupRoot,
|
||||
ContainerRuntime: s.ContainerRuntime,
|
||||
Mounter: mounter,
|
||||
DockerDaemonContainer: s.DockerDaemonContainer,
|
||||
SystemContainer: s.SystemContainer,
|
||||
ConfigureCBR0: s.ConfigureCBR0,
|
||||
MaxPods: s.MaxPods,
|
||||
DockerExecHandler: dockerExecHandler,
|
||||
}
|
||||
|
||||
err = app.RunKubelet(&kcfg, app.KubeletBuilder(func(kc *app.KubeletConfig) (app.KubeletBootstrap, *kconfig.PodConfig, error) {
|
||||
return s.createAndInitKubelet(kc, hks, clientConfig, shutdownCloser)
|
||||
}))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.HealthzPort > 0 {
|
||||
healthz.DefaultHealthz()
|
||||
go util.Forever(func() {
|
||||
err := http.ListenAndServe(net.JoinHostPort(s.HealthzBindAddress.String(), strconv.Itoa(s.HealthzPort)), nil)
|
||||
if err != nil {
|
||||
log.Errorf("Starting health server failed: %v", err)
|
||||
}
|
||||
}, 5*time.Second)
|
||||
}
|
||||
|
||||
// block until executor is shut down or commits shutdown
|
||||
select {}
|
||||
}
|
||||
|
||||
func defaultBindingAddress() string {
|
||||
libProcessIP := os.Getenv("LIBPROCESS_IP")
|
||||
if libProcessIP == "" {
|
||||
return "0.0.0.0"
|
||||
} else {
|
||||
return libProcessIP
|
||||
}
|
||||
}
|
||||
|
||||
func (ks *KubeletExecutorServer) createAndInitKubelet(
|
||||
kc *app.KubeletConfig,
|
||||
hks hyperkube.Interface,
|
||||
clientConfig *client.Config,
|
||||
shutdownCloser io.Closer,
|
||||
) (app.KubeletBootstrap, *kconfig.PodConfig, error) {
|
||||
|
||||
// TODO(k8s): block until all sources have delivered at least one update to the channel, or break the sync loop
|
||||
// up into "per source" synchronizations
|
||||
// TODO(k8s): KubeletConfig.KubeClient should be a client interface, but client interface misses certain methods
|
||||
// used by kubelet. Since NewMainKubelet expects a client interface, we need to make sure we are not passing
|
||||
// a nil pointer to it when what we really want is a nil interface.
|
||||
var kubeClient client.Interface
|
||||
if kc.KubeClient == nil {
|
||||
kubeClient = nil
|
||||
} else {
|
||||
kubeClient = kc.KubeClient
|
||||
}
|
||||
|
||||
gcPolicy := kubelet.ContainerGCPolicy{
|
||||
MinAge: kc.MinimumGCAge,
|
||||
MaxPerPodContainer: kc.MaxPerPodContainerCount,
|
||||
MaxContainers: kc.MaxContainerCount,
|
||||
}
|
||||
|
||||
pc := kconfig.NewPodConfig(kconfig.PodConfigNotificationSnapshotAndUpdates, kc.Recorder)
|
||||
updates := pc.Channel(MESOS_CFG_SOURCE)
|
||||
|
||||
klet, err := kubelet.NewMainKubelet(
|
||||
kc.Hostname,
|
||||
kc.DockerClient,
|
||||
kubeClient,
|
||||
kc.RootDirectory,
|
||||
kc.PodInfraContainerImage,
|
||||
kc.SyncFrequency,
|
||||
float32(kc.RegistryPullQPS),
|
||||
kc.RegistryBurst,
|
||||
gcPolicy,
|
||||
pc.SeenAllSources,
|
||||
kc.RegisterNode,
|
||||
kc.ClusterDomain,
|
||||
net.IP(kc.ClusterDNS),
|
||||
kc.MasterServiceNamespace,
|
||||
kc.VolumePlugins,
|
||||
kc.NetworkPlugins,
|
||||
kc.NetworkPluginName,
|
||||
kc.StreamingConnectionIdleTimeout,
|
||||
kc.Recorder,
|
||||
kc.CadvisorInterface,
|
||||
kc.ImageGCPolicy,
|
||||
kc.DiskSpacePolicy,
|
||||
kc.Cloud,
|
||||
kc.NodeStatusUpdateFrequency,
|
||||
kc.ResourceContainer,
|
||||
kc.OSInterface,
|
||||
kc.CgroupRoot,
|
||||
kc.ContainerRuntime,
|
||||
kc.Mounter,
|
||||
kc.DockerDaemonContainer,
|
||||
kc.SystemContainer,
|
||||
kc.ConfigureCBR0,
|
||||
kc.MaxPods,
|
||||
kc.DockerExecHandler,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
//TODO(jdef) either configure Watch here with something useful, or else
|
||||
// get rid of it from executor.Config
|
||||
kubeletFinished := make(chan struct{})
|
||||
exec := executor.New(executor.Config{
|
||||
Kubelet: klet,
|
||||
Updates: updates,
|
||||
SourceName: MESOS_CFG_SOURCE,
|
||||
APIClient: kc.KubeClient,
|
||||
Docker: kc.DockerClient,
|
||||
SuicideTimeout: ks.SuicideTimeout,
|
||||
KubeletFinished: kubeletFinished,
|
||||
ShutdownAlert: func() {
|
||||
if shutdownCloser != nil {
|
||||
if e := shutdownCloser.Close(); e != nil {
|
||||
log.Warningf("failed to signal shutdown to external watcher: %v", e)
|
||||
}
|
||||
}
|
||||
},
|
||||
ExitFunc: os.Exit,
|
||||
PodStatusFunc: func(kl *kubelet.Kubelet, pod *api.Pod) (*api.PodStatus, error) {
|
||||
return kl.GetRuntime().GetPodStatus(pod)
|
||||
},
|
||||
})
|
||||
|
||||
k := &kubeletExecutor{
|
||||
Kubelet: klet,
|
||||
runProxy: ks.RunProxy,
|
||||
proxyLogV: ks.ProxyLogV,
|
||||
proxyExec: ks.ProxyExec,
|
||||
proxyLogfile: ks.ProxyLogfile,
|
||||
proxyBindall: ks.ProxyBindall,
|
||||
address: ks.Address,
|
||||
dockerClient: kc.DockerClient,
|
||||
hks: hks,
|
||||
kubeletFinished: kubeletFinished,
|
||||
executorDone: exec.Done(),
|
||||
clientConfig: clientConfig,
|
||||
}
|
||||
|
||||
dconfig := bindings.DriverConfig{
|
||||
Executor: exec,
|
||||
HostnameOverride: ks.HostnameOverride,
|
||||
BindingAddress: net.IP(ks.Address),
|
||||
}
|
||||
if driver, err := bindings.NewMesosExecutorDriver(dconfig); err != nil {
|
||||
log.Fatalf("failed to create executor driver: %v", err)
|
||||
} else {
|
||||
k.driver = driver
|
||||
}
|
||||
|
||||
log.V(2).Infof("Initialize executor driver...")
|
||||
|
||||
k.BirthCry()
|
||||
exec.Init(k.driver)
|
||||
|
||||
k.StartGarbageCollection()
|
||||
|
||||
return k, pc, nil
|
||||
}
|
||||
|
||||
// kubelet decorator
|
||||
type kubeletExecutor struct {
|
||||
*kubelet.Kubelet
|
||||
initialize sync.Once
|
||||
driver bindings.ExecutorDriver
|
||||
runProxy bool
|
||||
proxyLogV int
|
||||
proxyExec string
|
||||
proxyLogfile string
|
||||
proxyBindall bool
|
||||
address util.IP
|
||||
dockerClient dockertools.DockerInterface
|
||||
hks hyperkube.Interface
|
||||
kubeletFinished chan struct{} // closed once kubelet.Run() returns
|
||||
executorDone <-chan struct{} // from KubeletExecutor.Done()
|
||||
clientConfig *client.Config
|
||||
}
|
||||
|
||||
func (kl *kubeletExecutor) ListenAndServe(address net.IP, port uint, tlsOptions *kubelet.TLSOptions, enableDebuggingHandlers bool) {
|
||||
// this func could be called many times, depending how often the HTTP server crashes,
|
||||
// so only execute certain initialization procs once
|
||||
kl.initialize.Do(func() {
|
||||
if kl.runProxy {
|
||||
go runtime.Until(kl.runProxyService, 5*time.Second, kl.executorDone)
|
||||
}
|
||||
go func() {
|
||||
if _, err := kl.driver.Run(); err != nil {
|
||||
log.Fatalf("executor driver failed: %v", err)
|
||||
}
|
||||
log.Info("executor Run completed")
|
||||
}()
|
||||
})
|
||||
log.Infof("Starting kubelet server...")
|
||||
kubelet.ListenAndServeKubeletServer(kl, address, port, tlsOptions, enableDebuggingHandlers)
|
||||
}
|
||||
|
||||
// this function blocks as long as the proxy service is running; intended to be
|
||||
// executed asynchronously.
|
||||
func (kl *kubeletExecutor) runProxyService() {
|
||||
|
||||
log.Infof("Starting proxy process...")
|
||||
|
||||
const KM_PROXY = "proxy" //TODO(jdef) constant should be shared with km package
|
||||
args := []string{}
|
||||
|
||||
if kl.hks.FindServer(KM_PROXY) {
|
||||
args = append(args, KM_PROXY)
|
||||
log.V(1).Infof("attempting to using km proxy service")
|
||||
} else if _, err := os.Stat(kl.proxyExec); os.IsNotExist(err) {
|
||||
log.Errorf("failed to locate proxy executable at '%v' and km not present: %v", kl.proxyExec, err)
|
||||
return
|
||||
}
|
||||
|
||||
bindAddress := "0.0.0.0"
|
||||
if !kl.proxyBindall {
|
||||
bindAddress = kl.address.String()
|
||||
}
|
||||
args = append(args,
|
||||
fmt.Sprintf("--bind-address=%s", bindAddress),
|
||||
fmt.Sprintf("--v=%d", kl.proxyLogV),
|
||||
"--logtostderr=true",
|
||||
)
|
||||
|
||||
// add client.Config args here. proxy still calls client.BindClientConfigFlags
|
||||
appendStringArg := func(name, value string) {
|
||||
if value != "" {
|
||||
args = append(args, fmt.Sprintf("--%s=%s", name, value))
|
||||
}
|
||||
}
|
||||
appendStringArg("master", kl.clientConfig.Host)
|
||||
/* TODO(jdef) move these flags to a config file pointed to by --kubeconfig
|
||||
appendStringArg("api-version", kl.clientConfig.Version)
|
||||
appendStringArg("client-certificate", kl.clientConfig.CertFile)
|
||||
appendStringArg("client-key", kl.clientConfig.KeyFile)
|
||||
appendStringArg("certificate-authority", kl.clientConfig.CAFile)
|
||||
args = append(args, fmt.Sprintf("--insecure-skip-tls-verify=%t", kl.clientConfig.Insecure))
|
||||
*/
|
||||
|
||||
log.Infof("Spawning process executable %s with args '%+v'", kl.proxyExec, args)
|
||||
|
||||
cmd := exec.Command(kl.proxyExec, args...)
|
||||
if _, err := cmd.StdoutPipe(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
proxylogs, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
//TODO(jdef) append instead of truncate? what if the disk is full?
|
||||
logfile, err := os.Create(kl.proxyLogfile)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer logfile.Close()
|
||||
|
||||
ch := make(chan struct{})
|
||||
go func() {
|
||||
defer func() {
|
||||
select {
|
||||
case <-ch:
|
||||
log.Infof("killing proxy process..")
|
||||
if err = cmd.Process.Kill(); err != nil {
|
||||
log.Errorf("failed to kill proxy process: %v", err)
|
||||
}
|
||||
default:
|
||||
}
|
||||
}()
|
||||
|
||||
writer := bufio.NewWriter(logfile)
|
||||
defer writer.Flush()
|
||||
|
||||
<-ch
|
||||
written, err := io.Copy(writer, proxylogs)
|
||||
if err != nil {
|
||||
log.Errorf("error writing data to proxy log: %v", err)
|
||||
}
|
||||
|
||||
log.Infof("wrote %d bytes to proxy log", written)
|
||||
}()
|
||||
|
||||
// if the proxy fails to start then we exit the executor, otherwise
|
||||
// wait for the proxy process to end (and release resources after).
|
||||
if err := cmd.Start(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
close(ch)
|
||||
if err := cmd.Wait(); err != nil {
|
||||
log.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
// runs the main kubelet loop, closing the kubeletFinished chan when the loop exits.
|
||||
// never returns.
|
||||
func (kl *kubeletExecutor) Run(updates <-chan kubelet.PodUpdate) {
|
||||
defer func() {
|
||||
close(kl.kubeletFinished)
|
||||
util.HandleCrash()
|
||||
log.Infoln("kubelet run terminated") //TODO(jdef) turn down verbosity
|
||||
// important: never return! this is in our contract
|
||||
select {}
|
||||
}()
|
||||
|
||||
// push updates through a closable pipe. when the executor indicates shutdown
|
||||
// via Done() we want to stop the Kubelet from processing updates.
|
||||
pipe := make(chan kubelet.PodUpdate)
|
||||
go func() {
|
||||
// closing pipe will cause our patched kubelet's syncLoop() to exit
|
||||
defer close(pipe)
|
||||
pipeLoop:
|
||||
for {
|
||||
select {
|
||||
case <-kl.executorDone:
|
||||
break pipeLoop
|
||||
default:
|
||||
select {
|
||||
case u := <-updates:
|
||||
select {
|
||||
case pipe <- u: // noop
|
||||
case <-kl.executorDone:
|
||||
break pipeLoop
|
||||
}
|
||||
case <-kl.executorDone:
|
||||
break pipeLoop
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// we expect that Run() will complete after the pipe is closed and the
|
||||
// kubelet's syncLoop() has finished processing its backlog, which hopefully
|
||||
// will not take very long. Peeking into the future (current k8s master) it
|
||||
// seems that the backlog has grown from 1 to 50 -- this may negatively impact
|
||||
// us going forward, time will tell.
|
||||
util.Until(func() { kl.Kubelet.Run(pipe) }, 0, kl.executorDone)
|
||||
|
||||
//TODO(jdef) revisit this if/when executor failover lands
|
||||
err := kl.SyncPods([]*api.Pod{}, nil, nil, time.Now())
|
||||
if err != nil {
|
||||
log.Errorf("failed to cleanly remove all pods and associated state: %v", err)
|
||||
}
|
||||
}
|
||||
21
contrib/mesos/pkg/hyperkube/doc.go
Normal file
21
contrib/mesos/pkg/hyperkube/doc.go
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package hyperkube facilitates the combination of multiple
|
||||
// kubernetes-mesos components into a single binary form, providing a
|
||||
// simple mechanism for intra-component discovery as per the original
|
||||
// Kubernetes hyperkube package.
|
||||
package hyperkube
|
||||
54
contrib/mesos/pkg/hyperkube/types.go
Normal file
54
contrib/mesos/pkg/hyperkube/types.go
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package hyperkube
|
||||
|
||||
import (
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
var (
|
||||
nilKube = &nilKubeType{}
|
||||
)
|
||||
|
||||
type Interface interface {
|
||||
// FindServer will find a specific server named name.
|
||||
FindServer(name string) bool
|
||||
|
||||
// The executable name, used for help and soft-link invocation
|
||||
Name() string
|
||||
|
||||
// Flags returns a flagset for "global" flags.
|
||||
Flags() *pflag.FlagSet
|
||||
}
|
||||
|
||||
type nilKubeType struct{}
|
||||
|
||||
func (n *nilKubeType) FindServer(_ string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (n *nilKubeType) Name() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (n *nilKubeType) Flags() *pflag.FlagSet {
|
||||
return nil
|
||||
}
|
||||
|
||||
func Nil() Interface {
|
||||
return nilKube
|
||||
}
|
||||
18
contrib/mesos/pkg/offers/doc.go
Normal file
18
contrib/mesos/pkg/offers/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package offers contains code that manages Mesos offers.
|
||||
package offers
|
||||
19
contrib/mesos/pkg/offers/metrics/doc.go
Normal file
19
contrib/mesos/pkg/offers/metrics/doc.go
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package metrics defines and exposes instrumentation metrics related to
|
||||
// Mesos offers.
|
||||
package metrics
|
||||
89
contrib/mesos/pkg/offers/metrics/metrics.go
Normal file
89
contrib/mesos/pkg/offers/metrics/metrics.go
Normal file
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
offerSubsystem = "mesos_offers"
|
||||
)
|
||||
|
||||
type OfferDeclinedReason string
|
||||
|
||||
const (
|
||||
OfferExpired = OfferDeclinedReason("expired")
|
||||
OfferRescinded = OfferDeclinedReason("rescinded")
|
||||
OfferCompat = OfferDeclinedReason("compat")
|
||||
)
|
||||
|
||||
var (
|
||||
OffersReceived = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: offerSubsystem,
|
||||
Name: "received",
|
||||
Help: "Counter of offers received from Mesos broken out by slave host.",
|
||||
},
|
||||
[]string{"hostname"},
|
||||
)
|
||||
|
||||
OffersDeclined = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: offerSubsystem,
|
||||
Name: "declined",
|
||||
Help: "Counter of offers declined by the framework broken out by slave host.",
|
||||
},
|
||||
[]string{"hostname", "reason"},
|
||||
)
|
||||
|
||||
OffersAcquired = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: offerSubsystem,
|
||||
Name: "acquired",
|
||||
Help: "Counter of offers acquired for task launch broken out by slave host.",
|
||||
},
|
||||
[]string{"hostname"},
|
||||
)
|
||||
|
||||
OffersReleased = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: offerSubsystem,
|
||||
Name: "released",
|
||||
Help: "Counter of previously-acquired offers later released, broken out by slave host.",
|
||||
},
|
||||
[]string{"hostname"},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
func Register() {
|
||||
registerMetrics.Do(func() {
|
||||
prometheus.MustRegister(OffersReceived)
|
||||
prometheus.MustRegister(OffersDeclined)
|
||||
prometheus.MustRegister(OffersAcquired)
|
||||
prometheus.MustRegister(OffersReleased)
|
||||
})
|
||||
}
|
||||
|
||||
func InMicroseconds(d time.Duration) float64 {
|
||||
return float64(d.Nanoseconds() / time.Microsecond.Nanoseconds())
|
||||
}
|
||||
570
contrib/mesos/pkg/offers/offers.go
Normal file
570
contrib/mesos/pkg/offers/offers.go
Normal file
@@ -0,0 +1,570 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package offers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers/metrics"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
log "github.com/golang/glog"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
)
|
||||
|
||||
const (
|
||||
offerListenerMaxAge = 12 // max number of times we'll attempt to fit an offer to a listener before requiring them to re-register themselves
|
||||
offerIdCacheTTL = 1 * time.Second // determines expiration of cached offer ids, used in listener notification
|
||||
deferredDeclineTtlFactor = 2 // this factor, multiplied by the offer ttl, determines how long to wait before attempting to decline previously claimed offers that were subsequently deleted, then released. see offerStorage.Delete
|
||||
notifyListenersDelay = 0 // delay between offer listener notification attempts
|
||||
)
|
||||
|
||||
type Filter func(*mesos.Offer) bool
|
||||
|
||||
type Registry interface {
|
||||
// Initialize the instance, spawning necessary housekeeping go routines.
|
||||
Init(<-chan struct{})
|
||||
|
||||
// Add offers to this registry, rejecting those that are deemed incompatible.
|
||||
Add([]*mesos.Offer)
|
||||
|
||||
// Listen for arriving offers that are acceptable to the filter, sending
|
||||
// a signal on (by closing) the returned channel. A listener will only
|
||||
// ever be notified once, if at all.
|
||||
Listen(id string, f Filter) <-chan struct{}
|
||||
|
||||
// invoked when offers are rescinded or expired
|
||||
Delete(string, metrics.OfferDeclinedReason)
|
||||
|
||||
// when true, returns the offer that's registered for the given ID
|
||||
Get(offerId string) (Perishable, bool)
|
||||
|
||||
// iterate through non-expired offers in this registry
|
||||
Walk(Walker) error
|
||||
|
||||
// invalidate one or all (when offerId="") offers; offers are not declined,
|
||||
// but are simply flagged as expired in the offer history
|
||||
Invalidate(offerId string)
|
||||
|
||||
// invalidate all offers associated with the slave identified by slaveId.
|
||||
InvalidateForSlave(slaveId string)
|
||||
}
|
||||
|
||||
// callback that is invoked during a walk through a series of live offers,
|
||||
// returning with stop=true (or err != nil) if the walk should stop permaturely.
|
||||
type Walker func(offer Perishable) (stop bool, err error)
|
||||
|
||||
type RegistryConfig struct {
|
||||
DeclineOffer func(offerId string) <-chan error // tell Mesos that we're declining the offer
|
||||
Compat func(*mesos.Offer) bool // returns true if offer is compatible; incompatible offers are declined
|
||||
TTL time.Duration // determines a perishable offer's expiration deadline: now+ttl
|
||||
LingerTTL time.Duration // if zero, offers will not linger in the FIFO past their expiration deadline
|
||||
ListenerDelay time.Duration // specifies the sleep time between offer listener notifications
|
||||
}
|
||||
|
||||
type offerStorage struct {
|
||||
RegistryConfig
|
||||
offers *cache.FIFO // collection of Perishable, both live and expired
|
||||
listeners *queue.DelayFIFO // collection of *offerListener
|
||||
delayed *queue.DelayQueue // deadline-oriented offer-event queue
|
||||
slaves *slaveStorage // slave to offer mappings
|
||||
}
|
||||
|
||||
type liveOffer struct {
|
||||
*mesos.Offer
|
||||
expiration time.Time
|
||||
acquired int32 // 1 = acquired, 0 = free
|
||||
}
|
||||
|
||||
type expiredOffer struct {
|
||||
offerSpec
|
||||
deadline time.Time
|
||||
}
|
||||
|
||||
// subset of mesos.OfferInfo useful for recordkeeping
|
||||
type offerSpec struct {
|
||||
id string
|
||||
hostname string
|
||||
}
|
||||
|
||||
// offers that may perish (all of them?) implement this interface.
|
||||
// callers may expect to access these funcs concurrently so implementations
|
||||
// must provide their own form of synchronization around mutable state.
|
||||
type Perishable interface {
|
||||
// returns true if this offer has expired
|
||||
HasExpired() bool
|
||||
// if not yet expired, return mesos offer details; otherwise nil
|
||||
Details() *mesos.Offer
|
||||
// mark this offer as acquired, returning true if it was previously unacquired. thread-safe.
|
||||
Acquire() bool
|
||||
// mark this offer as un-acquired. thread-safe.
|
||||
Release()
|
||||
// expire or delete this offer from storage
|
||||
age(s *offerStorage)
|
||||
// return a unique identifier for this offer
|
||||
Id() string
|
||||
// return the slave host for this offer
|
||||
Host() string
|
||||
addTo(*queue.DelayQueue)
|
||||
}
|
||||
|
||||
func (e *expiredOffer) addTo(q *queue.DelayQueue) {
|
||||
q.Add(e)
|
||||
}
|
||||
|
||||
func (e *expiredOffer) Id() string {
|
||||
return e.id
|
||||
}
|
||||
|
||||
func (e *expiredOffer) Host() string {
|
||||
return e.hostname
|
||||
}
|
||||
|
||||
func (e *expiredOffer) HasExpired() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (e *expiredOffer) Details() *mesos.Offer {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *expiredOffer) Acquire() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (e *expiredOffer) Release() {}
|
||||
|
||||
func (e *expiredOffer) age(s *offerStorage) {
|
||||
log.V(3).Infof("Delete lingering offer: %v", e.id)
|
||||
s.offers.Delete(e)
|
||||
s.slaves.deleteOffer(e.id)
|
||||
}
|
||||
|
||||
// return the time left to linger
|
||||
func (e *expiredOffer) GetDelay() time.Duration {
|
||||
return e.deadline.Sub(time.Now())
|
||||
}
|
||||
|
||||
func (to *liveOffer) HasExpired() bool {
|
||||
return time.Now().After(to.expiration)
|
||||
}
|
||||
|
||||
func (to *liveOffer) Details() *mesos.Offer {
|
||||
return to.Offer
|
||||
}
|
||||
|
||||
func (to *liveOffer) Acquire() (acquired bool) {
|
||||
if acquired = atomic.CompareAndSwapInt32(&to.acquired, 0, 1); acquired {
|
||||
metrics.OffersAcquired.WithLabelValues(to.Host()).Inc()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (to *liveOffer) Release() {
|
||||
if released := atomic.CompareAndSwapInt32(&to.acquired, 1, 0); released {
|
||||
metrics.OffersReleased.WithLabelValues(to.Host()).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
func (to *liveOffer) age(s *offerStorage) {
|
||||
s.Delete(to.Id(), metrics.OfferExpired)
|
||||
}
|
||||
|
||||
func (to *liveOffer) Id() string {
|
||||
return to.Offer.Id.GetValue()
|
||||
}
|
||||
|
||||
func (to *liveOffer) Host() string {
|
||||
return to.Offer.GetHostname()
|
||||
}
|
||||
|
||||
func (to *liveOffer) addTo(q *queue.DelayQueue) {
|
||||
q.Add(to)
|
||||
}
|
||||
|
||||
// return the time remaining before the offer expires
|
||||
func (to *liveOffer) GetDelay() time.Duration {
|
||||
return to.expiration.Sub(time.Now())
|
||||
}
|
||||
|
||||
func CreateRegistry(c RegistryConfig) Registry {
|
||||
metrics.Register()
|
||||
return &offerStorage{
|
||||
RegistryConfig: c,
|
||||
offers: cache.NewFIFO(cache.KeyFunc(func(v interface{}) (string, error) {
|
||||
if perishable, ok := v.(Perishable); !ok {
|
||||
return "", fmt.Errorf("expected perishable offer, not '%+v'", v)
|
||||
} else {
|
||||
return perishable.Id(), nil
|
||||
}
|
||||
})),
|
||||
listeners: queue.NewDelayFIFO(),
|
||||
delayed: queue.NewDelayQueue(),
|
||||
slaves: newSlaveStorage(),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *offerStorage) declineOffer(offerId, hostname string, reason metrics.OfferDeclinedReason) {
|
||||
//TODO(jdef) might be nice to spec an abort chan here
|
||||
runtime.Signal(proc.OnError(s.DeclineOffer(offerId), func(err error) {
|
||||
log.Warningf("decline failed for offer id %v: %v", offerId, err)
|
||||
}, nil)).Then(func() {
|
||||
metrics.OffersDeclined.WithLabelValues(hostname, string(reason)).Inc()
|
||||
})
|
||||
}
|
||||
|
||||
func (s *offerStorage) Add(offers []*mesos.Offer) {
|
||||
now := time.Now()
|
||||
for _, offer := range offers {
|
||||
if !s.Compat(offer) {
|
||||
//TODO(jdef) would be nice to batch these up
|
||||
offerId := offer.Id.GetValue()
|
||||
log.V(3).Infof("Declining incompatible offer %v", offerId)
|
||||
s.declineOffer(offerId, offer.GetHostname(), metrics.OfferCompat)
|
||||
return
|
||||
}
|
||||
timed := &liveOffer{
|
||||
Offer: offer,
|
||||
expiration: now.Add(s.TTL),
|
||||
acquired: 0,
|
||||
}
|
||||
log.V(3).Infof("Receiving offer %v", timed.Id())
|
||||
s.offers.Add(timed)
|
||||
s.delayed.Add(timed)
|
||||
s.slaves.add(offer.SlaveId.GetValue(), timed.Id())
|
||||
metrics.OffersReceived.WithLabelValues(timed.Host()).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
// delete an offer from storage, implicitly expires the offer
|
||||
func (s *offerStorage) Delete(offerId string, reason metrics.OfferDeclinedReason) {
|
||||
if offer, ok := s.Get(offerId); ok {
|
||||
log.V(3).Infof("Deleting offer %v", offerId)
|
||||
// attempt to block others from consuming the offer. if it's already been
|
||||
// claimed and is not yet lingering then don't decline it - just mark it as
|
||||
// expired in the history: allow a prior claimant to attempt to launch with it
|
||||
notYetClaimed := offer.Acquire()
|
||||
if offer.Details() != nil {
|
||||
if notYetClaimed {
|
||||
log.V(3).Infof("Declining offer %v", offerId)
|
||||
s.declineOffer(offerId, offer.Host(), reason)
|
||||
} else {
|
||||
// some pod has acquired this and may attempt to launch a task with it
|
||||
// failed schedule/launch attempts are requried to Release() any claims on the offer
|
||||
|
||||
// TODO(jdef): not sure what a good value is here. the goal is to provide a
|
||||
// launchTasks (driver) operation enough time to complete so that we don't end
|
||||
// up declining an offer that we're actually attempting to use.
|
||||
time.AfterFunc(deferredDeclineTtlFactor*s.TTL, func() {
|
||||
// at this point the offer is in one of five states:
|
||||
// a) permanently deleted: expired due to timeout
|
||||
// b) permanently deleted: expired due to having been rescinded
|
||||
// c) lingering: expired due to timeout
|
||||
// d) lingering: expired due to having been rescinded
|
||||
// e) claimed: task launched and it using resources from this offer
|
||||
// we want to **avoid** declining an offer that's claimed: attempt to acquire
|
||||
if offer.Acquire() {
|
||||
// previously claimed offer was released, perhaps due to a launch
|
||||
// failure, so we should attempt to decline
|
||||
log.V(3).Infof("attempting to decline (previously claimed) offer %v", offerId)
|
||||
s.declineOffer(offerId, offer.Host(), reason)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
s.expireOffer(offer)
|
||||
} // else, ignore offers not in the history
|
||||
}
|
||||
|
||||
func (s *offerStorage) InvalidateForSlave(slaveId string) {
|
||||
offerIds := s.slaves.deleteSlave(slaveId)
|
||||
for oid := range offerIds {
|
||||
s.invalidateOne(oid)
|
||||
}
|
||||
}
|
||||
|
||||
// if offerId == "" then expire all known, live offers, otherwise only the offer indicated
|
||||
func (s *offerStorage) Invalidate(offerId string) {
|
||||
if offerId != "" {
|
||||
s.invalidateOne(offerId)
|
||||
return
|
||||
}
|
||||
obj := s.offers.List()
|
||||
for _, o := range obj {
|
||||
offer, ok := o.(Perishable)
|
||||
if !ok {
|
||||
log.Errorf("Expected perishable offer, not %v", o)
|
||||
continue
|
||||
}
|
||||
offer.Acquire() // attempt to block others from using it
|
||||
s.expireOffer(offer)
|
||||
// don't decline, we already know that it's an invalid offer
|
||||
}
|
||||
}
|
||||
|
||||
func (s *offerStorage) invalidateOne(offerId string) {
|
||||
if offer, ok := s.Get(offerId); ok {
|
||||
offer.Acquire() // attempt to block others from using it
|
||||
s.expireOffer(offer)
|
||||
// don't decline, we already know that it's an invalid offer
|
||||
}
|
||||
}
|
||||
|
||||
// Walk the collection of offers. The walk stops either as indicated by the
|
||||
// Walker or when the end of the offer list is reached. Expired offers are
|
||||
// never passed to a Walker.
|
||||
func (s *offerStorage) Walk(w Walker) error {
|
||||
for _, v := range s.offers.List() {
|
||||
offer, ok := v.(Perishable)
|
||||
if !ok {
|
||||
// offer disappeared...
|
||||
continue
|
||||
}
|
||||
if offer.HasExpired() {
|
||||
// never pass expired offers to walkers
|
||||
continue
|
||||
}
|
||||
if stop, err := w(offer); err != nil {
|
||||
return err
|
||||
} else if stop {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func Expired(offerId, hostname string, ttl time.Duration) *expiredOffer {
|
||||
return &expiredOffer{offerSpec{id: offerId, hostname: hostname}, time.Now().Add(ttl)}
|
||||
}
|
||||
|
||||
func (s *offerStorage) expireOffer(offer Perishable) {
|
||||
// the offer may or may not be expired due to TTL so check for details
|
||||
// since that's a more reliable determinant of lingering status
|
||||
if details := offer.Details(); details != nil {
|
||||
// recently expired, should linger
|
||||
offerId := details.Id.GetValue()
|
||||
log.V(3).Infof("Expiring offer %v", offerId)
|
||||
if s.LingerTTL > 0 {
|
||||
log.V(3).Infof("offer will linger: %v", offerId)
|
||||
expired := Expired(offerId, offer.Host(), s.LingerTTL)
|
||||
s.offers.Update(expired)
|
||||
s.delayed.Add(expired)
|
||||
} else {
|
||||
log.V(3).Infof("Permanently deleting offer %v", offerId)
|
||||
s.offers.Delete(offerId)
|
||||
s.slaves.deleteOffer(offerId)
|
||||
}
|
||||
} // else, it's still lingering...
|
||||
}
|
||||
|
||||
func (s *offerStorage) Get(id string) (Perishable, bool) {
|
||||
if obj, ok, _ := s.offers.GetByKey(id); !ok {
|
||||
return nil, false
|
||||
} else {
|
||||
to, ok := obj.(Perishable)
|
||||
if !ok {
|
||||
log.Errorf("invalid offer object in fifo '%v'", obj)
|
||||
}
|
||||
return to, ok
|
||||
}
|
||||
}
|
||||
|
||||
type offerListener struct {
|
||||
id string
|
||||
accepts Filter
|
||||
notify chan<- struct{}
|
||||
age int
|
||||
deadline time.Time
|
||||
sawVersion uint64
|
||||
}
|
||||
|
||||
func (l *offerListener) GetUID() string {
|
||||
return l.id
|
||||
}
|
||||
|
||||
func (l *offerListener) Deadline() (time.Time, bool) {
|
||||
return l.deadline, true
|
||||
}
|
||||
|
||||
// register a listener for new offers, whom we'll notify upon receiving such.
|
||||
// notification is delivered in the form of closing the channel, nothing is ever sent.
|
||||
func (s *offerStorage) Listen(id string, f Filter) <-chan struct{} {
|
||||
if f == nil {
|
||||
return nil
|
||||
}
|
||||
ch := make(chan struct{})
|
||||
listen := &offerListener{
|
||||
id: id,
|
||||
accepts: f,
|
||||
notify: ch,
|
||||
deadline: time.Now().Add(s.ListenerDelay),
|
||||
}
|
||||
log.V(3).Infof("Registering offer listener %s", listen.id)
|
||||
s.listeners.Offer(listen, queue.ReplaceExisting)
|
||||
return ch
|
||||
}
|
||||
|
||||
func (s *offerStorage) ageOffers() {
|
||||
offer, ok := s.delayed.Pop().(Perishable)
|
||||
if !ok {
|
||||
log.Errorf("Expected Perishable, not %v", offer)
|
||||
return
|
||||
}
|
||||
if details := offer.Details(); details != nil && !offer.HasExpired() {
|
||||
// live offer has not expired yet: timed out early
|
||||
// FWIW: early timeouts are more frequent when GOMAXPROCS is > 1
|
||||
offer.addTo(s.delayed)
|
||||
} else {
|
||||
offer.age(s)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *offerStorage) nextListener() *offerListener {
|
||||
obj := s.listeners.Pop()
|
||||
if listen, ok := obj.(*offerListener); !ok {
|
||||
//programming error
|
||||
panic(fmt.Sprintf("unexpected listener object %v", obj))
|
||||
} else {
|
||||
return listen
|
||||
}
|
||||
}
|
||||
|
||||
// notify listeners if we find an acceptable offer for them. listeners
|
||||
// are garbage collected after a certain age (see offerListenerMaxAge).
|
||||
// ids lists offer IDs that are retrievable from offer storage.
|
||||
func (s *offerStorage) notifyListeners(ids func() (util.StringSet, uint64)) {
|
||||
listener := s.nextListener() // blocking
|
||||
|
||||
offerIds, version := ids()
|
||||
if listener.sawVersion == version {
|
||||
// no changes to offer list, avoid growing older - just wait for new offers to arrive
|
||||
listener.deadline = time.Now().Add(s.ListenerDelay)
|
||||
s.listeners.Offer(listener, queue.KeepExisting)
|
||||
return
|
||||
}
|
||||
listener.sawVersion = version
|
||||
|
||||
// notify if we find an acceptable offer
|
||||
for id := range offerIds {
|
||||
if offer, ok := s.Get(id); !ok || offer.HasExpired() {
|
||||
continue
|
||||
} else if listener.accepts(offer.Details()) {
|
||||
log.V(3).Infof("Notifying offer listener %s", listener.id)
|
||||
close(listener.notify)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// no interesting offers found, re-queue the listener
|
||||
listener.age++
|
||||
if listener.age < offerListenerMaxAge {
|
||||
listener.deadline = time.Now().Add(s.ListenerDelay)
|
||||
s.listeners.Offer(listener, queue.KeepExisting)
|
||||
} else {
|
||||
// garbage collection is as simple as not re-adding the listener to the queue
|
||||
log.V(3).Infof("garbage collecting offer listener %s", listener.id)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *offerStorage) Init(done <-chan struct{}) {
|
||||
// zero delay, reap offers as soon as they expire
|
||||
go runtime.Until(s.ageOffers, 0, done)
|
||||
|
||||
// cached offer ids for the purposes of listener notification
|
||||
idCache := &stringsCache{
|
||||
refill: func() util.StringSet {
|
||||
result := util.NewStringSet()
|
||||
for _, v := range s.offers.List() {
|
||||
if offer, ok := v.(Perishable); ok {
|
||||
result.Insert(offer.Id())
|
||||
}
|
||||
}
|
||||
return result
|
||||
},
|
||||
ttl: offerIdCacheTTL,
|
||||
}
|
||||
|
||||
go runtime.Until(func() { s.notifyListeners(idCache.Strings) }, notifyListenersDelay, done)
|
||||
}
|
||||
|
||||
type stringsCache struct {
|
||||
expiresAt time.Time
|
||||
cached util.StringSet
|
||||
ttl time.Duration
|
||||
refill func() util.StringSet
|
||||
version uint64
|
||||
}
|
||||
|
||||
// not thread-safe
|
||||
func (c *stringsCache) Strings() (util.StringSet, uint64) {
|
||||
now := time.Now()
|
||||
if c.expiresAt.Before(now) {
|
||||
old := c.cached
|
||||
c.cached = c.refill()
|
||||
c.expiresAt = now.Add(c.ttl)
|
||||
if !reflect.DeepEqual(old, c.cached) {
|
||||
c.version++
|
||||
}
|
||||
}
|
||||
return c.cached, c.version
|
||||
}
|
||||
|
||||
type slaveStorage struct {
|
||||
sync.Mutex
|
||||
index map[string]string // map offerId to slaveId
|
||||
}
|
||||
|
||||
func newSlaveStorage() *slaveStorage {
|
||||
return &slaveStorage{
|
||||
index: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
// create a mapping between a slave and an offer
|
||||
func (self *slaveStorage) add(slaveId, offerId string) {
|
||||
self.Lock()
|
||||
defer self.Unlock()
|
||||
self.index[offerId] = slaveId
|
||||
}
|
||||
|
||||
// delete the slave-offer mappings for slaveId, returns the IDs of the offers that were unmapped
|
||||
func (self *slaveStorage) deleteSlave(slaveId string) util.StringSet {
|
||||
offerIds := util.NewStringSet()
|
||||
self.Lock()
|
||||
defer self.Unlock()
|
||||
for oid, sid := range self.index {
|
||||
if sid == slaveId {
|
||||
offerIds.Insert(oid)
|
||||
delete(self.index, oid)
|
||||
}
|
||||
}
|
||||
return offerIds
|
||||
}
|
||||
|
||||
// delete the slave-offer mappings for offerId
|
||||
func (self *slaveStorage) deleteOffer(offerId string) {
|
||||
self.Lock()
|
||||
defer self.Unlock()
|
||||
delete(self.index, offerId)
|
||||
}
|
||||
391
contrib/mesos/pkg/offers/offers_test.go
Normal file
391
contrib/mesos/pkg/offers/offers_test.go
Normal file
@@ -0,0 +1,391 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package offers
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
util "github.com/mesos/mesos-go/mesosutil"
|
||||
)
|
||||
|
||||
func TestExpiredOffer(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ttl := 2 * time.Second
|
||||
o := Expired("test", "testhost", ttl)
|
||||
|
||||
if o.Id() != "test" {
|
||||
t.Error("expiredOffer does not return its Id")
|
||||
}
|
||||
if o.Host() != "testhost" {
|
||||
t.Error("expiredOffer does not return its hostname")
|
||||
}
|
||||
if o.HasExpired() != true {
|
||||
t.Error("expiredOffer is not expired")
|
||||
}
|
||||
if o.Details() != nil {
|
||||
t.Error("expiredOffer does not return nil Details")
|
||||
}
|
||||
if o.Acquire() != false {
|
||||
t.Error("expiredOffer must not be able to be acquired")
|
||||
}
|
||||
if delay := o.GetDelay(); !(0 < delay && delay <= ttl) {
|
||||
t.Error("expiredOffer does not return a valid deadline")
|
||||
}
|
||||
} // TestExpiredOffer
|
||||
|
||||
func TestTimedOffer(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ttl := 2 * time.Second
|
||||
now := time.Now()
|
||||
o := &liveOffer{nil, now.Add(ttl), 0}
|
||||
|
||||
if o.HasExpired() {
|
||||
t.Errorf("offer ttl was %v and should not have expired yet", ttl)
|
||||
}
|
||||
if !o.Acquire() {
|
||||
t.Fatal("1st acquisition of offer failed")
|
||||
}
|
||||
o.Release()
|
||||
if !o.Acquire() {
|
||||
t.Fatal("2nd acquisition of offer failed")
|
||||
}
|
||||
if o.Acquire() {
|
||||
t.Fatal("3rd acquisition of offer passed but prior claim was not released")
|
||||
}
|
||||
o.Release()
|
||||
if !o.Acquire() {
|
||||
t.Fatal("4th acquisition of offer failed")
|
||||
}
|
||||
o.Release()
|
||||
time.Sleep(ttl)
|
||||
if !o.HasExpired() {
|
||||
t.Fatal("offer not expired after ttl passed")
|
||||
}
|
||||
if !o.Acquire() {
|
||||
t.Fatal("5th acquisition of offer failed; should not be tied to expiration")
|
||||
}
|
||||
if o.Acquire() {
|
||||
t.Fatal("6th acquisition of offer succeeded; should already be acquired")
|
||||
}
|
||||
} // TestTimedOffer
|
||||
|
||||
func TestOfferStorage(t *testing.T) {
|
||||
ttl := time.Second / 4
|
||||
var declinedNum int32
|
||||
getDeclinedNum := func() int32 { return atomic.LoadInt32(&declinedNum) }
|
||||
config := RegistryConfig{
|
||||
DeclineOffer: func(offerId string) <-chan error {
|
||||
atomic.AddInt32(&declinedNum, 1)
|
||||
return proc.ErrorChan(nil)
|
||||
},
|
||||
Compat: func(o *mesos.Offer) bool {
|
||||
return o.Hostname == nil || *o.Hostname != "incompatiblehost"
|
||||
},
|
||||
TTL: ttl,
|
||||
LingerTTL: 2 * ttl,
|
||||
}
|
||||
storage := CreateRegistry(config)
|
||||
|
||||
done := make(chan struct{})
|
||||
storage.Init(done)
|
||||
|
||||
// Add offer
|
||||
id := util.NewOfferID("foo")
|
||||
o := &mesos.Offer{Id: id}
|
||||
storage.Add([]*mesos.Offer{o})
|
||||
|
||||
// Added offer should be in the storage
|
||||
if obj, ok := storage.Get(id.GetValue()); obj == nil || !ok {
|
||||
t.Error("offer not added")
|
||||
}
|
||||
if obj, _ := storage.Get(id.GetValue()); obj.Details() != o {
|
||||
t.Error("added offer differs from returned offer")
|
||||
}
|
||||
|
||||
// Not-added offer is not in storage
|
||||
if obj, ok := storage.Get("bar"); obj != nil || ok {
|
||||
t.Error("offer bar should not exist in storage")
|
||||
}
|
||||
|
||||
// Deleted offer lingers in storage, is acquired and declined
|
||||
offer, _ := storage.Get(id.GetValue())
|
||||
declinedNumBefore := getDeclinedNum()
|
||||
storage.Delete(id.GetValue(), "deleted for test")
|
||||
if obj, _ := storage.Get(id.GetValue()); obj == nil {
|
||||
t.Error("deleted offer is not lingering")
|
||||
}
|
||||
if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
|
||||
t.Error("deleted offer is no expired")
|
||||
}
|
||||
if ok := offer.Acquire(); ok {
|
||||
t.Error("deleted offer can be acquired")
|
||||
}
|
||||
if getDeclinedNum() <= declinedNumBefore {
|
||||
t.Error("deleted offer was not declined")
|
||||
}
|
||||
|
||||
// Acquired offer is only declined after 2*ttl
|
||||
id = util.NewOfferID("foo2")
|
||||
o = &mesos.Offer{Id: id}
|
||||
storage.Add([]*mesos.Offer{o})
|
||||
offer, _ = storage.Get(id.GetValue())
|
||||
declinedNumBefore = getDeclinedNum()
|
||||
offer.Acquire()
|
||||
storage.Delete(id.GetValue(), "deleted for test")
|
||||
if getDeclinedNum() > declinedNumBefore {
|
||||
t.Error("acquired offer is declined")
|
||||
}
|
||||
|
||||
offer.Release()
|
||||
time.Sleep(3 * ttl)
|
||||
if getDeclinedNum() <= declinedNumBefore {
|
||||
t.Error("released offer is not declined after 2*ttl")
|
||||
}
|
||||
|
||||
// Added offer should be expired after ttl, but lingering
|
||||
id = util.NewOfferID("foo3")
|
||||
o = &mesos.Offer{Id: id}
|
||||
storage.Add([]*mesos.Offer{o})
|
||||
|
||||
time.Sleep(2 * ttl)
|
||||
obj, ok := storage.Get(id.GetValue())
|
||||
if obj == nil || !ok {
|
||||
t.Error("offer not lingering after ttl")
|
||||
}
|
||||
if !obj.HasExpired() {
|
||||
t.Error("offer is not expired after ttl")
|
||||
}
|
||||
|
||||
// Should be deleted when waiting longer than LingerTTL
|
||||
time.Sleep(2 * ttl)
|
||||
if obj, ok := storage.Get(id.GetValue()); obj != nil || ok {
|
||||
t.Error("offer not deleted after LingerTTL")
|
||||
}
|
||||
|
||||
// Incompatible offer is declined
|
||||
id = util.NewOfferID("foo4")
|
||||
incompatibleHostname := "incompatiblehost"
|
||||
o = &mesos.Offer{Id: id, Hostname: &incompatibleHostname}
|
||||
declinedNumBefore = getDeclinedNum()
|
||||
storage.Add([]*mesos.Offer{o})
|
||||
if obj, ok := storage.Get(id.GetValue()); obj != nil || ok {
|
||||
t.Error("incompatible offer not rejected")
|
||||
}
|
||||
if getDeclinedNum() <= declinedNumBefore {
|
||||
t.Error("incompatible offer is not declined")
|
||||
}
|
||||
|
||||
// Invalidated offer are not declined, but expired
|
||||
id = util.NewOfferID("foo5")
|
||||
o = &mesos.Offer{Id: id}
|
||||
storage.Add([]*mesos.Offer{o})
|
||||
offer, _ = storage.Get(id.GetValue())
|
||||
declinedNumBefore = getDeclinedNum()
|
||||
storage.Invalidate(id.GetValue())
|
||||
if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
|
||||
t.Error("invalidated offer is not expired")
|
||||
}
|
||||
if getDeclinedNum() > declinedNumBefore {
|
||||
t.Error("invalidated offer is declined")
|
||||
}
|
||||
if ok := offer.Acquire(); ok {
|
||||
t.Error("invalidated offer can be acquired")
|
||||
}
|
||||
|
||||
// Invalidate "" will invalidate all offers
|
||||
id = util.NewOfferID("foo6")
|
||||
o = &mesos.Offer{Id: id}
|
||||
storage.Add([]*mesos.Offer{o})
|
||||
id2 := util.NewOfferID("foo7")
|
||||
o2 := &mesos.Offer{Id: id2}
|
||||
storage.Add([]*mesos.Offer{o2})
|
||||
storage.Invalidate("")
|
||||
if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
|
||||
t.Error("invalidated offer is not expired")
|
||||
}
|
||||
if obj2, _ := storage.Get(id2.GetValue()); !obj2.HasExpired() {
|
||||
t.Error("invalidated offer is not expired")
|
||||
}
|
||||
|
||||
// InvalidateForSlave invalides all offers for that slave, but only those
|
||||
id = util.NewOfferID("foo8")
|
||||
slaveId := util.NewSlaveID("test-slave")
|
||||
o = &mesos.Offer{Id: id, SlaveId: slaveId}
|
||||
storage.Add([]*mesos.Offer{o})
|
||||
id2 = util.NewOfferID("foo9")
|
||||
o2 = &mesos.Offer{Id: id2}
|
||||
storage.Add([]*mesos.Offer{o2})
|
||||
storage.InvalidateForSlave(slaveId.GetValue())
|
||||
if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() {
|
||||
t.Error("invalidated offer for test-slave is not expired")
|
||||
}
|
||||
if obj2, _ := storage.Get(id2.GetValue()); obj2.HasExpired() {
|
||||
t.Error("invalidated offer another slave is expired")
|
||||
}
|
||||
|
||||
close(done)
|
||||
} // TestOfferStorage
|
||||
|
||||
func TestListen(t *testing.T) {
|
||||
ttl := time.Second / 4
|
||||
config := RegistryConfig{
|
||||
DeclineOffer: func(offerId string) <-chan error {
|
||||
return proc.ErrorChan(nil)
|
||||
},
|
||||
Compat: func(o *mesos.Offer) bool {
|
||||
return true
|
||||
},
|
||||
TTL: ttl,
|
||||
ListenerDelay: ttl / 2,
|
||||
}
|
||||
storage := CreateRegistry(config)
|
||||
|
||||
done := make(chan struct{})
|
||||
storage.Init(done)
|
||||
|
||||
// Create two listeners with a hostname filter
|
||||
hostname1 := "hostname1"
|
||||
hostname2 := "hostname2"
|
||||
listener1 := storage.Listen("listener1", func(offer *mesos.Offer) bool {
|
||||
return offer.GetHostname() == hostname1
|
||||
})
|
||||
listener2 := storage.Listen("listener2", func(offer *mesos.Offer) bool {
|
||||
return offer.GetHostname() == hostname2
|
||||
})
|
||||
|
||||
// Add hostname1 offer
|
||||
id := util.NewOfferID("foo")
|
||||
o := &mesos.Offer{Id: id, Hostname: &hostname1}
|
||||
storage.Add([]*mesos.Offer{o})
|
||||
|
||||
// listener1 is notified by closing channel
|
||||
select {
|
||||
case _, more := <-listener1:
|
||||
if more {
|
||||
t.Error("listener1 is not closed")
|
||||
}
|
||||
}
|
||||
|
||||
// listener2 is not notified within ttl
|
||||
select {
|
||||
case <-listener2:
|
||||
t.Error("listener2 is notified")
|
||||
case <-time.After(ttl):
|
||||
}
|
||||
|
||||
close(done)
|
||||
} // TestListen
|
||||
|
||||
func TestWalk(t *testing.T) {
|
||||
t.Parallel()
|
||||
config := RegistryConfig{
|
||||
DeclineOffer: func(offerId string) <-chan error {
|
||||
return proc.ErrorChan(nil)
|
||||
},
|
||||
TTL: 0 * time.Second,
|
||||
LingerTTL: 0 * time.Second,
|
||||
ListenerDelay: 0 * time.Second,
|
||||
}
|
||||
storage := CreateRegistry(config)
|
||||
acceptedOfferId := ""
|
||||
walked := 0
|
||||
walker1 := func(p Perishable) (bool, error) {
|
||||
walked++
|
||||
if p.Acquire() {
|
||||
acceptedOfferId = p.Details().Id.GetValue()
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
// sanity check
|
||||
err := storage.Walk(walker1)
|
||||
if err != nil {
|
||||
t.Fatalf("received impossible error %v", err)
|
||||
}
|
||||
if walked != 0 {
|
||||
t.Fatal("walked empty storage")
|
||||
}
|
||||
if acceptedOfferId != "" {
|
||||
t.Fatal("somehow found an offer when registry was empty")
|
||||
}
|
||||
impl, ok := storage.(*offerStorage)
|
||||
if !ok {
|
||||
t.Fatal("unexpected offer storage impl")
|
||||
}
|
||||
// single offer
|
||||
ttl := 2 * time.Second
|
||||
now := time.Now()
|
||||
o := &liveOffer{&mesos.Offer{Id: util.NewOfferID("foo")}, now.Add(ttl), 0}
|
||||
|
||||
impl.offers.Add(o)
|
||||
err = storage.Walk(walker1)
|
||||
if err != nil {
|
||||
t.Fatalf("received impossible error %v", err)
|
||||
}
|
||||
if walked != 1 {
|
||||
t.Fatalf("walk count %d", walked)
|
||||
}
|
||||
if acceptedOfferId != "foo" {
|
||||
t.Fatalf("found offer %v", acceptedOfferId)
|
||||
}
|
||||
|
||||
acceptedOfferId = ""
|
||||
err = storage.Walk(walker1)
|
||||
if err != nil {
|
||||
t.Fatalf("received impossible error %v", err)
|
||||
}
|
||||
if walked != 2 {
|
||||
t.Fatalf("walk count %d", walked)
|
||||
}
|
||||
if acceptedOfferId != "" {
|
||||
t.Fatalf("found offer %v", acceptedOfferId)
|
||||
}
|
||||
|
||||
walker2 := func(p Perishable) (bool, error) {
|
||||
walked++
|
||||
return true, nil
|
||||
}
|
||||
err = storage.Walk(walker2)
|
||||
if err != nil {
|
||||
t.Fatalf("received impossible error %v", err)
|
||||
}
|
||||
if walked != 3 {
|
||||
t.Fatalf("walk count %d", walked)
|
||||
}
|
||||
if acceptedOfferId != "" {
|
||||
t.Fatalf("found offer %v", acceptedOfferId)
|
||||
}
|
||||
|
||||
walker3 := func(p Perishable) (bool, error) {
|
||||
walked++
|
||||
return true, errors.New("baz")
|
||||
}
|
||||
err = storage.Walk(walker3)
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if walked != 4 {
|
||||
t.Fatalf("walk count %d", walked)
|
||||
}
|
||||
}
|
||||
19
contrib/mesos/pkg/proc/doc.go
Normal file
19
contrib/mesos/pkg/proc/doc.go
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package proc provides opinionated utilities for processing background
|
||||
// operations and future errors, somewhat inspired by libprocess.
|
||||
package proc
|
||||
34
contrib/mesos/pkg/proc/errors.go
Normal file
34
contrib/mesos/pkg/proc/errors.go
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package proc
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var (
|
||||
errProcessTerminated = errors.New("cannot execute action because process has terminated")
|
||||
errIllegalState = errors.New("illegal state, cannot execute action")
|
||||
)
|
||||
|
||||
func IsProcessTerminated(err error) bool {
|
||||
return err == errProcessTerminated
|
||||
}
|
||||
|
||||
func IsIllegalState(err error) bool {
|
||||
return err == errIllegalState
|
||||
}
|
||||
377
contrib/mesos/pkg/proc/proc.go
Normal file
377
contrib/mesos/pkg/proc/proc.go
Normal file
@@ -0,0 +1,377 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package proc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
const (
|
||||
// if the action processor crashes (if some Action panics) then we
|
||||
// wait this long before spinning up the action processor again.
|
||||
defaultActionHandlerCrashDelay = 100 * time.Millisecond
|
||||
|
||||
// how many actions we can store in the backlog
|
||||
defaultActionQueueDepth = 1024
|
||||
)
|
||||
|
||||
type procImpl struct {
|
||||
Config
|
||||
backlog chan Action // action queue
|
||||
terminate chan struct{} // signaled via close()
|
||||
wg sync.WaitGroup // End() terminates when the wait is over
|
||||
done runtime.Signal
|
||||
state *stateType
|
||||
pid uint32
|
||||
writeLock sync.Mutex // avoid data race between write and close of backlog
|
||||
changed *sync.Cond // wait/signal for backlog changes
|
||||
engine DoerFunc // isolated this for easier unit testing later on
|
||||
running chan struct{} // closes once event loop processing starts
|
||||
dead chan struct{} // closes upon completion of process termination
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
// cooldown period in between deferred action crashes
|
||||
actionHandlerCrashDelay time.Duration
|
||||
|
||||
// determines the size of the deferred action backlog
|
||||
actionQueueDepth uint32
|
||||
}
|
||||
|
||||
var (
|
||||
defaultConfig = Config{
|
||||
actionHandlerCrashDelay: defaultActionHandlerCrashDelay,
|
||||
actionQueueDepth: defaultActionQueueDepth,
|
||||
}
|
||||
pid uint32
|
||||
closedErrChan <-chan error
|
||||
)
|
||||
|
||||
func init() {
|
||||
ch := make(chan error)
|
||||
close(ch)
|
||||
closedErrChan = ch
|
||||
}
|
||||
|
||||
func New() Process {
|
||||
return newConfigured(defaultConfig)
|
||||
}
|
||||
|
||||
func newConfigured(config Config) Process {
|
||||
state := stateNew
|
||||
pi := &procImpl{
|
||||
Config: config,
|
||||
backlog: make(chan Action, config.actionQueueDepth),
|
||||
terminate: make(chan struct{}),
|
||||
state: &state,
|
||||
pid: atomic.AddUint32(&pid, 1),
|
||||
running: make(chan struct{}),
|
||||
dead: make(chan struct{}),
|
||||
}
|
||||
pi.engine = DoerFunc(pi.doLater)
|
||||
pi.changed = sync.NewCond(&pi.writeLock)
|
||||
pi.wg.Add(1) // symmetrical to wg.Done() in End()
|
||||
pi.done = pi.begin()
|
||||
return pi
|
||||
}
|
||||
|
||||
// returns a chan that closes upon termination of the action processing loop
|
||||
func (self *procImpl) Done() <-chan struct{} {
|
||||
return self.done
|
||||
}
|
||||
|
||||
func (self *procImpl) Running() <-chan struct{} {
|
||||
return self.running
|
||||
}
|
||||
|
||||
func (self *procImpl) begin() runtime.Signal {
|
||||
if !self.state.transition(stateNew, stateRunning) {
|
||||
panic(fmt.Errorf("failed to transition from New to Idle state"))
|
||||
}
|
||||
defer log.V(2).Infof("started process %d", self.pid)
|
||||
var entered runtime.Latch
|
||||
// execute actions on the backlog chan
|
||||
return runtime.After(func() {
|
||||
runtime.Until(func() {
|
||||
if entered.Acquire() {
|
||||
close(self.running)
|
||||
self.wg.Add(1)
|
||||
}
|
||||
for action := range self.backlog {
|
||||
select {
|
||||
case <-self.terminate:
|
||||
return
|
||||
default:
|
||||
// signal to indicate there's room in the backlog now
|
||||
self.changed.Broadcast()
|
||||
// rely on Until to handle action panics
|
||||
action()
|
||||
}
|
||||
}
|
||||
}, self.actionHandlerCrashDelay, self.terminate)
|
||||
}).Then(func() {
|
||||
log.V(2).Infof("finished processing action backlog for process %d", self.pid)
|
||||
if !entered.Acquire() {
|
||||
self.wg.Done()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// execute some action in the context of the current process. Actions
|
||||
// executed via this func are to be executed in a concurrency-safe manner:
|
||||
// no two actions should execute at the same time. invocations of this func
|
||||
// should not block for very long, unless the action backlog is full or the
|
||||
// process is terminating.
|
||||
// returns errProcessTerminated if the process already ended.
|
||||
func (self *procImpl) doLater(deferredAction Action) (err <-chan error) {
|
||||
a := Action(func() {
|
||||
self.wg.Add(1)
|
||||
defer self.wg.Done()
|
||||
deferredAction()
|
||||
})
|
||||
|
||||
scheduled := false
|
||||
self.writeLock.Lock()
|
||||
defer self.writeLock.Unlock()
|
||||
|
||||
for err == nil && !scheduled {
|
||||
switch s := self.state.get(); s {
|
||||
case stateRunning:
|
||||
select {
|
||||
case self.backlog <- a:
|
||||
scheduled = true
|
||||
default:
|
||||
self.changed.Wait()
|
||||
}
|
||||
case stateTerminal:
|
||||
err = ErrorChan(errProcessTerminated)
|
||||
default:
|
||||
err = ErrorChan(errIllegalState)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// implementation of Doer interface, schedules some action to be executed via
|
||||
// the current execution engine
|
||||
func (self *procImpl) Do(a Action) <-chan error {
|
||||
return self.engine(a)
|
||||
}
|
||||
|
||||
// spawn a goroutine that waits for an error. if a non-nil error is read from the
|
||||
// channel then the handler func is invoked, otherwise (nil error or closed chan)
|
||||
// the handler is skipped. if a nil handler is specified then it's not invoked.
|
||||
// the signal chan that's returned closes once the error process logic (and handler,
|
||||
// if any) has completed.
|
||||
func OnError(ch <-chan error, f func(error), abort <-chan struct{}) <-chan struct{} {
|
||||
return runtime.After(func() {
|
||||
if ch == nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case err, ok := <-ch:
|
||||
if ok && err != nil && f != nil {
|
||||
f(err)
|
||||
}
|
||||
case <-abort:
|
||||
if f != nil {
|
||||
f(errProcessTerminated)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (self *procImpl) OnError(ch <-chan error, f func(error)) <-chan struct{} {
|
||||
return OnError(ch, f, self.Done())
|
||||
}
|
||||
|
||||
func (self *procImpl) flush() {
|
||||
log.V(2).Infof("flushing action backlog for process %d", self.pid)
|
||||
i := 0
|
||||
//TODO: replace with `for range self.backlog` once Go 1.3 support is dropped
|
||||
for {
|
||||
_, open := <-self.backlog
|
||||
if !open {
|
||||
break
|
||||
}
|
||||
i++
|
||||
}
|
||||
log.V(2).Infof("flushed %d backlog actions for process %d", i, self.pid)
|
||||
}
|
||||
|
||||
func (self *procImpl) End() <-chan struct{} {
|
||||
if self.state.transitionTo(stateTerminal, stateTerminal) {
|
||||
go func() {
|
||||
defer close(self.dead)
|
||||
self.writeLock.Lock()
|
||||
defer self.writeLock.Unlock()
|
||||
|
||||
log.V(2).Infof("terminating process %d", self.pid)
|
||||
|
||||
close(self.backlog)
|
||||
close(self.terminate)
|
||||
self.wg.Done()
|
||||
self.changed.Broadcast()
|
||||
|
||||
log.V(2).Infof("waiting for deferred actions to complete")
|
||||
|
||||
// wait for all pending actions to complete, then flush the backlog
|
||||
self.wg.Wait()
|
||||
self.flush()
|
||||
}()
|
||||
}
|
||||
return self.dead
|
||||
}
|
||||
|
||||
type errorOnce struct {
|
||||
once sync.Once
|
||||
err chan error
|
||||
abort <-chan struct{}
|
||||
}
|
||||
|
||||
func NewErrorOnce(abort <-chan struct{}) ErrorOnce {
|
||||
return &errorOnce{
|
||||
err: make(chan error, 1),
|
||||
abort: abort,
|
||||
}
|
||||
}
|
||||
|
||||
func (b *errorOnce) Err() <-chan error {
|
||||
return b.err
|
||||
}
|
||||
|
||||
func (b *errorOnce) Reportf(msg string, args ...interface{}) {
|
||||
b.Report(fmt.Errorf(msg, args...))
|
||||
}
|
||||
|
||||
func (b *errorOnce) Report(err error) {
|
||||
b.once.Do(func() {
|
||||
select {
|
||||
case b.err <- err:
|
||||
default:
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (b *errorOnce) Send(errIn <-chan error) ErrorOnce {
|
||||
go b.forward(errIn)
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *errorOnce) forward(errIn <-chan error) {
|
||||
if errIn == nil {
|
||||
b.Report(nil)
|
||||
return
|
||||
}
|
||||
select {
|
||||
case err, _ := <-errIn:
|
||||
b.Report(err)
|
||||
case <-b.abort:
|
||||
b.Report(errProcessTerminated)
|
||||
}
|
||||
}
|
||||
|
||||
type processAdapter struct {
|
||||
parent Process
|
||||
delegate Doer
|
||||
}
|
||||
|
||||
func (p *processAdapter) Do(a Action) <-chan error {
|
||||
if p == nil || p.parent == nil || p.delegate == nil {
|
||||
return ErrorChan(errIllegalState)
|
||||
}
|
||||
errCh := NewErrorOnce(p.Done())
|
||||
go func() {
|
||||
errOuter := p.parent.Do(func() {
|
||||
errInner := p.delegate.Do(a)
|
||||
errCh.forward(errInner)
|
||||
})
|
||||
// if the outer err is !nil then either the parent failed to schedule the
|
||||
// the action, or else it backgrounded the scheduling task.
|
||||
if errOuter != nil {
|
||||
errCh.forward(errOuter)
|
||||
}
|
||||
}()
|
||||
return errCh.Err()
|
||||
}
|
||||
|
||||
func (p *processAdapter) End() <-chan struct{} {
|
||||
if p != nil && p.parent != nil {
|
||||
return p.parent.End()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *processAdapter) Done() <-chan struct{} {
|
||||
if p != nil && p.parent != nil {
|
||||
return p.parent.Done()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *processAdapter) Running() <-chan struct{} {
|
||||
if p != nil && p.parent != nil {
|
||||
return p.parent.Running()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *processAdapter) OnError(ch <-chan error, f func(error)) <-chan struct{} {
|
||||
if p != nil && p.parent != nil {
|
||||
return p.parent.OnError(ch, f)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// returns a process that, within its execution context, delegates to the specified Doer.
|
||||
// if the given Doer instance is nil, a valid Process is still returned though calls to its
|
||||
// Do() implementation will always return errIllegalState.
|
||||
// if the given Process instance is nil then in addition to the behavior in the prior sentence,
|
||||
// calls to End() and Done() are effectively noops.
|
||||
func DoWith(other Process, d Doer) Process {
|
||||
return &processAdapter{
|
||||
parent: other,
|
||||
delegate: d,
|
||||
}
|
||||
}
|
||||
|
||||
func ErrorChanf(msg string, args ...interface{}) <-chan error {
|
||||
return ErrorChan(fmt.Errorf(msg, args...))
|
||||
}
|
||||
|
||||
func ErrorChan(err error) <-chan error {
|
||||
if err == nil {
|
||||
return closedErrChan
|
||||
}
|
||||
ch := make(chan error, 1)
|
||||
ch <- err
|
||||
return ch
|
||||
}
|
||||
|
||||
// invoke the f on action a. returns an illegal state error if f is nil.
|
||||
func (f DoerFunc) Do(a Action) <-chan error {
|
||||
if f != nil {
|
||||
return f(a)
|
||||
}
|
||||
return ErrorChan(errIllegalState)
|
||||
}
|
||||
373
contrib/mesos/pkg/proc/proc_test.go
Normal file
373
contrib/mesos/pkg/proc/proc_test.go
Normal file
@@ -0,0 +1,373 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package proc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
// logs a testing.Fatalf if the elapsed time d passes before signal chan done is closed
|
||||
func fatalAfter(t *testing.T, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) {
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(d):
|
||||
t.Fatalf(msg, args...)
|
||||
}
|
||||
}
|
||||
|
||||
func errorAfter(errOnce ErrorOnce, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) {
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(d):
|
||||
errOnce.Reportf(msg, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// logs a testing.Fatalf if the signal chan closes before the elapsed time d passes
|
||||
func fatalOn(t *testing.T, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) {
|
||||
select {
|
||||
case <-done:
|
||||
t.Fatalf(msg, args...)
|
||||
case <-time.After(d):
|
||||
}
|
||||
}
|
||||
|
||||
func TestProc_manyEndings(t *testing.T) {
|
||||
p := New()
|
||||
const COUNT = 20
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(COUNT)
|
||||
for i := 0; i < COUNT; i++ {
|
||||
runtime.On(p.End(), wg.Done)
|
||||
}
|
||||
fatalAfter(t, runtime.After(wg.Wait), 5*time.Second, "timed out waiting for loose End()s")
|
||||
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
|
||||
}
|
||||
|
||||
func TestProc_singleAction(t *testing.T) {
|
||||
p := New()
|
||||
scheduled := make(chan struct{})
|
||||
called := make(chan struct{})
|
||||
|
||||
go func() {
|
||||
log.Infof("do'ing deferred action")
|
||||
defer close(scheduled)
|
||||
err := p.Do(func() {
|
||||
defer close(called)
|
||||
log.Infof("deferred action invoked")
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
fatalAfter(t, scheduled, 5*time.Second, "timed out waiting for deferred action to be scheduled")
|
||||
fatalAfter(t, called, 5*time.Second, "timed out waiting for deferred action to be invoked")
|
||||
|
||||
p.End()
|
||||
|
||||
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
|
||||
}
|
||||
|
||||
func TestProc_singleActionEnd(t *testing.T) {
|
||||
p := New()
|
||||
scheduled := make(chan struct{})
|
||||
called := make(chan struct{})
|
||||
|
||||
go func() {
|
||||
log.Infof("do'ing deferred action")
|
||||
defer close(scheduled)
|
||||
err := p.Do(func() {
|
||||
defer close(called)
|
||||
log.Infof("deferred action invoked")
|
||||
p.End()
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
fatalAfter(t, scheduled, 5*time.Second, "timed out waiting for deferred action to be scheduled")
|
||||
fatalAfter(t, called, 5*time.Second, "timed out waiting for deferred action to be invoked")
|
||||
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
|
||||
}
|
||||
|
||||
func TestProc_multiAction(t *testing.T) {
|
||||
p := New()
|
||||
const COUNT = 10
|
||||
var called sync.WaitGroup
|
||||
called.Add(COUNT)
|
||||
|
||||
// test FIFO property
|
||||
next := 0
|
||||
for i := 0; i < COUNT; i++ {
|
||||
log.Infof("do'ing deferred action %d", i)
|
||||
idx := i
|
||||
err := p.Do(func() {
|
||||
defer called.Done()
|
||||
log.Infof("deferred action invoked")
|
||||
if next != idx {
|
||||
t.Fatalf("expected index %d instead of %d", idx, next)
|
||||
}
|
||||
next++
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
fatalAfter(t, runtime.After(called.Wait), 2*time.Second, "timed out waiting for deferred actions to be invoked")
|
||||
|
||||
p.End()
|
||||
|
||||
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
|
||||
}
|
||||
|
||||
func TestProc_goodLifecycle(t *testing.T) {
|
||||
p := New()
|
||||
p.End()
|
||||
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
|
||||
}
|
||||
|
||||
func TestProc_doWithDeadProc(t *testing.T) {
|
||||
p := New()
|
||||
p.End()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
errUnexpected := fmt.Errorf("unexpected execution of delegated action")
|
||||
decorated := DoWith(p, DoerFunc(func(_ Action) <-chan error {
|
||||
return ErrorChan(errUnexpected)
|
||||
}))
|
||||
|
||||
decorated.Do(func() {})
|
||||
fatalAfter(t, decorated.Done(), 5*time.Second, "timed out waiting for process death")
|
||||
}
|
||||
|
||||
func TestProc_doWith(t *testing.T) {
|
||||
p := New()
|
||||
|
||||
delegated := false
|
||||
decorated := DoWith(p, DoerFunc(func(a Action) <-chan error {
|
||||
delegated = true
|
||||
a()
|
||||
return nil
|
||||
}))
|
||||
|
||||
executed := make(chan struct{})
|
||||
err := decorated.Do(func() {
|
||||
defer close(executed)
|
||||
if !delegated {
|
||||
t.Fatalf("expected delegated execution")
|
||||
}
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatalf("expected !nil error chan")
|
||||
}
|
||||
|
||||
fatalAfter(t, executed, 5*time.Second, "timed out waiting deferred execution")
|
||||
fatalAfter(t, decorated.OnError(err, func(e error) {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}), 1*time.Second, "timed out waiting for doer result")
|
||||
|
||||
decorated.End()
|
||||
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
|
||||
}
|
||||
|
||||
func TestProc_doWithNestedTwice(t *testing.T) {
|
||||
p := New()
|
||||
|
||||
delegated := false
|
||||
decorated := DoWith(p, DoerFunc(func(a Action) <-chan error {
|
||||
a()
|
||||
return nil
|
||||
}))
|
||||
|
||||
decorated2 := DoWith(decorated, DoerFunc(func(a Action) <-chan error {
|
||||
delegated = true
|
||||
a()
|
||||
return nil
|
||||
}))
|
||||
|
||||
executed := make(chan struct{})
|
||||
err := decorated2.Do(func() {
|
||||
defer close(executed)
|
||||
if !delegated {
|
||||
t.Fatalf("expected delegated execution")
|
||||
}
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatalf("expected !nil error chan")
|
||||
}
|
||||
|
||||
fatalAfter(t, executed, 5*time.Second, "timed out waiting deferred execution")
|
||||
fatalAfter(t, decorated2.OnError(err, func(e error) {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}), 1*time.Second, "timed out waiting for doer result")
|
||||
|
||||
decorated2.End()
|
||||
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
|
||||
}
|
||||
|
||||
func TestProc_doWithNestedErrorPropagation(t *testing.T) {
|
||||
p := New()
|
||||
|
||||
delegated := false
|
||||
decorated := DoWith(p, DoerFunc(func(a Action) <-chan error {
|
||||
a()
|
||||
return nil
|
||||
}))
|
||||
|
||||
expectedErr := fmt.Errorf("expecting this")
|
||||
errOnce := NewErrorOnce(p.Done())
|
||||
decorated2 := DoWith(decorated, DoerFunc(func(a Action) <-chan error {
|
||||
delegated = true
|
||||
a()
|
||||
errOnce.Reportf("unexpected error in decorator2")
|
||||
return ErrorChanf("another unexpected error in decorator2")
|
||||
}))
|
||||
|
||||
executed := make(chan struct{})
|
||||
err := decorated2.Do(func() {
|
||||
defer close(executed)
|
||||
if !delegated {
|
||||
t.Fatalf("expected delegated execution")
|
||||
}
|
||||
errOnce.Report(expectedErr)
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatalf("expected !nil error chan")
|
||||
}
|
||||
errOnce.Send(err)
|
||||
|
||||
foundError := false
|
||||
fatalAfter(t, executed, 1*time.Second, "timed out waiting deferred execution")
|
||||
fatalAfter(t, decorated2.OnError(errOnce.Err(), func(e error) {
|
||||
if e != expectedErr {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
} else {
|
||||
foundError = true
|
||||
}
|
||||
}), 1*time.Second, "timed out waiting for doer result")
|
||||
|
||||
if !foundError {
|
||||
t.Fatalf("expected a propagated error")
|
||||
}
|
||||
|
||||
decorated2.End()
|
||||
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
|
||||
}
|
||||
|
||||
func runDelegationTest(t *testing.T, p Process, name string, errOnce ErrorOnce) {
|
||||
defer func() {
|
||||
t.Logf("runDelegationTest finished at " + time.Now().String())
|
||||
}()
|
||||
var decorated Process
|
||||
decorated = p
|
||||
|
||||
const DEPTH = 100
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(DEPTH)
|
||||
y := 0
|
||||
|
||||
for x := 1; x <= DEPTH; x++ {
|
||||
x := x
|
||||
nextp := DoWith(decorated, DoerFunc(func(a Action) <-chan error {
|
||||
if x == 1 {
|
||||
t.Logf("delegate chain invoked for " + name)
|
||||
}
|
||||
y++
|
||||
if y != x {
|
||||
return ErrorChanf("out of order delegated execution")
|
||||
}
|
||||
defer wg.Done()
|
||||
a()
|
||||
return nil
|
||||
}))
|
||||
decorated = nextp
|
||||
}
|
||||
|
||||
executed := make(chan struct{})
|
||||
errCh := decorated.Do(func() {
|
||||
defer close(executed)
|
||||
if y != DEPTH {
|
||||
errOnce.Reportf("expected delegated execution")
|
||||
}
|
||||
t.Logf("executing deferred action: " + name + " at " + time.Now().String())
|
||||
errOnce.Send(nil) // we completed without error, let the listener know
|
||||
})
|
||||
if errCh == nil {
|
||||
t.Fatalf("expected !nil error chan")
|
||||
}
|
||||
|
||||
// forward any scheduling errors to the listener; NOTHING else should attempt to read
|
||||
// from errCh after this point
|
||||
errOnce.Send(errCh)
|
||||
|
||||
errorAfter(errOnce, executed, 5*time.Second, "timed out waiting deferred execution")
|
||||
t.Logf("runDelegationTest received executed signal at " + time.Now().String())
|
||||
}
|
||||
|
||||
func TestProc_doWithNestedX(t *testing.T) {
|
||||
t.Logf("starting test case at " + time.Now().String())
|
||||
p := New()
|
||||
errOnce := NewErrorOnce(p.Done())
|
||||
runDelegationTest(t, p, "nested", errOnce)
|
||||
<-p.End()
|
||||
select {
|
||||
case err := <-errOnce.Err():
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timed out waiting for doer result")
|
||||
}
|
||||
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
|
||||
}
|
||||
|
||||
// intended to be run with -race
|
||||
func TestProc_doWithNestedXConcurrent(t *testing.T) {
|
||||
p := New()
|
||||
errOnce := NewErrorOnce(p.Done())
|
||||
var wg sync.WaitGroup
|
||||
const CONC = 20
|
||||
wg.Add(CONC)
|
||||
for i := 0; i < CONC; i++ {
|
||||
i := i
|
||||
runtime.After(func() { runDelegationTest(t, p, fmt.Sprintf("nested%d", i), errOnce) }).Then(wg.Done)
|
||||
}
|
||||
ch := runtime.After(wg.Wait)
|
||||
fatalAfter(t, ch, 10*time.Second, "timed out waiting for concurrent delegates")
|
||||
|
||||
<-p.End()
|
||||
|
||||
select {
|
||||
case err := <-errOnce.Err():
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timed out waiting for doer result")
|
||||
}
|
||||
|
||||
fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
|
||||
}
|
||||
55
contrib/mesos/pkg/proc/state.go
Normal file
55
contrib/mesos/pkg/proc/state.go
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package proc
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
type stateType int32
|
||||
|
||||
const (
|
||||
stateNew stateType = iota
|
||||
stateRunning
|
||||
stateTerminal
|
||||
)
|
||||
|
||||
func (s *stateType) get() stateType {
|
||||
return stateType(atomic.LoadInt32((*int32)(s)))
|
||||
}
|
||||
|
||||
func (s *stateType) transition(from, to stateType) bool {
|
||||
return atomic.CompareAndSwapInt32((*int32)(s), int32(from), int32(to))
|
||||
}
|
||||
|
||||
func (s *stateType) transitionTo(to stateType, unless ...stateType) bool {
|
||||
if len(unless) == 0 {
|
||||
atomic.StoreInt32((*int32)(s), int32(to))
|
||||
return true
|
||||
}
|
||||
for {
|
||||
state := s.get()
|
||||
for _, x := range unless {
|
||||
if state == x {
|
||||
return false
|
||||
}
|
||||
}
|
||||
if s.transition(state, to) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
71
contrib/mesos/pkg/proc/types.go
Normal file
71
contrib/mesos/pkg/proc/types.go
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package proc
|
||||
|
||||
// something that executes in the context of a process
|
||||
type Action func()
|
||||
|
||||
type Context interface {
|
||||
// end (terminate) the execution context
|
||||
End() <-chan struct{}
|
||||
|
||||
// return a signal chan that will close upon the termination of this process
|
||||
Done() <-chan struct{}
|
||||
}
|
||||
|
||||
type Doer interface {
|
||||
// execute some action in some context. actions are to be executed in a
|
||||
// concurrency-safe manner: no two actions should execute at the same time.
|
||||
// errors are generated if the action cannot be executed (not by the execution
|
||||
// of the action) and should be testable with the error API of this package,
|
||||
// for example, IsProcessTerminated.
|
||||
Do(Action) <-chan error
|
||||
}
|
||||
|
||||
// adapter func for Doer interface
|
||||
type DoerFunc func(Action) <-chan error
|
||||
|
||||
type Process interface {
|
||||
Context
|
||||
Doer
|
||||
|
||||
// see top level OnError func. this implementation will terminate upon the arrival of
|
||||
// an error (and subsequently invoke the error handler, if given) or else the termination
|
||||
// of the process (testable via IsProcessTerminated).
|
||||
OnError(<-chan error, func(error)) <-chan struct{}
|
||||
|
||||
// return a signal chan that will close once the process is ready to run actions
|
||||
Running() <-chan struct{}
|
||||
}
|
||||
|
||||
// this is an error promise. if we ever start building out support for other promise types it will probably
|
||||
// make sense to group them in some sort of "promises" package.
|
||||
type ErrorOnce interface {
|
||||
// return a chan that only ever sends one error, either obtained via Report() or Forward()
|
||||
Err() <-chan error
|
||||
|
||||
// reports the given error via Err(), but only if no other errors have been reported or forwarded
|
||||
Report(error)
|
||||
Reportf(string, ...interface{})
|
||||
|
||||
// waits for an error on the incoming chan, the result of which is later obtained via Err() (if no
|
||||
// other errors have been reported or forwarded)
|
||||
forward(<-chan error)
|
||||
|
||||
// non-blocking, spins up a goroutine that reports an error (if any) that occurs on the error chan.
|
||||
Send(<-chan error) ErrorOnce
|
||||
}
|
||||
18
contrib/mesos/pkg/profile/doc.go
Normal file
18
contrib/mesos/pkg/profile/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package profile contains reusable code for profiling Go programs with pprof.
|
||||
package profile
|
||||
27
contrib/mesos/pkg/profile/profile.go
Normal file
27
contrib/mesos/pkg/profile/profile.go
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package profile
|
||||
|
||||
import "net/http"
|
||||
import "net/http/pprof"
|
||||
|
||||
func InstallHandler(m *http.ServeMux) {
|
||||
// register similar endpoints as net/http/pprof.init() does
|
||||
m.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index))
|
||||
m.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile))
|
||||
m.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
|
||||
}
|
||||
373
contrib/mesos/pkg/queue/delay.go
Normal file
373
contrib/mesos/pkg/queue/delay.go
Normal file
@@ -0,0 +1,373 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
)
|
||||
|
||||
type qitem struct {
|
||||
value interface{}
|
||||
priority Priority
|
||||
index int
|
||||
readd func(item *qitem) // re-add the value of the item to the queue
|
||||
}
|
||||
|
||||
// A priorityQueue implements heap.Interface and holds qitems.
|
||||
type priorityQueue []*qitem
|
||||
|
||||
func (pq priorityQueue) Len() int { return len(pq) }
|
||||
|
||||
func (pq priorityQueue) Less(i, j int) bool {
|
||||
return pq[i].priority.ts.Before(pq[j].priority.ts)
|
||||
}
|
||||
|
||||
func (pq priorityQueue) Swap(i, j int) {
|
||||
pq[i], pq[j] = pq[j], pq[i]
|
||||
pq[i].index = i
|
||||
pq[j].index = j
|
||||
}
|
||||
|
||||
func (pq *priorityQueue) Push(x interface{}) {
|
||||
n := len(*pq)
|
||||
item := x.(*qitem)
|
||||
item.index = n
|
||||
*pq = append(*pq, item)
|
||||
}
|
||||
|
||||
func (pq *priorityQueue) Pop() interface{} {
|
||||
old := *pq
|
||||
n := len(old)
|
||||
item := old[n-1]
|
||||
item.index = -1 // for safety
|
||||
*pq = old[0 : n-1]
|
||||
return item
|
||||
}
|
||||
|
||||
// concurrency-safe, deadline-oriented queue that returns items after their
|
||||
// delay period has expired.
|
||||
type DelayQueue struct {
|
||||
queue priorityQueue
|
||||
lock sync.RWMutex
|
||||
cond sync.Cond
|
||||
}
|
||||
|
||||
func NewDelayQueue() *DelayQueue {
|
||||
q := &DelayQueue{}
|
||||
q.cond.L = &q.lock
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *DelayQueue) Add(d Delayed) {
|
||||
deadline := extractFromDelayed(d)
|
||||
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
|
||||
// readd using the original deadline computed from the original delay
|
||||
var readd func(*qitem)
|
||||
readd = func(qp *qitem) {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
heap.Push(&q.queue, &qitem{
|
||||
value: d,
|
||||
priority: deadline,
|
||||
readd: readd,
|
||||
})
|
||||
q.cond.Broadcast()
|
||||
}
|
||||
heap.Push(&q.queue, &qitem{
|
||||
value: d,
|
||||
priority: deadline,
|
||||
readd: readd,
|
||||
})
|
||||
q.cond.Broadcast()
|
||||
}
|
||||
|
||||
// If there's a deadline reported by d.Deadline() then `d` is added to the
|
||||
// queue and this func returns true.
|
||||
func (q *DelayQueue) Offer(d Deadlined) bool {
|
||||
deadline, ok := extractFromDeadlined(d)
|
||||
if ok {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
heap.Push(&q.queue, &qitem{
|
||||
value: d,
|
||||
priority: deadline,
|
||||
readd: func(qp *qitem) {
|
||||
q.Offer(qp.value.(Deadlined))
|
||||
},
|
||||
})
|
||||
q.cond.Broadcast()
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
// wait for the delay of the next item in the queue to expire, blocking if
|
||||
// there are no items in the queue. does not guarantee first-come-first-serve
|
||||
// ordering with respect to clients.
|
||||
func (q *DelayQueue) Pop() interface{} {
|
||||
// doesn't implement cancellation, will always return a non-nil value
|
||||
return q.pop(func() *qitem {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
for q.queue.Len() == 0 {
|
||||
q.cond.Wait()
|
||||
}
|
||||
x := heap.Pop(&q.queue)
|
||||
item := x.(*qitem)
|
||||
return item
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// returns a non-nil value from the queue, or else nil if/when cancelled; if cancel
|
||||
// is nil then cancellation is disabled and this func must return a non-nil value.
|
||||
func (q *DelayQueue) pop(next func() *qitem, cancel <-chan struct{}) interface{} {
|
||||
var ch chan struct{}
|
||||
for {
|
||||
item := next()
|
||||
if item == nil {
|
||||
// cancelled
|
||||
return nil
|
||||
}
|
||||
x := item.value
|
||||
waitingPeriod := item.priority.ts.Sub(time.Now())
|
||||
if waitingPeriod >= 0 {
|
||||
// listen for calls to Add() while we're waiting for the deadline
|
||||
if ch == nil {
|
||||
ch = make(chan struct{}, 1)
|
||||
}
|
||||
go func() {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
q.cond.Wait()
|
||||
ch <- struct{}{}
|
||||
}()
|
||||
select {
|
||||
case <-cancel:
|
||||
item.readd(item)
|
||||
return nil
|
||||
case <-ch:
|
||||
// we may no longer have the earliest deadline, re-try
|
||||
item.readd(item)
|
||||
continue
|
||||
case <-time.After(waitingPeriod):
|
||||
// noop
|
||||
case <-item.priority.notify:
|
||||
// noop
|
||||
}
|
||||
}
|
||||
return x
|
||||
}
|
||||
}
|
||||
|
||||
// If multiple adds/updates of a single item happen while an item is in the
|
||||
// queue before it has been processed, it will only be processed once, and
|
||||
// when it is processed, the most recent version will be processed. Items are
|
||||
// popped in order of their priority, currently controlled by a delay or
|
||||
// deadline assigned to each item in the queue.
|
||||
type DelayFIFO struct {
|
||||
// internal deadline-based priority queue
|
||||
delegate *DelayQueue
|
||||
// We depend on the property that items in the set are in the queue and vice versa.
|
||||
items map[string]*qitem
|
||||
deadlinePolicy DeadlinePolicy
|
||||
}
|
||||
|
||||
func (q *DelayFIFO) lock() {
|
||||
q.delegate.lock.Lock()
|
||||
}
|
||||
|
||||
func (q *DelayFIFO) unlock() {
|
||||
q.delegate.lock.Unlock()
|
||||
}
|
||||
|
||||
func (q *DelayFIFO) rlock() {
|
||||
q.delegate.lock.RLock()
|
||||
}
|
||||
|
||||
func (q *DelayFIFO) runlock() {
|
||||
q.delegate.lock.RUnlock()
|
||||
}
|
||||
|
||||
func (q *DelayFIFO) queue() *priorityQueue {
|
||||
return &q.delegate.queue
|
||||
}
|
||||
|
||||
func (q *DelayFIFO) cond() *sync.Cond {
|
||||
return &q.delegate.cond
|
||||
}
|
||||
|
||||
// Add inserts an item, and puts it in the queue. The item is only enqueued
|
||||
// if it doesn't already exist in the set.
|
||||
func (q *DelayFIFO) Add(d UniqueDelayed, rp ReplacementPolicy) {
|
||||
deadline := extractFromDelayed(d)
|
||||
id := d.GetUID()
|
||||
var adder func(*qitem)
|
||||
adder = func(*qitem) {
|
||||
q.add(id, deadline, d, KeepExisting, adder)
|
||||
}
|
||||
q.add(id, deadline, d, rp, adder)
|
||||
}
|
||||
|
||||
func (q *DelayFIFO) Offer(d UniqueDeadlined, rp ReplacementPolicy) bool {
|
||||
if deadline, ok := extractFromDeadlined(d); ok {
|
||||
id := d.GetUID()
|
||||
q.add(id, deadline, d, rp, func(qp *qitem) { q.Offer(qp.value.(UniqueDeadlined), KeepExisting) })
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (q *DelayFIFO) add(id string, deadline Priority, value interface{}, rp ReplacementPolicy, adder func(*qitem)) {
|
||||
q.lock()
|
||||
defer q.unlock()
|
||||
if item, exists := q.items[id]; !exists {
|
||||
item = &qitem{
|
||||
value: value,
|
||||
priority: deadline,
|
||||
readd: adder,
|
||||
}
|
||||
heap.Push(q.queue(), item)
|
||||
q.items[id] = item
|
||||
} else {
|
||||
// this is an update of an existing item
|
||||
item.value = rp.replacementValue(item.value, value)
|
||||
item.priority = q.deadlinePolicy.nextDeadline(item.priority, deadline)
|
||||
heap.Fix(q.queue(), item.index)
|
||||
}
|
||||
q.cond().Broadcast()
|
||||
}
|
||||
|
||||
// Delete removes an item. It doesn't add it to the queue, because
|
||||
// this implementation assumes the consumer only cares about the objects,
|
||||
// not their priority order.
|
||||
func (f *DelayFIFO) Delete(id string) {
|
||||
f.lock()
|
||||
defer f.unlock()
|
||||
delete(f.items, id)
|
||||
}
|
||||
|
||||
// List returns a list of all the items.
|
||||
func (f *DelayFIFO) List() []UniqueID {
|
||||
f.rlock()
|
||||
defer f.runlock()
|
||||
list := make([]UniqueID, 0, len(f.items))
|
||||
for _, item := range f.items {
|
||||
list = append(list, item.value.(UniqueDelayed))
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
// ContainedIDs returns a util.StringSet containing all IDs of the stored items.
|
||||
// This is a snapshot of a moment in time, and one should keep in mind that
|
||||
// other go routines can add or remove items after you call this.
|
||||
func (c *DelayFIFO) ContainedIDs() util.StringSet {
|
||||
c.rlock()
|
||||
defer c.runlock()
|
||||
set := util.StringSet{}
|
||||
for id := range c.items {
|
||||
set.Insert(id)
|
||||
}
|
||||
return set
|
||||
}
|
||||
|
||||
// Get returns the requested item, or sets exists=false.
|
||||
func (f *DelayFIFO) Get(id string) (UniqueID, bool) {
|
||||
f.rlock()
|
||||
defer f.runlock()
|
||||
if item, exists := f.items[id]; exists {
|
||||
return item.value.(UniqueID), true
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// Variant of DelayQueue.Pop() for UniqueDelayed items
|
||||
func (q *DelayFIFO) Await(timeout time.Duration) UniqueID {
|
||||
cancel := make(chan struct{})
|
||||
ch := make(chan interface{}, 1)
|
||||
go func() { ch <- q.pop(cancel) }()
|
||||
var x interface{}
|
||||
select {
|
||||
case <-time.After(timeout):
|
||||
close(cancel)
|
||||
x = <-ch
|
||||
case x = <-ch:
|
||||
// noop
|
||||
}
|
||||
if x != nil {
|
||||
return x.(UniqueID)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Variant of DelayQueue.Pop() for UniqueDelayed items
|
||||
func (q *DelayFIFO) Pop() UniqueID {
|
||||
return q.pop(nil).(UniqueID)
|
||||
}
|
||||
|
||||
// variant of DelayQueue.Pop that implements optional cancellation
|
||||
func (q *DelayFIFO) pop(cancel chan struct{}) interface{} {
|
||||
next := func() *qitem {
|
||||
q.lock()
|
||||
defer q.unlock()
|
||||
for {
|
||||
for q.queue().Len() == 0 {
|
||||
signal := make(chan struct{})
|
||||
go func() {
|
||||
defer close(signal)
|
||||
q.cond().Wait()
|
||||
}()
|
||||
select {
|
||||
case <-cancel:
|
||||
// we may not have the lock yet, so
|
||||
// broadcast to abort Wait, then
|
||||
// return after lock re-acquisition
|
||||
q.cond().Broadcast()
|
||||
<-signal
|
||||
return nil
|
||||
case <-signal:
|
||||
// we have the lock, re-check
|
||||
// the queue for data...
|
||||
}
|
||||
}
|
||||
x := heap.Pop(q.queue())
|
||||
item := x.(*qitem)
|
||||
unique := item.value.(UniqueID)
|
||||
uid := unique.GetUID()
|
||||
if _, ok := q.items[uid]; !ok {
|
||||
// item was deleted, keep looking
|
||||
continue
|
||||
}
|
||||
delete(q.items, uid)
|
||||
return item
|
||||
}
|
||||
}
|
||||
return q.delegate.pop(next, cancel)
|
||||
}
|
||||
|
||||
func NewDelayFIFO() *DelayFIFO {
|
||||
f := &DelayFIFO{
|
||||
delegate: NewDelayQueue(),
|
||||
items: map[string]*qitem{},
|
||||
}
|
||||
return f
|
||||
}
|
||||
406
contrib/mesos/pkg/queue/delay_test.go
Normal file
406
contrib/mesos/pkg/queue/delay_test.go
Normal file
@@ -0,0 +1,406 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
const (
|
||||
tolerance = 100 * time.Millisecond // go time delays aren't perfect, this is our tolerance for errors WRT expected timeouts
|
||||
)
|
||||
|
||||
func timedPriority(t time.Time) Priority {
|
||||
return Priority{ts: t}
|
||||
}
|
||||
|
||||
func TestPQ(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var pq priorityQueue
|
||||
if pq.Len() != 0 {
|
||||
t.Fatalf("pq should be empty")
|
||||
}
|
||||
|
||||
now := timedPriority(time.Now())
|
||||
now2 := timedPriority(now.ts.Add(2 * time.Second))
|
||||
pq.Push(&qitem{priority: now2})
|
||||
if pq.Len() != 1 {
|
||||
t.Fatalf("pq.len should be 1")
|
||||
}
|
||||
x := pq.Pop()
|
||||
if x == nil {
|
||||
t.Fatalf("x is nil")
|
||||
}
|
||||
if pq.Len() != 0 {
|
||||
t.Fatalf("pq should be empty")
|
||||
}
|
||||
item := x.(*qitem)
|
||||
if !item.priority.Equal(now2) {
|
||||
t.Fatalf("item.priority != now2")
|
||||
}
|
||||
|
||||
pq.Push(&qitem{priority: now2})
|
||||
pq.Push(&qitem{priority: now2})
|
||||
pq.Push(&qitem{priority: now2})
|
||||
pq.Push(&qitem{priority: now2})
|
||||
pq.Push(&qitem{priority: now2})
|
||||
pq.Pop()
|
||||
pq.Pop()
|
||||
pq.Pop()
|
||||
pq.Pop()
|
||||
pq.Pop()
|
||||
if pq.Len() != 0 {
|
||||
t.Fatalf("pq should be empty")
|
||||
}
|
||||
now4 := timedPriority(now.ts.Add(4 * time.Second))
|
||||
now6 := timedPriority(now.ts.Add(4 * time.Second))
|
||||
pq.Push(&qitem{priority: now2})
|
||||
pq.Push(&qitem{priority: now4})
|
||||
pq.Push(&qitem{priority: now6})
|
||||
pq.Swap(0, 2)
|
||||
if !pq[0].priority.Equal(now6) || !pq[2].priority.Equal(now2) {
|
||||
t.Fatalf("swap failed")
|
||||
}
|
||||
if pq.Less(1, 2) {
|
||||
t.Fatalf("now4 < now2")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPopEmptyPQ(t *testing.T) {
|
||||
t.Parallel()
|
||||
defer func() {
|
||||
if r := recover(); r == nil {
|
||||
t.Fatalf("Expected panic from popping an empty PQ")
|
||||
}
|
||||
}()
|
||||
var pq priorityQueue
|
||||
pq.Pop()
|
||||
}
|
||||
|
||||
type testjob struct {
|
||||
d time.Duration
|
||||
t time.Time
|
||||
deadline *time.Time
|
||||
uid string
|
||||
instance int
|
||||
}
|
||||
|
||||
func (j *testjob) GetDelay() time.Duration {
|
||||
return j.d
|
||||
}
|
||||
|
||||
func (j testjob) GetUID() string {
|
||||
return j.uid
|
||||
}
|
||||
|
||||
func (td *testjob) Deadline() (deadline time.Time, ok bool) {
|
||||
if td.deadline != nil {
|
||||
return *td.deadline, true
|
||||
} else {
|
||||
return time.Now(), false
|
||||
}
|
||||
}
|
||||
|
||||
func TestDQ_sanity_check(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dq := NewDelayQueue()
|
||||
delay := 2 * time.Second
|
||||
dq.Add(&testjob{d: delay})
|
||||
|
||||
before := time.Now()
|
||||
x := dq.Pop()
|
||||
|
||||
now := time.Now()
|
||||
waitPeriod := now.Sub(before)
|
||||
|
||||
if waitPeriod+tolerance < delay {
|
||||
t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
|
||||
}
|
||||
if x == nil {
|
||||
t.Fatalf("x is nil")
|
||||
}
|
||||
item := x.(*testjob)
|
||||
if item.d != delay {
|
||||
t.Fatalf("d != delay")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDQ_Offer(t *testing.T) {
|
||||
t.Parallel()
|
||||
assert := assert.New(t)
|
||||
|
||||
dq := NewDelayQueue()
|
||||
delay := time.Second
|
||||
|
||||
added := dq.Offer(&testjob{})
|
||||
if added {
|
||||
t.Fatalf("DelayQueue should not add offered job without deadline")
|
||||
}
|
||||
|
||||
deadline := time.Now().Add(delay)
|
||||
added = dq.Offer(&testjob{deadline: &deadline})
|
||||
if !added {
|
||||
t.Fatalf("DelayQueue should add offered job with deadline")
|
||||
}
|
||||
|
||||
before := time.Now()
|
||||
x := dq.Pop()
|
||||
|
||||
now := time.Now()
|
||||
waitPeriod := now.Sub(before)
|
||||
|
||||
if waitPeriod+tolerance < delay {
|
||||
t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
|
||||
}
|
||||
assert.NotNil(x)
|
||||
assert.Equal(x.(*testjob).deadline, &deadline)
|
||||
}
|
||||
|
||||
func TestDQ_ordered_add_pop(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dq := NewDelayQueue()
|
||||
dq.Add(&testjob{d: 2 * time.Second})
|
||||
dq.Add(&testjob{d: 1 * time.Second})
|
||||
dq.Add(&testjob{d: 3 * time.Second})
|
||||
|
||||
var finished [3]*testjob
|
||||
before := time.Now()
|
||||
idx := int32(-1)
|
||||
ch := make(chan bool, 3)
|
||||
//TODO: replace with `for range finished` once Go 1.3 support is dropped
|
||||
for n := 0; n < len(finished); n++ {
|
||||
go func() {
|
||||
var ok bool
|
||||
x := dq.Pop()
|
||||
i := atomic.AddInt32(&idx, 1)
|
||||
if finished[i], ok = x.(*testjob); !ok {
|
||||
t.Fatalf("expected a *testjob, not %v", x)
|
||||
}
|
||||
finished[i].t = time.Now()
|
||||
ch <- true
|
||||
}()
|
||||
}
|
||||
<-ch
|
||||
<-ch
|
||||
<-ch
|
||||
|
||||
after := time.Now()
|
||||
totalDelay := after.Sub(before)
|
||||
if totalDelay+tolerance < (3 * time.Second) {
|
||||
t.Fatalf("totalDelay < 3s: %v", totalDelay)
|
||||
}
|
||||
for i, v := range finished {
|
||||
if v == nil {
|
||||
t.Fatalf("task %d was nil", i)
|
||||
}
|
||||
expected := time.Duration(i+1) * time.Second
|
||||
if v.d != expected {
|
||||
t.Fatalf("task %d had delay-priority %v, expected %v", i, v.d, expected)
|
||||
}
|
||||
actualDelay := v.t.Sub(before)
|
||||
if actualDelay+tolerance < v.d {
|
||||
t.Fatalf("task %d had actual-delay %v < expected delay %v", i, actualDelay, v.d)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDQ_always_pop_earliest_deadline(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// add a testjob with delay of 2s
|
||||
// spawn a func f1 that attempts to Pop() and wait for f1 to begin
|
||||
// add a testjob with a delay of 1s
|
||||
// check that the func f1 actually popped the 1s task (not the 2s task)
|
||||
|
||||
dq := NewDelayQueue()
|
||||
dq.Add(&testjob{d: 2 * time.Second})
|
||||
ch := make(chan *testjob)
|
||||
started := make(chan bool)
|
||||
|
||||
go func() {
|
||||
started <- true
|
||||
x := dq.Pop()
|
||||
job := x.(*testjob)
|
||||
job.t = time.Now()
|
||||
ch <- job
|
||||
}()
|
||||
|
||||
<-started
|
||||
time.Sleep(500 * time.Millisecond) // give plently of time for Pop() to enter
|
||||
expected := 1 * time.Second
|
||||
dq.Add(&testjob{d: expected})
|
||||
job := <-ch
|
||||
|
||||
if expected != job.d {
|
||||
t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
|
||||
}
|
||||
|
||||
job = dq.Pop().(*testjob)
|
||||
expected = 2 * time.Second
|
||||
if expected != job.d {
|
||||
t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDQ_always_pop_earliest_deadline_multi(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dq := NewDelayQueue()
|
||||
dq.Add(&testjob{d: 2 * time.Second})
|
||||
|
||||
ch := make(chan *testjob)
|
||||
multi := 10
|
||||
started := make(chan bool, multi)
|
||||
|
||||
go func() {
|
||||
started <- true
|
||||
for i := 0; i < multi; i++ {
|
||||
x := dq.Pop()
|
||||
job := x.(*testjob)
|
||||
job.t = time.Now()
|
||||
ch <- job
|
||||
}
|
||||
}()
|
||||
|
||||
<-started
|
||||
time.Sleep(500 * time.Millisecond) // give plently of time for Pop() to enter
|
||||
expected := 1 * time.Second
|
||||
|
||||
for i := 0; i < multi; i++ {
|
||||
dq.Add(&testjob{d: expected})
|
||||
}
|
||||
for i := 0; i < multi; i++ {
|
||||
job := <-ch
|
||||
if expected != job.d {
|
||||
t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
|
||||
}
|
||||
}
|
||||
|
||||
job := dq.Pop().(*testjob)
|
||||
expected = 2 * time.Second
|
||||
if expected != job.d {
|
||||
t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDQ_negative_delay(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dq := NewDelayQueue()
|
||||
delay := -2 * time.Second
|
||||
dq.Add(&testjob{d: delay})
|
||||
|
||||
before := time.Now()
|
||||
x := dq.Pop()
|
||||
|
||||
now := time.Now()
|
||||
waitPeriod := now.Sub(before)
|
||||
|
||||
if waitPeriod > tolerance {
|
||||
t.Fatalf("delay too long: %v, expected something less than: %v", waitPeriod, tolerance)
|
||||
}
|
||||
if x == nil {
|
||||
t.Fatalf("x is nil")
|
||||
}
|
||||
item := x.(*testjob)
|
||||
if item.d != delay {
|
||||
t.Fatalf("d != delay")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDFIFO_sanity_check(t *testing.T) {
|
||||
t.Parallel()
|
||||
assert := assert.New(t)
|
||||
|
||||
df := NewDelayFIFO()
|
||||
delay := 2 * time.Second
|
||||
df.Add(&testjob{d: delay, uid: "a", instance: 1}, ReplaceExisting)
|
||||
assert.True(df.ContainedIDs().Has("a"))
|
||||
|
||||
// re-add by ReplaceExisting
|
||||
df.Add(&testjob{d: delay, uid: "a", instance: 2}, ReplaceExisting)
|
||||
assert.True(df.ContainedIDs().Has("a"))
|
||||
|
||||
a, ok := df.Get("a")
|
||||
assert.True(ok)
|
||||
assert.Equal(a.(*testjob).instance, 2)
|
||||
|
||||
// re-add by KeepExisting
|
||||
df.Add(&testjob{d: delay, uid: "a", instance: 3}, KeepExisting)
|
||||
assert.True(df.ContainedIDs().Has("a"))
|
||||
|
||||
a, ok = df.Get("a")
|
||||
assert.True(ok)
|
||||
assert.Equal(a.(*testjob).instance, 2)
|
||||
|
||||
// pop last
|
||||
before := time.Now()
|
||||
x := df.Pop()
|
||||
assert.Equal(a.(*testjob).instance, 2)
|
||||
|
||||
now := time.Now()
|
||||
waitPeriod := now.Sub(before)
|
||||
|
||||
if waitPeriod+tolerance < delay {
|
||||
t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
|
||||
}
|
||||
if x == nil {
|
||||
t.Fatalf("x is nil")
|
||||
}
|
||||
item := x.(*testjob)
|
||||
if item.d != delay {
|
||||
t.Fatalf("d != delay")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDFIFO_Offer(t *testing.T) {
|
||||
t.Parallel()
|
||||
assert := assert.New(t)
|
||||
|
||||
dq := NewDelayFIFO()
|
||||
delay := time.Second
|
||||
|
||||
added := dq.Offer(&testjob{instance: 1}, ReplaceExisting)
|
||||
if added {
|
||||
t.Fatalf("DelayFIFO should not add offered job without deadline")
|
||||
}
|
||||
|
||||
deadline := time.Now().Add(delay)
|
||||
added = dq.Offer(&testjob{deadline: &deadline, instance: 2}, ReplaceExisting)
|
||||
if !added {
|
||||
t.Fatalf("DelayFIFO should add offered job with deadline")
|
||||
}
|
||||
|
||||
before := time.Now()
|
||||
x := dq.Pop()
|
||||
|
||||
now := time.Now()
|
||||
waitPeriod := now.Sub(before)
|
||||
|
||||
if waitPeriod+tolerance < delay {
|
||||
t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay)
|
||||
}
|
||||
assert.NotNil(x)
|
||||
assert.Equal(x.(*testjob).instance, 2)
|
||||
}
|
||||
19
contrib/mesos/pkg/queue/doc.go
Normal file
19
contrib/mesos/pkg/queue/doc.go
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package queue provides several queue implementations, originally
|
||||
// inspired by Kubernetes pkg/client/cache/fifo.
|
||||
package queue
|
||||
403
contrib/mesos/pkg/queue/historical.go
Normal file
403
contrib/mesos/pkg/queue/historical.go
Normal file
@@ -0,0 +1,403 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
)
|
||||
|
||||
type entry struct {
|
||||
value UniqueCopyable
|
||||
event EventType
|
||||
}
|
||||
|
||||
type deletedEntry struct {
|
||||
*entry
|
||||
expiration time.Time
|
||||
}
|
||||
|
||||
func (e *entry) Value() UniqueCopyable {
|
||||
return e.value
|
||||
}
|
||||
|
||||
func (e *entry) Copy() Copyable {
|
||||
if e == nil {
|
||||
return nil
|
||||
}
|
||||
return &entry{e.value.Copy().(UniqueCopyable), e.event}
|
||||
}
|
||||
|
||||
func (e *entry) Is(types EventType) bool {
|
||||
return types&e.event != 0
|
||||
}
|
||||
|
||||
func (e *deletedEntry) Copy() Copyable {
|
||||
if e == nil {
|
||||
return nil
|
||||
}
|
||||
return &deletedEntry{e.entry.Copy().(*entry), e.expiration}
|
||||
}
|
||||
|
||||
// deliver a message
|
||||
type pigeon func(msg Entry)
|
||||
|
||||
func dead(msg Entry) {
|
||||
// intentionally blank
|
||||
}
|
||||
|
||||
// HistoricalFIFO receives adds and updates from a Reflector, and puts them in a queue for
|
||||
// FIFO order processing. If multiple adds/updates of a single item happen while
|
||||
// an item is in the queue before it has been processed, it will only be
|
||||
// processed once, and when it is processed, the most recent version will be
|
||||
// processed. This can't be done with a channel.
|
||||
type HistoricalFIFO struct {
|
||||
lock sync.RWMutex
|
||||
cond sync.Cond
|
||||
items map[string]Entry // We depend on the property that items in the queue are in the set.
|
||||
queue []string
|
||||
carrier pigeon // may be dead, but never nil
|
||||
gcc int
|
||||
lingerTTL time.Duration
|
||||
}
|
||||
|
||||
// panics if obj doesn't implement UniqueCopyable; otherwise returns the same, typecast object
|
||||
func checkType(obj interface{}) UniqueCopyable {
|
||||
if v, ok := obj.(UniqueCopyable); !ok {
|
||||
panic(fmt.Sprintf("Illegal object type, expected UniqueCopyable: %T", obj))
|
||||
} else {
|
||||
return v
|
||||
}
|
||||
}
|
||||
|
||||
// Add inserts an item, and puts it in the queue. The item is only enqueued
|
||||
// if it doesn't already exist in the set.
|
||||
func (f *HistoricalFIFO) Add(v interface{}) error {
|
||||
obj := checkType(v)
|
||||
notifications := []Entry(nil)
|
||||
defer func() {
|
||||
for _, e := range notifications {
|
||||
f.carrier(e)
|
||||
}
|
||||
}()
|
||||
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
id := obj.GetUID()
|
||||
if entry, exists := f.items[id]; !exists {
|
||||
f.queue = append(f.queue, id)
|
||||
} else {
|
||||
if entry.Is(DELETE_EVENT | POP_EVENT) {
|
||||
f.queue = append(f.queue, id)
|
||||
}
|
||||
}
|
||||
notifications = f.merge(id, obj)
|
||||
f.cond.Broadcast()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update is the same as Add in this implementation.
|
||||
func (f *HistoricalFIFO) Update(obj interface{}) error {
|
||||
return f.Add(obj)
|
||||
}
|
||||
|
||||
// Delete removes an item. It doesn't add it to the queue, because
|
||||
// this implementation assumes the consumer only cares about the objects,
|
||||
// not the order in which they were created/added.
|
||||
func (f *HistoricalFIFO) Delete(v interface{}) error {
|
||||
obj := checkType(v)
|
||||
deleteEvent := (Entry)(nil)
|
||||
defer func() {
|
||||
f.carrier(deleteEvent)
|
||||
}()
|
||||
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
id := obj.GetUID()
|
||||
item, exists := f.items[id]
|
||||
if exists && !item.Is(DELETE_EVENT) {
|
||||
e := item.(*entry)
|
||||
e.event = DELETE_EVENT
|
||||
deleteEvent = &deletedEntry{e, time.Now().Add(f.lingerTTL)}
|
||||
f.items[id] = deleteEvent
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// List returns a list of all the items.
|
||||
func (f *HistoricalFIFO) List() []interface{} {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
|
||||
// TODO(jdef): slightly overallocates b/c of deleted items
|
||||
list := make([]interface{}, 0, len(f.queue))
|
||||
|
||||
for _, entry := range f.items {
|
||||
if entry.Is(DELETE_EVENT | POP_EVENT) {
|
||||
continue
|
||||
}
|
||||
list = append(list, entry.Value().Copy())
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
// List returns a list of all the items.
|
||||
func (f *HistoricalFIFO) ListKeys() []string {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
|
||||
// TODO(jdef): slightly overallocates b/c of deleted items
|
||||
list := make([]string, 0, len(f.queue))
|
||||
|
||||
for key, entry := range f.items {
|
||||
if entry.Is(DELETE_EVENT | POP_EVENT) {
|
||||
continue
|
||||
}
|
||||
list = append(list, key)
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
// ContainedIDs returns a util.StringSet containing all IDs of the stored items.
|
||||
// This is a snapshot of a moment in time, and one should keep in mind that
|
||||
// other go routines can add or remove items after you call this.
|
||||
func (c *HistoricalFIFO) ContainedIDs() util.StringSet {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
set := util.StringSet{}
|
||||
for id, entry := range c.items {
|
||||
if entry.Is(DELETE_EVENT | POP_EVENT) {
|
||||
continue
|
||||
}
|
||||
set.Insert(id)
|
||||
}
|
||||
return set
|
||||
}
|
||||
|
||||
// Get returns the requested item, or sets exists=false.
|
||||
func (f *HistoricalFIFO) Get(v interface{}) (interface{}, bool, error) {
|
||||
obj := checkType(v)
|
||||
return f.GetByKey(obj.GetUID())
|
||||
}
|
||||
|
||||
// Get returns the requested item, or sets exists=false.
|
||||
func (f *HistoricalFIFO) GetByKey(id string) (interface{}, bool, error) {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
entry, exists := f.items[id]
|
||||
if exists && !entry.Is(DELETE_EVENT|POP_EVENT) {
|
||||
return entry.Value().Copy(), true, nil
|
||||
}
|
||||
return nil, false, nil
|
||||
}
|
||||
|
||||
// Get returns the requested item, or sets exists=false.
|
||||
func (f *HistoricalFIFO) Poll(id string, t EventType) bool {
|
||||
f.lock.RLock()
|
||||
defer f.lock.RUnlock()
|
||||
entry, exists := f.items[id]
|
||||
return exists && entry.Is(t)
|
||||
}
|
||||
|
||||
// Variant of DelayQueue.Pop() for UniqueDelayed items
|
||||
func (q *HistoricalFIFO) Await(timeout time.Duration) interface{} {
|
||||
cancel := make(chan struct{})
|
||||
ch := make(chan interface{}, 1)
|
||||
go func() { ch <- q.pop(cancel) }()
|
||||
select {
|
||||
case <-time.After(timeout):
|
||||
close(cancel)
|
||||
return <-ch
|
||||
case x := <-ch:
|
||||
return x
|
||||
}
|
||||
}
|
||||
func (f *HistoricalFIFO) Pop() interface{} {
|
||||
return f.pop(nil)
|
||||
}
|
||||
|
||||
func (f *HistoricalFIFO) pop(cancel chan struct{}) interface{} {
|
||||
popEvent := (Entry)(nil)
|
||||
defer func() {
|
||||
f.carrier(popEvent)
|
||||
}()
|
||||
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
for {
|
||||
for len(f.queue) == 0 {
|
||||
signal := make(chan struct{})
|
||||
go func() {
|
||||
defer close(signal)
|
||||
f.cond.Wait()
|
||||
}()
|
||||
select {
|
||||
case <-cancel:
|
||||
// we may not have the lock yet, so
|
||||
// broadcast to abort Wait, then
|
||||
// return after lock re-acquisition
|
||||
f.cond.Broadcast()
|
||||
<-signal
|
||||
return nil
|
||||
case <-signal:
|
||||
// we have the lock, re-check
|
||||
// the queue for data...
|
||||
}
|
||||
}
|
||||
id := f.queue[0]
|
||||
f.queue = f.queue[1:]
|
||||
item, ok := f.items[id]
|
||||
if !ok || item.Is(DELETE_EVENT|POP_EVENT) {
|
||||
// Item may have been deleted subsequently.
|
||||
continue
|
||||
}
|
||||
value := item.Value()
|
||||
popEvent = &entry{value, POP_EVENT}
|
||||
f.items[id] = popEvent
|
||||
return value.Copy()
|
||||
}
|
||||
}
|
||||
|
||||
func (f *HistoricalFIFO) Replace(objs []interface{}) error {
|
||||
notifications := make([]Entry, 0, len(objs))
|
||||
defer func() {
|
||||
for _, e := range notifications {
|
||||
f.carrier(e)
|
||||
}
|
||||
}()
|
||||
|
||||
idToObj := make(map[string]interface{})
|
||||
for _, v := range objs {
|
||||
obj := checkType(v)
|
||||
idToObj[obj.GetUID()] = v
|
||||
}
|
||||
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
f.queue = f.queue[:0]
|
||||
now := time.Now()
|
||||
for id, v := range f.items {
|
||||
if _, exists := idToObj[id]; !exists && !v.Is(DELETE_EVENT) {
|
||||
// a non-deleted entry in the items list that doesn't show up in the
|
||||
// new list: mark it as deleted
|
||||
ent := v.(*entry)
|
||||
ent.event = DELETE_EVENT
|
||||
e := &deletedEntry{ent, now.Add(f.lingerTTL)}
|
||||
f.items[id] = e
|
||||
notifications = append(notifications, e)
|
||||
}
|
||||
}
|
||||
for id, v := range idToObj {
|
||||
obj := checkType(v)
|
||||
f.queue = append(f.queue, id)
|
||||
n := f.merge(id, obj)
|
||||
notifications = append(notifications, n...)
|
||||
}
|
||||
if len(f.queue) > 0 {
|
||||
f.cond.Broadcast()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// garbage collect DELETEd items whose TTL has expired; the IDs of such items are removed
|
||||
// from the queue. This impl assumes that caller has acquired state lock.
|
||||
func (f *HistoricalFIFO) gc() {
|
||||
now := time.Now()
|
||||
deleted := make(map[string]struct{})
|
||||
for id, v := range f.items {
|
||||
if v.Is(DELETE_EVENT) {
|
||||
ent := v.(*deletedEntry)
|
||||
if ent.expiration.Before(now) {
|
||||
delete(f.items, id)
|
||||
deleted[id] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
// remove deleted items from the queue, will likely (slightly) overallocate here
|
||||
queue := make([]string, 0, len(f.queue))
|
||||
for _, id := range f.queue {
|
||||
if _, exists := deleted[id]; !exists {
|
||||
queue = append(queue, id)
|
||||
}
|
||||
}
|
||||
f.queue = queue
|
||||
}
|
||||
|
||||
// Assumes that the caller has acquired the state lock.
|
||||
func (f *HistoricalFIFO) merge(id string, obj UniqueCopyable) (notifications []Entry) {
|
||||
item, exists := f.items[id]
|
||||
now := time.Now()
|
||||
if !exists {
|
||||
e := &entry{obj.Copy().(UniqueCopyable), ADD_EVENT}
|
||||
f.items[id] = e
|
||||
notifications = append(notifications, e)
|
||||
} else {
|
||||
if !item.Is(DELETE_EVENT) && item.Value().GetUID() != obj.GetUID() {
|
||||
// hidden DELETE!
|
||||
// (1) append a DELETE
|
||||
// (2) append an ADD
|
||||
// .. and notify listeners in that order
|
||||
ent := item.(*entry)
|
||||
ent.event = DELETE_EVENT
|
||||
e1 := &deletedEntry{ent, now.Add(f.lingerTTL)}
|
||||
e2 := &entry{obj.Copy().(UniqueCopyable), ADD_EVENT}
|
||||
f.items[id] = e2
|
||||
notifications = append(notifications, e1, e2)
|
||||
} else if !reflect.DeepEqual(obj, item.Value()) {
|
||||
//TODO(jdef): it would be nice if we could rely on resource versions
|
||||
//instead of doing a DeepEqual. Maybe someday we'll be able to.
|
||||
e := &entry{obj.Copy().(UniqueCopyable), UPDATE_EVENT}
|
||||
f.items[id] = e
|
||||
notifications = append(notifications, e)
|
||||
}
|
||||
}
|
||||
// check for garbage collection
|
||||
f.gcc++
|
||||
if f.gcc%256 == 0 { //TODO(jdef): extract constant
|
||||
f.gcc = 0
|
||||
f.gc()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// NewHistorical returns a Store which can be used to queue up items to
|
||||
// process. If a non-nil Mux is provided, then modifications to the
|
||||
// the FIFO are delivered on a channel specific to this fifo.
|
||||
func NewHistorical(ch chan<- Entry) FIFO {
|
||||
carrier := dead
|
||||
if ch != nil {
|
||||
carrier = func(msg Entry) {
|
||||
if msg != nil {
|
||||
ch <- msg.Copy().(Entry)
|
||||
}
|
||||
}
|
||||
}
|
||||
f := &HistoricalFIFO{
|
||||
items: map[string]Entry{},
|
||||
queue: []string{},
|
||||
carrier: carrier,
|
||||
lingerTTL: 5 * time.Minute, // TODO(jdef): extract constant
|
||||
}
|
||||
f.cond.L = &f.lock
|
||||
return f
|
||||
}
|
||||
191
contrib/mesos/pkg/queue/historical_test.go
Normal file
191
contrib/mesos/pkg/queue/historical_test.go
Normal file
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
type _int int
|
||||
type _uint uint
|
||||
|
||||
func (i _int) Copy() Copyable {
|
||||
return i
|
||||
}
|
||||
|
||||
func (i _int) GetUID() string {
|
||||
return fmt.Sprintf("INT%d", int(i))
|
||||
}
|
||||
|
||||
func (i _uint) Copy() Copyable {
|
||||
return i
|
||||
}
|
||||
|
||||
func (i _uint) GetUID() string {
|
||||
return fmt.Sprintf("UINT%d", uint64(i))
|
||||
}
|
||||
|
||||
type testObj struct {
|
||||
id string
|
||||
value int
|
||||
}
|
||||
|
||||
func (i *testObj) Copy() Copyable {
|
||||
if i == nil {
|
||||
return nil
|
||||
} else {
|
||||
return &testObj{i.id, i.value}
|
||||
}
|
||||
}
|
||||
|
||||
func (i *testObj) GetUID() string {
|
||||
return i.id
|
||||
}
|
||||
|
||||
func TestFIFO_basic(t *testing.T) {
|
||||
f := NewHistorical(nil)
|
||||
const amount = 500
|
||||
go func() {
|
||||
for i := 0; i < amount; i++ {
|
||||
f.Add(_int(i + 1))
|
||||
}
|
||||
}()
|
||||
go func() {
|
||||
for u := uint(0); u < amount; u++ {
|
||||
f.Add(_uint(u + 1))
|
||||
}
|
||||
}()
|
||||
|
||||
lastInt := _int(0)
|
||||
lastUint := _uint(0)
|
||||
for i := 0; i < amount*2; i++ {
|
||||
switch obj := f.Pop().(type) {
|
||||
case _int:
|
||||
if obj <= lastInt {
|
||||
t.Errorf("got %v (int) out of order, last was %v", obj, lastInt)
|
||||
}
|
||||
lastInt = obj
|
||||
case _uint:
|
||||
if obj <= lastUint {
|
||||
t.Errorf("got %v (uint) out of order, last was %v", obj, lastUint)
|
||||
} else {
|
||||
lastUint = obj
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unexpected type %#v", obj)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_addUpdate(t *testing.T) {
|
||||
f := NewHistorical(nil)
|
||||
f.Add(&testObj{"foo", 10})
|
||||
f.Update(&testObj{"foo", 15})
|
||||
got := make(chan *testObj, 2)
|
||||
go func() {
|
||||
for {
|
||||
got <- f.Pop().(*testObj)
|
||||
}
|
||||
}()
|
||||
|
||||
first := <-got
|
||||
if e, a := 15, first.value; e != a {
|
||||
t.Errorf("Didn't get updated value (%v), got %v", e, a)
|
||||
}
|
||||
select {
|
||||
case unexpected := <-got:
|
||||
t.Errorf("Got second value %v", unexpected)
|
||||
case <-time.After(50 * time.Millisecond):
|
||||
}
|
||||
_, exists, _ := f.GetByKey("foo")
|
||||
if exists {
|
||||
t.Errorf("item did not get removed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_addReplace(t *testing.T) {
|
||||
f := NewHistorical(nil)
|
||||
f.Add(&testObj{"foo", 10})
|
||||
f.Replace([]interface{}{&testObj{"foo", 15}})
|
||||
got := make(chan *testObj, 2)
|
||||
go func() {
|
||||
for {
|
||||
got <- f.Pop().(*testObj)
|
||||
}
|
||||
}()
|
||||
|
||||
first := <-got
|
||||
if e, a := 15, first.value; e != a {
|
||||
t.Errorf("Didn't get updated value (%v), got %v", e, a)
|
||||
}
|
||||
select {
|
||||
case unexpected := <-got:
|
||||
t.Errorf("Got second value %v", unexpected)
|
||||
case <-time.After(50 * time.Millisecond):
|
||||
}
|
||||
_, exists, _ := f.GetByKey("foo")
|
||||
if exists {
|
||||
t.Errorf("item did not get removed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_detectLineJumpers(t *testing.T) {
|
||||
f := NewHistorical(nil)
|
||||
|
||||
f.Add(&testObj{"foo", 10})
|
||||
f.Add(&testObj{"bar", 1})
|
||||
f.Add(&testObj{"foo", 11})
|
||||
f.Add(&testObj{"foo", 13})
|
||||
f.Add(&testObj{"zab", 30})
|
||||
|
||||
err := error(nil)
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
defer close(done)
|
||||
if e, a := 13, f.Pop().(*testObj).value; a != e {
|
||||
err = fmt.Errorf("expected %d, got %d", e, a)
|
||||
return
|
||||
}
|
||||
|
||||
f.Add(&testObj{"foo", 14}) // ensure foo doesn't jump back in line
|
||||
|
||||
if e, a := 1, f.Pop().(*testObj).value; a != e {
|
||||
err = fmt.Errorf("expected %d, got %d", e, a)
|
||||
return
|
||||
}
|
||||
|
||||
if e, a := 30, f.Pop().(*testObj).value; a != e {
|
||||
err = fmt.Errorf("expected %d, got %d", e, a)
|
||||
return
|
||||
}
|
||||
|
||||
if e, a := 14, f.Pop().(*testObj).value; a != e {
|
||||
err = fmt.Errorf("expected %d, got %d", e, a)
|
||||
return
|
||||
}
|
||||
}()
|
||||
select {
|
||||
case <-done:
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatal("Deadlocked unit test")
|
||||
}
|
||||
}
|
||||
103
contrib/mesos/pkg/queue/interface.go
Normal file
103
contrib/mesos/pkg/queue/interface.go
Normal file
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
|
||||
)
|
||||
|
||||
type EventType int
|
||||
|
||||
const (
|
||||
ADD_EVENT EventType = 1 << iota
|
||||
UPDATE_EVENT
|
||||
DELETE_EVENT
|
||||
POP_EVENT
|
||||
)
|
||||
|
||||
type Entry interface {
|
||||
Copyable
|
||||
Value() UniqueCopyable
|
||||
// types is a logically OR'd combination of EventType, e.g. ADD_EVENT|UPDATE_EVENT
|
||||
Is(types EventType) bool
|
||||
}
|
||||
|
||||
type Copyable interface {
|
||||
// return an independent copy (deep clone) of the current object
|
||||
Copy() Copyable
|
||||
}
|
||||
|
||||
type UniqueID interface {
|
||||
GetUID() string
|
||||
}
|
||||
|
||||
type UniqueCopyable interface {
|
||||
Copyable
|
||||
UniqueID
|
||||
}
|
||||
|
||||
type FIFO interface {
|
||||
cache.Store
|
||||
|
||||
// Pop waits until an item is ready and returns it. If multiple items are
|
||||
// ready, they are returned in the order in which they were added/updated.
|
||||
// The item is removed from the queue (and the store) before it is returned,
|
||||
// so if you don't succesfully process it, you need to add it back with Add().
|
||||
Pop() interface{}
|
||||
|
||||
// Await attempts to Pop within the given interval; upon success the non-nil
|
||||
// item is returned, otherwise nil
|
||||
Await(timeout time.Duration) interface{}
|
||||
|
||||
// Is there an entry for the id that matches the event mask?
|
||||
Poll(id string, types EventType) bool
|
||||
}
|
||||
|
||||
type Delayed interface {
|
||||
// return the remaining delay; a non-positive value indicates no delay
|
||||
GetDelay() time.Duration
|
||||
}
|
||||
|
||||
type Deadlined interface {
|
||||
// when ok, returns the time when this object should be activated/executed/evaluated
|
||||
Deadline() (deadline time.Time, ok bool)
|
||||
}
|
||||
|
||||
// No objects are ever expected to be sent over this channel. References to BreakChan
|
||||
// instances may be nil (always blocking). Signalling over this channel is performed by
|
||||
// closing the channel. As such there can only ever be a single signal sent over the
|
||||
// lifetime of the channel.
|
||||
type BreakChan <-chan struct{}
|
||||
|
||||
// an optional interface to be implemented by Delayed objects; returning a nil
|
||||
// channel from Breaker() results in waiting the full delay duration
|
||||
type Breakout interface {
|
||||
// return a channel that signals early departure from a blocking delay
|
||||
Breaker() BreakChan
|
||||
}
|
||||
|
||||
type UniqueDelayed interface {
|
||||
UniqueID
|
||||
Delayed
|
||||
}
|
||||
|
||||
type UniqueDeadlined interface {
|
||||
UniqueID
|
||||
Deadlined
|
||||
}
|
||||
70
contrib/mesos/pkg/queue/policy.go
Normal file
70
contrib/mesos/pkg/queue/policy.go
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
// Decide whether a pre-existing deadline for an item in a delay-queue should be
|
||||
// updated if an attempt is made to offer/add a new deadline for said item. Whether
|
||||
// the deadline changes or not has zero impact on the data blob associated with the
|
||||
// entry in the queue.
|
||||
type DeadlinePolicy int
|
||||
|
||||
const (
|
||||
PreferLatest DeadlinePolicy = iota
|
||||
PreferEarliest
|
||||
)
|
||||
|
||||
// Decide whether a pre-existing data blob in a delay-queue should be replaced if an
|
||||
// an attempt is made to add/offer a new data blob in its place. Whether the data is
|
||||
// replaced has no bearing on the deadline (priority) of the item in the queue.
|
||||
type ReplacementPolicy int
|
||||
|
||||
const (
|
||||
KeepExisting ReplacementPolicy = iota
|
||||
ReplaceExisting
|
||||
)
|
||||
|
||||
func (rp ReplacementPolicy) replacementValue(original, replacement interface{}) (result interface{}) {
|
||||
switch rp {
|
||||
case KeepExisting:
|
||||
result = original
|
||||
case ReplaceExisting:
|
||||
fallthrough
|
||||
default:
|
||||
result = replacement
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (dp DeadlinePolicy) nextDeadline(a, b Priority) (result Priority) {
|
||||
switch dp {
|
||||
case PreferEarliest:
|
||||
if a.ts.Before(b.ts) {
|
||||
result = a
|
||||
} else {
|
||||
result = b
|
||||
}
|
||||
case PreferLatest:
|
||||
fallthrough
|
||||
default:
|
||||
if a.ts.After(b.ts) {
|
||||
result = a
|
||||
} else {
|
||||
result = b
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
56
contrib/mesos/pkg/queue/priority.go
Normal file
56
contrib/mesos/pkg/queue/priority.go
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type Priority struct {
|
||||
ts time.Time // timestamp
|
||||
notify BreakChan // notification channel
|
||||
}
|
||||
|
||||
func (p Priority) Equal(other Priority) bool {
|
||||
return p.ts.Equal(other.ts) && p.notify == other.notify
|
||||
}
|
||||
|
||||
func extractFromDelayed(d Delayed) Priority {
|
||||
deadline := time.Now().Add(d.GetDelay())
|
||||
breaker := BreakChan(nil)
|
||||
if breakout, good := d.(Breakout); good {
|
||||
breaker = breakout.Breaker()
|
||||
}
|
||||
return Priority{
|
||||
ts: deadline,
|
||||
notify: breaker,
|
||||
}
|
||||
}
|
||||
|
||||
func extractFromDeadlined(d Deadlined) (Priority, bool) {
|
||||
if ts, ok := d.Deadline(); ok {
|
||||
breaker := BreakChan(nil)
|
||||
if breakout, good := d.(Breakout); good {
|
||||
breaker = breakout.Breaker()
|
||||
}
|
||||
return Priority{
|
||||
ts: ts,
|
||||
notify: breaker,
|
||||
}, true
|
||||
}
|
||||
return Priority{}, false
|
||||
}
|
||||
19
contrib/mesos/pkg/redirfd/doc.go
Normal file
19
contrib/mesos/pkg/redirfd/doc.go
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Some file descriptor manipulation funcs (Unix-Only), inspired by
|
||||
// https://github.com/skarnet/execline/blob/master/src/execline/redirfd.c
|
||||
package redirfd
|
||||
41
contrib/mesos/pkg/redirfd/file_descriptor.go
Normal file
41
contrib/mesos/pkg/redirfd/file_descriptor.go
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package redirfd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// FileDescriptor mirrors unix-specific indexes for cross-platform use
|
||||
type FileDescriptor int
|
||||
|
||||
const (
|
||||
InvalidFD FileDescriptor = -1
|
||||
Stdin FileDescriptor = 0
|
||||
Stdout FileDescriptor = 1
|
||||
Stderr FileDescriptor = 2
|
||||
)
|
||||
|
||||
// ParseFileDescriptor parses a string formatted file descriptor
|
||||
func ParseFileDescriptor(fdstr string) (FileDescriptor, error) {
|
||||
fdint, err := strconv.Atoi(fdstr)
|
||||
if err != nil {
|
||||
return InvalidFD, fmt.Errorf("file descriptor must be an integer: %q", fdstr)
|
||||
}
|
||||
return FileDescriptor(fdint), nil
|
||||
}
|
||||
54
contrib/mesos/pkg/redirfd/file_descriptor_test.go
Normal file
54
contrib/mesos/pkg/redirfd/file_descriptor_test.go
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package redirfd
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestParseFileDescriptor(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
valid := map[string]FileDescriptor{
|
||||
"-1": InvalidFD,
|
||||
"0": Stdin,
|
||||
"1": Stdout,
|
||||
"2": Stderr,
|
||||
"3": FileDescriptor(3),
|
||||
}
|
||||
|
||||
for input, expected := range valid {
|
||||
fd, err := ParseFileDescriptor(input)
|
||||
Expect(err).ToNot(HaveOccurred(), "Input: '%s'", input)
|
||||
Expect(fd).To(Equal(expected), "Input: '%s'", input)
|
||||
}
|
||||
|
||||
invalid := []string{
|
||||
"a",
|
||||
" 1",
|
||||
"blue",
|
||||
"stderr",
|
||||
"STDERR",
|
||||
}
|
||||
|
||||
for _, input := range invalid {
|
||||
_, err := ParseFileDescriptor(input)
|
||||
Expect(err).To(HaveOccurred(), "Input: '%s'", input)
|
||||
}
|
||||
}
|
||||
208
contrib/mesos/pkg/redirfd/redirfd_unix.go
Normal file
208
contrib/mesos/pkg/redirfd/redirfd_unix.go
Normal file
@@ -0,0 +1,208 @@
|
||||
// +build !windows
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package redirfd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
type RedirectMode int
|
||||
|
||||
const (
|
||||
Read RedirectMode = iota // open file for reading
|
||||
Write // open file for writing, truncating if it exists
|
||||
Update // open file for read & write
|
||||
Append // open file for append, create if it does not exist
|
||||
AppendExisting // open file for append, do not create if it does not already exist
|
||||
WriteNew // open file for writing, creating it, failing if it already exists
|
||||
)
|
||||
|
||||
// see https://github.com/skarnet/execline/blob/master/src/execline/redirfd.c
|
||||
func (mode RedirectMode) Redirect(nonblock, changemode bool, fd FileDescriptor, name string) (*os.File, error) {
|
||||
flags := 0
|
||||
what := -1
|
||||
|
||||
switch mode {
|
||||
case Read:
|
||||
what = syscall.O_RDONLY
|
||||
flags &= ^(syscall.O_APPEND | syscall.O_CREAT | syscall.O_TRUNC | syscall.O_EXCL)
|
||||
case Write:
|
||||
what = syscall.O_WRONLY
|
||||
flags |= syscall.O_CREAT | syscall.O_TRUNC
|
||||
flags &= ^(syscall.O_APPEND | syscall.O_EXCL)
|
||||
case Update:
|
||||
what = syscall.O_RDWR
|
||||
flags &= ^(syscall.O_APPEND | syscall.O_CREAT | syscall.O_TRUNC | syscall.O_EXCL)
|
||||
case Append:
|
||||
what = syscall.O_WRONLY
|
||||
flags |= syscall.O_CREAT | syscall.O_APPEND
|
||||
flags &= ^(syscall.O_TRUNC | syscall.O_EXCL)
|
||||
case AppendExisting:
|
||||
what = syscall.O_WRONLY
|
||||
flags |= syscall.O_APPEND
|
||||
flags &= ^(syscall.O_CREAT | syscall.O_TRUNC | syscall.O_EXCL)
|
||||
case WriteNew:
|
||||
what = syscall.O_WRONLY
|
||||
flags |= syscall.O_CREAT | syscall.O_EXCL
|
||||
flags &= ^(syscall.O_APPEND | syscall.O_TRUNC)
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected mode %d", mode)
|
||||
}
|
||||
if nonblock {
|
||||
flags |= syscall.O_NONBLOCK
|
||||
}
|
||||
flags |= what
|
||||
|
||||
fd2, e := open(name, flags, 0666)
|
||||
if (what == syscall.O_WRONLY) && (e == syscall.ENXIO) {
|
||||
// Opens file in read-only, non-blocking mode. Returns a valid fd number if it succeeds, or -1 (and sets errno) if it fails.
|
||||
fdr, e2 := open(name, syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
|
||||
if e2 != nil {
|
||||
return nil, &os.PathError{"open_read", name, e2}
|
||||
}
|
||||
fd2, e = open(name, flags, 0666)
|
||||
fd_close(fdr)
|
||||
}
|
||||
if e != nil {
|
||||
return nil, &os.PathError{"open", name, e}
|
||||
}
|
||||
if e = fd_move(fd, fd2); e != nil {
|
||||
return nil, &os.PathError{"fd_move", name, e}
|
||||
}
|
||||
if changemode {
|
||||
if nonblock {
|
||||
e = ndelay_off(fd)
|
||||
} else {
|
||||
e = ndelay_on(fd)
|
||||
}
|
||||
if e != nil {
|
||||
return nil, &os.PathError{"ndelay", name, e}
|
||||
}
|
||||
}
|
||||
return os.NewFile(uintptr(fd2), name), nil
|
||||
}
|
||||
|
||||
// proxy to return a FileDescriptor
|
||||
func open(path string, openmode int, perm uint32) (FileDescriptor, error) {
|
||||
fdint, err := syscall.Open(path, openmode, perm)
|
||||
return FileDescriptor(fdint), err
|
||||
}
|
||||
|
||||
// see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/fd_move.c
|
||||
func fd_move(to, from FileDescriptor) (err error) {
|
||||
if to == from {
|
||||
return
|
||||
}
|
||||
for {
|
||||
_, _, e1 := syscall.RawSyscall(syscall.SYS_DUP2, uintptr(from), uintptr(to), 0)
|
||||
if e1 != syscall.EINTR {
|
||||
if e1 != 0 {
|
||||
err = e1
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
err = fd_close(from)
|
||||
}
|
||||
return
|
||||
/*
|
||||
do
|
||||
r = dup2(from, to) ;
|
||||
while ((r == -1) && (errno == EINTR)) ;
|
||||
return (r == -1) ? -1 : fd_close(from) ;
|
||||
*/
|
||||
}
|
||||
|
||||
// see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/fd_close.c
|
||||
func fd_close(fd FileDescriptor) (err error) {
|
||||
i := 0
|
||||
var e error
|
||||
for {
|
||||
if e = syscall.Close(int(fd)); e != nil {
|
||||
return nil
|
||||
}
|
||||
i++
|
||||
if e != syscall.EINTR {
|
||||
break
|
||||
}
|
||||
}
|
||||
if e == syscall.EBADF && i > 1 {
|
||||
return nil
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
/*
|
||||
int fd_close (int fd)
|
||||
{
|
||||
register unsigned int i = 0 ;
|
||||
doit:
|
||||
if (!close(fd)) return 0 ;
|
||||
i++ ;
|
||||
if (errno == EINTR) goto doit ;
|
||||
return ((errno == EBADF) && (i > 1)) ? 0 : -1 ;
|
||||
}
|
||||
*/
|
||||
|
||||
// see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/ndelay_on.c
|
||||
func ndelay_on(fd FileDescriptor) error {
|
||||
// 32-bit will likely break because it needs SYS_FCNTL64
|
||||
got, _, e := syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_GETFL), 0)
|
||||
if e != 0 {
|
||||
return e
|
||||
}
|
||||
_, _, e = syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_SETFL), uintptr(got|syscall.O_NONBLOCK))
|
||||
if e != 0 {
|
||||
return e
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/*
|
||||
int ndelay_on (int fd)
|
||||
{
|
||||
register int got = fcntl(fd, F_GETFL) ;
|
||||
return (got == -1) ? -1 : fcntl(fd, F_SETFL, got | O_NONBLOCK) ;
|
||||
}
|
||||
*/
|
||||
|
||||
// see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/ndelay_off.c
|
||||
func ndelay_off(fd FileDescriptor) error {
|
||||
// 32-bit will likely break because it needs SYS_FCNTL64
|
||||
got, _, e := syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_GETFL), 0)
|
||||
if e != 0 {
|
||||
return e
|
||||
}
|
||||
_, _, e = syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_SETFL), uintptr(int(got) & ^syscall.O_NONBLOCK))
|
||||
if e != 0 {
|
||||
return e
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/*
|
||||
int ndelay_off (int fd)
|
||||
{
|
||||
register int got = fcntl(fd, F_GETFL) ;
|
||||
return (got == -1) ? -1 : fcntl(fd, F_SETFL, got & ^O_NONBLOCK) ;
|
||||
}
|
||||
*/
|
||||
39
contrib/mesos/pkg/redirfd/redirfd_windows.go
Normal file
39
contrib/mesos/pkg/redirfd/redirfd_windows.go
Normal file
@@ -0,0 +1,39 @@
|
||||
// +build windows
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package redirfd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
type RedirectMode int
|
||||
|
||||
const (
|
||||
Read RedirectMode = iota // open file for reading
|
||||
Write // open file for writing, truncating if it exists
|
||||
Update // open file for read & write
|
||||
Append // open file for append, create if it does not exist
|
||||
AppendExisting // open file for append, do not create if it does not already exist
|
||||
WriteNew // open file for writing, creating it, failing if it already exists
|
||||
)
|
||||
|
||||
func (mode RedirectMode) Redirect(nonblock, changemode bool, fd FileDescriptor, name string) (*os.File, error) {
|
||||
return nil, fmt.Errorf("Redirect(%s, %s, %d, \"%s\") not supported on windows", nonblock, changemode, fd, name)
|
||||
}
|
||||
19
contrib/mesos/pkg/runtime/doc.go
Normal file
19
contrib/mesos/pkg/runtime/doc.go
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package runtime provides utilities for semaphores (chan struct{}),
|
||||
// a simple Latch implementation, and metrics for reporting handled panics.
|
||||
package runtime
|
||||
35
contrib/mesos/pkg/runtime/latch.go
Normal file
35
contrib/mesos/pkg/runtime/latch.go
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
type Latch struct {
|
||||
int32
|
||||
}
|
||||
|
||||
// return true if this latch was successfully acquired. concurrency safe. will only return true
|
||||
// upon the first invocation, all subsequent invocations will return false. always returns false
|
||||
// when self is nil.
|
||||
func (self *Latch) Acquire() bool {
|
||||
if self == nil {
|
||||
return false
|
||||
}
|
||||
return atomic.CompareAndSwapInt32(&self.int32, 0, 1)
|
||||
}
|
||||
61
contrib/mesos/pkg/runtime/latch_test.go
Normal file
61
contrib/mesos/pkg/runtime/latch_test.go
Normal file
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func Test_LatchAcquireBasic(t *testing.T) {
|
||||
var x Latch
|
||||
if !x.Acquire() {
|
||||
t.Fatalf("expected first acquire to succeed")
|
||||
}
|
||||
if x.Acquire() {
|
||||
t.Fatalf("expected second acquire to fail")
|
||||
}
|
||||
if x.Acquire() {
|
||||
t.Fatalf("expected third acquire to fail")
|
||||
}
|
||||
}
|
||||
|
||||
func Test_LatchAcquireConcurrent(t *testing.T) {
|
||||
var x Latch
|
||||
const NUM = 10
|
||||
ch := make(chan struct{})
|
||||
var success int32
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(NUM)
|
||||
for i := 0; i < NUM; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-ch
|
||||
if x.Acquire() {
|
||||
atomic.AddInt32(&success, 1)
|
||||
}
|
||||
}()
|
||||
}
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
close(ch)
|
||||
wg.Wait()
|
||||
if success != 1 {
|
||||
t.Fatalf("expected single acquire to succeed instead of %d", success)
|
||||
}
|
||||
}
|
||||
47
contrib/mesos/pkg/runtime/metrics.go
Normal file
47
contrib/mesos/pkg/runtime/metrics.go
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
runtimeSubsystem = "runtime"
|
||||
)
|
||||
|
||||
var (
|
||||
panicCounter = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: runtimeSubsystem,
|
||||
Name: "panics",
|
||||
Help: "Counter of panics handled by the internal crash handler.",
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
func Register() {
|
||||
registerMetrics.Do(func() {
|
||||
prometheus.MustRegister(panicCounter)
|
||||
util.PanicHandlers = append(util.PanicHandlers, func(interface{}) { panicCounter.Inc() })
|
||||
})
|
||||
}
|
||||
122
contrib/mesos/pkg/runtime/util.go
Normal file
122
contrib/mesos/pkg/runtime/util.go
Normal file
@@ -0,0 +1,122 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
)
|
||||
|
||||
type Signal <-chan struct{}
|
||||
|
||||
// return a func that will close the signal chan.
|
||||
// multiple invocations of the returned func will not generate a panic.
|
||||
// two funcs from separate invocations of Closer() (on the same sig chan) will cause a panic if both invoked.
|
||||
// for example:
|
||||
// // good
|
||||
// x := runtime.After(func() { ... })
|
||||
// f := x.Closer()
|
||||
// f()
|
||||
// f()
|
||||
//
|
||||
// // bad
|
||||
// x := runtime.After(func() { ... })
|
||||
// f := x.Closer()
|
||||
// g := x.Closer()
|
||||
// f()
|
||||
// g() // this will panic
|
||||
func Closer(sig chan<- struct{}) func() {
|
||||
var once sync.Once
|
||||
return func() {
|
||||
once.Do(func() { close(sig) })
|
||||
}
|
||||
}
|
||||
|
||||
// upon receiving signal sig invoke function f and immediately return a signal
|
||||
// that indicates f's completion. used to chain handler funcs, for example:
|
||||
// On(job.Done(), response.Send).Then(wg.Done)
|
||||
func (sig Signal) Then(f func()) Signal {
|
||||
if sig == nil {
|
||||
return nil
|
||||
}
|
||||
return On(sig, f)
|
||||
}
|
||||
|
||||
// execute a callback function after the specified signal chan closes.
|
||||
// immediately returns a signal that indicates f's completion.
|
||||
func On(sig <-chan struct{}, f func()) Signal {
|
||||
if sig == nil {
|
||||
return nil
|
||||
}
|
||||
return After(func() {
|
||||
<-sig
|
||||
if f != nil {
|
||||
f()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func OnOSSignal(sig <-chan os.Signal, f func(os.Signal)) Signal {
|
||||
if sig == nil {
|
||||
return nil
|
||||
}
|
||||
return After(func() {
|
||||
if s, ok := <-sig; ok && f != nil {
|
||||
f(s)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// spawn a goroutine to execute a func, immediately returns a chan that closes
|
||||
// upon completion of the func. returns a nil signal chan if the given func is nil.
|
||||
func After(f func()) Signal {
|
||||
ch := make(chan struct{})
|
||||
go func() {
|
||||
defer close(ch)
|
||||
defer util.HandleCrash()
|
||||
if f != nil {
|
||||
f()
|
||||
}
|
||||
}()
|
||||
return Signal(ch)
|
||||
}
|
||||
|
||||
// periodically execute the given function, stopping once stopCh is closed.
|
||||
// this func blocks until stopCh is closed, it's intended to be run as a goroutine.
|
||||
func Until(f func(), period time.Duration, stopCh <-chan struct{}) {
|
||||
if f == nil {
|
||||
return
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-stopCh:
|
||||
return
|
||||
default:
|
||||
}
|
||||
func() {
|
||||
defer util.HandleCrash()
|
||||
f()
|
||||
}()
|
||||
select {
|
||||
case <-stopCh:
|
||||
case <-time.After(period):
|
||||
}
|
||||
}
|
||||
}
|
||||
64
contrib/mesos/pkg/runtime/util_test.go
Normal file
64
contrib/mesos/pkg/runtime/util_test.go
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestUntil(t *testing.T) {
|
||||
ch := make(chan struct{})
|
||||
close(ch)
|
||||
Until(func() {
|
||||
t.Fatal("should not have been invoked")
|
||||
}, 0, ch)
|
||||
|
||||
//--
|
||||
ch = make(chan struct{})
|
||||
called := make(chan struct{})
|
||||
After(func() {
|
||||
Until(func() {
|
||||
called <- struct{}{}
|
||||
}, 0, ch)
|
||||
}).Then(func() { close(called) })
|
||||
|
||||
<-called
|
||||
close(ch)
|
||||
<-called
|
||||
|
||||
//--
|
||||
ch = make(chan struct{})
|
||||
called = make(chan struct{})
|
||||
running := make(chan struct{})
|
||||
After(func() {
|
||||
Until(func() {
|
||||
close(running)
|
||||
called <- struct{}{}
|
||||
}, 2*time.Second, ch)
|
||||
}).Then(func() { close(called) })
|
||||
|
||||
<-running
|
||||
close(ch)
|
||||
<-called // unblock the goroutine
|
||||
now := time.Now()
|
||||
|
||||
<-called
|
||||
if time.Since(now) > 1800*time.Millisecond {
|
||||
t.Fatalf("Until should not have waited the full timeout period since we closed the stop chan")
|
||||
}
|
||||
}
|
||||
109
contrib/mesos/pkg/scheduler/config/config.go
Normal file
109
contrib/mesos/pkg/scheduler/config/config.go
Normal file
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"code.google.com/p/gcfg"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultOfferTTL = 5 * time.Second // duration an offer is viable, prior to being expired
|
||||
DefaultOfferLingerTTL = 120 * time.Second // duration an expired offer lingers in history
|
||||
DefaultListenerDelay = 1 * time.Second // duration between offer listener notifications
|
||||
DefaultUpdatesBacklog = 2048 // size of the pod updates channel
|
||||
DefaultFrameworkIdRefreshInterval = 30 * time.Second // interval we update the frameworkId stored in etcd
|
||||
DefaultInitialImplicitReconciliationDelay = 15 * time.Second // wait this amount of time after initial registration before attempting implicit reconciliation
|
||||
DefaultExplicitReconciliationMaxBackoff = 2 * time.Minute // interval in between internal task status checks/updates
|
||||
DefaultExplicitReconciliationAbortTimeout = 30 * time.Second // waiting period after attempting to cancel an ongoing reconciliation
|
||||
DefaultInitialPodBackoff = 1 * time.Second
|
||||
DefaultMaxPodBackoff = 60 * time.Second
|
||||
DefaultHttpHandlerTimeout = 10 * time.Second
|
||||
DefaultHttpBindInterval = 5 * time.Second
|
||||
)
|
||||
|
||||
// Example scheduler configuration file:
|
||||
//
|
||||
// [scheduler]
|
||||
// info-name = Kubernetes
|
||||
// offer-ttl = 5s
|
||||
// offer-linger-ttl = 2m
|
||||
|
||||
type ConfigWrapper struct {
|
||||
Scheduler Config
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
OfferTTL WrappedDuration `gcfg:"offer-ttl"`
|
||||
OfferLingerTTL WrappedDuration `gcfg:"offer-linger-ttl"`
|
||||
ListenerDelay WrappedDuration `gcfg:"listener-delay"`
|
||||
UpdatesBacklog int `gcfg:"updates-backlog"`
|
||||
FrameworkIdRefreshInterval WrappedDuration `gcfg:"framework-id-refresh-interval"`
|
||||
InitialImplicitReconciliationDelay WrappedDuration `gcfg:"initial-implicit-reconciliation-delay"`
|
||||
ExplicitReconciliationMaxBackoff WrappedDuration `gcfg:"explicit-reconciliantion-max-backoff"`
|
||||
ExplicitReconciliationAbortTimeout WrappedDuration `gcfg:"explicit-reconciliantion-abort-timeout"`
|
||||
InitialPodBackoff WrappedDuration `gcfg:"initial-pod-backoff"`
|
||||
MaxPodBackoff WrappedDuration `gcfg:"max-pod-backoff"`
|
||||
HttpHandlerTimeout WrappedDuration `gcfg:"http-handler-timeout"`
|
||||
HttpBindInterval WrappedDuration `gcfg:"http-bind-interval"`
|
||||
}
|
||||
|
||||
type WrappedDuration struct {
|
||||
time.Duration
|
||||
}
|
||||
|
||||
func (wd *WrappedDuration) UnmarshalText(data []byte) error {
|
||||
d, err := time.ParseDuration(string(data))
|
||||
if err == nil {
|
||||
wd.Duration = d
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Config) SetDefaults() {
|
||||
c.OfferTTL = WrappedDuration{DefaultOfferTTL}
|
||||
c.OfferLingerTTL = WrappedDuration{DefaultOfferLingerTTL}
|
||||
c.ListenerDelay = WrappedDuration{DefaultListenerDelay}
|
||||
c.UpdatesBacklog = DefaultUpdatesBacklog
|
||||
c.FrameworkIdRefreshInterval = WrappedDuration{DefaultFrameworkIdRefreshInterval}
|
||||
c.InitialImplicitReconciliationDelay = WrappedDuration{DefaultInitialImplicitReconciliationDelay}
|
||||
c.ExplicitReconciliationMaxBackoff = WrappedDuration{DefaultExplicitReconciliationMaxBackoff}
|
||||
c.ExplicitReconciliationAbortTimeout = WrappedDuration{DefaultExplicitReconciliationAbortTimeout}
|
||||
c.InitialPodBackoff = WrappedDuration{DefaultInitialPodBackoff}
|
||||
c.MaxPodBackoff = WrappedDuration{DefaultMaxPodBackoff}
|
||||
c.HttpHandlerTimeout = WrappedDuration{DefaultHttpHandlerTimeout}
|
||||
c.HttpBindInterval = WrappedDuration{DefaultHttpBindInterval}
|
||||
}
|
||||
|
||||
func CreateDefaultConfig() *Config {
|
||||
c := &Config{}
|
||||
c.SetDefaults()
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Config) Read(configReader io.Reader) error {
|
||||
wrapper := &ConfigWrapper{Scheduler: *c}
|
||||
if configReader != nil {
|
||||
if err := gcfg.ReadInto(wrapper, configReader); err != nil {
|
||||
return err
|
||||
}
|
||||
*c = wrapper.Scheduler
|
||||
}
|
||||
return nil
|
||||
}
|
||||
112
contrib/mesos/pkg/scheduler/config/config_test.go
Normal file
112
contrib/mesos/pkg/scheduler/config/config_test.go
Normal file
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func is_default(c *Config, t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
assert.Equal(DefaultOfferTTL, c.OfferTTL.Duration)
|
||||
assert.Equal(DefaultOfferLingerTTL, c.OfferLingerTTL.Duration)
|
||||
assert.Equal(DefaultListenerDelay, c.ListenerDelay.Duration)
|
||||
assert.Equal(DefaultUpdatesBacklog, c.UpdatesBacklog)
|
||||
assert.Equal(DefaultFrameworkIdRefreshInterval, c.FrameworkIdRefreshInterval.Duration)
|
||||
assert.Equal(DefaultInitialImplicitReconciliationDelay, c.InitialImplicitReconciliationDelay.Duration)
|
||||
assert.Equal(DefaultExplicitReconciliationMaxBackoff, c.ExplicitReconciliationMaxBackoff.Duration)
|
||||
assert.Equal(DefaultExplicitReconciliationAbortTimeout, c.ExplicitReconciliationAbortTimeout.Duration)
|
||||
assert.Equal(DefaultInitialPodBackoff, c.InitialPodBackoff.Duration)
|
||||
assert.Equal(DefaultMaxPodBackoff, c.MaxPodBackoff.Duration)
|
||||
assert.Equal(DefaultHttpHandlerTimeout, c.HttpHandlerTimeout.Duration)
|
||||
assert.Equal(DefaultHttpBindInterval, c.HttpBindInterval.Duration)
|
||||
}
|
||||
|
||||
// Check that SetDefaults sets the default values
|
||||
func TestConfig_SetDefaults(t *testing.T) {
|
||||
c := &Config{}
|
||||
c.SetDefaults()
|
||||
is_default(c, t)
|
||||
}
|
||||
|
||||
// Check that CreateDefaultConfig returns a default config
|
||||
func TestConfig_CreateDefaultConfig(t *testing.T) {
|
||||
c := CreateDefaultConfig()
|
||||
is_default(c, t)
|
||||
}
|
||||
|
||||
// Check that a config string can be parsed
|
||||
func TestConfig_Read(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
c := CreateDefaultConfig()
|
||||
reader := strings.NewReader(`
|
||||
[scheduler]
|
||||
offer-ttl=42s
|
||||
offer-linger-ttl=42s
|
||||
listener-delay=42s
|
||||
updates-backlog=42
|
||||
framework-id-refresh-interval=42s
|
||||
initial-implicit-reconciliation-delay=42s
|
||||
explicit-reconciliantion-max-backoff=42s
|
||||
explicit-reconciliantion-abort-timeout=42s
|
||||
initial-pod-backoff=42s
|
||||
max-pod-backoff=42s
|
||||
http-handler-timeout=42s
|
||||
http-bind-interval=42s
|
||||
`)
|
||||
err := c.Read(reader)
|
||||
if err != nil {
|
||||
t.Fatal("Cannot parse scheduler config: " + err.Error())
|
||||
}
|
||||
|
||||
assert.Equal(42*time.Second, c.OfferTTL.Duration)
|
||||
assert.Equal(42*time.Second, c.OfferLingerTTL.Duration)
|
||||
assert.Equal(42*time.Second, c.ListenerDelay.Duration)
|
||||
assert.Equal(42, c.UpdatesBacklog)
|
||||
assert.Equal(42*time.Second, c.FrameworkIdRefreshInterval.Duration)
|
||||
assert.Equal(42*time.Second, c.InitialImplicitReconciliationDelay.Duration)
|
||||
assert.Equal(42*time.Second, c.ExplicitReconciliationMaxBackoff.Duration)
|
||||
assert.Equal(42*time.Second, c.ExplicitReconciliationAbortTimeout.Duration)
|
||||
assert.Equal(42*time.Second, c.InitialPodBackoff.Duration)
|
||||
assert.Equal(42*time.Second, c.MaxPodBackoff.Duration)
|
||||
assert.Equal(42*time.Second, c.HttpHandlerTimeout.Duration)
|
||||
assert.Equal(42*time.Second, c.HttpBindInterval.Duration)
|
||||
}
|
||||
|
||||
// check that an invalid config is rejected and non of the values to overwritten
|
||||
func TestConfig_ReadError(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
c := CreateDefaultConfig()
|
||||
reader := strings.NewReader(`
|
||||
[scheduler]
|
||||
offer-ttl = 42s
|
||||
invalid-setting = 42s
|
||||
`)
|
||||
err := c.Read(reader)
|
||||
if err == nil {
|
||||
t.Fatal("Invalid scheduler config should lead to an error")
|
||||
}
|
||||
|
||||
assert.NotEqual(42*time.Second, c.OfferTTL.Duration)
|
||||
}
|
||||
18
contrib/mesos/pkg/scheduler/config/doc.go
Normal file
18
contrib/mesos/pkg/scheduler/config/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package config provides mechanisms for low-level scheduler tuning.
|
||||
package config
|
||||
106
contrib/mesos/pkg/scheduler/constraint/constraint.go
Normal file
106
contrib/mesos/pkg/scheduler/constraint/constraint.go
Normal file
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package constraint
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type OperatorType int
|
||||
|
||||
const (
|
||||
UniqueOperator OperatorType = iota
|
||||
LikeOperator
|
||||
ClusterOperator
|
||||
GroupByOperator
|
||||
UnlikeOperator
|
||||
)
|
||||
|
||||
var (
|
||||
labels = []string{
|
||||
"UNIQUE",
|
||||
"LIKE",
|
||||
"CLUSTER",
|
||||
"GROUP_BY",
|
||||
"UNLIKE",
|
||||
}
|
||||
|
||||
labelToType map[string]OperatorType
|
||||
)
|
||||
|
||||
func init() {
|
||||
labelToType = make(map[string]OperatorType)
|
||||
for i, s := range labels {
|
||||
labelToType[s] = OperatorType(i)
|
||||
}
|
||||
}
|
||||
|
||||
func (t OperatorType) String() string {
|
||||
switch t {
|
||||
case UniqueOperator, LikeOperator, ClusterOperator, GroupByOperator, UnlikeOperator:
|
||||
return labels[int(t)]
|
||||
default:
|
||||
panic(fmt.Sprintf("unrecognized operator type: %d", int(t)))
|
||||
}
|
||||
}
|
||||
|
||||
func parseOperatorType(s string) (OperatorType, error) {
|
||||
t, found := labelToType[s]
|
||||
if !found {
|
||||
return UniqueOperator, fmt.Errorf("unrecognized operator %q", s)
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
type Constraint struct {
|
||||
Field string // required
|
||||
Operator OperatorType // required
|
||||
Value string // optional
|
||||
}
|
||||
|
||||
func (c *Constraint) MarshalJSON() ([]byte, error) {
|
||||
var a []string
|
||||
if c != nil {
|
||||
if c.Value != "" {
|
||||
a = append(a, c.Field, c.Operator.String(), c.Value)
|
||||
} else {
|
||||
a = append(a, c.Field, c.Operator.String())
|
||||
}
|
||||
}
|
||||
return json.Marshal(a)
|
||||
}
|
||||
|
||||
func (c *Constraint) UnmarshalJSON(buf []byte) (err error) {
|
||||
var a []string
|
||||
if err = json.Unmarshal(buf, &a); err != nil {
|
||||
return err
|
||||
}
|
||||
switch x := len(a); {
|
||||
case x < 2:
|
||||
err = fmt.Errorf("not enough arguments to form constraint")
|
||||
case x > 3:
|
||||
err = fmt.Errorf("too many arguments to form constraint")
|
||||
case x == 3:
|
||||
c.Value = a[2]
|
||||
fallthrough
|
||||
case x == 2:
|
||||
c.Field = a[0]
|
||||
c.Operator, err = parseOperatorType(a[1])
|
||||
}
|
||||
return err
|
||||
}
|
||||
79
contrib/mesos/pkg/scheduler/constraint/constraint_test.go
Normal file
79
contrib/mesos/pkg/scheduler/constraint/constraint_test.go
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package constraint
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDeserialize(t *testing.T) {
|
||||
shouldMatch := func(js string, field string, operator OperatorType, value string) (err error) {
|
||||
constraint := Constraint{}
|
||||
if err = json.Unmarshal(([]byte)(js), &constraint); err != nil {
|
||||
return
|
||||
}
|
||||
if field != constraint.Field {
|
||||
t.Fatalf("expected field %q instead of %q", field, constraint.Field)
|
||||
}
|
||||
if operator != constraint.Operator {
|
||||
t.Fatalf("expected operator %v instead of %v", operator, constraint.Operator)
|
||||
}
|
||||
if value != constraint.Value {
|
||||
t.Fatalf("expected value %q instead of %q", value, constraint.Value)
|
||||
}
|
||||
return
|
||||
}
|
||||
failOnError := func(err error) {
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
failOnError(shouldMatch(`["hostname","UNIQUE"]`, "hostname", UniqueOperator, ""))
|
||||
failOnError(shouldMatch(`["rackid","GROUP_BY","1"]`, "rackid", GroupByOperator, "1"))
|
||||
failOnError(shouldMatch(`["jdk","LIKE","7"]`, "jdk", LikeOperator, "7"))
|
||||
failOnError(shouldMatch(`["jdk","UNLIKE","7"]`, "jdk", UnlikeOperator, "7"))
|
||||
failOnError(shouldMatch(`["bob","CLUSTER","foo"]`, "bob", ClusterOperator, "foo"))
|
||||
err := shouldMatch(`["bill","NOT_REALLY_AN_OPERATOR","pete"]`, "bill", ClusterOperator, "pete")
|
||||
if err == nil {
|
||||
t.Fatalf("expected unmarshalling error for invalid operator")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSerialize(t *testing.T) {
|
||||
shouldMatch := func(expected string, constraint *Constraint) error {
|
||||
data, err := json.Marshal(constraint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
js := string(data)
|
||||
if js != expected {
|
||||
t.Fatalf("expected json %q instead of %q", expected, js)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
failOnError := func(err error) {
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
failOnError(shouldMatch(`["hostname","UNIQUE"]`, &Constraint{"hostname", UniqueOperator, ""}))
|
||||
failOnError(shouldMatch(`["rackid","GROUP_BY","1"]`, &Constraint{"rackid", GroupByOperator, "1"}))
|
||||
failOnError(shouldMatch(`["jdk","LIKE","7"]`, &Constraint{"jdk", LikeOperator, "7"}))
|
||||
failOnError(shouldMatch(`["jdk","UNLIKE","7"]`, &Constraint{"jdk", UnlikeOperator, "7"}))
|
||||
failOnError(shouldMatch(`["bob","CLUSTER","foo"]`, &Constraint{"bob", ClusterOperator, "foo"}))
|
||||
}
|
||||
19
contrib/mesos/pkg/scheduler/constraint/doc.go
Normal file
19
contrib/mesos/pkg/scheduler/constraint/doc.go
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package constraint exposes Marathon-like constraints for scheduling pods.
|
||||
// Incomplete.
|
||||
package constraint
|
||||
18
contrib/mesos/pkg/scheduler/doc.go
Normal file
18
contrib/mesos/pkg/scheduler/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package scheduler implements the Kubernetes Mesos scheduler.
|
||||
package scheduler
|
||||
57
contrib/mesos/pkg/scheduler/fcfs.go
Normal file
57
contrib/mesos/pkg/scheduler/fcfs.go
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
log "github.com/golang/glog"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
|
||||
)
|
||||
|
||||
// A first-come-first-serve scheduler: acquires the first offer that can support the task
|
||||
func FCFSScheduleFunc(r offers.Registry, unused SlaveIndex, task *podtask.T) (offers.Perishable, error) {
|
||||
podName := fmt.Sprintf("%s/%s", task.Pod.Namespace, task.Pod.Name)
|
||||
var acceptedOffer offers.Perishable
|
||||
err := r.Walk(func(p offers.Perishable) (bool, error) {
|
||||
offer := p.Details()
|
||||
if offer == nil {
|
||||
return false, fmt.Errorf("nil offer while scheduling task %v", task.ID)
|
||||
}
|
||||
if task.AcceptOffer(offer) {
|
||||
if p.Acquire() {
|
||||
acceptedOffer = p
|
||||
log.V(3).Infof("Pod %s accepted offer %v", podName, offer.Id.GetValue())
|
||||
return true, nil // stop, we found an offer
|
||||
}
|
||||
}
|
||||
return false, nil // continue
|
||||
})
|
||||
if acceptedOffer != nil {
|
||||
if err != nil {
|
||||
log.Warningf("problems walking the offer registry: %v, attempting to continue", err)
|
||||
}
|
||||
return acceptedOffer, nil
|
||||
}
|
||||
if err != nil {
|
||||
log.V(2).Infof("failed to find a fit for pod: %s, err = %v", podName, err)
|
||||
return nil, err
|
||||
}
|
||||
log.V(2).Infof("failed to find a fit for pod: %s", podName)
|
||||
return nil, noSuitableOffersErr
|
||||
}
|
||||
18
contrib/mesos/pkg/scheduler/ha/doc.go
Normal file
18
contrib/mesos/pkg/scheduler/ha/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package ha encapsulates high-availability scheduler concerns.
|
||||
package ha
|
||||
73
contrib/mesos/pkg/scheduler/ha/election.go
Normal file
73
contrib/mesos/pkg/scheduler/ha/election.go
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package ha
|
||||
|
||||
import (
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/election"
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
type roleType int
|
||||
|
||||
const (
|
||||
followerRole roleType = iota
|
||||
masterRole
|
||||
retiredRole
|
||||
)
|
||||
|
||||
type candidateService struct {
|
||||
sched *SchedulerProcess
|
||||
newDriver DriverFactory
|
||||
role roleType
|
||||
valid ValidationFunc
|
||||
}
|
||||
|
||||
type ValidationFunc func(desiredUid, currentUid string)
|
||||
|
||||
func NewCandidate(s *SchedulerProcess, f DriverFactory, v ValidationFunc) election.Service {
|
||||
return &candidateService{
|
||||
sched: s,
|
||||
newDriver: f,
|
||||
role: followerRole,
|
||||
valid: v,
|
||||
}
|
||||
}
|
||||
|
||||
func (self *candidateService) Validate(desired, current election.Master) {
|
||||
if self.valid != nil {
|
||||
self.valid(string(desired), string(current))
|
||||
}
|
||||
}
|
||||
|
||||
func (self *candidateService) Start() {
|
||||
if self.role == followerRole {
|
||||
log.Info("elected as master")
|
||||
self.role = masterRole
|
||||
self.sched.Elect(self.newDriver)
|
||||
}
|
||||
}
|
||||
|
||||
func (self *candidateService) Stop() {
|
||||
if self.role == masterRole {
|
||||
log.Info("retiring from master")
|
||||
self.role = retiredRole
|
||||
// order is important here, watchers of a SchedulerProcess will
|
||||
// check SchedulerProcess.Failover() once Done() is closed.
|
||||
close(self.sched.failover)
|
||||
self.sched.End()
|
||||
}
|
||||
}
|
||||
285
contrib/mesos/pkg/scheduler/ha/ha.go
Normal file
285
contrib/mesos/pkg/scheduler/ha/ha.go
Normal file
@@ -0,0 +1,285 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package ha
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
|
||||
log "github.com/golang/glog"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
bindings "github.com/mesos/mesos-go/scheduler"
|
||||
)
|
||||
|
||||
type DriverFactory func() (bindings.SchedulerDriver, error)
|
||||
|
||||
type stageType int32
|
||||
|
||||
const (
|
||||
initStage stageType = iota
|
||||
standbyStage
|
||||
masterStage
|
||||
finStage
|
||||
)
|
||||
|
||||
func (stage *stageType) transition(from, to stageType) bool {
|
||||
return atomic.CompareAndSwapInt32((*int32)(stage), int32(from), int32(to))
|
||||
}
|
||||
|
||||
func (s *stageType) transitionTo(to stageType, unless ...stageType) bool {
|
||||
if len(unless) == 0 {
|
||||
atomic.StoreInt32((*int32)(s), int32(to))
|
||||
return true
|
||||
}
|
||||
for {
|
||||
state := s.get()
|
||||
for _, x := range unless {
|
||||
if state == x {
|
||||
return false
|
||||
}
|
||||
}
|
||||
if s.transition(state, to) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (stage *stageType) get() stageType {
|
||||
return stageType(atomic.LoadInt32((*int32)(stage)))
|
||||
}
|
||||
|
||||
// execute some action in the deferred context of the process, but only if we
|
||||
// match the stage of the process at the time the action is executed.
|
||||
func (stage stageType) Do(p *SchedulerProcess, a proc.Action) <-chan error {
|
||||
errOnce := proc.NewErrorOnce(p.fin)
|
||||
errOuter := p.Do(proc.Action(func() {
|
||||
switch stage {
|
||||
case standbyStage:
|
||||
//await standby signal or death
|
||||
select {
|
||||
case <-p.standby:
|
||||
case <-p.Done():
|
||||
}
|
||||
case masterStage:
|
||||
//await elected signal or death
|
||||
select {
|
||||
case <-p.elected:
|
||||
case <-p.Done():
|
||||
}
|
||||
case finStage:
|
||||
errOnce.Reportf("scheduler process is dying, dropping action")
|
||||
return
|
||||
default:
|
||||
}
|
||||
errOnce.Report(stage.When(p, a))
|
||||
}))
|
||||
return errOnce.Send(errOuter).Err()
|
||||
}
|
||||
|
||||
// execute some action only if we match the stage of the scheduler process
|
||||
func (stage stageType) When(p *SchedulerProcess, a proc.Action) (err error) {
|
||||
if stage != (&p.stage).get() {
|
||||
err = fmt.Errorf("failed to execute deferred action, expected lifecycle stage %v instead of %v", stage, p.stage)
|
||||
} else {
|
||||
a()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type SchedulerProcess struct {
|
||||
proc.Process
|
||||
bindings.Scheduler
|
||||
stage stageType
|
||||
elected chan struct{} // upon close we've been elected
|
||||
failover chan struct{} // closed indicates that we should failover upon End()
|
||||
standby chan struct{}
|
||||
fin chan struct{}
|
||||
}
|
||||
|
||||
func New(sched bindings.Scheduler) *SchedulerProcess {
|
||||
p := &SchedulerProcess{
|
||||
Process: proc.New(),
|
||||
Scheduler: sched,
|
||||
stage: initStage,
|
||||
elected: make(chan struct{}),
|
||||
failover: make(chan struct{}),
|
||||
standby: make(chan struct{}),
|
||||
fin: make(chan struct{}),
|
||||
}
|
||||
runtime.On(p.Running(), p.begin)
|
||||
return p
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) begin() {
|
||||
if (&self.stage).transition(initStage, standbyStage) {
|
||||
close(self.standby)
|
||||
log.Infoln("scheduler process entered standby stage")
|
||||
} else {
|
||||
log.Errorf("failed to transition from init to standby stage")
|
||||
}
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) End() <-chan struct{} {
|
||||
if (&self.stage).transitionTo(finStage, finStage) {
|
||||
defer close(self.fin)
|
||||
log.Infoln("scheduler process entered fin stage")
|
||||
}
|
||||
return self.Process.End()
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) Elect(newDriver DriverFactory) {
|
||||
errOnce := proc.NewErrorOnce(self.fin)
|
||||
proc.OnError(errOnce.Send(standbyStage.Do(self, proc.Action(func() {
|
||||
if !(&self.stage).transition(standbyStage, masterStage) {
|
||||
log.Errorf("failed to transition from standby to master stage, aborting")
|
||||
self.End()
|
||||
return
|
||||
}
|
||||
log.Infoln("scheduler process entered master stage")
|
||||
drv, err := newDriver()
|
||||
if err != nil {
|
||||
log.Errorf("failed to fetch scheduler driver: %v", err)
|
||||
self.End()
|
||||
return
|
||||
}
|
||||
log.V(1).Infoln("starting driver...")
|
||||
stat, err := drv.Start()
|
||||
if stat == mesos.Status_DRIVER_RUNNING && err == nil {
|
||||
log.Infoln("driver started successfully and is running")
|
||||
close(self.elected)
|
||||
go func() {
|
||||
defer self.End()
|
||||
_, err := drv.Join()
|
||||
if err != nil {
|
||||
log.Errorf("driver failed with error: %v", err)
|
||||
}
|
||||
errOnce.Report(err)
|
||||
}()
|
||||
return
|
||||
}
|
||||
defer self.End()
|
||||
if err != nil {
|
||||
log.Errorf("failed to start scheduler driver: %v", err)
|
||||
} else {
|
||||
log.Errorf("expected RUNNING status, not %v", stat)
|
||||
}
|
||||
}))).Err(), func(err error) {
|
||||
defer self.End()
|
||||
log.Errorf("failed to handle election event, aborting: %v", err)
|
||||
}, self.fin)
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) Terminal() <-chan struct{} {
|
||||
return self.fin
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) Elected() <-chan struct{} {
|
||||
return self.elected
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) Failover() <-chan struct{} {
|
||||
return self.failover
|
||||
}
|
||||
|
||||
type masterProcess struct {
|
||||
*SchedulerProcess
|
||||
doer proc.Doer
|
||||
}
|
||||
|
||||
func (self *masterProcess) Done() <-chan struct{} {
|
||||
return self.SchedulerProcess.Terminal()
|
||||
}
|
||||
|
||||
func (self *masterProcess) Do(a proc.Action) <-chan error {
|
||||
return self.doer.Do(a)
|
||||
}
|
||||
|
||||
// returns a Process instance that will only execute a proc.Action if the scheduler is the elected master
|
||||
func (self *SchedulerProcess) Master() proc.Process {
|
||||
return &masterProcess{
|
||||
SchedulerProcess: self,
|
||||
doer: proc.DoWith(self, proc.DoerFunc(func(a proc.Action) <-chan error {
|
||||
return proc.ErrorChan(masterStage.When(self, a))
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) logError(ch <-chan error) {
|
||||
self.OnError(ch, func(err error) {
|
||||
log.Errorf("failed to execute scheduler action: %v", err)
|
||||
})
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) Registered(drv bindings.SchedulerDriver, fid *mesos.FrameworkID, mi *mesos.MasterInfo) {
|
||||
self.logError(self.Master().Do(proc.Action(func() {
|
||||
self.Scheduler.Registered(drv, fid, mi)
|
||||
})))
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) Reregistered(drv bindings.SchedulerDriver, mi *mesos.MasterInfo) {
|
||||
self.logError(self.Master().Do(proc.Action(func() {
|
||||
self.Scheduler.Reregistered(drv, mi)
|
||||
})))
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) Disconnected(drv bindings.SchedulerDriver) {
|
||||
self.logError(self.Master().Do(proc.Action(func() {
|
||||
self.Scheduler.Disconnected(drv)
|
||||
})))
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) ResourceOffers(drv bindings.SchedulerDriver, off []*mesos.Offer) {
|
||||
self.logError(self.Master().Do(proc.Action(func() {
|
||||
self.Scheduler.ResourceOffers(drv, off)
|
||||
})))
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) OfferRescinded(drv bindings.SchedulerDriver, oid *mesos.OfferID) {
|
||||
self.logError(self.Master().Do(proc.Action(func() {
|
||||
self.Scheduler.OfferRescinded(drv, oid)
|
||||
})))
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) StatusUpdate(drv bindings.SchedulerDriver, ts *mesos.TaskStatus) {
|
||||
self.logError(self.Master().Do(proc.Action(func() {
|
||||
self.Scheduler.StatusUpdate(drv, ts)
|
||||
})))
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) FrameworkMessage(drv bindings.SchedulerDriver, eid *mesos.ExecutorID, sid *mesos.SlaveID, m string) {
|
||||
self.logError(self.Master().Do(proc.Action(func() {
|
||||
self.Scheduler.FrameworkMessage(drv, eid, sid, m)
|
||||
})))
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) SlaveLost(drv bindings.SchedulerDriver, sid *mesos.SlaveID) {
|
||||
self.logError(self.Master().Do(proc.Action(func() {
|
||||
self.Scheduler.SlaveLost(drv, sid)
|
||||
})))
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) ExecutorLost(drv bindings.SchedulerDriver, eid *mesos.ExecutorID, sid *mesos.SlaveID, x int) {
|
||||
self.logError(self.Master().Do(proc.Action(func() {
|
||||
self.Scheduler.ExecutorLost(drv, eid, sid, x)
|
||||
})))
|
||||
}
|
||||
|
||||
func (self *SchedulerProcess) Error(drv bindings.SchedulerDriver, msg string) {
|
||||
self.Scheduler.Error(drv, msg)
|
||||
}
|
||||
30
contrib/mesos/pkg/scheduler/meta/annotations.go
Normal file
30
contrib/mesos/pkg/scheduler/meta/annotations.go
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package meta
|
||||
|
||||
// kubernetes api object annotations
|
||||
const (
|
||||
BindingHostKey = "k8s.mesosphere.io/bindingHost"
|
||||
TaskIdKey = "k8s.mesosphere.io/taskId"
|
||||
SlaveIdKey = "k8s.mesosphere.io/slaveId"
|
||||
OfferIdKey = "k8s.mesosphere.io/offerId"
|
||||
ExecutorIdKey = "k8s.mesosphere.io/executorId"
|
||||
PortMappingKeyPrefix = "k8s.mesosphere.io/port_"
|
||||
PortMappingKeyFormat = PortMappingKeyPrefix + "%s_%d"
|
||||
PortNameMappingKeyPrefix = "k8s.mesosphere.io/portName_"
|
||||
PortNameMappingKeyFormat = PortNameMappingKeyPrefix + "%s_%s"
|
||||
)
|
||||
19
contrib/mesos/pkg/scheduler/meta/doc.go
Normal file
19
contrib/mesos/pkg/scheduler/meta/doc.go
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package meta defines framework constants used as keys in k8s annotations
|
||||
// that are attached to k8s pods
|
||||
package meta
|
||||
24
contrib/mesos/pkg/scheduler/meta/store.go
Normal file
24
contrib/mesos/pkg/scheduler/meta/store.go
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package meta
|
||||
|
||||
// keys for things that we store
|
||||
const (
|
||||
//TODO(jdef) this should also be a format instead of a fixed path
|
||||
FrameworkIDKey = "/mesos/k8sm/frameworkid"
|
||||
DefaultElectionFormat = "/mesos/k8sm/framework/%s/leader"
|
||||
)
|
||||
18
contrib/mesos/pkg/scheduler/metrics/doc.go
Normal file
18
contrib/mesos/pkg/scheduler/metrics/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package metrics defines and exposes instrumentation metrics of the scheduler.
|
||||
package metrics
|
||||
102
contrib/mesos/pkg/scheduler/metrics/metrics.go
Normal file
102
contrib/mesos/pkg/scheduler/metrics/metrics.go
Normal file
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
schedulerSubsystem = "k8sm_scheduler"
|
||||
)
|
||||
|
||||
var (
|
||||
QueueWaitTime = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Subsystem: schedulerSubsystem,
|
||||
Name: "queue_wait_time_microseconds",
|
||||
Help: "Launch queue wait time in microseconds",
|
||||
},
|
||||
)
|
||||
BindLatency = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Subsystem: schedulerSubsystem,
|
||||
Name: "bind_latency_microseconds",
|
||||
Help: "Latency in microseconds between pod-task launch and pod binding.",
|
||||
},
|
||||
)
|
||||
StatusUpdates = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: schedulerSubsystem,
|
||||
Name: "status_updates",
|
||||
Help: "Counter of TaskStatus updates, broken out by source, reason, state.",
|
||||
},
|
||||
[]string{"source", "reason", "state"},
|
||||
)
|
||||
ReconciliationLatency = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Subsystem: schedulerSubsystem,
|
||||
Name: "reconciliation_latency_microseconds",
|
||||
Help: "Latency in microseconds to execute explicit task reconciliation.",
|
||||
},
|
||||
)
|
||||
ReconciliationRequested = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: schedulerSubsystem,
|
||||
Name: "reconciliation_requested",
|
||||
Help: "Counter of requested task reconciliations, broken out by kind.",
|
||||
},
|
||||
[]string{"kind"},
|
||||
)
|
||||
ReconciliationExecuted = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: schedulerSubsystem,
|
||||
Name: "reconciliation_executed",
|
||||
Help: "Counter of executed task reconciliations requests, broken out by kind.",
|
||||
},
|
||||
[]string{"kind"},
|
||||
)
|
||||
ReconciliationCancelled = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: schedulerSubsystem,
|
||||
Name: "reconciliation_cancelled",
|
||||
Help: "Counter of cancelled task reconciliations requests, broken out by kind.",
|
||||
},
|
||||
[]string{"kind"},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
func Register() {
|
||||
registerMetrics.Do(func() {
|
||||
prometheus.MustRegister(QueueWaitTime)
|
||||
prometheus.MustRegister(BindLatency)
|
||||
prometheus.MustRegister(StatusUpdates)
|
||||
prometheus.MustRegister(ReconciliationLatency)
|
||||
prometheus.MustRegister(ReconciliationRequested)
|
||||
prometheus.MustRegister(ReconciliationExecuted)
|
||||
prometheus.MustRegister(ReconciliationCancelled)
|
||||
})
|
||||
}
|
||||
|
||||
func InMicroseconds(d time.Duration) float64 {
|
||||
return float64(d.Nanoseconds() / time.Microsecond.Nanoseconds())
|
||||
}
|
||||
203
contrib/mesos/pkg/scheduler/mock_test.go
Normal file
203
contrib/mesos/pkg/scheduler/mock_test.go
Normal file
@@ -0,0 +1,203 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
"github.com/stretchr/testify/mock"
|
||||
)
|
||||
|
||||
// implements SchedulerInterface
|
||||
type MockScheduler struct {
|
||||
sync.RWMutex
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
func (m *MockScheduler) slaveFor(id string) (slave *Slave, ok bool) {
|
||||
args := m.Called(id)
|
||||
x := args.Get(0)
|
||||
if x != nil {
|
||||
slave = x.(*Slave)
|
||||
}
|
||||
ok = args.Bool(1)
|
||||
return
|
||||
}
|
||||
func (m *MockScheduler) algorithm() (f PodScheduleFunc) {
|
||||
args := m.Called()
|
||||
x := args.Get(0)
|
||||
if x != nil {
|
||||
f = x.(PodScheduleFunc)
|
||||
}
|
||||
return
|
||||
}
|
||||
func (m *MockScheduler) createPodTask(ctx api.Context, pod *api.Pod) (task *podtask.T, err error) {
|
||||
args := m.Called(ctx, pod)
|
||||
x := args.Get(0)
|
||||
if x != nil {
|
||||
task = x.(*podtask.T)
|
||||
}
|
||||
err = args.Error(1)
|
||||
return
|
||||
}
|
||||
func (m *MockScheduler) offers() (f offers.Registry) {
|
||||
args := m.Called()
|
||||
x := args.Get(0)
|
||||
if x != nil {
|
||||
f = x.(offers.Registry)
|
||||
}
|
||||
return
|
||||
}
|
||||
func (m *MockScheduler) tasks() (f podtask.Registry) {
|
||||
args := m.Called()
|
||||
x := args.Get(0)
|
||||
if x != nil {
|
||||
f = x.(podtask.Registry)
|
||||
}
|
||||
return
|
||||
}
|
||||
func (m *MockScheduler) killTask(taskId string) error {
|
||||
args := m.Called(taskId)
|
||||
return args.Error(0)
|
||||
}
|
||||
func (m *MockScheduler) launchTask(task *podtask.T) error {
|
||||
args := m.Called(task)
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
// @deprecated this is a placeholder for me to test the mock package
|
||||
func TestNoSlavesYet(t *testing.T) {
|
||||
obj := &MockScheduler{}
|
||||
obj.On("slaveFor", "foo").Return(nil, false)
|
||||
obj.slaveFor("foo")
|
||||
obj.AssertExpectations(t)
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
|
|
||||
| this really belongs in the mesos-go package, but that's being updated soon
|
||||
| any way so just keep it here for now unless we *really* need it there.
|
||||
|
|
||||
\-----------------------------------------------------------------------------
|
||||
|
||||
// Scheduler defines the interfaces that needed to be implemented.
|
||||
type Scheduler interface {
|
||||
Registered(SchedulerDriver, *FrameworkID, *MasterInfo)
|
||||
Reregistered(SchedulerDriver, *MasterInfo)
|
||||
Disconnected(SchedulerDriver)
|
||||
ResourceOffers(SchedulerDriver, []*Offer)
|
||||
OfferRescinded(SchedulerDriver, *OfferID)
|
||||
StatusUpdate(SchedulerDriver, *TaskStatus)
|
||||
FrameworkMessage(SchedulerDriver, *ExecutorID, *SlaveID, string)
|
||||
SlaveLost(SchedulerDriver, *SlaveID)
|
||||
ExecutorLost(SchedulerDriver, *ExecutorID, *SlaveID, int)
|
||||
Error(SchedulerDriver, string)
|
||||
}
|
||||
*/
|
||||
|
||||
func status(args mock.Arguments, at int) (val mesos.Status) {
|
||||
if x := args.Get(at); x != nil {
|
||||
val = x.(mesos.Status)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type extendedMock struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
// Upon returns a chan that closes upon the execution of the most recently registered call.
|
||||
func (m *extendedMock) Upon() <-chan struct{} {
|
||||
ch := make(chan struct{})
|
||||
call := &m.ExpectedCalls[len(m.ExpectedCalls)-1]
|
||||
f := call.Run
|
||||
call.Run = func(args mock.Arguments) {
|
||||
defer close(ch)
|
||||
if f != nil {
|
||||
f(args)
|
||||
}
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
type MockSchedulerDriver struct {
|
||||
extendedMock
|
||||
}
|
||||
|
||||
func (m *MockSchedulerDriver) Init() error {
|
||||
args := m.Called()
|
||||
return args.Error(0)
|
||||
}
|
||||
func (m *MockSchedulerDriver) Start() (mesos.Status, error) {
|
||||
args := m.Called()
|
||||
return status(args, 0), args.Error(1)
|
||||
}
|
||||
func (m *MockSchedulerDriver) Stop(b bool) (mesos.Status, error) {
|
||||
args := m.Called(b)
|
||||
return status(args, 0), args.Error(1)
|
||||
}
|
||||
func (m *MockSchedulerDriver) Abort() (mesos.Status, error) {
|
||||
args := m.Called()
|
||||
return status(args, 0), args.Error(1)
|
||||
}
|
||||
func (m *MockSchedulerDriver) Join() (mesos.Status, error) {
|
||||
args := m.Called()
|
||||
return status(args, 0), args.Error(1)
|
||||
}
|
||||
func (m *MockSchedulerDriver) Run() (mesos.Status, error) {
|
||||
args := m.Called()
|
||||
return status(args, 0), args.Error(1)
|
||||
}
|
||||
func (m *MockSchedulerDriver) RequestResources(r []*mesos.Request) (mesos.Status, error) {
|
||||
args := m.Called(r)
|
||||
return status(args, 0), args.Error(1)
|
||||
}
|
||||
func (m *MockSchedulerDriver) ReconcileTasks(statuses []*mesos.TaskStatus) (mesos.Status, error) {
|
||||
args := m.Called(statuses)
|
||||
return status(args, 0), args.Error(1)
|
||||
}
|
||||
func (m *MockSchedulerDriver) LaunchTasks(offerIds []*mesos.OfferID, ti []*mesos.TaskInfo, f *mesos.Filters) (mesos.Status, error) {
|
||||
args := m.Called(offerIds, ti, f)
|
||||
return status(args, 0), args.Error(1)
|
||||
}
|
||||
func (m *MockSchedulerDriver) KillTask(tid *mesos.TaskID) (mesos.Status, error) {
|
||||
args := m.Called(tid)
|
||||
return status(args, 0), args.Error(1)
|
||||
}
|
||||
func (m *MockSchedulerDriver) DeclineOffer(oid *mesos.OfferID, f *mesos.Filters) (mesos.Status, error) {
|
||||
args := m.Called(oid, f)
|
||||
return status(args, 0), args.Error(1)
|
||||
}
|
||||
func (m *MockSchedulerDriver) ReviveOffers() (mesos.Status, error) {
|
||||
args := m.Called()
|
||||
return status(args, 0), args.Error(0)
|
||||
}
|
||||
func (m *MockSchedulerDriver) SendFrameworkMessage(eid *mesos.ExecutorID, sid *mesos.SlaveID, s string) (mesos.Status, error) {
|
||||
args := m.Called(eid, sid, s)
|
||||
return status(args, 0), args.Error(1)
|
||||
}
|
||||
func (m *MockSchedulerDriver) Destroy() {
|
||||
m.Called()
|
||||
}
|
||||
func (m *MockSchedulerDriver) Wait() {
|
||||
m.Called()
|
||||
}
|
||||
875
contrib/mesos/pkg/scheduler/plugin.go
Normal file
875
contrib/mesos/pkg/scheduler/plugin.go
Normal file
@@ -0,0 +1,875 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/backoff"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
|
||||
annotation "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
plugin "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||
log "github.com/golang/glog"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
mutil "github.com/mesos/mesos-go/mesosutil"
|
||||
)
|
||||
|
||||
const (
|
||||
enqueuePopTimeout = 200 * time.Millisecond
|
||||
enqueueWaitTimeout = 1 * time.Second
|
||||
yieldPopTimeout = 200 * time.Millisecond
|
||||
yieldWaitTimeout = 1 * time.Second
|
||||
pluginRecoveryDelay = 100 * time.Millisecond // delay after scheduler plugin crashes, before we resume scheduling
|
||||
)
|
||||
|
||||
// scheduler abstraction to allow for easier unit testing
|
||||
type schedulerInterface interface {
|
||||
sync.Locker // synchronize scheduler plugin operations
|
||||
SlaveIndex
|
||||
algorithm() PodScheduleFunc
|
||||
offers() offers.Registry
|
||||
tasks() podtask.Registry
|
||||
|
||||
// driver calls
|
||||
|
||||
killTask(taskId string) error
|
||||
launchTask(*podtask.T) error
|
||||
|
||||
// convenience
|
||||
|
||||
createPodTask(api.Context, *api.Pod) (*podtask.T, error)
|
||||
}
|
||||
|
||||
type k8smScheduler struct {
|
||||
sync.Mutex
|
||||
internal *KubernetesScheduler
|
||||
}
|
||||
|
||||
func (k *k8smScheduler) algorithm() PodScheduleFunc {
|
||||
return k.internal.scheduleFunc
|
||||
}
|
||||
|
||||
func (k *k8smScheduler) offers() offers.Registry {
|
||||
return k.internal.offers
|
||||
}
|
||||
|
||||
func (k *k8smScheduler) tasks() podtask.Registry {
|
||||
return k.internal.taskRegistry
|
||||
}
|
||||
|
||||
func (k *k8smScheduler) createPodTask(ctx api.Context, pod *api.Pod) (*podtask.T, error) {
|
||||
return podtask.New(ctx, "", *pod, k.internal.executor)
|
||||
}
|
||||
|
||||
func (k *k8smScheduler) slaveFor(id string) (slave *Slave, ok bool) {
|
||||
slave, ok = k.internal.slaves.getSlave(id)
|
||||
return
|
||||
}
|
||||
|
||||
func (k *k8smScheduler) killTask(taskId string) error {
|
||||
killTaskId := mutil.NewTaskID(taskId)
|
||||
_, err := k.internal.driver.KillTask(killTaskId)
|
||||
return err
|
||||
}
|
||||
|
||||
func (k *k8smScheduler) launchTask(task *podtask.T) error {
|
||||
// assume caller is holding scheduler lock
|
||||
taskList := []*mesos.TaskInfo{task.BuildTaskInfo()}
|
||||
offerIds := []*mesos.OfferID{task.Offer.Details().Id}
|
||||
filters := &mesos.Filters{}
|
||||
_, err := k.internal.driver.LaunchTasks(offerIds, taskList, filters)
|
||||
return err
|
||||
}
|
||||
|
||||
type binder struct {
|
||||
api schedulerInterface
|
||||
}
|
||||
|
||||
// implements binding.Registry, launches the pod-associated-task in mesos
|
||||
func (b *binder) Bind(binding *api.Binding) error {
|
||||
|
||||
ctx := api.WithNamespace(api.NewContext(), binding.Namespace)
|
||||
|
||||
// default upstream scheduler passes pod.Name as binding.Name
|
||||
podKey, err := podtask.MakePodKey(ctx, binding.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
b.api.Lock()
|
||||
defer b.api.Unlock()
|
||||
|
||||
switch task, state := b.api.tasks().ForPod(podKey); state {
|
||||
case podtask.StatePending:
|
||||
return b.bind(ctx, binding, task)
|
||||
default:
|
||||
// in this case it's likely that the pod has been deleted between Schedule
|
||||
// and Bind calls
|
||||
log.Infof("No pending task for pod %s", podKey)
|
||||
return noSuchPodErr //TODO(jdef) this error is somewhat misleading since the task could be running?!
|
||||
}
|
||||
}
|
||||
|
||||
func (b *binder) rollback(task *podtask.T, err error) error {
|
||||
task.Offer.Release()
|
||||
task.Reset()
|
||||
if err2 := b.api.tasks().Update(task); err2 != nil {
|
||||
log.Errorf("failed to update pod task: %v", err2)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// assumes that: caller has acquired scheduler lock and that the task is still pending
|
||||
func (b *binder) bind(ctx api.Context, binding *api.Binding, task *podtask.T) (err error) {
|
||||
// sanity check: ensure that the task hasAcceptedOffer(), it's possible that between
|
||||
// Schedule() and now that the offer for this task was rescinded or invalidated.
|
||||
// ((we should never see this here))
|
||||
if !task.HasAcceptedOffer() {
|
||||
return fmt.Errorf("task has not accepted a valid offer %v", task.ID)
|
||||
}
|
||||
|
||||
// By this time, there is a chance that the slave is disconnected.
|
||||
offerId := task.GetOfferId()
|
||||
if offer, ok := b.api.offers().Get(offerId); !ok || offer.HasExpired() {
|
||||
// already rescinded or timed out or otherwise invalidated
|
||||
return b.rollback(task, fmt.Errorf("failed prior to launchTask due to expired offer for task %v", task.ID))
|
||||
}
|
||||
|
||||
if err = b.prepareTaskForLaunch(ctx, binding.Target.Name, task, offerId); err == nil {
|
||||
log.V(2).Infof("launching task: %q on target %q slave %q for pod \"%v/%v\"",
|
||||
task.ID, binding.Target.Name, task.Spec.SlaveID, task.Pod.Namespace, task.Pod.Name)
|
||||
if err = b.api.launchTask(task); err == nil {
|
||||
b.api.offers().Invalidate(offerId)
|
||||
task.Set(podtask.Launched)
|
||||
if err = b.api.tasks().Update(task); err != nil {
|
||||
// this should only happen if the task has been removed or has changed status,
|
||||
// which SHOULD NOT HAPPEN as long as we're synchronizing correctly
|
||||
log.Errorf("failed to update task w/ Launched status: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
return b.rollback(task, fmt.Errorf("Failed to launch task %v: %v", task.ID, err))
|
||||
}
|
||||
|
||||
//TODO(jdef) unit test this, ensure that task's copy of api.Pod is not modified
|
||||
func (b *binder) prepareTaskForLaunch(ctx api.Context, machine string, task *podtask.T, offerId string) error {
|
||||
pod := task.Pod
|
||||
|
||||
// we make an effort here to avoid making changes to the task's copy of the pod, since
|
||||
// we want that to reflect the initial user spec, and not the modified spec that we
|
||||
// build for the executor to consume.
|
||||
oemCt := pod.Spec.Containers
|
||||
pod.Spec.Containers = append([]api.Container{}, oemCt...) // (shallow) clone before mod
|
||||
|
||||
if pod.Annotations == nil {
|
||||
pod.Annotations = make(map[string]string)
|
||||
} else {
|
||||
oemAnn := pod.Annotations
|
||||
pod.Annotations = make(map[string]string)
|
||||
for k, v := range oemAnn {
|
||||
pod.Annotations[k] = v
|
||||
}
|
||||
}
|
||||
pod.Annotations[annotation.BindingHostKey] = machine
|
||||
task.SaveRecoveryInfo(pod.Annotations)
|
||||
|
||||
for _, entry := range task.Spec.PortMap {
|
||||
oemPorts := pod.Spec.Containers[entry.ContainerIdx].Ports
|
||||
ports := append([]api.ContainerPort{}, oemPorts...)
|
||||
p := &ports[entry.PortIdx]
|
||||
p.HostPort = int(entry.OfferPort)
|
||||
op := strconv.FormatUint(entry.OfferPort, 10)
|
||||
pod.Annotations[fmt.Sprintf(annotation.PortMappingKeyFormat, p.Protocol, p.ContainerPort)] = op
|
||||
if p.Name != "" {
|
||||
pod.Annotations[fmt.Sprintf(annotation.PortNameMappingKeyFormat, p.Protocol, p.Name)] = op
|
||||
}
|
||||
pod.Spec.Containers[entry.ContainerIdx].Ports = ports
|
||||
}
|
||||
|
||||
// the kubelet-executor uses this to instantiate the pod
|
||||
log.V(3).Infof("prepared pod spec: %+v", pod)
|
||||
|
||||
data, err := api.Codec.Encode(&pod)
|
||||
if err != nil {
|
||||
log.V(2).Infof("Failed to marshal the pod spec: %v", err)
|
||||
return err
|
||||
}
|
||||
task.Spec.Data = data
|
||||
return nil
|
||||
}
|
||||
|
||||
type kubeScheduler struct {
|
||||
api schedulerInterface
|
||||
podUpdates queue.FIFO
|
||||
}
|
||||
|
||||
// Schedule implements the Scheduler interface of Kubernetes.
|
||||
// It returns the selectedMachine's name and error (if there's any).
|
||||
func (k *kubeScheduler) Schedule(pod *api.Pod, unused algorithm.MinionLister) (string, error) {
|
||||
log.Infof("Try to schedule pod %v\n", pod.Name)
|
||||
ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
|
||||
|
||||
// default upstream scheduler passes pod.Name as binding.PodID
|
||||
podKey, err := podtask.MakePodKey(ctx, pod.Name)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
k.api.Lock()
|
||||
defer k.api.Unlock()
|
||||
|
||||
switch task, state := k.api.tasks().ForPod(podKey); state {
|
||||
case podtask.StateUnknown:
|
||||
// There's a bit of a potential race here, a pod could have been yielded() and
|
||||
// then before we get *here* it could be deleted.
|
||||
// We use meta to index the pod in the store since that's what k8s reflector does.
|
||||
podName, err := cache.MetaNamespaceKeyFunc(pod)
|
||||
if err != nil {
|
||||
log.Warningf("aborting Schedule, unable to understand pod object %+v", pod)
|
||||
return "", noSuchPodErr
|
||||
}
|
||||
if deleted := k.podUpdates.Poll(podName, queue.DELETE_EVENT); deleted {
|
||||
// avoid scheduling a pod that's been deleted between yieldPod() and Schedule()
|
||||
log.Infof("aborting Schedule, pod has been deleted %+v", pod)
|
||||
return "", noSuchPodErr
|
||||
}
|
||||
return k.doSchedule(k.api.tasks().Register(k.api.createPodTask(ctx, pod)))
|
||||
|
||||
//TODO(jdef) it's possible that the pod state has diverged from what
|
||||
//we knew previously, we should probably update the task.Pod state here
|
||||
//before proceeding with scheduling
|
||||
case podtask.StatePending:
|
||||
if pod.UID != task.Pod.UID {
|
||||
// we're dealing with a brand new pod spec here, so the old one must have been
|
||||
// deleted -- and so our task store is out of sync w/ respect to reality
|
||||
//TODO(jdef) reconcile task
|
||||
return "", fmt.Errorf("task %v spec is out of sync with pod %v spec, aborting schedule", task.ID, pod.Name)
|
||||
} else if task.Has(podtask.Launched) {
|
||||
// task has been marked as "launched" but the pod binding creation may have failed in k8s,
|
||||
// but we're going to let someone else handle it, probably the mesos task error handler
|
||||
return "", fmt.Errorf("task %s has already been launched, aborting schedule", task.ID)
|
||||
} else {
|
||||
return k.doSchedule(task, nil)
|
||||
}
|
||||
|
||||
default:
|
||||
return "", fmt.Errorf("task %s is not pending, nothing to schedule", task.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// Call ScheduleFunc and subtract some resources, returning the name of the machine the task is scheduled on
|
||||
func (k *kubeScheduler) doSchedule(task *podtask.T, err error) (string, error) {
|
||||
var offer offers.Perishable
|
||||
if task.HasAcceptedOffer() {
|
||||
// verify that the offer is still on the table
|
||||
offerId := task.GetOfferId()
|
||||
if offer, ok := k.api.offers().Get(offerId); ok && !offer.HasExpired() {
|
||||
// skip tasks that have already have assigned offers
|
||||
offer = task.Offer
|
||||
} else {
|
||||
task.Offer.Release()
|
||||
task.Reset()
|
||||
if err = k.api.tasks().Update(task); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == nil && offer == nil {
|
||||
offer, err = k.api.algorithm()(k.api.offers(), k.api, task)
|
||||
}
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
details := offer.Details()
|
||||
if details == nil {
|
||||
return "", fmt.Errorf("offer already invalid/expired for task %v", task.ID)
|
||||
}
|
||||
slaveId := details.GetSlaveId().GetValue()
|
||||
if slave, ok := k.api.slaveFor(slaveId); !ok {
|
||||
// not much sense in Release()ing the offer here since its owner died
|
||||
offer.Release()
|
||||
k.api.offers().Invalidate(details.Id.GetValue())
|
||||
return "", fmt.Errorf("Slave disappeared (%v) while scheduling task %v", slaveId, task.ID)
|
||||
} else {
|
||||
if task.Offer != nil && task.Offer != offer {
|
||||
return "", fmt.Errorf("task.offer assignment must be idempotent, task %+v: offer %+v", task, offer)
|
||||
}
|
||||
task.Offer = offer
|
||||
task.FillFromDetails(details)
|
||||
if err := k.api.tasks().Update(task); err != nil {
|
||||
offer.Release()
|
||||
return "", err
|
||||
}
|
||||
return slave.HostName, nil
|
||||
}
|
||||
}
|
||||
|
||||
type queuer struct {
|
||||
lock sync.Mutex // shared by condition variables of this struct
|
||||
podUpdates queue.FIFO // queue of pod updates to be processed
|
||||
podQueue *queue.DelayFIFO // queue of pods to be scheduled
|
||||
deltaCond sync.Cond // pod changes are available for processing
|
||||
unscheduledCond sync.Cond // there are unscheduled pods for processing
|
||||
}
|
||||
|
||||
func newQueuer(store queue.FIFO) *queuer {
|
||||
q := &queuer{
|
||||
podQueue: queue.NewDelayFIFO(),
|
||||
podUpdates: store,
|
||||
}
|
||||
q.deltaCond.L = &q.lock
|
||||
q.unscheduledCond.L = &q.lock
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *queuer) installDebugHandlers(mux *http.ServeMux) {
|
||||
mux.HandleFunc("/debug/scheduler/podqueue", func(w http.ResponseWriter, r *http.Request) {
|
||||
for _, x := range q.podQueue.List() {
|
||||
if _, err := io.WriteString(w, fmt.Sprintf("%+v\n", x)); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/debug/scheduler/podstore", func(w http.ResponseWriter, r *http.Request) {
|
||||
for _, x := range q.podUpdates.List() {
|
||||
if _, err := io.WriteString(w, fmt.Sprintf("%+v\n", x)); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// signal that there are probably pod updates waiting to be processed
|
||||
func (q *queuer) updatesAvailable() {
|
||||
q.deltaCond.Broadcast()
|
||||
}
|
||||
|
||||
// delete a pod from the to-be-scheduled queue
|
||||
func (q *queuer) dequeue(id string) {
|
||||
q.podQueue.Delete(id)
|
||||
}
|
||||
|
||||
// re-add a pod to the to-be-scheduled queue, will not overwrite existing pod data (that
|
||||
// may have already changed).
|
||||
func (q *queuer) requeue(pod *Pod) {
|
||||
// use KeepExisting in case the pod has already been updated (can happen if binding fails
|
||||
// due to constraint voilations); we don't want to overwrite a newer entry with stale data.
|
||||
q.podQueue.Add(pod, queue.KeepExisting)
|
||||
q.unscheduledCond.Broadcast()
|
||||
}
|
||||
|
||||
// same as requeue but calls podQueue.Offer instead of podQueue.Add
|
||||
func (q *queuer) reoffer(pod *Pod) {
|
||||
// use KeepExisting in case the pod has already been updated (can happen if binding fails
|
||||
// due to constraint voilations); we don't want to overwrite a newer entry with stale data.
|
||||
if q.podQueue.Offer(pod, queue.KeepExisting) {
|
||||
q.unscheduledCond.Broadcast()
|
||||
}
|
||||
}
|
||||
|
||||
// spawns a go-routine to watch for unscheduled pods and queue them up
|
||||
// for scheduling. returns immediately.
|
||||
func (q *queuer) Run(done <-chan struct{}) {
|
||||
go runtime.Until(func() {
|
||||
log.Info("Watching for newly created pods")
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
|
||||
for {
|
||||
// limit blocking here for short intervals so that scheduling
|
||||
// may proceed even if there have been no recent pod changes
|
||||
p := q.podUpdates.Await(enqueuePopTimeout)
|
||||
if p == nil {
|
||||
signalled := runtime.After(q.deltaCond.Wait)
|
||||
// we've yielded the lock
|
||||
select {
|
||||
case <-time.After(enqueueWaitTimeout):
|
||||
q.deltaCond.Broadcast() // abort Wait()
|
||||
<-signalled // wait for lock re-acquisition
|
||||
log.V(4).Infoln("timed out waiting for a pod update")
|
||||
case <-signalled:
|
||||
// we've acquired the lock and there may be
|
||||
// changes for us to process now
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
pod := p.(*Pod)
|
||||
if pod.Spec.NodeName != "" {
|
||||
log.V(3).Infof("dequeuing pod for scheduling: %v", pod.Pod.Name)
|
||||
q.dequeue(pod.GetUID())
|
||||
} else {
|
||||
// use ReplaceExisting because we are always pushing the latest state
|
||||
now := time.Now()
|
||||
pod.deadline = &now
|
||||
if q.podQueue.Offer(pod, queue.ReplaceExisting) {
|
||||
q.unscheduledCond.Broadcast()
|
||||
log.V(3).Infof("queued pod for scheduling: %v", pod.Pod.Name)
|
||||
} else {
|
||||
log.Warningf("failed to queue pod for scheduling: %v", pod.Pod.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}, 1*time.Second, done)
|
||||
}
|
||||
|
||||
// implementation of scheduling plugin's NextPod func; see k8s plugin/pkg/scheduler
|
||||
func (q *queuer) yield() *api.Pod {
|
||||
log.V(2).Info("attempting to yield a pod")
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
|
||||
for {
|
||||
// limit blocking here to short intervals so that we don't block the
|
||||
// enqueuer Run() routine for very long
|
||||
kpod := q.podQueue.Await(yieldPopTimeout)
|
||||
if kpod == nil {
|
||||
signalled := runtime.After(q.unscheduledCond.Wait)
|
||||
// lock is yielded at this point and we're going to wait for either
|
||||
// a timeout, or a signal that there's data
|
||||
select {
|
||||
case <-time.After(yieldWaitTimeout):
|
||||
q.unscheduledCond.Broadcast() // abort Wait()
|
||||
<-signalled // wait for the go-routine, and the lock
|
||||
log.V(4).Infoln("timed out waiting for a pod to yield")
|
||||
case <-signalled:
|
||||
// we have acquired the lock, and there
|
||||
// may be a pod for us to pop now
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
pod := kpod.(*Pod).Pod
|
||||
if podName, err := cache.MetaNamespaceKeyFunc(pod); err != nil {
|
||||
log.Warningf("yield unable to understand pod object %+v, will skip: %v", pod, err)
|
||||
} else if !q.podUpdates.Poll(podName, queue.POP_EVENT) {
|
||||
log.V(1).Infof("yield popped a transitioning pod, skipping: %+v", pod)
|
||||
} else if pod.Spec.NodeName != "" {
|
||||
// should never happen if enqueuePods is filtering properly
|
||||
log.Warningf("yield popped an already-scheduled pod, skipping: %+v", pod)
|
||||
} else {
|
||||
return pod
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type errorHandler struct {
|
||||
api schedulerInterface
|
||||
backoff *backoff.Backoff
|
||||
qr *queuer
|
||||
}
|
||||
|
||||
// implementation of scheduling plugin's Error func; see plugin/pkg/scheduler
|
||||
func (k *errorHandler) handleSchedulingError(pod *api.Pod, schedulingErr error) {
|
||||
|
||||
if schedulingErr == noSuchPodErr {
|
||||
log.V(2).Infof("Not rescheduling non-existent pod %v", pod.Name)
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("Error scheduling %v: %v; retrying", pod.Name, schedulingErr)
|
||||
defer util.HandleCrash()
|
||||
|
||||
// default upstream scheduler passes pod.Name as binding.PodID
|
||||
ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
|
||||
podKey, err := podtask.MakePodKey(ctx, pod.Name)
|
||||
if err != nil {
|
||||
log.Errorf("Failed to construct pod key, aborting scheduling for pod %v: %v", pod.Name, err)
|
||||
return
|
||||
}
|
||||
|
||||
k.backoff.GC()
|
||||
k.api.Lock()
|
||||
defer k.api.Unlock()
|
||||
|
||||
switch task, state := k.api.tasks().ForPod(podKey); state {
|
||||
case podtask.StateUnknown:
|
||||
// if we don't have a mapping here any more then someone deleted the pod
|
||||
log.V(2).Infof("Could not resolve pod to task, aborting pod reschdule: %s", podKey)
|
||||
return
|
||||
|
||||
case podtask.StatePending:
|
||||
if task.Has(podtask.Launched) {
|
||||
log.V(2).Infof("Skipping re-scheduling for already-launched pod %v", podKey)
|
||||
return
|
||||
}
|
||||
breakoutEarly := queue.BreakChan(nil)
|
||||
if schedulingErr == noSuitableOffersErr {
|
||||
log.V(3).Infof("adding backoff breakout handler for pod %v", podKey)
|
||||
breakoutEarly = queue.BreakChan(k.api.offers().Listen(podKey, func(offer *mesos.Offer) bool {
|
||||
k.api.Lock()
|
||||
defer k.api.Unlock()
|
||||
switch task, state := k.api.tasks().Get(task.ID); state {
|
||||
case podtask.StatePending:
|
||||
return !task.Has(podtask.Launched) && task.AcceptOffer(offer)
|
||||
default:
|
||||
// no point in continuing to check for matching offers
|
||||
return true
|
||||
}
|
||||
}))
|
||||
}
|
||||
delay := k.backoff.Get(podKey)
|
||||
log.V(3).Infof("requeuing pod %v with delay %v", podKey, delay)
|
||||
k.qr.requeue(&Pod{Pod: pod, delay: &delay, notify: breakoutEarly})
|
||||
|
||||
default:
|
||||
log.V(2).Infof("Task is no longer pending, aborting reschedule for pod %v", podKey)
|
||||
}
|
||||
}
|
||||
|
||||
type deleter struct {
|
||||
api schedulerInterface
|
||||
qr *queuer
|
||||
}
|
||||
|
||||
// currently monitors for "pod deleted" events, upon which handle()
|
||||
// is invoked.
|
||||
func (k *deleter) Run(updates <-chan queue.Entry, done <-chan struct{}) {
|
||||
go runtime.Until(func() {
|
||||
for {
|
||||
entry := <-updates
|
||||
pod := entry.Value().(*Pod)
|
||||
if entry.Is(queue.DELETE_EVENT) {
|
||||
if err := k.deleteOne(pod); err != nil {
|
||||
log.Error(err)
|
||||
}
|
||||
} else if !entry.Is(queue.POP_EVENT) {
|
||||
k.qr.updatesAvailable()
|
||||
}
|
||||
}
|
||||
}, 1*time.Second, done)
|
||||
}
|
||||
|
||||
func (k *deleter) deleteOne(pod *Pod) error {
|
||||
ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
|
||||
podKey, err := podtask.MakePodKey(ctx, pod.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.V(2).Infof("pod deleted: %v", podKey)
|
||||
|
||||
// order is important here: we want to make sure we have the lock before
|
||||
// removing the pod from the scheduling queue. this makes the concurrent
|
||||
// execution of scheduler-error-handling and delete-handling easier to
|
||||
// reason about.
|
||||
k.api.Lock()
|
||||
defer k.api.Unlock()
|
||||
|
||||
// prevent the scheduler from attempting to pop this; it's also possible that
|
||||
// it's concurrently being scheduled (somewhere between pod scheduling and
|
||||
// binding) - if so, then we'll end up removing it from taskRegistry which
|
||||
// will abort Bind()ing
|
||||
k.qr.dequeue(pod.GetUID())
|
||||
|
||||
switch task, state := k.api.tasks().ForPod(podKey); state {
|
||||
case podtask.StateUnknown:
|
||||
log.V(2).Infof("Could not resolve pod '%s' to task id", podKey)
|
||||
return noSuchPodErr
|
||||
|
||||
// determine if the task has already been launched to mesos, if not then
|
||||
// cleanup is easier (unregister) since there's no state to sync
|
||||
case podtask.StatePending:
|
||||
if !task.Has(podtask.Launched) {
|
||||
// we've been invoked in between Schedule() and Bind()
|
||||
if task.HasAcceptedOffer() {
|
||||
task.Offer.Release()
|
||||
task.Reset()
|
||||
task.Set(podtask.Deleted)
|
||||
//TODO(jdef) probably want better handling here
|
||||
if err := k.api.tasks().Update(task); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
k.api.tasks().Unregister(task)
|
||||
return nil
|
||||
}
|
||||
fallthrough
|
||||
|
||||
case podtask.StateRunning:
|
||||
// signal to watchers that the related pod is going down
|
||||
task.Set(podtask.Deleted)
|
||||
if err := k.api.tasks().Update(task); err != nil {
|
||||
log.Errorf("failed to update task w/ Deleted status: %v", err)
|
||||
}
|
||||
return k.api.killTask(task.ID)
|
||||
|
||||
default:
|
||||
log.Infof("cannot kill pod '%s': non-terminal task not found %v", podKey, task.ID)
|
||||
return noSuchTaskErr
|
||||
}
|
||||
}
|
||||
|
||||
// Create creates a scheduler plugin and all supporting background functions.
|
||||
func (k *KubernetesScheduler) NewDefaultPluginConfig(terminate <-chan struct{}, mux *http.ServeMux) *PluginConfig {
|
||||
// use ListWatch watching pods using the client by default
|
||||
return k.NewPluginConfig(terminate, mux, createAllPodsLW(k.client))
|
||||
}
|
||||
|
||||
func (k *KubernetesScheduler) NewPluginConfig(terminate <-chan struct{}, mux *http.ServeMux,
|
||||
podsWatcher *cache.ListWatch) *PluginConfig {
|
||||
|
||||
// Watch and queue pods that need scheduling.
|
||||
updates := make(chan queue.Entry, k.schedcfg.UpdatesBacklog)
|
||||
podUpdates := &podStoreAdapter{queue.NewHistorical(updates)}
|
||||
reflector := cache.NewReflector(podsWatcher, &api.Pod{}, podUpdates, 0)
|
||||
|
||||
// lock that guards critial sections that involve transferring pods from
|
||||
// the store (cache) to the scheduling queue; its purpose is to maintain
|
||||
// an ordering (vs interleaving) of operations that's easier to reason about.
|
||||
kapi := &k8smScheduler{internal: k}
|
||||
q := newQueuer(podUpdates)
|
||||
podDeleter := &deleter{
|
||||
api: kapi,
|
||||
qr: q,
|
||||
}
|
||||
eh := &errorHandler{
|
||||
api: kapi,
|
||||
backoff: backoff.New(k.schedcfg.InitialPodBackoff.Duration, k.schedcfg.MaxPodBackoff.Duration),
|
||||
qr: q,
|
||||
}
|
||||
startLatch := make(chan struct{})
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
runtime.On(startLatch, func() {
|
||||
eventBroadcaster.StartRecordingToSink(k.client.Events(""))
|
||||
reflector.Run() // TODO(jdef) should listen for termination
|
||||
podDeleter.Run(updates, terminate)
|
||||
q.Run(terminate)
|
||||
|
||||
q.installDebugHandlers(mux)
|
||||
podtask.InstallDebugHandlers(k.taskRegistry, mux)
|
||||
})
|
||||
return &PluginConfig{
|
||||
Config: &plugin.Config{
|
||||
MinionLister: nil,
|
||||
Algorithm: &kubeScheduler{
|
||||
api: kapi,
|
||||
podUpdates: podUpdates,
|
||||
},
|
||||
Binder: &binder{api: kapi},
|
||||
NextPod: q.yield,
|
||||
Error: eh.handleSchedulingError,
|
||||
Recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"}),
|
||||
},
|
||||
api: kapi,
|
||||
client: k.client,
|
||||
qr: q,
|
||||
deleter: podDeleter,
|
||||
starting: startLatch,
|
||||
}
|
||||
}
|
||||
|
||||
type PluginConfig struct {
|
||||
*plugin.Config
|
||||
api schedulerInterface
|
||||
client *client.Client
|
||||
qr *queuer
|
||||
deleter *deleter
|
||||
starting chan struct{} // startup latch
|
||||
}
|
||||
|
||||
func NewPlugin(c *PluginConfig) PluginInterface {
|
||||
return &schedulingPlugin{
|
||||
config: c.Config,
|
||||
api: c.api,
|
||||
client: c.client,
|
||||
qr: c.qr,
|
||||
deleter: c.deleter,
|
||||
starting: c.starting,
|
||||
}
|
||||
}
|
||||
|
||||
type schedulingPlugin struct {
|
||||
config *plugin.Config
|
||||
api schedulerInterface
|
||||
client *client.Client
|
||||
qr *queuer
|
||||
deleter *deleter
|
||||
starting chan struct{}
|
||||
}
|
||||
|
||||
func (s *schedulingPlugin) Run(done <-chan struct{}) {
|
||||
defer close(s.starting)
|
||||
go runtime.Until(s.scheduleOne, pluginRecoveryDelay, done)
|
||||
}
|
||||
|
||||
// hacked from GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/scheduler.go,
|
||||
// with the Modeler stuff removed since we don't use it because we have mesos.
|
||||
func (s *schedulingPlugin) scheduleOne() {
|
||||
pod := s.config.NextPod()
|
||||
log.V(3).Infof("Attempting to schedule: %v", pod)
|
||||
dest, err := s.config.Algorithm.Schedule(pod, s.config.MinionLister) // call kubeScheduler.Schedule
|
||||
if err != nil {
|
||||
log.V(1).Infof("Failed to schedule: %v", pod)
|
||||
s.config.Recorder.Eventf(pod, "failedScheduling", "Error scheduling: %v", err)
|
||||
s.config.Error(pod, err)
|
||||
return
|
||||
}
|
||||
b := &api.Binding{
|
||||
ObjectMeta: api.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name},
|
||||
Target: api.ObjectReference{
|
||||
Kind: "Node",
|
||||
Name: dest,
|
||||
},
|
||||
}
|
||||
if err := s.config.Binder.Bind(b); err != nil {
|
||||
log.V(1).Infof("Failed to bind pod: %v", err)
|
||||
s.config.Recorder.Eventf(pod, "failedScheduling", "Binding rejected: %v", err)
|
||||
s.config.Error(pod, err)
|
||||
return
|
||||
}
|
||||
s.config.Recorder.Eventf(pod, "scheduled", "Successfully assigned %v to %v", pod.Name, dest)
|
||||
}
|
||||
|
||||
// this pod may be out of sync with respect to the API server registry:
|
||||
// this pod | apiserver registry
|
||||
// -------------|----------------------
|
||||
// host=.* | 404 ; pod was deleted
|
||||
// host=.* | 5xx ; failed to sync, try again later?
|
||||
// host="" | host="" ; perhaps no updates to process?
|
||||
// host="" | host="..." ; pod has been scheduled and assigned, is there a task assigned? (check TaskIdKey in binding?)
|
||||
// host="..." | host="" ; pod is no longer scheduled, does it need to be re-queued?
|
||||
// host="..." | host="..." ; perhaps no updates to process?
|
||||
//
|
||||
// TODO(jdef) this needs an integration test
|
||||
func (s *schedulingPlugin) reconcilePod(oldPod api.Pod) {
|
||||
log.V(1).Infof("reconcile pod %v", oldPod.Name)
|
||||
ctx := api.WithNamespace(api.NewDefaultContext(), oldPod.Namespace)
|
||||
pod, err := s.client.Pods(api.NamespaceValue(ctx)).Get(oldPod.Name)
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
// attempt to delete
|
||||
if err = s.deleter.deleteOne(&Pod{Pod: &oldPod}); err != nil && err != noSuchPodErr && err != noSuchTaskErr {
|
||||
log.Errorf("failed to delete pod: %v: %v", oldPod.Name, err)
|
||||
}
|
||||
} else {
|
||||
//TODO(jdef) other errors should probably trigger a retry (w/ backoff).
|
||||
//For now, drop the pod on the floor
|
||||
log.Warning("aborting reconciliation for pod %v: %v", oldPod.Name, err)
|
||||
}
|
||||
return
|
||||
}
|
||||
if oldPod.Spec.NodeName != pod.Spec.NodeName {
|
||||
if pod.Spec.NodeName == "" {
|
||||
// pod is unscheduled.
|
||||
// it's possible that we dropped the pod in the scheduler error handler
|
||||
// because of task misalignment with the pod (task.Has(podtask.Launched) == true)
|
||||
|
||||
podKey, err := podtask.MakePodKey(ctx, pod.Name)
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
s.api.Lock()
|
||||
defer s.api.Unlock()
|
||||
|
||||
if _, state := s.api.tasks().ForPod(podKey); state != podtask.StateUnknown {
|
||||
//TODO(jdef) reconcile the task
|
||||
log.Errorf("task already registered for pod %v", pod.Name)
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
log.V(3).Infof("reoffering pod %v", podKey)
|
||||
s.qr.reoffer(&Pod{
|
||||
Pod: pod,
|
||||
deadline: &now,
|
||||
})
|
||||
} else {
|
||||
// pod is scheduled.
|
||||
// not sure how this happened behind our backs. attempt to reconstruct
|
||||
// at least a partial podtask.T record.
|
||||
//TODO(jdef) reconcile the task
|
||||
log.Errorf("pod already scheduled: %v", pod.Name)
|
||||
}
|
||||
} else {
|
||||
//TODO(jdef) for now, ignore the fact that the rest of the spec may be different
|
||||
//and assume that our knowledge of the pod aligns with that of the apiserver
|
||||
log.Error("pod reconciliation does not support updates; not yet implemented")
|
||||
}
|
||||
}
|
||||
|
||||
func parseSelectorOrDie(s string) fields.Selector {
|
||||
selector, err := fields.ParseSelector(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return selector
|
||||
}
|
||||
|
||||
// createAllPodsLW returns a listWatch that finds all pods
|
||||
func createAllPodsLW(cl *client.Client) *cache.ListWatch {
|
||||
return cache.NewListWatchFromClient(cl, "pods", api.NamespaceAll, parseSelectorOrDie(""))
|
||||
}
|
||||
|
||||
// Consumes *api.Pod, produces *Pod; the k8s reflector wants to push *api.Pod
|
||||
// objects at us, but we want to store more flexible (Pod) type defined in
|
||||
// this package. The adapter implementation facilitates this. It's a little
|
||||
// hackish since the object type going in is different than the object type
|
||||
// coming out -- you've been warned.
|
||||
type podStoreAdapter struct {
|
||||
queue.FIFO
|
||||
}
|
||||
|
||||
func (psa *podStoreAdapter) Add(obj interface{}) error {
|
||||
pod := obj.(*api.Pod)
|
||||
return psa.FIFO.Add(&Pod{Pod: pod})
|
||||
}
|
||||
|
||||
func (psa *podStoreAdapter) Update(obj interface{}) error {
|
||||
pod := obj.(*api.Pod)
|
||||
return psa.FIFO.Update(&Pod{Pod: pod})
|
||||
}
|
||||
|
||||
func (psa *podStoreAdapter) Delete(obj interface{}) error {
|
||||
pod := obj.(*api.Pod)
|
||||
return psa.FIFO.Delete(&Pod{Pod: pod})
|
||||
}
|
||||
|
||||
func (psa *podStoreAdapter) Get(obj interface{}) (interface{}, bool, error) {
|
||||
pod := obj.(*api.Pod)
|
||||
return psa.FIFO.Get(&Pod{Pod: pod})
|
||||
}
|
||||
|
||||
// Replace will delete the contents of the store, using instead the
|
||||
// given map. This store implementation does NOT take ownership of the map.
|
||||
func (psa *podStoreAdapter) Replace(objs []interface{}) error {
|
||||
newobjs := make([]interface{}, len(objs))
|
||||
for i, v := range objs {
|
||||
pod := v.(*api.Pod)
|
||||
newobjs[i] = &Pod{Pod: pod}
|
||||
}
|
||||
return psa.FIFO.Replace(newobjs)
|
||||
}
|
||||
700
contrib/mesos/pkg/scheduler/plugin_test.go
Normal file
700
contrib/mesos/pkg/scheduler/plugin_test.go
Normal file
@@ -0,0 +1,700 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
|
||||
kutil "github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
||||
|
||||
assertext "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/assert"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue"
|
||||
schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/ha"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
|
||||
log "github.com/golang/glog"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
util "github.com/mesos/mesos-go/mesosutil"
|
||||
bindings "github.com/mesos/mesos-go/scheduler"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/mock"
|
||||
)
|
||||
|
||||
// A apiserver mock which partially mocks the pods API
|
||||
type TestServer struct {
|
||||
server *httptest.Server
|
||||
stats map[string]uint
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
func NewTestServer(t *testing.T, namespace string, mockPodListWatch *MockPodsListWatch) *TestServer {
|
||||
ts := TestServer{
|
||||
stats: map[string]uint{},
|
||||
}
|
||||
mux := http.NewServeMux()
|
||||
|
||||
mux.HandleFunc(testapi.ResourcePath("pods", namespace, ""), func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
pods := mockPodListWatch.Pods()
|
||||
w.Write([]byte(runtime.EncodeOrDie(testapi.Codec(), &pods)))
|
||||
})
|
||||
|
||||
podsPrefix := testapi.ResourcePath("pods", namespace, "") + "/"
|
||||
mux.HandleFunc(podsPrefix, func(w http.ResponseWriter, r *http.Request) {
|
||||
name := r.URL.Path[len(podsPrefix):]
|
||||
|
||||
// update statistics for this pod
|
||||
ts.lock.Lock()
|
||||
defer ts.lock.Unlock()
|
||||
ts.stats[name] = ts.stats[name] + 1
|
||||
|
||||
p := mockPodListWatch.GetPod(name)
|
||||
if p != nil {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(runtime.EncodeOrDie(testapi.Codec(), p)))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
})
|
||||
|
||||
mux.HandleFunc(testapi.ResourcePath("events", namespace, ""), func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
mux.HandleFunc("/", func(res http.ResponseWriter, req *http.Request) {
|
||||
t.Errorf("unexpected request: %v", req.RequestURI)
|
||||
res.WriteHeader(http.StatusNotFound)
|
||||
})
|
||||
|
||||
ts.server = httptest.NewServer(mux)
|
||||
return &ts
|
||||
}
|
||||
func (ts *TestServer) Stats(name string) uint {
|
||||
ts.lock.Lock()
|
||||
defer ts.lock.Unlock()
|
||||
|
||||
return ts.stats[name]
|
||||
}
|
||||
|
||||
// Create mock of pods ListWatch, usually listening on the apiserver pods watch endpoint
|
||||
type MockPodsListWatch struct {
|
||||
ListWatch cache.ListWatch
|
||||
fakeWatcher *watch.FakeWatcher
|
||||
list api.PodList
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
func NewMockPodsListWatch(initialPodList api.PodList) *MockPodsListWatch {
|
||||
lw := MockPodsListWatch{
|
||||
fakeWatcher: watch.NewFake(),
|
||||
list: initialPodList,
|
||||
}
|
||||
lw.ListWatch = cache.ListWatch{
|
||||
WatchFunc: func(resourceVersion string) (watch.Interface, error) {
|
||||
return lw.fakeWatcher, nil
|
||||
},
|
||||
ListFunc: func() (runtime.Object, error) {
|
||||
return &lw.list, nil
|
||||
},
|
||||
}
|
||||
return &lw
|
||||
}
|
||||
func (lw *MockPodsListWatch) Pods() api.PodList {
|
||||
lw.lock.Lock()
|
||||
defer lw.lock.Unlock()
|
||||
|
||||
return lw.list
|
||||
}
|
||||
func (lw *MockPodsListWatch) GetPod(name string) *api.Pod {
|
||||
lw.lock.Lock()
|
||||
defer lw.lock.Unlock()
|
||||
|
||||
for _, p := range lw.list.Items {
|
||||
if p.Name == name {
|
||||
return &p
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func (lw *MockPodsListWatch) Add(pod *api.Pod, notify bool) {
|
||||
lw.lock.Lock()
|
||||
defer lw.lock.Unlock()
|
||||
|
||||
lw.list.Items = append(lw.list.Items, *pod)
|
||||
if notify {
|
||||
lw.fakeWatcher.Add(pod)
|
||||
}
|
||||
}
|
||||
func (lw *MockPodsListWatch) Modify(pod *api.Pod, notify bool) {
|
||||
lw.lock.Lock()
|
||||
defer lw.lock.Unlock()
|
||||
|
||||
for i, otherPod := range lw.list.Items {
|
||||
if otherPod.Name == pod.Name {
|
||||
lw.list.Items[i] = *pod
|
||||
if notify {
|
||||
lw.fakeWatcher.Modify(pod)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
log.Fatalf("Cannot find pod %v to modify in MockPodsListWatch", pod.Name)
|
||||
}
|
||||
func (lw *MockPodsListWatch) Delete(pod *api.Pod, notify bool) {
|
||||
lw.lock.Lock()
|
||||
defer lw.lock.Unlock()
|
||||
|
||||
for i, otherPod := range lw.list.Items {
|
||||
if otherPod.Name == pod.Name {
|
||||
lw.list.Items = append(lw.list.Items[:i], lw.list.Items[i+1:]...)
|
||||
if notify {
|
||||
lw.fakeWatcher.Delete(&otherPod)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
log.Fatalf("Cannot find pod %v to delete in MockPodsListWatch", pod.Name)
|
||||
}
|
||||
|
||||
// Create a pod with a given index, requiring one port
|
||||
func NewTestPod(i int) *api.Pod {
|
||||
name := fmt.Sprintf("pod%d", i)
|
||||
return &api.Pod{
|
||||
TypeMeta: api.TypeMeta{APIVersion: testapi.Version()},
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: "default",
|
||||
SelfLink: fmt.Sprintf("http://1.2.3.4/api/v1beta1/pods/%s", name),
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Ports: []api.ContainerPort{
|
||||
{
|
||||
ContainerPort: 8000 + i,
|
||||
Protocol: api.ProtocolTCP,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Status: api.PodStatus{
|
||||
PodIP: fmt.Sprintf("1.2.3.%d", 4+i),
|
||||
Conditions: []api.PodCondition{
|
||||
{
|
||||
Type: api.PodReady,
|
||||
Status: api.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Offering some cpus and memory and the 8000-9000 port range
|
||||
func NewTestOffer(i int) *mesos.Offer {
|
||||
hostname := fmt.Sprintf("h%d", i)
|
||||
cpus := util.NewScalarResource("cpus", 3.75)
|
||||
mem := util.NewScalarResource("mem", 940)
|
||||
var port8000 uint64 = 8000
|
||||
var port9000 uint64 = 9000
|
||||
ports8000to9000 := mesos.Value_Range{Begin: &port8000, End: &port9000}
|
||||
ports := util.NewRangesResource("ports", []*mesos.Value_Range{&ports8000to9000})
|
||||
return &mesos.Offer{
|
||||
Id: util.NewOfferID(fmt.Sprintf("offer%d", i)),
|
||||
Hostname: &hostname,
|
||||
SlaveId: util.NewSlaveID(hostname),
|
||||
Resources: []*mesos.Resource{cpus, mem, ports},
|
||||
}
|
||||
}
|
||||
|
||||
// Add assertions to reason about event streams
|
||||
type Event struct {
|
||||
Object runtime.Object
|
||||
Reason string
|
||||
Message string
|
||||
}
|
||||
|
||||
type EventPredicate func(e Event) bool
|
||||
|
||||
type EventAssertions struct {
|
||||
assert.Assertions
|
||||
}
|
||||
|
||||
// EventObserver implements record.EventRecorder for the purposes of validation via EventAssertions.
|
||||
type EventObserver struct {
|
||||
fifo chan Event
|
||||
}
|
||||
|
||||
func NewEventObserver() *EventObserver {
|
||||
return &EventObserver{
|
||||
fifo: make(chan Event, 1000),
|
||||
}
|
||||
}
|
||||
func (o *EventObserver) Event(object runtime.Object, reason, message string) {
|
||||
o.fifo <- Event{Object: object, Reason: reason, Message: message}
|
||||
}
|
||||
func (o *EventObserver) Eventf(object runtime.Object, reason, messageFmt string, args ...interface{}) {
|
||||
o.fifo <- Event{Object: object, Reason: reason, Message: fmt.Sprintf(messageFmt, args...)}
|
||||
}
|
||||
func (o *EventObserver) PastEventf(object runtime.Object, timestamp kutil.Time, reason, messageFmt string, args ...interface{}) {
|
||||
o.fifo <- Event{Object: object, Reason: reason, Message: fmt.Sprintf(messageFmt, args...)}
|
||||
}
|
||||
|
||||
func (a *EventAssertions) Event(observer *EventObserver, pred EventPredicate, msgAndArgs ...interface{}) bool {
|
||||
// parse msgAndArgs: first possibly a duration, otherwise a format string with further args
|
||||
timeout := time.Second * 2
|
||||
msg := "event not received"
|
||||
msgArgStart := 0
|
||||
if len(msgAndArgs) > 0 {
|
||||
switch msgAndArgs[0].(type) {
|
||||
case time.Duration:
|
||||
timeout = msgAndArgs[0].(time.Duration)
|
||||
msgArgStart += 1
|
||||
}
|
||||
}
|
||||
if len(msgAndArgs) > msgArgStart {
|
||||
msg = fmt.Sprintf(msgAndArgs[msgArgStart].(string), msgAndArgs[msgArgStart+1:]...)
|
||||
}
|
||||
|
||||
// watch events
|
||||
result := make(chan bool)
|
||||
stop := make(chan struct{})
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case e, ok := <-observer.fifo:
|
||||
if !ok {
|
||||
result <- false
|
||||
return
|
||||
} else if pred(e) {
|
||||
log.V(3).Infof("found asserted event for reason '%v': %v", e.Reason, e.Message)
|
||||
result <- true
|
||||
return
|
||||
} else {
|
||||
log.V(5).Infof("ignoring not-asserted event for reason '%v': %v", e.Reason, e.Message)
|
||||
}
|
||||
case _, ok := <-stop:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
defer close(stop)
|
||||
|
||||
// wait for watch to match or timeout
|
||||
select {
|
||||
case matched := <-result:
|
||||
return matched
|
||||
case <-time.After(timeout):
|
||||
return a.Fail(msg)
|
||||
}
|
||||
}
|
||||
func (a *EventAssertions) EventWithReason(observer *EventObserver, reason string, msgAndArgs ...interface{}) bool {
|
||||
return a.Event(observer, func(e Event) bool {
|
||||
return e.Reason == reason
|
||||
}, msgAndArgs...)
|
||||
}
|
||||
|
||||
type joinableDriver struct {
|
||||
MockSchedulerDriver
|
||||
joinFunc func() (mesos.Status, error)
|
||||
}
|
||||
|
||||
// Join invokes joinFunc if it has been set, otherwise blocks forever
|
||||
func (m *joinableDriver) Join() (mesos.Status, error) {
|
||||
if m.joinFunc != nil {
|
||||
return m.joinFunc()
|
||||
}
|
||||
select {}
|
||||
}
|
||||
|
||||
// Create mesos.TaskStatus for a given task
|
||||
func newTaskStatusForTask(task *mesos.TaskInfo, state mesos.TaskState) *mesos.TaskStatus {
|
||||
healthy := state == mesos.TaskState_TASK_RUNNING
|
||||
ts := float64(time.Now().Nanosecond()) / 1000000000.0
|
||||
source := mesos.TaskStatus_SOURCE_EXECUTOR
|
||||
return &mesos.TaskStatus{
|
||||
TaskId: task.TaskId,
|
||||
State: &state,
|
||||
SlaveId: task.SlaveId,
|
||||
ExecutorId: task.Executor.ExecutorId,
|
||||
Timestamp: &ts,
|
||||
Healthy: &healthy,
|
||||
Source: &source,
|
||||
Data: task.Data,
|
||||
}
|
||||
}
|
||||
|
||||
// Test to create the scheduler plugin with an empty plugin config
|
||||
func TestPlugin_New(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
c := PluginConfig{}
|
||||
p := NewPlugin(&c)
|
||||
assert.NotNil(p)
|
||||
}
|
||||
|
||||
// Test to create the scheduler plugin with the config returned by the scheduler,
|
||||
// and play through the whole life cycle of the plugin while creating pods, deleting
|
||||
// and failing them.
|
||||
func TestPlugin_LifeCycle(t *testing.T) {
|
||||
assert := &EventAssertions{*assert.New(t)}
|
||||
|
||||
// create a fake pod watch. We use that below to submit new pods to the scheduler
|
||||
podListWatch := NewMockPodsListWatch(api.PodList{})
|
||||
|
||||
// create fake apiserver
|
||||
testApiServer := NewTestServer(t, api.NamespaceDefault, podListWatch)
|
||||
defer testApiServer.server.Close()
|
||||
|
||||
// create scheduler
|
||||
testScheduler := New(Config{
|
||||
Executor: util.NewExecutorInfo(
|
||||
util.NewExecutorID("executor-id"),
|
||||
util.NewCommandInfo("executor-cmd"),
|
||||
),
|
||||
Client: client.NewOrDie(&client.Config{Host: testApiServer.server.URL, Version: testapi.Version()}),
|
||||
ScheduleFunc: FCFSScheduleFunc,
|
||||
Schedcfg: *schedcfg.CreateDefaultConfig(),
|
||||
})
|
||||
|
||||
assert.NotNil(testScheduler.client, "client is nil")
|
||||
assert.NotNil(testScheduler.executor, "executor is nil")
|
||||
assert.NotNil(testScheduler.offers, "offer registry is nil")
|
||||
|
||||
// create scheduler process
|
||||
schedulerProcess := ha.New(testScheduler)
|
||||
|
||||
// get plugin config from it
|
||||
c := testScheduler.NewPluginConfig(schedulerProcess.Terminal(), http.DefaultServeMux, &podListWatch.ListWatch)
|
||||
assert.NotNil(c)
|
||||
|
||||
// make events observable
|
||||
eventObserver := NewEventObserver()
|
||||
c.Recorder = eventObserver
|
||||
|
||||
// create plugin
|
||||
p := NewPlugin(c)
|
||||
assert.NotNil(p)
|
||||
|
||||
// run plugin
|
||||
p.Run(schedulerProcess.Terminal())
|
||||
defer schedulerProcess.End()
|
||||
|
||||
// init scheduler
|
||||
err := testScheduler.Init(schedulerProcess.Master(), p, http.DefaultServeMux)
|
||||
assert.NoError(err)
|
||||
|
||||
// create mock mesos scheduler driver
|
||||
mockDriver := &joinableDriver{}
|
||||
mockDriver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once()
|
||||
started := mockDriver.Upon()
|
||||
|
||||
mAny := mock.AnythingOfType
|
||||
mockDriver.On("ReconcileTasks", mAny("[]*mesosproto.TaskStatus")).Return(mesos.Status_DRIVER_RUNNING, nil)
|
||||
mockDriver.On("SendFrameworkMessage", mAny("*mesosproto.ExecutorID"), mAny("*mesosproto.SlaveID"), mAny("string")).
|
||||
Return(mesos.Status_DRIVER_RUNNING, nil)
|
||||
|
||||
launchedTasks := make(chan *mesos.TaskInfo, 1)
|
||||
launchTasksCalledFunc := func(args mock.Arguments) {
|
||||
taskInfos := args.Get(1).([]*mesos.TaskInfo)
|
||||
assert.Equal(1, len(taskInfos))
|
||||
launchedTasks <- taskInfos[0]
|
||||
}
|
||||
mockDriver.On("LaunchTasks", mAny("[]*mesosproto.OfferID"), mAny("[]*mesosproto.TaskInfo"), mAny("*mesosproto.Filters")).
|
||||
Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksCalledFunc)
|
||||
|
||||
// elect master with mock driver
|
||||
driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) {
|
||||
return mockDriver, nil
|
||||
})
|
||||
schedulerProcess.Elect(driverFactory)
|
||||
elected := schedulerProcess.Elected()
|
||||
|
||||
// driver will be started
|
||||
<-started
|
||||
|
||||
// tell scheduler to be registered
|
||||
testScheduler.Registered(
|
||||
mockDriver,
|
||||
util.NewFrameworkID("kubernetes-id"),
|
||||
util.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050),
|
||||
)
|
||||
|
||||
// wait for being elected
|
||||
<-elected
|
||||
|
||||
//TODO(jdef) refactor things above here into a test suite setup of some sort
|
||||
|
||||
// fake new, unscheduled pod
|
||||
pod1 := NewTestPod(1)
|
||||
podListWatch.Add(pod1, true) // notify watchers
|
||||
|
||||
// wait for failedScheduling event because there is no offer
|
||||
assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received")
|
||||
|
||||
// add some matching offer
|
||||
offers1 := []*mesos.Offer{NewTestOffer(1)}
|
||||
testScheduler.ResourceOffers(nil, offers1)
|
||||
|
||||
// and wait for scheduled pod
|
||||
assert.EventWithReason(eventObserver, "scheduled")
|
||||
select {
|
||||
case launchedTask := <-launchedTasks:
|
||||
// report back that the task has been staged, and then started by mesos
|
||||
testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING))
|
||||
testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING))
|
||||
|
||||
// report back that the task has been lost
|
||||
mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0)
|
||||
testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_LOST))
|
||||
|
||||
// and wait that framework message is sent to executor
|
||||
mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1)
|
||||
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timed out waiting for launchTasks call")
|
||||
}
|
||||
|
||||
// start another pod
|
||||
podNum := 1
|
||||
startPod := func(offers []*mesos.Offer) (*api.Pod, *mesos.TaskInfo) {
|
||||
podNum = podNum + 1
|
||||
|
||||
// create pod and matching offer
|
||||
pod := NewTestPod(podNum)
|
||||
podListWatch.Add(pod, true) // notify watchers
|
||||
testScheduler.ResourceOffers(mockDriver, offers)
|
||||
assert.EventWithReason(eventObserver, "scheduled")
|
||||
|
||||
// wait for driver.launchTasks call
|
||||
select {
|
||||
case launchedTask := <-launchedTasks:
|
||||
testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING))
|
||||
testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING))
|
||||
return pod, launchedTask
|
||||
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("timed out waiting for launchTasks")
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
pod, launchedTask := startPod(offers1)
|
||||
|
||||
// mock drvier.KillTask, should be invoked when a pod is deleted
|
||||
mockDriver.On("KillTask", mAny("*mesosproto.TaskID")).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) {
|
||||
killedTaskId := *(args.Get(0).(*mesos.TaskID))
|
||||
assert.Equal(*launchedTask.TaskId, killedTaskId, "expected same TaskID as during launch")
|
||||
})
|
||||
killTaskCalled := mockDriver.Upon()
|
||||
|
||||
// stop it again via the apiserver mock
|
||||
podListWatch.Delete(pod, true) // notify watchers
|
||||
|
||||
// and wait for the driver killTask call with the correct TaskId
|
||||
select {
|
||||
case <-killTaskCalled:
|
||||
// report back that the task is finished
|
||||
testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_FINISHED))
|
||||
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("timed out waiting for KillTask")
|
||||
}
|
||||
|
||||
// start pods:
|
||||
// - which are failing while binding,
|
||||
// - leading to reconciliation
|
||||
// - with different states on the apiserver
|
||||
|
||||
failPodFromExecutor := func(task *mesos.TaskInfo) {
|
||||
beforePodLookups := testApiServer.Stats(pod.Name)
|
||||
status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED)
|
||||
message := messages.CreateBindingFailure
|
||||
status.Message = &message
|
||||
testScheduler.StatusUpdate(mockDriver, status)
|
||||
|
||||
// wait until pod is looked up at the apiserver
|
||||
assertext.EventuallyTrue(t, time.Second, func() bool {
|
||||
return testApiServer.Stats(pod.Name) == beforePodLookups+1
|
||||
}, "expect that reconcilePod will access apiserver for pod %v", pod.Name)
|
||||
}
|
||||
|
||||
// 1. with pod deleted from the apiserver
|
||||
pod, launchedTask = startPod(offers1)
|
||||
podListWatch.Delete(pod, false) // not notifying the watchers
|
||||
failPodFromExecutor(launchedTask)
|
||||
|
||||
// 2. with pod still on the apiserver, not bound
|
||||
pod, launchedTask = startPod(offers1)
|
||||
failPodFromExecutor(launchedTask)
|
||||
|
||||
// 3. with pod still on the apiserver, bound i.e. host!=""
|
||||
pod, launchedTask = startPod(offers1)
|
||||
pod.Spec.NodeName = *offers1[0].Hostname
|
||||
podListWatch.Modify(pod, false) // not notifying the watchers
|
||||
failPodFromExecutor(launchedTask)
|
||||
|
||||
// 4. with pod still on the apiserver, bound i.e. host!="", notified via ListWatch
|
||||
pod, launchedTask = startPod(offers1)
|
||||
pod.Spec.NodeName = *offers1[0].Hostname
|
||||
podListWatch.Modify(pod, true) // notifying the watchers
|
||||
time.Sleep(time.Second / 2)
|
||||
failPodFromExecutor(launchedTask)
|
||||
}
|
||||
|
||||
func TestDeleteOne_NonexistentPod(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
obj := &MockScheduler{}
|
||||
reg := podtask.NewInMemoryRegistry()
|
||||
obj.On("tasks").Return(reg)
|
||||
|
||||
qr := newQueuer(nil)
|
||||
assert.Equal(0, len(qr.podQueue.List()))
|
||||
d := &deleter{
|
||||
api: obj,
|
||||
qr: qr,
|
||||
}
|
||||
pod := &Pod{Pod: &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "foo",
|
||||
Namespace: api.NamespaceDefault,
|
||||
}}}
|
||||
err := d.deleteOne(pod)
|
||||
assert.Equal(err, noSuchPodErr)
|
||||
obj.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestDeleteOne_PendingPod(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
obj := &MockScheduler{}
|
||||
reg := podtask.NewInMemoryRegistry()
|
||||
obj.On("tasks").Return(reg)
|
||||
|
||||
pod := &Pod{Pod: &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "foo",
|
||||
UID: "foo0",
|
||||
Namespace: api.NamespaceDefault,
|
||||
}}}
|
||||
_, err := reg.Register(podtask.New(api.NewDefaultContext(), "bar", *pod.Pod, &mesos.ExecutorInfo{}))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create task: %v", err)
|
||||
}
|
||||
|
||||
// preconditions
|
||||
qr := newQueuer(nil)
|
||||
qr.podQueue.Add(pod, queue.ReplaceExisting)
|
||||
assert.Equal(1, len(qr.podQueue.List()))
|
||||
_, found := qr.podQueue.Get("default/foo")
|
||||
assert.True(found)
|
||||
|
||||
// exec & post conditions
|
||||
d := &deleter{
|
||||
api: obj,
|
||||
qr: qr,
|
||||
}
|
||||
err = d.deleteOne(pod)
|
||||
assert.Nil(err)
|
||||
_, found = qr.podQueue.Get("foo0")
|
||||
assert.False(found)
|
||||
assert.Equal(0, len(qr.podQueue.List()))
|
||||
obj.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestDeleteOne_Running(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
obj := &MockScheduler{}
|
||||
reg := podtask.NewInMemoryRegistry()
|
||||
obj.On("tasks").Return(reg)
|
||||
|
||||
pod := &Pod{Pod: &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "foo",
|
||||
UID: "foo0",
|
||||
Namespace: api.NamespaceDefault,
|
||||
}}}
|
||||
task, err := reg.Register(podtask.New(api.NewDefaultContext(), "bar", *pod.Pod, &mesos.ExecutorInfo{}))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
task.Set(podtask.Launched)
|
||||
err = reg.Update(task)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
// preconditions
|
||||
qr := newQueuer(nil)
|
||||
qr.podQueue.Add(pod, queue.ReplaceExisting)
|
||||
assert.Equal(1, len(qr.podQueue.List()))
|
||||
_, found := qr.podQueue.Get("default/foo")
|
||||
assert.True(found)
|
||||
|
||||
obj.On("killTask", task.ID).Return(nil)
|
||||
|
||||
// exec & post conditions
|
||||
d := &deleter{
|
||||
api: obj,
|
||||
qr: qr,
|
||||
}
|
||||
err = d.deleteOne(pod)
|
||||
assert.Nil(err)
|
||||
_, found = qr.podQueue.Get("foo0")
|
||||
assert.False(found)
|
||||
assert.Equal(0, len(qr.podQueue.List()))
|
||||
obj.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestDeleteOne_badPodNaming(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
obj := &MockScheduler{}
|
||||
pod := &Pod{Pod: &api.Pod{}}
|
||||
d := &deleter{
|
||||
api: obj,
|
||||
qr: newQueuer(nil),
|
||||
}
|
||||
|
||||
err := d.deleteOne(pod)
|
||||
assert.NotNil(err)
|
||||
|
||||
pod.Pod.ObjectMeta.Name = "foo"
|
||||
err = d.deleteOne(pod)
|
||||
assert.NotNil(err)
|
||||
|
||||
pod.Pod.ObjectMeta.Name = ""
|
||||
pod.Pod.ObjectMeta.Namespace = "bar"
|
||||
err = d.deleteOne(pod)
|
||||
assert.NotNil(err)
|
||||
|
||||
obj.AssertExpectations(t)
|
||||
}
|
||||
80
contrib/mesos/pkg/scheduler/pod.go
Normal file
80
contrib/mesos/pkg/scheduler/pod.go
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
|
||||
)
|
||||
|
||||
// wrapper for the k8s pod type so that we can define additional methods on a "pod"
|
||||
type Pod struct {
|
||||
*api.Pod
|
||||
deadline *time.Time
|
||||
delay *time.Duration
|
||||
notify queue.BreakChan
|
||||
}
|
||||
|
||||
// implements Copyable
|
||||
func (p *Pod) Copy() queue.Copyable {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
//TODO(jdef) we may need a better "deep-copy" implementation
|
||||
pod := *(p.Pod)
|
||||
return &Pod{Pod: &pod}
|
||||
}
|
||||
|
||||
// implements Unique
|
||||
func (p *Pod) GetUID() string {
|
||||
if id, err := cache.MetaNamespaceKeyFunc(p.Pod); err != nil {
|
||||
panic(fmt.Sprintf("failed to determine pod id for '%+v'", p.Pod))
|
||||
} else {
|
||||
return id
|
||||
}
|
||||
}
|
||||
|
||||
// implements Deadlined
|
||||
func (dp *Pod) Deadline() (time.Time, bool) {
|
||||
if dp.deadline != nil {
|
||||
return *(dp.deadline), true
|
||||
}
|
||||
return time.Time{}, false
|
||||
}
|
||||
|
||||
func (dp *Pod) GetDelay() time.Duration {
|
||||
if dp.delay != nil {
|
||||
return *(dp.delay)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (p *Pod) Breaker() queue.BreakChan {
|
||||
return p.notify
|
||||
}
|
||||
|
||||
func (p *Pod) String() string {
|
||||
displayDeadline := "<none>"
|
||||
if deadline, ok := p.Deadline(); ok {
|
||||
displayDeadline = deadline.String()
|
||||
}
|
||||
return fmt.Sprintf("{pod:%v, deadline:%v, delay:%v}", p.Pod.Name, displayDeadline, p.GetDelay())
|
||||
}
|
||||
54
contrib/mesos/pkg/scheduler/podtask/debug.go
Normal file
54
contrib/mesos/pkg/scheduler/podtask/debug.go
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtask
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
//TODO(jdef) we use a Locker to guard against concurrent task state changes, but it would be
|
||||
//really, really nice to avoid doing this. Maybe someday the registry won't return data ptrs
|
||||
//but plain structs instead.
|
||||
func InstallDebugHandlers(reg Registry, mux *http.ServeMux) {
|
||||
mux.HandleFunc("/debug/registry/tasks", func(w http.ResponseWriter, r *http.Request) {
|
||||
//TODO(jdef) support filtering tasks based on status
|
||||
alltasks := reg.List(nil)
|
||||
io.WriteString(w, fmt.Sprintf("task_count=%d\n", len(alltasks)))
|
||||
for _, task := range alltasks {
|
||||
if err := func() (err error) {
|
||||
podName := task.Pod.Name
|
||||
podNamespace := task.Pod.Namespace
|
||||
offerId := ""
|
||||
if task.Offer != nil {
|
||||
offerId = task.Offer.Id()
|
||||
}
|
||||
_, err = io.WriteString(w, fmt.Sprintf("%v\t%v/%v\t%v\t%v\n", task.ID, podNamespace, podName, task.State, offerId))
|
||||
return
|
||||
}(); err != nil {
|
||||
log.Warningf("aborting debug handler: %v", err)
|
||||
break // stop listing on I/O errors
|
||||
}
|
||||
}
|
||||
if flusher, ok := w.(http.Flusher); ok {
|
||||
flusher.Flush()
|
||||
}
|
||||
})
|
||||
}
|
||||
18
contrib/mesos/pkg/scheduler/podtask/doc.go
Normal file
18
contrib/mesos/pkg/scheduler/podtask/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package podtask maps Kubernetes pods to Mesos tasks.
|
||||
package podtask
|
||||
29
contrib/mesos/pkg/scheduler/podtask/leaky.go
Normal file
29
contrib/mesos/pkg/scheduler/podtask/leaky.go
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtask
|
||||
|
||||
// Concepts that have leaked to where they should not have.
|
||||
|
||||
import (
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/registry/etcd"
|
||||
)
|
||||
|
||||
// makePodKey constructs etcd paths to pod items enforcing namespace rules.
|
||||
func MakePodKey(ctx api.Context, id string) (string, error) {
|
||||
return etcd.MakeEtcdItemKey(ctx, PodPath, id)
|
||||
}
|
||||
373
contrib/mesos/pkg/scheduler/podtask/pod_task.go
Normal file
373
contrib/mesos/pkg/scheduler/podtask/pod_task.go
Normal file
@@ -0,0 +1,373 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtask
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"code.google.com/p/go-uuid/uuid"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
|
||||
annotation "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/gogo/protobuf/proto"
|
||||
log "github.com/golang/glog"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
mutil "github.com/mesos/mesos-go/mesosutil"
|
||||
)
|
||||
|
||||
const (
|
||||
containerCpus = 0.25 // initial CPU allocated for executor
|
||||
containerMem = 64 // initial MB of memory allocated for executor
|
||||
)
|
||||
|
||||
type StateType int
|
||||
|
||||
const (
|
||||
StatePending StateType = iota
|
||||
StateRunning
|
||||
StateFinished
|
||||
StateUnknown
|
||||
)
|
||||
|
||||
type FlagType string
|
||||
|
||||
const (
|
||||
Launched = FlagType("launched")
|
||||
Bound = FlagType("bound")
|
||||
Deleted = FlagType("deleted")
|
||||
)
|
||||
|
||||
// A struct that describes a pod task.
|
||||
type T struct {
|
||||
ID string
|
||||
Pod api.Pod
|
||||
Spec Spec
|
||||
Offer offers.Perishable // thread-safe
|
||||
State StateType
|
||||
Flags map[FlagType]struct{}
|
||||
CreateTime time.Time
|
||||
UpdatedTime time.Time // time of the most recent StatusUpdate we've seen from the mesos master
|
||||
|
||||
podStatus api.PodStatus
|
||||
executor *mesos.ExecutorInfo // readonly
|
||||
podKey string
|
||||
launchTime time.Time
|
||||
bindTime time.Time
|
||||
mapper HostPortMappingType
|
||||
}
|
||||
|
||||
type Spec struct {
|
||||
SlaveID string
|
||||
CPU float64
|
||||
Memory float64
|
||||
PortMap []HostPortMapping
|
||||
Ports []uint64
|
||||
Data []byte
|
||||
}
|
||||
|
||||
// mostly-clone this pod task. the clone will actually share the some fields:
|
||||
// - executor // OK because it's read only
|
||||
// - Offer // OK because it's guarantees safe concurrent access
|
||||
func (t *T) Clone() *T {
|
||||
if t == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// shallow-copy
|
||||
clone := *t
|
||||
|
||||
// deep copy
|
||||
(&t.Spec).copyTo(&clone.Spec)
|
||||
clone.Flags = map[FlagType]struct{}{}
|
||||
for k := range t.Flags {
|
||||
clone.Flags[k] = struct{}{}
|
||||
}
|
||||
return &clone
|
||||
}
|
||||
|
||||
func (old *Spec) copyTo(new *Spec) {
|
||||
if len(old.PortMap) > 0 {
|
||||
new.PortMap = append(([]HostPortMapping)(nil), old.PortMap...)
|
||||
}
|
||||
if len(old.Ports) > 0 {
|
||||
new.Ports = append(([]uint64)(nil), old.Ports...)
|
||||
}
|
||||
if len(old.Data) > 0 {
|
||||
new.Data = append(([]byte)(nil), old.Data...)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *T) HasAcceptedOffer() bool {
|
||||
return t.Spec.SlaveID != ""
|
||||
}
|
||||
|
||||
func (t *T) GetOfferId() string {
|
||||
if t.Offer == nil {
|
||||
return ""
|
||||
}
|
||||
return t.Offer.Details().Id.GetValue()
|
||||
}
|
||||
|
||||
func generateTaskName(pod *api.Pod) string {
|
||||
ns := pod.Namespace
|
||||
if ns == "" {
|
||||
ns = api.NamespaceDefault
|
||||
}
|
||||
return fmt.Sprintf("%s.%s.pods", pod.Name, ns)
|
||||
}
|
||||
|
||||
func (t *T) BuildTaskInfo() *mesos.TaskInfo {
|
||||
info := &mesos.TaskInfo{
|
||||
Name: proto.String(generateTaskName(&t.Pod)),
|
||||
TaskId: mutil.NewTaskID(t.ID),
|
||||
SlaveId: mutil.NewSlaveID(t.Spec.SlaveID),
|
||||
Executor: t.executor,
|
||||
Data: t.Spec.Data,
|
||||
Resources: []*mesos.Resource{
|
||||
mutil.NewScalarResource("cpus", t.Spec.CPU),
|
||||
mutil.NewScalarResource("mem", t.Spec.Memory),
|
||||
},
|
||||
}
|
||||
if portsResource := rangeResource("ports", t.Spec.Ports); portsResource != nil {
|
||||
info.Resources = append(info.Resources, portsResource)
|
||||
}
|
||||
return info
|
||||
}
|
||||
|
||||
// Fill the Spec in the T, should be called during k8s scheduling,
|
||||
// before binding.
|
||||
func (t *T) FillFromDetails(details *mesos.Offer) error {
|
||||
if details == nil {
|
||||
//programming error
|
||||
panic("offer details are nil")
|
||||
}
|
||||
|
||||
log.V(3).Infof("Recording offer(s) %v against pod %v", details.Id, t.Pod.Name)
|
||||
|
||||
t.Spec = Spec{
|
||||
SlaveID: details.GetSlaveId().GetValue(),
|
||||
CPU: containerCpus,
|
||||
Memory: containerMem,
|
||||
}
|
||||
|
||||
if mapping, err := t.mapper.Generate(t, details); err != nil {
|
||||
t.Reset()
|
||||
return err
|
||||
} else {
|
||||
ports := []uint64{}
|
||||
for _, entry := range mapping {
|
||||
ports = append(ports, entry.OfferPort)
|
||||
}
|
||||
t.Spec.PortMap = mapping
|
||||
t.Spec.Ports = ports
|
||||
}
|
||||
|
||||
// hostname needs of the executor needs to match that of the offer, otherwise
|
||||
// the kubelet node status checker/updater is very unhappy
|
||||
const HOSTNAME_OVERRIDE_FLAG = "--hostname-override="
|
||||
hostname := details.GetHostname() // required field, non-empty
|
||||
hostnameOverride := HOSTNAME_OVERRIDE_FLAG + hostname
|
||||
|
||||
argv := t.executor.Command.Arguments
|
||||
overwrite := false
|
||||
for i, arg := range argv {
|
||||
if strings.HasPrefix(arg, HOSTNAME_OVERRIDE_FLAG) {
|
||||
overwrite = true
|
||||
argv[i] = hostnameOverride
|
||||
break
|
||||
}
|
||||
}
|
||||
if !overwrite {
|
||||
t.executor.Command.Arguments = append(argv, hostnameOverride)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Clear offer-related details from the task, should be called if/when an offer
|
||||
// has already been assigned to a task but for some reason is no longer valid.
|
||||
func (t *T) Reset() {
|
||||
log.V(3).Infof("Clearing offer(s) from pod %v", t.Pod.Name)
|
||||
t.Offer = nil
|
||||
t.Spec = Spec{}
|
||||
}
|
||||
|
||||
func (t *T) AcceptOffer(offer *mesos.Offer) bool {
|
||||
if offer == nil {
|
||||
return false
|
||||
}
|
||||
var (
|
||||
cpus float64 = 0
|
||||
mem float64 = 0
|
||||
)
|
||||
for _, resource := range offer.Resources {
|
||||
if resource.GetName() == "cpus" {
|
||||
cpus = *resource.GetScalar().Value
|
||||
}
|
||||
|
||||
if resource.GetName() == "mem" {
|
||||
mem = *resource.GetScalar().Value
|
||||
}
|
||||
}
|
||||
if _, err := t.mapper.Generate(t, offer); err != nil {
|
||||
log.V(3).Info(err)
|
||||
return false
|
||||
}
|
||||
|
||||
// for now hard-coded, constant values are used for cpus and mem. This is necessary
|
||||
// until parent-cgroup integration is finished for mesos and k8sm. Then the k8sm
|
||||
// executor can become the parent of pods and subsume their resource usage and
|
||||
// therefore be compliant with expectations of mesos executors w/ respect to
|
||||
// resource allocation and management.
|
||||
//
|
||||
// TODO(jdef): remove hardcoded values and make use of actual pod resource settings
|
||||
if (cpus < containerCpus) || (mem < containerMem) {
|
||||
log.V(3).Infof("not enough resources: cpus: %f mem: %f", cpus, mem)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (t *T) Set(f FlagType) {
|
||||
t.Flags[f] = struct{}{}
|
||||
if Launched == f {
|
||||
t.launchTime = time.Now()
|
||||
queueWaitTime := t.launchTime.Sub(t.CreateTime)
|
||||
metrics.QueueWaitTime.Observe(metrics.InMicroseconds(queueWaitTime))
|
||||
}
|
||||
}
|
||||
|
||||
func (t *T) Has(f FlagType) (exists bool) {
|
||||
_, exists = t.Flags[f]
|
||||
return
|
||||
}
|
||||
|
||||
func New(ctx api.Context, id string, pod api.Pod, executor *mesos.ExecutorInfo) (*T, error) {
|
||||
if executor == nil {
|
||||
return nil, fmt.Errorf("illegal argument: executor was nil")
|
||||
}
|
||||
key, err := MakePodKey(ctx, pod.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if id == "" {
|
||||
id = "pod." + uuid.NewUUID().String()
|
||||
}
|
||||
task := &T{
|
||||
ID: id,
|
||||
Pod: pod,
|
||||
State: StatePending,
|
||||
podKey: key,
|
||||
mapper: MappingTypeForPod(&pod),
|
||||
Flags: make(map[FlagType]struct{}),
|
||||
executor: proto.Clone(executor).(*mesos.ExecutorInfo),
|
||||
}
|
||||
task.CreateTime = time.Now()
|
||||
return task, nil
|
||||
}
|
||||
|
||||
func (t *T) SaveRecoveryInfo(dict map[string]string) {
|
||||
dict[annotation.TaskIdKey] = t.ID
|
||||
dict[annotation.SlaveIdKey] = t.Spec.SlaveID
|
||||
dict[annotation.OfferIdKey] = t.Offer.Details().Id.GetValue()
|
||||
dict[annotation.ExecutorIdKey] = t.executor.ExecutorId.GetValue()
|
||||
}
|
||||
|
||||
// reconstruct a task from metadata stashed in a pod entry. there are limited pod states that
|
||||
// support reconstruction. if we expect to be able to reconstruct state but encounter errors
|
||||
// in the process then those errors are returned. if the pod is in a seemingly valid state but
|
||||
// otherwise does not support task reconstruction return false. if we're able to reconstruct
|
||||
// state then return a reconstructed task and true.
|
||||
//
|
||||
// at this time task reconstruction is only supported for pods that have been annotated with
|
||||
// binding metadata, which implies that they've previously been associated with a task and
|
||||
// that mesos knows about it.
|
||||
//
|
||||
// assumes that the pod data comes from the k8s registry and reflects the desired state.
|
||||
//
|
||||
func RecoverFrom(pod api.Pod) (*T, bool, error) {
|
||||
// we only expect annotations if pod has been bound, which implies that it has already
|
||||
// been scheduled and launched
|
||||
if pod.Spec.NodeName == "" && len(pod.Annotations) == 0 {
|
||||
log.V(1).Infof("skipping recovery for unbound pod %v/%v", pod.Namespace, pod.Name)
|
||||
return nil, false, nil
|
||||
}
|
||||
|
||||
// only process pods that are not in a terminal state
|
||||
switch pod.Status.Phase {
|
||||
case api.PodPending, api.PodRunning, api.PodUnknown: // continue
|
||||
default:
|
||||
log.V(1).Infof("skipping recovery for terminal pod %v/%v", pod.Namespace, pod.Name)
|
||||
return nil, false, nil
|
||||
}
|
||||
|
||||
ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
|
||||
key, err := MakePodKey(ctx, pod.Name)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
//TODO(jdef) recover ports (and other resource requirements?) from the pod spec as well
|
||||
|
||||
now := time.Now()
|
||||
t := &T{
|
||||
Pod: pod,
|
||||
CreateTime: now,
|
||||
podKey: key,
|
||||
State: StatePending, // possibly running? mesos will tell us during reconciliation
|
||||
Flags: make(map[FlagType]struct{}),
|
||||
mapper: MappingTypeForPod(&pod),
|
||||
launchTime: now,
|
||||
bindTime: now,
|
||||
}
|
||||
var (
|
||||
offerId string
|
||||
hostname string
|
||||
)
|
||||
for _, k := range []string{
|
||||
annotation.BindingHostKey,
|
||||
annotation.TaskIdKey,
|
||||
annotation.SlaveIdKey,
|
||||
annotation.OfferIdKey,
|
||||
annotation.ExecutorIdKey,
|
||||
} {
|
||||
v, found := pod.Annotations[k]
|
||||
if !found {
|
||||
return nil, false, fmt.Errorf("incomplete metadata: missing value for pod annotation: %v", k)
|
||||
}
|
||||
switch k {
|
||||
case annotation.BindingHostKey:
|
||||
hostname = v
|
||||
case annotation.SlaveIdKey:
|
||||
t.Spec.SlaveID = v
|
||||
case annotation.OfferIdKey:
|
||||
offerId = v
|
||||
case annotation.TaskIdKey:
|
||||
t.ID = v
|
||||
case annotation.ExecutorIdKey:
|
||||
// this is nowhere near sufficient to re-launch a task, but we really just
|
||||
// want this for tracking
|
||||
t.executor = &mesos.ExecutorInfo{ExecutorId: mutil.NewExecutorID(v)}
|
||||
}
|
||||
}
|
||||
t.Offer = offers.Expired(offerId, hostname, 0)
|
||||
t.Flags[Launched] = struct{}{}
|
||||
t.Flags[Bound] = struct{}{}
|
||||
return t, true, nil
|
||||
}
|
||||
153
contrib/mesos/pkg/scheduler/podtask/pod_task_test.go
Normal file
153
contrib/mesos/pkg/scheduler/podtask/pod_task_test.go
Normal file
@@ -0,0 +1,153 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtask
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
mutil "github.com/mesos/mesos-go/mesosutil"
|
||||
)
|
||||
|
||||
const (
|
||||
t_min_cpu = 128
|
||||
t_min_mem = 128
|
||||
)
|
||||
|
||||
func fakePodTask(id string) (*T, error) {
|
||||
return New(api.NewDefaultContext(), "", api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: id,
|
||||
Namespace: api.NamespaceDefault,
|
||||
},
|
||||
}, &mesos.ExecutorInfo{})
|
||||
}
|
||||
|
||||
func TestEmptyOffer(t *testing.T) {
|
||||
t.Parallel()
|
||||
task, err := fakePodTask("foo")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if ok := task.AcceptOffer(nil); ok {
|
||||
t.Fatalf("accepted nil offer")
|
||||
}
|
||||
if ok := task.AcceptOffer(&mesos.Offer{}); ok {
|
||||
t.Fatalf("accepted empty offer")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNoPortsInPodOrOffer(t *testing.T) {
|
||||
t.Parallel()
|
||||
task, err := fakePodTask("foo")
|
||||
if err != nil || task == nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
offer := &mesos.Offer{
|
||||
Resources: []*mesos.Resource{
|
||||
mutil.NewScalarResource("cpus", 0.001),
|
||||
mutil.NewScalarResource("mem", 0.001),
|
||||
},
|
||||
}
|
||||
if ok := task.AcceptOffer(offer); ok {
|
||||
t.Fatalf("accepted offer %v:", offer)
|
||||
}
|
||||
|
||||
offer = &mesos.Offer{
|
||||
Resources: []*mesos.Resource{
|
||||
mutil.NewScalarResource("cpus", t_min_cpu),
|
||||
mutil.NewScalarResource("mem", t_min_mem),
|
||||
},
|
||||
}
|
||||
if ok := task.AcceptOffer(offer); !ok {
|
||||
t.Fatalf("did not accepted offer %v:", offer)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAcceptOfferPorts(t *testing.T) {
|
||||
t.Parallel()
|
||||
task, _ := fakePodTask("foo")
|
||||
pod := &task.Pod
|
||||
|
||||
offer := &mesos.Offer{
|
||||
Resources: []*mesos.Resource{
|
||||
mutil.NewScalarResource("cpus", t_min_cpu),
|
||||
mutil.NewScalarResource("mem", t_min_mem),
|
||||
rangeResource("ports", []uint64{1, 1}),
|
||||
},
|
||||
}
|
||||
if ok := task.AcceptOffer(offer); !ok {
|
||||
t.Fatalf("did not accepted offer %v:", offer)
|
||||
}
|
||||
|
||||
pod.Spec = api.PodSpec{
|
||||
Containers: []api.Container{{
|
||||
Ports: []api.ContainerPort{{
|
||||
HostPort: 123,
|
||||
}},
|
||||
}},
|
||||
}
|
||||
if ok := task.AcceptOffer(offer); ok {
|
||||
t.Fatalf("accepted offer %v:", offer)
|
||||
}
|
||||
|
||||
pod.Spec.Containers[0].Ports[0].HostPort = 1
|
||||
if ok := task.AcceptOffer(offer); !ok {
|
||||
t.Fatalf("did not accepted offer %v:", offer)
|
||||
}
|
||||
|
||||
pod.Spec.Containers[0].Ports[0].HostPort = 0
|
||||
if ok := task.AcceptOffer(offer); !ok {
|
||||
t.Fatalf("did not accepted offer %v:", offer)
|
||||
}
|
||||
|
||||
offer.Resources = []*mesos.Resource{
|
||||
mutil.NewScalarResource("cpus", t_min_cpu),
|
||||
mutil.NewScalarResource("mem", t_min_mem),
|
||||
}
|
||||
if ok := task.AcceptOffer(offer); ok {
|
||||
t.Fatalf("accepted offer %v:", offer)
|
||||
}
|
||||
|
||||
pod.Spec.Containers[0].Ports[0].HostPort = 1
|
||||
if ok := task.AcceptOffer(offer); ok {
|
||||
t.Fatalf("accepted offer %v:", offer)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGeneratePodName(t *testing.T) {
|
||||
p := &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "foo",
|
||||
Namespace: "bar",
|
||||
},
|
||||
}
|
||||
name := generateTaskName(p)
|
||||
expected := "foo.bar.pods"
|
||||
if name != expected {
|
||||
t.Fatalf("expected %q instead of %q", expected, name)
|
||||
}
|
||||
|
||||
p.Namespace = ""
|
||||
name = generateTaskName(p)
|
||||
expected = "foo.default.pods"
|
||||
if name != expected {
|
||||
t.Fatalf("expected %q instead of %q", expected, name)
|
||||
}
|
||||
}
|
||||
185
contrib/mesos/pkg/scheduler/podtask/port_mapping.go
Normal file
185
contrib/mesos/pkg/scheduler/podtask/port_mapping.go
Normal file
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtask
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
|
||||
log "github.com/golang/glog"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
)
|
||||
|
||||
type HostPortMappingType string
|
||||
|
||||
const (
|
||||
// maps a Container.HostPort to the same exact offered host port, ignores .HostPort = 0
|
||||
HostPortMappingFixed HostPortMappingType = "fixed"
|
||||
// same as HostPortMappingFixed, except that .HostPort of 0 are mapped to any port offered
|
||||
HostPortMappingWildcard = "wildcard"
|
||||
)
|
||||
|
||||
type HostPortMapper interface {
|
||||
// abstracts the way that host ports are mapped to pod container ports
|
||||
Generate(t *T, offer *mesos.Offer) ([]HostPortMapping, error)
|
||||
}
|
||||
|
||||
type HostPortMapping struct {
|
||||
ContainerIdx int // index of the container in the pod spec
|
||||
PortIdx int // index of the port in a container's port spec
|
||||
OfferPort uint64
|
||||
}
|
||||
|
||||
func (self HostPortMappingType) Generate(t *T, offer *mesos.Offer) ([]HostPortMapping, error) {
|
||||
switch self {
|
||||
case HostPortMappingWildcard:
|
||||
return wildcardHostPortMapping(t, offer)
|
||||
case HostPortMappingFixed:
|
||||
default:
|
||||
log.Warningf("illegal host-port mapping spec %q, defaulting to %q", self, HostPortMappingFixed)
|
||||
}
|
||||
return defaultHostPortMapping(t, offer)
|
||||
}
|
||||
|
||||
type PortAllocationError struct {
|
||||
PodId string
|
||||
Ports []uint64
|
||||
}
|
||||
|
||||
func (err *PortAllocationError) Error() string {
|
||||
return fmt.Sprintf("Could not schedule pod %s: %d port(s) could not be allocated", err.PodId, len(err.Ports))
|
||||
}
|
||||
|
||||
type DuplicateHostPortError struct {
|
||||
m1, m2 HostPortMapping
|
||||
}
|
||||
|
||||
func (err *DuplicateHostPortError) Error() string {
|
||||
return fmt.Sprintf(
|
||||
"Host port %d is specified for container %d, pod %d and container %d, pod %d",
|
||||
err.m1.OfferPort, err.m1.ContainerIdx, err.m1.PortIdx, err.m2.ContainerIdx, err.m2.PortIdx)
|
||||
}
|
||||
|
||||
// wildcard k8s host port mapping implementation: hostPort == 0 gets mapped to any available offer port
|
||||
func wildcardHostPortMapping(t *T, offer *mesos.Offer) ([]HostPortMapping, error) {
|
||||
mapping, err := defaultHostPortMapping(t, offer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
taken := make(map[uint64]struct{})
|
||||
for _, entry := range mapping {
|
||||
taken[entry.OfferPort] = struct{}{}
|
||||
}
|
||||
wildports := []HostPortMapping{}
|
||||
for i, container := range t.Pod.Spec.Containers {
|
||||
for pi, port := range container.Ports {
|
||||
if port.HostPort == 0 {
|
||||
wildports = append(wildports, HostPortMapping{
|
||||
ContainerIdx: i,
|
||||
PortIdx: pi,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
remaining := len(wildports)
|
||||
foreachRange(offer, "ports", func(bp, ep uint64) {
|
||||
log.V(3).Infof("Searching for wildcard port in range {%d:%d}", bp, ep)
|
||||
for _, entry := range wildports {
|
||||
if entry.OfferPort != 0 {
|
||||
continue
|
||||
}
|
||||
for port := bp; port <= ep && remaining > 0; port++ {
|
||||
if _, inuse := taken[port]; inuse {
|
||||
continue
|
||||
}
|
||||
entry.OfferPort = port
|
||||
mapping = append(mapping, entry)
|
||||
remaining--
|
||||
taken[port] = struct{}{}
|
||||
break
|
||||
}
|
||||
}
|
||||
})
|
||||
if remaining > 0 {
|
||||
err := &PortAllocationError{
|
||||
PodId: t.Pod.Name,
|
||||
}
|
||||
// it doesn't make sense to include a port list here because they were all zero (wildcards)
|
||||
return nil, err
|
||||
}
|
||||
return mapping, nil
|
||||
}
|
||||
|
||||
// default k8s host port mapping implementation: hostPort == 0 means containerPort remains pod-private, and so
|
||||
// no offer ports will be mapped to such Container ports.
|
||||
func defaultHostPortMapping(t *T, offer *mesos.Offer) ([]HostPortMapping, error) {
|
||||
requiredPorts := make(map[uint64]HostPortMapping)
|
||||
mapping := []HostPortMapping{}
|
||||
for i, container := range t.Pod.Spec.Containers {
|
||||
// strip all port==0 from this array; k8s already knows what to do with zero-
|
||||
// ports (it does not create 'port bindings' on the minion-host); we need to
|
||||
// remove the wildcards from this array since they don't consume host resources
|
||||
for pi, port := range container.Ports {
|
||||
if port.HostPort == 0 {
|
||||
continue // ignore
|
||||
}
|
||||
m := HostPortMapping{
|
||||
ContainerIdx: i,
|
||||
PortIdx: pi,
|
||||
OfferPort: uint64(port.HostPort),
|
||||
}
|
||||
if entry, inuse := requiredPorts[uint64(port.HostPort)]; inuse {
|
||||
return nil, &DuplicateHostPortError{entry, m}
|
||||
}
|
||||
requiredPorts[uint64(port.HostPort)] = m
|
||||
}
|
||||
}
|
||||
foreachRange(offer, "ports", func(bp, ep uint64) {
|
||||
for port := range requiredPorts {
|
||||
log.V(3).Infof("evaluating port range {%d:%d} %d", bp, ep, port)
|
||||
if (bp <= port) && (port <= ep) {
|
||||
mapping = append(mapping, requiredPorts[port])
|
||||
delete(requiredPorts, port)
|
||||
}
|
||||
}
|
||||
})
|
||||
unsatisfiedPorts := len(requiredPorts)
|
||||
if unsatisfiedPorts > 0 {
|
||||
err := &PortAllocationError{
|
||||
PodId: t.Pod.Name,
|
||||
}
|
||||
for p := range requiredPorts {
|
||||
err.Ports = append(err.Ports, p)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return mapping, nil
|
||||
}
|
||||
|
||||
const PortMappingLabelKey = "k8s.mesosphere.io/portMapping"
|
||||
|
||||
func MappingTypeForPod(pod *api.Pod) HostPortMappingType {
|
||||
filter := map[string]string{
|
||||
PortMappingLabelKey: string(HostPortMappingFixed),
|
||||
}
|
||||
selector := labels.Set(filter).AsSelector()
|
||||
if selector.Matches(labels.Set(pod.Labels)) {
|
||||
return HostPortMappingFixed
|
||||
}
|
||||
return HostPortMappingWildcard
|
||||
}
|
||||
205
contrib/mesos/pkg/scheduler/podtask/port_mapping_test.go
Normal file
205
contrib/mesos/pkg/scheduler/podtask/port_mapping_test.go
Normal file
@@ -0,0 +1,205 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtask
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
)
|
||||
|
||||
func TestDefaultHostPortMatching(t *testing.T) {
|
||||
t.Parallel()
|
||||
task, _ := fakePodTask("foo")
|
||||
pod := &task.Pod
|
||||
|
||||
offer := &mesos.Offer{
|
||||
Resources: []*mesos.Resource{
|
||||
rangeResource("ports", []uint64{1, 1}),
|
||||
},
|
||||
}
|
||||
mapping, err := defaultHostPortMapping(task, offer)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(mapping) > 0 {
|
||||
t.Fatalf("Found mappings for a pod without ports: %v", pod)
|
||||
}
|
||||
|
||||
//--
|
||||
pod.Spec = api.PodSpec{
|
||||
Containers: []api.Container{{
|
||||
Ports: []api.ContainerPort{{
|
||||
HostPort: 123,
|
||||
}, {
|
||||
HostPort: 123,
|
||||
}},
|
||||
}},
|
||||
}
|
||||
task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
_, err = defaultHostPortMapping(task, offer)
|
||||
if err, _ := err.(*DuplicateHostPortError); err == nil {
|
||||
t.Fatal("Expected duplicate port error")
|
||||
} else if err.m1.OfferPort != 123 {
|
||||
t.Fatal("Expected duplicate host port 123")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWildcardHostPortMatching(t *testing.T) {
|
||||
t.Parallel()
|
||||
task, _ := fakePodTask("foo")
|
||||
pod := &task.Pod
|
||||
|
||||
offer := &mesos.Offer{}
|
||||
mapping, err := wildcardHostPortMapping(task, offer)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(mapping) > 0 {
|
||||
t.Fatalf("Found mappings for an empty offer and a pod without ports: %v", pod)
|
||||
}
|
||||
|
||||
//--
|
||||
offer = &mesos.Offer{
|
||||
Resources: []*mesos.Resource{
|
||||
rangeResource("ports", []uint64{1, 1}),
|
||||
},
|
||||
}
|
||||
mapping, err = wildcardHostPortMapping(task, offer)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(mapping) > 0 {
|
||||
t.Fatalf("Found mappings for a pod without ports: %v", pod)
|
||||
}
|
||||
|
||||
//--
|
||||
pod.Spec = api.PodSpec{
|
||||
Containers: []api.Container{{
|
||||
Ports: []api.ContainerPort{{
|
||||
HostPort: 123,
|
||||
}},
|
||||
}},
|
||||
}
|
||||
task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
mapping, err = wildcardHostPortMapping(task, offer)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error instead of mappings: %#v", mapping)
|
||||
} else if err, _ := err.(*PortAllocationError); err == nil {
|
||||
t.Fatal("Expected port allocation error")
|
||||
} else if !(len(err.Ports) == 1 && err.Ports[0] == 123) {
|
||||
t.Fatal("Expected port allocation error for host port 123")
|
||||
}
|
||||
|
||||
//--
|
||||
pod.Spec = api.PodSpec{
|
||||
Containers: []api.Container{{
|
||||
Ports: []api.ContainerPort{{
|
||||
HostPort: 0,
|
||||
}, {
|
||||
HostPort: 123,
|
||||
}},
|
||||
}},
|
||||
}
|
||||
task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
mapping, err = wildcardHostPortMapping(task, offer)
|
||||
if err, _ := err.(*PortAllocationError); err == nil {
|
||||
t.Fatal("Expected port allocation error")
|
||||
} else if !(len(err.Ports) == 1 && err.Ports[0] == 123) {
|
||||
t.Fatal("Expected port allocation error for host port 123")
|
||||
}
|
||||
|
||||
//--
|
||||
pod.Spec = api.PodSpec{
|
||||
Containers: []api.Container{{
|
||||
Ports: []api.ContainerPort{{
|
||||
HostPort: 0,
|
||||
}, {
|
||||
HostPort: 1,
|
||||
}},
|
||||
}},
|
||||
}
|
||||
task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
mapping, err = wildcardHostPortMapping(task, offer)
|
||||
if err, _ := err.(*PortAllocationError); err == nil {
|
||||
t.Fatal("Expected port allocation error")
|
||||
} else if len(err.Ports) != 0 {
|
||||
t.Fatal("Expected port allocation error for wildcard port")
|
||||
}
|
||||
|
||||
//--
|
||||
offer = &mesos.Offer{
|
||||
Resources: []*mesos.Resource{
|
||||
rangeResource("ports", []uint64{1, 2}),
|
||||
},
|
||||
}
|
||||
mapping, err = wildcardHostPortMapping(task, offer)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
} else if len(mapping) != 2 {
|
||||
t.Fatal("Expected both ports allocated")
|
||||
}
|
||||
valid := 0
|
||||
for _, entry := range mapping {
|
||||
if entry.ContainerIdx == 0 && entry.PortIdx == 0 && entry.OfferPort == 2 {
|
||||
valid++
|
||||
}
|
||||
if entry.ContainerIdx == 0 && entry.PortIdx == 1 && entry.OfferPort == 1 {
|
||||
valid++
|
||||
}
|
||||
}
|
||||
if valid < 2 {
|
||||
t.Fatalf("Expected 2 valid port mappings, not %d", valid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMappingTypeForPod(t *testing.T) {
|
||||
pod := &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Labels: map[string]string{},
|
||||
},
|
||||
}
|
||||
mt := MappingTypeForPod(pod)
|
||||
if mt != HostPortMappingWildcard {
|
||||
t.Fatalf("expected wildcard mapping")
|
||||
}
|
||||
|
||||
pod.Labels[PortMappingLabelKey] = string(HostPortMappingFixed)
|
||||
mt = MappingTypeForPod(pod)
|
||||
if mt != HostPortMappingFixed {
|
||||
t.Fatalf("expected fixed mapping")
|
||||
}
|
||||
|
||||
pod.Labels[PortMappingLabelKey] = string(HostPortMappingWildcard)
|
||||
mt = MappingTypeForPod(pod)
|
||||
if mt != HostPortMappingWildcard {
|
||||
t.Fatalf("expected wildcard mapping")
|
||||
}
|
||||
}
|
||||
57
contrib/mesos/pkg/scheduler/podtask/protobuf.go
Normal file
57
contrib/mesos/pkg/scheduler/podtask/protobuf.go
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtask
|
||||
|
||||
import (
|
||||
"github.com/gogo/protobuf/proto"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
)
|
||||
|
||||
// create a range resource for the listed ports
|
||||
func rangeResource(name string, ports []uint64) *mesos.Resource {
|
||||
if len(ports) == 0 {
|
||||
// pod may consist of a container that doesn't expose any ports on the host
|
||||
return nil
|
||||
}
|
||||
return &mesos.Resource{
|
||||
Name: proto.String(name),
|
||||
Type: mesos.Value_RANGES.Enum(),
|
||||
Ranges: newRanges(ports),
|
||||
}
|
||||
}
|
||||
|
||||
// generate port ranges from a list of ports. this implementation is very naive
|
||||
func newRanges(ports []uint64) *mesos.Value_Ranges {
|
||||
r := make([]*mesos.Value_Range, 0)
|
||||
for _, port := range ports {
|
||||
x := proto.Uint64(port)
|
||||
r = append(r, &mesos.Value_Range{Begin: x, End: x})
|
||||
}
|
||||
return &mesos.Value_Ranges{Range: r}
|
||||
}
|
||||
|
||||
func foreachRange(offer *mesos.Offer, resourceName string, f func(begin, end uint64)) {
|
||||
for _, resource := range offer.Resources {
|
||||
if resource.GetName() == resourceName {
|
||||
for _, r := range (*resource).GetRanges().Range {
|
||||
bp := r.GetBegin()
|
||||
ep := r.GetEnd()
|
||||
f(bp, ep)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
335
contrib/mesos/pkg/scheduler/podtask/registry.go
Normal file
335
contrib/mesos/pkg/scheduler/podtask/registry.go
Normal file
@@ -0,0 +1,335 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtask
|
||||
|
||||
import (
|
||||
"container/ring"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
log "github.com/golang/glog"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
)
|
||||
|
||||
const (
|
||||
//TODO(jdef) move this somewhere else
|
||||
PodPath = "/pods"
|
||||
|
||||
// length of historical record of finished tasks
|
||||
defaultFinishedTasksSize = 1024
|
||||
)
|
||||
|
||||
// state store for pod tasks
|
||||
type Registry interface {
|
||||
// register the specified task with this registry, as long as the current error
|
||||
// condition is nil. if no errors occur then return a copy of the registered task.
|
||||
Register(*T, error) (*T, error)
|
||||
|
||||
// unregister the specified task from this registry
|
||||
Unregister(*T)
|
||||
|
||||
// update state for the registered task identified by task.ID, returning a copy of
|
||||
// the updated task, if any.
|
||||
Update(task *T) error
|
||||
|
||||
// return the task registered for the specified task ID and its current state.
|
||||
// if there is no such task then StateUnknown is returned.
|
||||
Get(taskId string) (task *T, currentState StateType)
|
||||
|
||||
// return the non-terminal task corresponding to the specified pod ID
|
||||
ForPod(podID string) (task *T, currentState StateType)
|
||||
|
||||
// update the task status given the specified mesos task status update, returning a
|
||||
// copy of the updated task (if any) and its state.
|
||||
UpdateStatus(status *mesos.TaskStatus) (*T, StateType)
|
||||
|
||||
// return a list of task ID's that match the given filter, or all task ID's if filter == nil.
|
||||
List(filter func(*T) bool) []*T
|
||||
}
|
||||
|
||||
type inMemoryRegistry struct {
|
||||
rw sync.RWMutex
|
||||
taskRegistry map[string]*T
|
||||
tasksFinished *ring.Ring
|
||||
podToTask map[string]string
|
||||
}
|
||||
|
||||
func NewInMemoryRegistry() Registry {
|
||||
return &inMemoryRegistry{
|
||||
taskRegistry: make(map[string]*T),
|
||||
tasksFinished: ring.New(defaultFinishedTasksSize),
|
||||
podToTask: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) List(accepts func(t *T) bool) (tasks []*T) {
|
||||
k.rw.RLock()
|
||||
defer k.rw.RUnlock()
|
||||
for _, task := range k.taskRegistry {
|
||||
if accepts == nil || accepts(task) {
|
||||
tasks = append(tasks, task.Clone())
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) ForPod(podID string) (task *T, currentState StateType) {
|
||||
k.rw.RLock()
|
||||
defer k.rw.RUnlock()
|
||||
tid, ok := k.podToTask[podID]
|
||||
if !ok {
|
||||
return nil, StateUnknown
|
||||
}
|
||||
t, state := k._get(tid)
|
||||
return t.Clone(), state
|
||||
}
|
||||
|
||||
// registers a pod task unless the spec'd error is not nil
|
||||
func (k *inMemoryRegistry) Register(task *T, err error) (*T, error) {
|
||||
if err == nil {
|
||||
k.rw.Lock()
|
||||
defer k.rw.Unlock()
|
||||
if _, found := k.podToTask[task.podKey]; found {
|
||||
return nil, fmt.Errorf("task already registered for pod key %q", task.podKey)
|
||||
}
|
||||
if _, found := k.taskRegistry[task.ID]; found {
|
||||
return nil, fmt.Errorf("task already registered for id %q", task.ID)
|
||||
}
|
||||
k.podToTask[task.podKey] = task.ID
|
||||
k.taskRegistry[task.ID] = task
|
||||
}
|
||||
return task.Clone(), err
|
||||
}
|
||||
|
||||
// updates internal task state. updates are limited to Spec, Flags, and Offer for
|
||||
// StatePending tasks, and are limited to Flag updates (additive only) for StateRunning tasks.
|
||||
func (k *inMemoryRegistry) Update(task *T) error {
|
||||
if task == nil {
|
||||
return nil
|
||||
}
|
||||
k.rw.Lock()
|
||||
defer k.rw.Unlock()
|
||||
switch internal, state := k._get(task.ID); state {
|
||||
case StateUnknown:
|
||||
return fmt.Errorf("no such task: %v", task.ID)
|
||||
case StatePending:
|
||||
internal.Offer = task.Offer
|
||||
internal.Spec = task.Spec
|
||||
(&task.Spec).copyTo(&internal.Spec)
|
||||
internal.Flags = map[FlagType]struct{}{}
|
||||
fallthrough
|
||||
case StateRunning:
|
||||
for k, v := range task.Flags {
|
||||
internal.Flags[k] = v
|
||||
}
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("may not update task %v in state %v", task.ID, state)
|
||||
}
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) Unregister(task *T) {
|
||||
k.rw.Lock()
|
||||
defer k.rw.Unlock()
|
||||
delete(k.podToTask, task.podKey)
|
||||
delete(k.taskRegistry, task.ID)
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) Get(taskId string) (*T, StateType) {
|
||||
k.rw.RLock()
|
||||
defer k.rw.RUnlock()
|
||||
t, state := k._get(taskId)
|
||||
return t.Clone(), state
|
||||
}
|
||||
|
||||
// assume that the caller has already locked around access to task state.
|
||||
// the caller is also responsible for cloning the task object before it leaves
|
||||
// the context of this registry.
|
||||
func (k *inMemoryRegistry) _get(taskId string) (*T, StateType) {
|
||||
if task, found := k.taskRegistry[taskId]; found {
|
||||
return task, task.State
|
||||
}
|
||||
return nil, StateUnknown
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) UpdateStatus(status *mesos.TaskStatus) (*T, StateType) {
|
||||
taskId := status.GetTaskId().GetValue()
|
||||
|
||||
k.rw.Lock()
|
||||
defer k.rw.Unlock()
|
||||
task, state := k._get(taskId)
|
||||
|
||||
switch status.GetState() {
|
||||
case mesos.TaskState_TASK_STAGING:
|
||||
k.handleTaskStaging(task, state, status)
|
||||
case mesos.TaskState_TASK_STARTING:
|
||||
k.handleTaskStarting(task, state, status)
|
||||
case mesos.TaskState_TASK_RUNNING:
|
||||
k.handleTaskRunning(task, state, status)
|
||||
case mesos.TaskState_TASK_FINISHED:
|
||||
k.handleTaskFinished(task, state, status)
|
||||
case mesos.TaskState_TASK_FAILED:
|
||||
k.handleTaskFailed(task, state, status)
|
||||
case mesos.TaskState_TASK_KILLED:
|
||||
k.handleTaskKilled(task, state, status)
|
||||
case mesos.TaskState_TASK_LOST:
|
||||
k.handleTaskLost(task, state, status)
|
||||
default:
|
||||
log.Warningf("unhandled status update for task: %v", taskId)
|
||||
}
|
||||
return task.Clone(), state
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) handleTaskStaging(task *T, state StateType, status *mesos.TaskStatus) {
|
||||
if status.GetSource() != mesos.TaskStatus_SOURCE_MASTER {
|
||||
log.Errorf("received STAGING for task %v with unexpected source: %v",
|
||||
status.GetTaskId().GetValue(), status.GetSource())
|
||||
}
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) handleTaskStarting(task *T, state StateType, status *mesos.TaskStatus) {
|
||||
// we expect to receive this when a launched task is finally "bound"
|
||||
// via the API server. however, there's nothing specific for us to do here.
|
||||
switch state {
|
||||
case StatePending:
|
||||
task.UpdatedTime = time.Now()
|
||||
if !task.Has(Bound) {
|
||||
task.Set(Bound)
|
||||
task.bindTime = task.UpdatedTime
|
||||
timeToBind := task.bindTime.Sub(task.launchTime)
|
||||
metrics.BindLatency.Observe(metrics.InMicroseconds(timeToBind))
|
||||
}
|
||||
default:
|
||||
taskId := status.GetTaskId().GetValue()
|
||||
log.Warningf("Ignore status TASK_STARTING because the task %v is not pending", taskId)
|
||||
}
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) handleTaskRunning(task *T, state StateType, status *mesos.TaskStatus) {
|
||||
taskId := status.GetTaskId().GetValue()
|
||||
switch state {
|
||||
case StatePending:
|
||||
task.UpdatedTime = time.Now()
|
||||
log.Infof("Received running status for pending task: %v", taskId)
|
||||
fillRunningPodInfo(task, status)
|
||||
task.State = StateRunning
|
||||
case StateRunning:
|
||||
task.UpdatedTime = time.Now()
|
||||
log.V(2).Infof("Ignore status TASK_RUNNING because the task %v is already running", taskId)
|
||||
case StateFinished:
|
||||
log.Warningf("Ignore status TASK_RUNNING because the task %v is already finished", taskId)
|
||||
default:
|
||||
log.Warningf("Ignore status TASK_RUNNING because the task %v is discarded", taskId)
|
||||
}
|
||||
}
|
||||
|
||||
func ParsePodStatusResult(taskStatus *mesos.TaskStatus) (result api.PodStatusResult, err error) {
|
||||
if taskStatus.Data != nil {
|
||||
err = json.Unmarshal(taskStatus.Data, &result)
|
||||
} else {
|
||||
err = fmt.Errorf("missing TaskStatus.Data")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func fillRunningPodInfo(task *T, taskStatus *mesos.TaskStatus) {
|
||||
if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER {
|
||||
// there is no data..
|
||||
return
|
||||
}
|
||||
//TODO(jdef) determine the usefullness of this information (if any)
|
||||
if result, err := ParsePodStatusResult(taskStatus); err != nil {
|
||||
log.Errorf("invalid TaskStatus.Data for task '%v': %v", task.ID, err)
|
||||
} else {
|
||||
task.podStatus = result.Status
|
||||
log.Infof("received pod status for task %v: %+v", task.ID, result.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) handleTaskFinished(task *T, state StateType, status *mesos.TaskStatus) {
|
||||
taskId := status.GetTaskId().GetValue()
|
||||
switch state {
|
||||
case StatePending:
|
||||
panic(fmt.Sprintf("Pending task %v finished, this couldn't happen", taskId))
|
||||
case StateRunning:
|
||||
log.V(2).Infof("received finished status for running task: %v", taskId)
|
||||
delete(k.podToTask, task.podKey)
|
||||
task.State = StateFinished
|
||||
task.UpdatedTime = time.Now()
|
||||
k.tasksFinished = k.recordFinishedTask(task.ID)
|
||||
case StateFinished:
|
||||
log.Warningf("Ignore status TASK_FINISHED because the task %v is already finished", taskId)
|
||||
default:
|
||||
log.Warningf("Ignore status TASK_FINISHED because the task %v is not running", taskId)
|
||||
}
|
||||
}
|
||||
|
||||
// record that a task has finished.
|
||||
// older record are expunged one at a time once the historical ring buffer is saturated.
|
||||
// assumes caller is holding state lock.
|
||||
func (k *inMemoryRegistry) recordFinishedTask(taskId string) *ring.Ring {
|
||||
slot := k.tasksFinished.Next()
|
||||
if slot.Value != nil {
|
||||
// garbage collect older finished task from the registry
|
||||
gctaskId := slot.Value.(string)
|
||||
if gctask, found := k.taskRegistry[gctaskId]; found && gctask.State == StateFinished {
|
||||
delete(k.taskRegistry, gctaskId)
|
||||
}
|
||||
}
|
||||
slot.Value = taskId
|
||||
return slot
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) handleTaskFailed(task *T, state StateType, status *mesos.TaskStatus) {
|
||||
switch state {
|
||||
case StatePending:
|
||||
delete(k.taskRegistry, task.ID)
|
||||
delete(k.podToTask, task.podKey)
|
||||
case StateRunning:
|
||||
delete(k.taskRegistry, task.ID)
|
||||
delete(k.podToTask, task.podKey)
|
||||
}
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) handleTaskKilled(task *T, state StateType, status *mesos.TaskStatus) {
|
||||
defer func() {
|
||||
msg := fmt.Sprintf("task killed: %+v, task %+v", status, task)
|
||||
if task != nil && task.Has(Deleted) {
|
||||
// we were expecting this, nothing out of the ordinary
|
||||
log.V(2).Infoln(msg)
|
||||
} else {
|
||||
log.Errorln(msg)
|
||||
}
|
||||
}()
|
||||
switch state {
|
||||
case StatePending, StateRunning:
|
||||
delete(k.taskRegistry, task.ID)
|
||||
delete(k.podToTask, task.podKey)
|
||||
}
|
||||
}
|
||||
|
||||
func (k *inMemoryRegistry) handleTaskLost(task *T, state StateType, status *mesos.TaskStatus) {
|
||||
switch state {
|
||||
case StateRunning, StatePending:
|
||||
delete(k.taskRegistry, task.ID)
|
||||
delete(k.podToTask, task.podKey)
|
||||
}
|
||||
}
|
||||
320
contrib/mesos/pkg/scheduler/podtask/registry_test.go
Normal file
320
contrib/mesos/pkg/scheduler/podtask/registry_test.go
Normal file
@@ -0,0 +1,320 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtask
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
"github.com/mesos/mesos-go/mesosutil"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestInMemoryRegistry_RegisterGetUnregister(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
registry := NewInMemoryRegistry()
|
||||
|
||||
// it's empty at the beginning
|
||||
tasks := registry.List(func(t *T) bool { return true })
|
||||
assert.Empty(tasks)
|
||||
|
||||
// add a task
|
||||
a, _ := fakePodTask("a")
|
||||
a_clone, err := registry.Register(a, nil)
|
||||
assert.NoError(err)
|
||||
assert.Equal(a_clone.ID, a.ID)
|
||||
assert.Equal(a_clone.podKey, a.podKey)
|
||||
|
||||
// add another task
|
||||
b, _ := fakePodTask("b")
|
||||
b_clone, err := registry.Register(b, nil)
|
||||
assert.NoError(err)
|
||||
assert.Equal(b_clone.ID, b.ID)
|
||||
assert.Equal(b_clone.podKey, b.podKey)
|
||||
|
||||
// find tasks in the registry
|
||||
tasks = registry.List(func(t *T) bool { return true })
|
||||
assert.Len(tasks, 2)
|
||||
assert.Contains(tasks, a_clone)
|
||||
assert.Contains(tasks, b_clone)
|
||||
|
||||
tasks = registry.List(func(t *T) bool { return t.ID == a.ID })
|
||||
assert.Len(tasks, 1)
|
||||
assert.Contains(tasks, a_clone)
|
||||
|
||||
task, _ := registry.ForPod(a.podKey)
|
||||
assert.NotNil(task)
|
||||
assert.Equal(task.ID, a.ID)
|
||||
|
||||
task, _ = registry.ForPod(b.podKey)
|
||||
assert.NotNil(task)
|
||||
assert.Equal(task.ID, b.ID)
|
||||
|
||||
task, _ = registry.ForPod("no-pod-key")
|
||||
assert.Nil(task)
|
||||
|
||||
task, _ = registry.Get(a.ID)
|
||||
assert.NotNil(task)
|
||||
assert.Equal(task.ID, a.ID)
|
||||
|
||||
task, _ = registry.Get("unknown-task-id")
|
||||
assert.Nil(task)
|
||||
|
||||
// re-add a task
|
||||
a_clone, err = registry.Register(a, nil)
|
||||
assert.Error(err)
|
||||
assert.Nil(a_clone)
|
||||
|
||||
// re-add a task with another podKey, but same task id
|
||||
another_a := a.Clone()
|
||||
another_a.podKey = "another-pod"
|
||||
another_a_clone, err := registry.Register(another_a, nil)
|
||||
assert.Error(err)
|
||||
assert.Nil(another_a_clone)
|
||||
|
||||
// re-add a task with another task ID, but same podKey
|
||||
another_b := b.Clone()
|
||||
another_b.ID = "another-task-id"
|
||||
another_b_clone, err := registry.Register(another_b, nil)
|
||||
assert.Error(err)
|
||||
assert.Nil(another_b_clone)
|
||||
|
||||
// unregister a task
|
||||
registry.Unregister(b)
|
||||
|
||||
tasks = registry.List(func(t *T) bool { return true })
|
||||
assert.Len(tasks, 1)
|
||||
assert.Contains(tasks, a)
|
||||
|
||||
// unregister a task not registered
|
||||
unregistered_task, _ := fakePodTask("unregistered-task")
|
||||
registry.Unregister(unregistered_task)
|
||||
}
|
||||
|
||||
func fakeStatusUpdate(taskId string, state mesos.TaskState) *mesos.TaskStatus {
|
||||
status := mesosutil.NewTaskStatus(mesosutil.NewTaskID(taskId), state)
|
||||
status.Data = []byte("{}") // empty json
|
||||
masterSource := mesos.TaskStatus_SOURCE_MASTER
|
||||
status.Source = &masterSource
|
||||
return status
|
||||
}
|
||||
|
||||
func TestInMemoryRegistry_State(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
registry := NewInMemoryRegistry()
|
||||
|
||||
// add a task
|
||||
a, _ := fakePodTask("a")
|
||||
a_clone, err := registry.Register(a, nil)
|
||||
assert.NoError(err)
|
||||
assert.Equal(a.State, a_clone.State)
|
||||
|
||||
// update the status
|
||||
assert.Equal(a_clone.State, StatePending)
|
||||
a_clone, state := registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_RUNNING))
|
||||
assert.Equal(state, StatePending) // old state
|
||||
assert.Equal(a_clone.State, StateRunning) // new state
|
||||
|
||||
// update unknown task
|
||||
unknown_clone, state := registry.UpdateStatus(fakeStatusUpdate("unknown-task-id", mesos.TaskState_TASK_RUNNING))
|
||||
assert.Nil(unknown_clone)
|
||||
assert.Equal(state, StateUnknown)
|
||||
}
|
||||
|
||||
func TestInMemoryRegistry_Update(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
// create offers registry
|
||||
ttl := time.Second / 4
|
||||
config := offers.RegistryConfig{
|
||||
DeclineOffer: func(offerId string) <-chan error {
|
||||
return proc.ErrorChan(nil)
|
||||
},
|
||||
Compat: func(o *mesos.Offer) bool {
|
||||
return true
|
||||
},
|
||||
TTL: ttl,
|
||||
LingerTTL: 2 * ttl,
|
||||
}
|
||||
storage := offers.CreateRegistry(config)
|
||||
|
||||
// Add offer
|
||||
offerId := mesosutil.NewOfferID("foo")
|
||||
mesosOffer := &mesos.Offer{Id: offerId}
|
||||
storage.Add([]*mesos.Offer{mesosOffer})
|
||||
offer, ok := storage.Get(offerId.GetValue())
|
||||
assert.True(ok)
|
||||
|
||||
// create registry
|
||||
registry := NewInMemoryRegistry()
|
||||
a, _ := fakePodTask("a")
|
||||
registry.Register(a.Clone(), nil) // here clone a because we change it below
|
||||
|
||||
// state changes are ignored
|
||||
a.State = StateRunning
|
||||
err := registry.Update(a)
|
||||
assert.NoError(err)
|
||||
a_clone, _ := registry.Get(a.ID)
|
||||
assert.Equal(StatePending, a_clone.State)
|
||||
|
||||
// offer is updated while pending
|
||||
a.Offer = offer
|
||||
err = registry.Update(a)
|
||||
assert.NoError(err)
|
||||
a_clone, _ = registry.Get(a.ID)
|
||||
assert.Equal(offer.Id(), a_clone.Offer.Id())
|
||||
|
||||
// spec is updated while pending
|
||||
a.Spec = Spec{SlaveID: "slave-1"}
|
||||
err = registry.Update(a)
|
||||
assert.NoError(err)
|
||||
a_clone, _ = registry.Get(a.ID)
|
||||
assert.Equal("slave-1", a_clone.Spec.SlaveID)
|
||||
|
||||
// flags are updated while pending
|
||||
a.Flags[Launched] = struct{}{}
|
||||
err = registry.Update(a)
|
||||
assert.NoError(err)
|
||||
a_clone, _ = registry.Get(a.ID)
|
||||
|
||||
_, found_launched := a_clone.Flags[Launched]
|
||||
assert.True(found_launched)
|
||||
|
||||
// flags are updated while running
|
||||
registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_RUNNING))
|
||||
a.Flags[Bound] = struct{}{}
|
||||
err = registry.Update(a)
|
||||
assert.NoError(err)
|
||||
a_clone, _ = registry.Get(a.ID)
|
||||
|
||||
_, found_launched = a_clone.Flags[Launched]
|
||||
assert.True(found_launched)
|
||||
_, found_bound := a_clone.Flags[Bound]
|
||||
assert.True(found_bound)
|
||||
|
||||
// spec is ignored while running
|
||||
a.Spec = Spec{SlaveID: "slave-2"}
|
||||
err = registry.Update(a)
|
||||
assert.NoError(err)
|
||||
a_clone, _ = registry.Get(a.ID)
|
||||
assert.Equal("slave-1", a_clone.Spec.SlaveID)
|
||||
|
||||
// error when finished
|
||||
registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_FINISHED))
|
||||
err = registry.Update(a)
|
||||
assert.Error(err)
|
||||
|
||||
// update unknown task
|
||||
unknown_task, _ := fakePodTask("unknown-task")
|
||||
err = registry.Update(unknown_task)
|
||||
assert.Error(err)
|
||||
|
||||
// update nil task
|
||||
err = registry.Update(nil)
|
||||
assert.Nil(err)
|
||||
}
|
||||
|
||||
type transition struct {
|
||||
statusUpdate mesos.TaskState
|
||||
expectedState *StateType
|
||||
expectPanic bool
|
||||
}
|
||||
|
||||
func NewTransition(statusUpdate mesos.TaskState, expectedState StateType) transition {
|
||||
return transition{statusUpdate: statusUpdate, expectedState: &expectedState, expectPanic: false}
|
||||
}
|
||||
|
||||
func NewTransitionToDeletedTask(statusUpdate mesos.TaskState) transition {
|
||||
return transition{statusUpdate: statusUpdate, expectedState: nil, expectPanic: false}
|
||||
}
|
||||
|
||||
func NewTransitionWhichPanics(statusUpdate mesos.TaskState) transition {
|
||||
return transition{statusUpdate: statusUpdate, expectPanic: true}
|
||||
}
|
||||
|
||||
func testStateTrace(t *testing.T, transitions []transition) *Registry {
|
||||
assert := assert.New(t)
|
||||
|
||||
registry := NewInMemoryRegistry()
|
||||
a, _ := fakePodTask("a")
|
||||
a, _ = registry.Register(a, nil)
|
||||
|
||||
// initial pending state
|
||||
assert.Equal(a.State, StatePending)
|
||||
|
||||
for _, transition := range transitions {
|
||||
if transition.expectPanic {
|
||||
assert.Panics(func() {
|
||||
registry.UpdateStatus(fakeStatusUpdate(a.ID, transition.statusUpdate))
|
||||
})
|
||||
} else {
|
||||
a, _ = registry.UpdateStatus(fakeStatusUpdate(a.ID, transition.statusUpdate))
|
||||
if transition.expectedState == nil {
|
||||
a, _ = registry.Get(a.ID)
|
||||
assert.Nil(a, "expected task to be deleted from registry after status update to %v", transition.statusUpdate)
|
||||
} else {
|
||||
assert.Equal(a.State, *transition.expectedState)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ®istry
|
||||
}
|
||||
|
||||
func TestInMemoryRegistry_TaskLifeCycle(t *testing.T) {
|
||||
testStateTrace(t, []transition{
|
||||
NewTransition(mesos.TaskState_TASK_STAGING, StatePending),
|
||||
NewTransition(mesos.TaskState_TASK_STARTING, StatePending),
|
||||
NewTransitionWhichPanics(mesos.TaskState_TASK_FINISHED),
|
||||
NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning),
|
||||
NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning),
|
||||
NewTransition(mesos.TaskState_TASK_STARTING, StateRunning),
|
||||
NewTransition(mesos.TaskState_TASK_FINISHED, StateFinished),
|
||||
NewTransition(mesos.TaskState_TASK_FINISHED, StateFinished),
|
||||
NewTransition(mesos.TaskState_TASK_RUNNING, StateFinished),
|
||||
})
|
||||
}
|
||||
|
||||
func TestInMemoryRegistry_NotFinished(t *testing.T) {
|
||||
// all these behave the same
|
||||
notFinishedStates := []mesos.TaskState{
|
||||
mesos.TaskState_TASK_FAILED,
|
||||
mesos.TaskState_TASK_KILLED,
|
||||
mesos.TaskState_TASK_LOST,
|
||||
}
|
||||
for _, notFinishedState := range notFinishedStates {
|
||||
testStateTrace(t, []transition{
|
||||
NewTransitionToDeletedTask(notFinishedState),
|
||||
})
|
||||
|
||||
testStateTrace(t, []transition{
|
||||
NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning),
|
||||
NewTransitionToDeletedTask(notFinishedState),
|
||||
})
|
||||
|
||||
testStateTrace(t, []transition{
|
||||
NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning),
|
||||
NewTransition(mesos.TaskState_TASK_FINISHED, StateFinished),
|
||||
NewTransition(notFinishedState, StateFinished),
|
||||
})
|
||||
}
|
||||
}
|
||||
924
contrib/mesos/pkg/scheduler/scheduler.go
Normal file
924
contrib/mesos/pkg/scheduler/scheduler.go
Normal file
@@ -0,0 +1,924 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
"reflect"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
execcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/config"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
|
||||
offerMetrics "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers/metrics"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
|
||||
schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/uid"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/tools"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
log "github.com/golang/glog"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
mutil "github.com/mesos/mesos-go/mesosutil"
|
||||
bindings "github.com/mesos/mesos-go/scheduler"
|
||||
)
|
||||
|
||||
type Slave struct {
|
||||
HostName string
|
||||
}
|
||||
|
||||
func newSlave(hostName string) *Slave {
|
||||
return &Slave{
|
||||
HostName: hostName,
|
||||
}
|
||||
}
|
||||
|
||||
type slaveStorage struct {
|
||||
sync.Mutex
|
||||
slaves map[string]*Slave // SlaveID => slave.
|
||||
}
|
||||
|
||||
func newSlaveStorage() *slaveStorage {
|
||||
return &slaveStorage{
|
||||
slaves: make(map[string]*Slave),
|
||||
}
|
||||
}
|
||||
|
||||
// Create a mapping between a slaveID and slave if not existing.
|
||||
func (self *slaveStorage) checkAndAdd(slaveId, slaveHostname string) {
|
||||
self.Lock()
|
||||
defer self.Unlock()
|
||||
_, exists := self.slaves[slaveId]
|
||||
if !exists {
|
||||
self.slaves[slaveId] = newSlave(slaveHostname)
|
||||
}
|
||||
}
|
||||
|
||||
func (self *slaveStorage) getSlaveIds() []string {
|
||||
self.Lock()
|
||||
defer self.Unlock()
|
||||
slaveIds := make([]string, 0, len(self.slaves))
|
||||
for slaveID := range self.slaves {
|
||||
slaveIds = append(slaveIds, slaveID)
|
||||
}
|
||||
return slaveIds
|
||||
}
|
||||
|
||||
func (self *slaveStorage) getSlave(slaveId string) (*Slave, bool) {
|
||||
self.Lock()
|
||||
defer self.Unlock()
|
||||
slave, exists := self.slaves[slaveId]
|
||||
return slave, exists
|
||||
}
|
||||
|
||||
type PluginInterface interface {
|
||||
// the apiserver may have a different state for the pod than we do
|
||||
// so reconcile our records, but only for this one pod
|
||||
reconcilePod(api.Pod)
|
||||
|
||||
// execute the Scheduling plugin, should start a go routine and return immediately
|
||||
Run(<-chan struct{})
|
||||
}
|
||||
|
||||
// KubernetesScheduler implements:
|
||||
// 1: A mesos scheduler.
|
||||
// 2: A kubernetes scheduler plugin.
|
||||
// 3: A kubernetes pod.Registry.
|
||||
type KubernetesScheduler struct {
|
||||
// We use a lock here to avoid races
|
||||
// between invoking the mesos callback
|
||||
// and the invoking the pod registry interfaces.
|
||||
// In particular, changes to podtask.T objects are currently guarded by this lock.
|
||||
*sync.RWMutex
|
||||
|
||||
// Config related, write-once
|
||||
|
||||
schedcfg *schedcfg.Config
|
||||
executor *mesos.ExecutorInfo
|
||||
executorGroup uint64
|
||||
scheduleFunc PodScheduleFunc
|
||||
client *client.Client
|
||||
etcdClient tools.EtcdGetSet
|
||||
failoverTimeout float64 // in seconds
|
||||
reconcileInterval int64
|
||||
|
||||
// Mesos context.
|
||||
|
||||
driver bindings.SchedulerDriver // late initialization
|
||||
frameworkId *mesos.FrameworkID
|
||||
masterInfo *mesos.MasterInfo
|
||||
registered bool
|
||||
registration chan struct{} // signal chan that closes upon first successful registration
|
||||
onRegistration sync.Once
|
||||
offers offers.Registry
|
||||
slaves *slaveStorage
|
||||
|
||||
// unsafe state, needs to be guarded
|
||||
|
||||
taskRegistry podtask.Registry
|
||||
|
||||
// via deferred init
|
||||
|
||||
plugin PluginInterface
|
||||
reconciler *Reconciler
|
||||
reconcileCooldown time.Duration
|
||||
asRegisteredMaster proc.Doer
|
||||
terminate <-chan struct{} // signal chan, closes when we should kill background tasks
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Schedcfg schedcfg.Config
|
||||
Executor *mesos.ExecutorInfo
|
||||
ScheduleFunc PodScheduleFunc
|
||||
Client *client.Client
|
||||
EtcdClient tools.EtcdGetSet
|
||||
FailoverTimeout float64
|
||||
ReconcileInterval int64
|
||||
ReconcileCooldown time.Duration
|
||||
}
|
||||
|
||||
// New creates a new KubernetesScheduler
|
||||
func New(config Config) *KubernetesScheduler {
|
||||
var k *KubernetesScheduler
|
||||
k = &KubernetesScheduler{
|
||||
schedcfg: &config.Schedcfg,
|
||||
RWMutex: new(sync.RWMutex),
|
||||
executor: config.Executor,
|
||||
executorGroup: uid.Parse(config.Executor.ExecutorId.GetValue()).Group(),
|
||||
scheduleFunc: config.ScheduleFunc,
|
||||
client: config.Client,
|
||||
etcdClient: config.EtcdClient,
|
||||
failoverTimeout: config.FailoverTimeout,
|
||||
reconcileInterval: config.ReconcileInterval,
|
||||
offers: offers.CreateRegistry(offers.RegistryConfig{
|
||||
Compat: func(o *mesos.Offer) bool {
|
||||
// filter the offers: the executor IDs must not identify a kubelet-
|
||||
// executor with a group that doesn't match ours
|
||||
for _, eid := range o.GetExecutorIds() {
|
||||
execuid := uid.Parse(eid.GetValue())
|
||||
if execuid.Name() == execcfg.DefaultInfoID && execuid.Group() != k.executorGroup {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
},
|
||||
DeclineOffer: func(id string) <-chan error {
|
||||
errOnce := proc.NewErrorOnce(k.terminate)
|
||||
errOuter := k.asRegisteredMaster.Do(func() {
|
||||
var err error
|
||||
defer errOnce.Report(err)
|
||||
offerId := mutil.NewOfferID(id)
|
||||
filters := &mesos.Filters{}
|
||||
_, err = k.driver.DeclineOffer(offerId, filters)
|
||||
})
|
||||
return errOnce.Send(errOuter).Err()
|
||||
},
|
||||
// remember expired offers so that we can tell if a previously scheduler offer relies on one
|
||||
LingerTTL: config.Schedcfg.OfferLingerTTL.Duration,
|
||||
TTL: config.Schedcfg.OfferTTL.Duration,
|
||||
ListenerDelay: config.Schedcfg.ListenerDelay.Duration,
|
||||
}),
|
||||
slaves: newSlaveStorage(),
|
||||
taskRegistry: podtask.NewInMemoryRegistry(),
|
||||
reconcileCooldown: config.ReconcileCooldown,
|
||||
registration: make(chan struct{}),
|
||||
asRegisteredMaster: proc.DoerFunc(func(proc.Action) <-chan error {
|
||||
return proc.ErrorChanf("cannot execute action with unregistered scheduler")
|
||||
}),
|
||||
}
|
||||
return k
|
||||
}
|
||||
|
||||
func (k *KubernetesScheduler) Init(electedMaster proc.Process, pl PluginInterface, mux *http.ServeMux) error {
|
||||
log.V(1).Infoln("initializing kubernetes mesos scheduler")
|
||||
|
||||
k.asRegisteredMaster = proc.DoerFunc(func(a proc.Action) <-chan error {
|
||||
if !k.registered {
|
||||
return proc.ErrorChanf("failed to execute action, scheduler is disconnected")
|
||||
}
|
||||
return electedMaster.Do(a)
|
||||
})
|
||||
k.terminate = electedMaster.Done()
|
||||
k.plugin = pl
|
||||
k.offers.Init(k.terminate)
|
||||
k.InstallDebugHandlers(mux)
|
||||
return k.recoverTasks()
|
||||
}
|
||||
|
||||
func (k *KubernetesScheduler) asMaster() proc.Doer {
|
||||
k.RLock()
|
||||
defer k.RUnlock()
|
||||
return k.asRegisteredMaster
|
||||
}
|
||||
|
||||
func (k *KubernetesScheduler) InstallDebugHandlers(mux *http.ServeMux) {
|
||||
wrappedHandler := func(uri string, h http.Handler) {
|
||||
mux.HandleFunc(uri, func(w http.ResponseWriter, r *http.Request) {
|
||||
ch := make(chan struct{})
|
||||
closer := runtime.Closer(ch)
|
||||
proc.OnError(k.asMaster().Do(func() {
|
||||
defer closer()
|
||||
h.ServeHTTP(w, r)
|
||||
}), func(err error) {
|
||||
defer closer()
|
||||
log.Warningf("failed HTTP request for %s: %v", uri, err)
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
}, k.terminate)
|
||||
select {
|
||||
case <-time.After(k.schedcfg.HttpHandlerTimeout.Duration):
|
||||
log.Warningf("timed out waiting for request to be processed")
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
return
|
||||
case <-ch: // noop
|
||||
}
|
||||
})
|
||||
}
|
||||
requestReconciliation := func(uri string, requestAction func()) {
|
||||
wrappedHandler(uri, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
requestAction()
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}))
|
||||
}
|
||||
requestReconciliation("/debug/actions/requestExplicit", k.reconciler.RequestExplicit)
|
||||
requestReconciliation("/debug/actions/requestImplicit", k.reconciler.RequestImplicit)
|
||||
|
||||
wrappedHandler("/debug/actions/kamikaze", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
slaves := k.slaves.getSlaveIds()
|
||||
for _, slaveId := range slaves {
|
||||
_, err := k.driver.SendFrameworkMessage(
|
||||
k.executor.ExecutorId,
|
||||
mutil.NewSlaveID(slaveId),
|
||||
messages.Kamikaze)
|
||||
if err != nil {
|
||||
log.Warningf("failed to send kamikaze message to slave %s: %v", slaveId, err)
|
||||
} else {
|
||||
io.WriteString(w, fmt.Sprintf("kamikaze slave %s\n", slaveId))
|
||||
}
|
||||
}
|
||||
io.WriteString(w, "OK")
|
||||
}))
|
||||
}
|
||||
|
||||
func (k *KubernetesScheduler) Registration() <-chan struct{} {
|
||||
return k.registration
|
||||
}
|
||||
|
||||
// Registered is called when the scheduler registered with the master successfully.
|
||||
func (k *KubernetesScheduler) Registered(drv bindings.SchedulerDriver, fid *mesos.FrameworkID, mi *mesos.MasterInfo) {
|
||||
log.Infof("Scheduler registered with the master: %v with frameworkId: %v\n", mi, fid)
|
||||
|
||||
k.driver = drv
|
||||
k.frameworkId = fid
|
||||
k.masterInfo = mi
|
||||
k.registered = true
|
||||
|
||||
k.onRegistration.Do(func() { k.onInitialRegistration(drv) })
|
||||
k.reconciler.RequestExplicit()
|
||||
}
|
||||
|
||||
func (k *KubernetesScheduler) storeFrameworkId() {
|
||||
// TODO(jdef): port FrameworkId store to generic Kubernetes config store as soon as available
|
||||
_, err := k.etcdClient.Set(meta.FrameworkIDKey, k.frameworkId.GetValue(), uint64(k.failoverTimeout))
|
||||
if err != nil {
|
||||
log.Errorf("failed to renew frameworkId TTL: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Reregistered is called when the scheduler re-registered with the master successfully.
|
||||
// This happends when the master fails over.
|
||||
func (k *KubernetesScheduler) Reregistered(drv bindings.SchedulerDriver, mi *mesos.MasterInfo) {
|
||||
log.Infof("Scheduler reregistered with the master: %v\n", mi)
|
||||
|
||||
k.driver = drv
|
||||
k.masterInfo = mi
|
||||
k.registered = true
|
||||
|
||||
k.onRegistration.Do(func() { k.onInitialRegistration(drv) })
|
||||
k.reconciler.RequestExplicit()
|
||||
}
|
||||
|
||||
// perform one-time initialization actions upon the first registration event received from Mesos.
|
||||
func (k *KubernetesScheduler) onInitialRegistration(driver bindings.SchedulerDriver) {
|
||||
defer close(k.registration)
|
||||
|
||||
if k.failoverTimeout > 0 {
|
||||
refreshInterval := k.schedcfg.FrameworkIdRefreshInterval.Duration
|
||||
if k.failoverTimeout < k.schedcfg.FrameworkIdRefreshInterval.Duration.Seconds() {
|
||||
refreshInterval = time.Duration(math.Max(1, k.failoverTimeout/2)) * time.Second
|
||||
}
|
||||
go runtime.Until(k.storeFrameworkId, refreshInterval, k.terminate)
|
||||
}
|
||||
|
||||
r1 := k.makeTaskRegistryReconciler()
|
||||
r2 := k.makePodRegistryReconciler()
|
||||
|
||||
k.reconciler = newReconciler(k.asRegisteredMaster, k.makeCompositeReconciler(r1, r2),
|
||||
k.reconcileCooldown, k.schedcfg.ExplicitReconciliationAbortTimeout.Duration, k.terminate)
|
||||
go k.reconciler.Run(driver)
|
||||
|
||||
if k.reconcileInterval > 0 {
|
||||
ri := time.Duration(k.reconcileInterval) * time.Second
|
||||
time.AfterFunc(k.schedcfg.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.reconciler.RequestImplicit, ri, k.terminate) })
|
||||
log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedcfg.InitialImplicitReconciliationDelay.Duration)
|
||||
}
|
||||
}
|
||||
|
||||
// Disconnected is called when the scheduler loses connection to the master.
|
||||
func (k *KubernetesScheduler) Disconnected(driver bindings.SchedulerDriver) {
|
||||
log.Infof("Master disconnected!\n")
|
||||
|
||||
k.registered = false
|
||||
|
||||
// discard all cached offers to avoid unnecessary TASK_LOST updates
|
||||
k.offers.Invalidate("")
|
||||
}
|
||||
|
||||
// ResourceOffers is called when the scheduler receives some offers from the master.
|
||||
func (k *KubernetesScheduler) ResourceOffers(driver bindings.SchedulerDriver, offers []*mesos.Offer) {
|
||||
log.V(2).Infof("Received offers %+v", offers)
|
||||
|
||||
// Record the offers in the global offer map as well as each slave's offer map.
|
||||
k.offers.Add(offers)
|
||||
for _, offer := range offers {
|
||||
slaveId := offer.GetSlaveId().GetValue()
|
||||
k.slaves.checkAndAdd(slaveId, offer.GetHostname())
|
||||
}
|
||||
}
|
||||
|
||||
// OfferRescinded is called when the resources are recinded from the scheduler.
|
||||
func (k *KubernetesScheduler) OfferRescinded(driver bindings.SchedulerDriver, offerId *mesos.OfferID) {
|
||||
log.Infof("Offer rescinded %v\n", offerId)
|
||||
|
||||
oid := offerId.GetValue()
|
||||
k.offers.Delete(oid, offerMetrics.OfferRescinded)
|
||||
}
|
||||
|
||||
// StatusUpdate is called when a status update message is sent to the scheduler.
|
||||
func (k *KubernetesScheduler) StatusUpdate(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) {
|
||||
|
||||
source, reason := "none", "none"
|
||||
if taskStatus.Source != nil {
|
||||
source = (*taskStatus.Source).String()
|
||||
}
|
||||
if taskStatus.Reason != nil {
|
||||
reason = (*taskStatus.Reason).String()
|
||||
}
|
||||
taskState := taskStatus.GetState()
|
||||
metrics.StatusUpdates.WithLabelValues(source, reason, taskState.String()).Inc()
|
||||
|
||||
log.Infof(
|
||||
"task status update %q from %q for task %q on slave %q executor %q for reason %q",
|
||||
taskState.String(),
|
||||
source,
|
||||
taskStatus.TaskId.GetValue(),
|
||||
taskStatus.SlaveId.GetValue(),
|
||||
taskStatus.ExecutorId.GetValue(),
|
||||
reason)
|
||||
|
||||
switch taskState {
|
||||
case mesos.TaskState_TASK_RUNNING, mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_STARTING, mesos.TaskState_TASK_STAGING:
|
||||
if _, state := k.taskRegistry.UpdateStatus(taskStatus); state == podtask.StateUnknown {
|
||||
if taskState != mesos.TaskState_TASK_FINISHED {
|
||||
//TODO(jdef) what if I receive this after a TASK_LOST or TASK_KILLED?
|
||||
//I don't want to reincarnate then.. TASK_LOST is a special case because
|
||||
//the master is stateless and there are scenarios where I may get TASK_LOST
|
||||
//followed by TASK_RUNNING.
|
||||
//TODO(jdef) consider running this asynchronously since there are API server
|
||||
//calls that may be made
|
||||
k.reconcileNonTerminalTask(driver, taskStatus)
|
||||
} // else, we don't really care about FINISHED tasks that aren't registered
|
||||
return
|
||||
}
|
||||
if _, exists := k.slaves.getSlave(taskStatus.GetSlaveId().GetValue()); !exists {
|
||||
// a registered task has an update reported by a slave that we don't recognize.
|
||||
// this should never happen! So we don't reconcile it.
|
||||
log.Errorf("Ignore status %+v because the slave does not exist", taskStatus)
|
||||
return
|
||||
}
|
||||
case mesos.TaskState_TASK_FAILED:
|
||||
if task, _ := k.taskRegistry.UpdateStatus(taskStatus); task != nil {
|
||||
if task.Has(podtask.Launched) && !task.Has(podtask.Bound) {
|
||||
go k.plugin.reconcilePod(task.Pod)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
// unknown task failed, not much we can do about it
|
||||
return
|
||||
}
|
||||
// last-ditch effort to reconcile our records
|
||||
fallthrough
|
||||
case mesos.TaskState_TASK_LOST, mesos.TaskState_TASK_KILLED:
|
||||
k.reconcileTerminalTask(driver, taskStatus)
|
||||
}
|
||||
}
|
||||
|
||||
func (k *KubernetesScheduler) reconcileTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) {
|
||||
task, state := k.taskRegistry.UpdateStatus(taskStatus)
|
||||
|
||||
if (state == podtask.StateRunning || state == podtask.StatePending) && taskStatus.SlaveId != nil &&
|
||||
((taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER && taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION) ||
|
||||
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) ||
|
||||
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED)) {
|
||||
//--
|
||||
// pod-task has metadata that refers to:
|
||||
// (1) a task that Mesos no longer knows about, or else
|
||||
// (2) a pod that the Kubelet will never report as "failed"
|
||||
// For now, destroy the pod and hope that there's a replication controller backing it up.
|
||||
// TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed
|
||||
pod := &task.Pod
|
||||
log.Warningf("deleting rogue pod %v/%v for lost task %v", pod.Namespace, pod.Name, task.ID)
|
||||
if err := k.client.Pods(pod.Namespace).Delete(pod.Name, nil); err != nil && !errors.IsNotFound(err) {
|
||||
log.Errorf("failed to delete pod %v/%v for terminal task %v: %v", pod.Namespace, pod.Name, task.ID, err)
|
||||
}
|
||||
} else if taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED || taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED {
|
||||
// attempt to prevent dangling pods in the pod and task registries
|
||||
log.V(1).Infof("request explicit reconciliation to clean up for task %v after executor reported (terminated/unregistered)", taskStatus.TaskId.GetValue())
|
||||
k.reconciler.RequestExplicit()
|
||||
} else if taskStatus.GetState() == mesos.TaskState_TASK_LOST && state == podtask.StateRunning && taskStatus.ExecutorId != nil && taskStatus.SlaveId != nil {
|
||||
//TODO(jdef) this may not be meaningful once we have proper checkpointing and master detection
|
||||
//If we're reconciling and receive this then the executor may be
|
||||
//running a task that we need it to kill. It's possible that the framework
|
||||
//is unrecognized by the master at this point, so KillTask is not guaranteed
|
||||
//to do anything. The underlying driver transport may be able to send a
|
||||
//FrameworkMessage directly to the slave to terminate the task.
|
||||
log.V(2).Info("forwarding TASK_LOST message to executor %v on slave %v", taskStatus.ExecutorId, taskStatus.SlaveId)
|
||||
data := fmt.Sprintf("task-lost:%s", task.ID) //TODO(jdef) use a real message type
|
||||
if _, err := driver.SendFrameworkMessage(taskStatus.ExecutorId, taskStatus.SlaveId, data); err != nil {
|
||||
log.Error(err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reconcile an unknown (from the perspective of our registry) non-terminal task
|
||||
func (k *KubernetesScheduler) reconcileNonTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) {
|
||||
// attempt to recover task from pod info:
|
||||
// - task data may contain an api.PodStatusResult; if status.reason == REASON_RECONCILIATION then status.data == nil
|
||||
// - the Name can be parsed by container.ParseFullName() to yield a pod Name and Namespace
|
||||
// - pull the pod metadata down from the api server
|
||||
// - perform task recovery based on pod metadata
|
||||
taskId := taskStatus.TaskId.GetValue()
|
||||
if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER {
|
||||
// there will be no data in the task status that we can use to determine the associated pod
|
||||
switch taskStatus.GetState() {
|
||||
case mesos.TaskState_TASK_STAGING:
|
||||
// there is still hope for this task, don't kill it just yet
|
||||
//TODO(jdef) there should probably be a limit for how long we tolerate tasks stuck in this state
|
||||
return
|
||||
default:
|
||||
// for TASK_{STARTING,RUNNING} we should have already attempted to recoverTasks() for.
|
||||
// if the scheduler failed over before the executor fired TASK_STARTING, then we should *not*
|
||||
// be processing this reconciliation update before we process the one from the executor.
|
||||
// point: we don't know what this task is (perhaps there was unrecoverable metadata in the pod),
|
||||
// so it gets killed.
|
||||
log.Errorf("killing non-terminal, unrecoverable task %v", taskId)
|
||||
}
|
||||
} else if podStatus, err := podtask.ParsePodStatusResult(taskStatus); err != nil {
|
||||
// possible rogue pod exists at this point because we can't identify it; should kill the task
|
||||
log.Errorf("possible rogue pod; illegal task status data for task %v, expected an api.PodStatusResult: %v", taskId, err)
|
||||
} else if name, namespace, err := container.ParsePodFullName(podStatus.Name); err != nil {
|
||||
// possible rogue pod exists at this point because we can't identify it; should kill the task
|
||||
log.Errorf("possible rogue pod; illegal api.PodStatusResult, unable to parse full pod name from: '%v' for task %v: %v",
|
||||
podStatus.Name, taskId, err)
|
||||
} else if pod, err := k.client.Pods(namespace).Get(name); err == nil {
|
||||
if t, ok, err := podtask.RecoverFrom(*pod); ok {
|
||||
log.Infof("recovered task %v from metadata in pod %v/%v", taskId, namespace, name)
|
||||
_, err := k.taskRegistry.Register(t, nil)
|
||||
if err != nil {
|
||||
// someone beat us to it?!
|
||||
log.Warningf("failed to register recovered task: %v", err)
|
||||
return
|
||||
} else {
|
||||
k.taskRegistry.UpdateStatus(taskStatus)
|
||||
}
|
||||
return
|
||||
} else if err != nil {
|
||||
//should kill the pod and the task
|
||||
log.Errorf("killing pod, failed to recover task from pod %v/%v: %v", namespace, name, err)
|
||||
if err := k.client.Pods(namespace).Delete(name, nil); err != nil {
|
||||
log.Errorf("failed to delete pod %v/%v: %v", namespace, name, err)
|
||||
}
|
||||
} else {
|
||||
//this is pretty unexpected: we received a TASK_{STARTING,RUNNING} message, but the apiserver's pod
|
||||
//metadata is not appropriate for task reconstruction -- which should almost certainly never
|
||||
//be the case unless someone swapped out the pod on us (and kept the same namespace/name) while
|
||||
//we were failed over.
|
||||
|
||||
//kill this task, allow the newly launched scheduler to schedule the new pod
|
||||
log.Warningf("unexpected pod metadata for task %v in apiserver, assuming new unscheduled pod spec: %+v", taskId, pod)
|
||||
}
|
||||
} else if errors.IsNotFound(err) {
|
||||
// pod lookup failed, should delete the task since the pod is no longer valid; may be redundant, that's ok
|
||||
log.Infof("killing task %v since pod %v/%v no longer exists", taskId, namespace, name)
|
||||
} else if errors.IsServerTimeout(err) {
|
||||
log.V(2).Infof("failed to reconcile task due to API server timeout: %v", err)
|
||||
return
|
||||
} else {
|
||||
log.Errorf("unexpected API server error, aborting reconcile for task %v: %v", taskId, err)
|
||||
return
|
||||
}
|
||||
if _, err := driver.KillTask(taskStatus.TaskId); err != nil {
|
||||
log.Errorf("failed to kill task %v: %v", taskId, err)
|
||||
}
|
||||
}
|
||||
|
||||
// FrameworkMessage is called when the scheduler receives a message from the executor.
|
||||
func (k *KubernetesScheduler) FrameworkMessage(driver bindings.SchedulerDriver,
|
||||
executorId *mesos.ExecutorID, slaveId *mesos.SlaveID, message string) {
|
||||
log.Infof("Received messages from executor %v of slave %v, %v\n", executorId, slaveId, message)
|
||||
}
|
||||
|
||||
// SlaveLost is called when some slave is lost.
|
||||
func (k *KubernetesScheduler) SlaveLost(driver bindings.SchedulerDriver, slaveId *mesos.SlaveID) {
|
||||
log.Infof("Slave %v is lost\n", slaveId)
|
||||
|
||||
sid := slaveId.GetValue()
|
||||
k.offers.InvalidateForSlave(sid)
|
||||
|
||||
// TODO(jdef): delete slave from our internal list? probably not since we may need to reconcile
|
||||
// tasks. it would be nice to somehow flag the slave as lost so that, perhaps, we can periodically
|
||||
// flush lost slaves older than X, and for which no tasks or pods reference.
|
||||
|
||||
// unfinished tasks/pods will be dropped. use a replication controller if you want pods to
|
||||
// be restarted when slaves die.
|
||||
}
|
||||
|
||||
// ExecutorLost is called when some executor is lost.
|
||||
func (k *KubernetesScheduler) ExecutorLost(driver bindings.SchedulerDriver, executorId *mesos.ExecutorID, slaveId *mesos.SlaveID, status int) {
|
||||
log.Infof("Executor %v of slave %v is lost, status: %v\n", executorId, slaveId, status)
|
||||
// TODO(yifan): Restart any unfinished tasks of the executor.
|
||||
}
|
||||
|
||||
// Error is called when there is an unrecoverable error in the scheduler or scheduler driver.
|
||||
// The driver should have been aborted before this is invoked.
|
||||
func (k *KubernetesScheduler) Error(driver bindings.SchedulerDriver, message string) {
|
||||
log.Fatalf("fatal scheduler error: %v\n", message)
|
||||
}
|
||||
|
||||
// filter func used for explicit task reconciliation, selects only non-terminal tasks which
|
||||
// have been communicated to mesos (read: launched).
|
||||
func explicitTaskFilter(t *podtask.T) bool {
|
||||
switch t.State {
|
||||
case podtask.StateRunning:
|
||||
return true
|
||||
case podtask.StatePending:
|
||||
return t.Has(podtask.Launched)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// invoke the given ReconcilerAction funcs in sequence, aborting the sequence if reconciliation
|
||||
// is cancelled. if any other errors occur the composite reconciler will attempt to complete the
|
||||
// sequence, reporting only the last generated error.
|
||||
func (k *KubernetesScheduler) makeCompositeReconciler(actions ...ReconcilerAction) ReconcilerAction {
|
||||
if x := len(actions); x == 0 {
|
||||
// programming error
|
||||
panic("no actions specified for composite reconciler")
|
||||
} else if x == 1 {
|
||||
return actions[0]
|
||||
}
|
||||
chained := func(d bindings.SchedulerDriver, c <-chan struct{}, a, b ReconcilerAction) <-chan error {
|
||||
ech := a(d, c)
|
||||
ch := make(chan error, 1)
|
||||
go func() {
|
||||
select {
|
||||
case <-k.terminate:
|
||||
case <-c:
|
||||
case e := <-ech:
|
||||
if e != nil {
|
||||
ch <- e
|
||||
return
|
||||
}
|
||||
ech = b(d, c)
|
||||
select {
|
||||
case <-k.terminate:
|
||||
case <-c:
|
||||
case e := <-ech:
|
||||
if e != nil {
|
||||
ch <- e
|
||||
return
|
||||
}
|
||||
close(ch)
|
||||
return
|
||||
}
|
||||
}
|
||||
ch <- fmt.Errorf("aborting composite reconciler action")
|
||||
}()
|
||||
return ch
|
||||
}
|
||||
result := func(d bindings.SchedulerDriver, c <-chan struct{}) <-chan error {
|
||||
return chained(d, c, actions[0], actions[1])
|
||||
}
|
||||
for i := 2; i < len(actions); i++ {
|
||||
i := i
|
||||
next := func(d bindings.SchedulerDriver, c <-chan struct{}) <-chan error {
|
||||
return chained(d, c, ReconcilerAction(result), actions[i])
|
||||
}
|
||||
result = next
|
||||
}
|
||||
return ReconcilerAction(result)
|
||||
}
|
||||
|
||||
// reconciler action factory, performs explicit task reconciliation for non-terminal
|
||||
// tasks listed in the scheduler's internal taskRegistry.
|
||||
func (k *KubernetesScheduler) makeTaskRegistryReconciler() ReconcilerAction {
|
||||
return ReconcilerAction(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error {
|
||||
taskToSlave := make(map[string]string)
|
||||
for _, t := range k.taskRegistry.List(explicitTaskFilter) {
|
||||
if t.Spec.SlaveID != "" {
|
||||
taskToSlave[t.ID] = t.Spec.SlaveID
|
||||
}
|
||||
}
|
||||
return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel))
|
||||
})
|
||||
}
|
||||
|
||||
// reconciler action factory, performs explicit task reconciliation for non-terminal
|
||||
// tasks identified by annotations in the Kubernetes pod registry.
|
||||
func (k *KubernetesScheduler) makePodRegistryReconciler() ReconcilerAction {
|
||||
return ReconcilerAction(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error {
|
||||
ctx := api.NewDefaultContext()
|
||||
podList, err := k.client.Pods(api.NamespaceValue(ctx)).List(labels.Everything(), fields.Everything())
|
||||
if err != nil {
|
||||
return proc.ErrorChanf("failed to reconcile pod registry: %v", err)
|
||||
}
|
||||
taskToSlave := make(map[string]string)
|
||||
for _, pod := range podList.Items {
|
||||
if len(pod.Annotations) == 0 {
|
||||
continue
|
||||
}
|
||||
taskId, found := pod.Annotations[meta.TaskIdKey]
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
slaveId, found := pod.Annotations[meta.SlaveIdKey]
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
taskToSlave[taskId] = slaveId
|
||||
}
|
||||
return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel))
|
||||
})
|
||||
}
|
||||
|
||||
// execute an explicit task reconciliation, as per http://mesos.apache.org/documentation/latest/reconciliation/
|
||||
func (k *KubernetesScheduler) explicitlyReconcileTasks(driver bindings.SchedulerDriver, taskToSlave map[string]string, cancel <-chan struct{}) error {
|
||||
log.Info("explicit reconcile tasks")
|
||||
|
||||
// tell mesos to send us the latest status updates for all the non-terminal tasks that we know about
|
||||
statusList := []*mesos.TaskStatus{}
|
||||
remaining := util.KeySet(reflect.ValueOf(taskToSlave))
|
||||
for taskId, slaveId := range taskToSlave {
|
||||
if slaveId == "" {
|
||||
delete(taskToSlave, taskId)
|
||||
continue
|
||||
}
|
||||
statusList = append(statusList, &mesos.TaskStatus{
|
||||
TaskId: mutil.NewTaskID(taskId),
|
||||
SlaveId: mutil.NewSlaveID(slaveId),
|
||||
State: mesos.TaskState_TASK_RUNNING.Enum(), // req'd field, doesn't have to reflect reality
|
||||
})
|
||||
}
|
||||
|
||||
select {
|
||||
case <-cancel:
|
||||
return reconciliationCancelledErr
|
||||
default:
|
||||
if _, err := driver.ReconcileTasks(statusList); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
first := true
|
||||
for backoff := 1 * time.Second; first || remaining.Len() > 0; backoff = backoff * 2 {
|
||||
first = false
|
||||
// nothing to do here other than wait for status updates..
|
||||
if backoff > k.schedcfg.ExplicitReconciliationMaxBackoff.Duration {
|
||||
backoff = k.schedcfg.ExplicitReconciliationMaxBackoff.Duration
|
||||
}
|
||||
select {
|
||||
case <-cancel:
|
||||
return reconciliationCancelledErr
|
||||
case <-time.After(backoff):
|
||||
for taskId := range remaining {
|
||||
if task, _ := k.taskRegistry.Get(taskId); task != nil && explicitTaskFilter(task) && task.UpdatedTime.Before(start) {
|
||||
// keep this task in remaining list
|
||||
continue
|
||||
}
|
||||
remaining.Delete(taskId)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
reconciliationCancelledErr = fmt.Errorf("explicit task reconciliation cancelled")
|
||||
)
|
||||
|
||||
type ReconcilerAction func(driver bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error
|
||||
|
||||
type Reconciler struct {
|
||||
proc.Doer
|
||||
Action ReconcilerAction
|
||||
explicit chan struct{} // send an empty struct to trigger explicit reconciliation
|
||||
implicit chan struct{} // send an empty struct to trigger implicit reconciliation
|
||||
done <-chan struct{} // close this when you want the reconciler to exit
|
||||
cooldown time.Duration
|
||||
explicitReconciliationAbortTimeout time.Duration
|
||||
}
|
||||
|
||||
func newReconciler(doer proc.Doer, action ReconcilerAction,
|
||||
cooldown, explicitReconciliationAbortTimeout time.Duration, done <-chan struct{}) *Reconciler {
|
||||
return &Reconciler{
|
||||
Doer: doer,
|
||||
explicit: make(chan struct{}, 1),
|
||||
implicit: make(chan struct{}, 1),
|
||||
cooldown: cooldown,
|
||||
explicitReconciliationAbortTimeout: explicitReconciliationAbortTimeout,
|
||||
done: done,
|
||||
Action: func(driver bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error {
|
||||
// trigged the reconciler action in the doer's execution context,
|
||||
// but it could take a while and the scheduler needs to be able to
|
||||
// process updates, the callbacks for which ALSO execute in the SAME
|
||||
// deferred execution context -- so the action MUST be executed async.
|
||||
errOnce := proc.NewErrorOnce(cancel)
|
||||
return errOnce.Send(doer.Do(func() {
|
||||
// only triggers the action if we're the currently elected,
|
||||
// registered master and runs the action async.
|
||||
go func() {
|
||||
var err <-chan error
|
||||
defer errOnce.Send(err)
|
||||
err = action(driver, cancel)
|
||||
}()
|
||||
})).Err()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reconciler) RequestExplicit() {
|
||||
select {
|
||||
case r.explicit <- struct{}{}: // noop
|
||||
default: // request queue full; noop
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reconciler) RequestImplicit() {
|
||||
select {
|
||||
case r.implicit <- struct{}{}: // noop
|
||||
default: // request queue full; noop
|
||||
}
|
||||
}
|
||||
|
||||
// execute task reconciliation, returns when r.done is closed. intended to run as a goroutine.
|
||||
// if reconciliation is requested while another is in progress, the in-progress operation will be
|
||||
// cancelled before the new reconciliation operation begins.
|
||||
func (r *Reconciler) Run(driver bindings.SchedulerDriver) {
|
||||
var cancel, finished chan struct{}
|
||||
requestLoop:
|
||||
for {
|
||||
select {
|
||||
case <-r.done:
|
||||
return
|
||||
default: // proceed
|
||||
}
|
||||
select {
|
||||
case <-r.implicit:
|
||||
metrics.ReconciliationRequested.WithLabelValues("implicit").Inc()
|
||||
select {
|
||||
case <-r.done:
|
||||
return
|
||||
case <-r.explicit:
|
||||
break // give preference to a pending request for explicit
|
||||
default: // continue
|
||||
// don't run implicit reconciliation while explicit is ongoing
|
||||
if finished != nil {
|
||||
select {
|
||||
case <-finished: // continue w/ implicit
|
||||
default:
|
||||
log.Infoln("skipping implicit reconcile because explicit reconcile is ongoing")
|
||||
continue requestLoop
|
||||
}
|
||||
}
|
||||
errOnce := proc.NewErrorOnce(r.done)
|
||||
errCh := r.Do(func() {
|
||||
var err error
|
||||
defer errOnce.Report(err)
|
||||
log.Infoln("implicit reconcile tasks")
|
||||
metrics.ReconciliationExecuted.WithLabelValues("implicit").Inc()
|
||||
if _, err = driver.ReconcileTasks([]*mesos.TaskStatus{}); err != nil {
|
||||
log.V(1).Infof("failed to request implicit reconciliation from mesos: %v", err)
|
||||
}
|
||||
})
|
||||
proc.OnError(errOnce.Send(errCh).Err(), func(err error) {
|
||||
log.Errorf("failed to run implicit reconciliation: %v", err)
|
||||
}, r.done)
|
||||
goto slowdown
|
||||
}
|
||||
case <-r.done:
|
||||
return
|
||||
case <-r.explicit: // continue
|
||||
metrics.ReconciliationRequested.WithLabelValues("explicit").Inc()
|
||||
}
|
||||
|
||||
if cancel != nil {
|
||||
close(cancel)
|
||||
cancel = nil
|
||||
|
||||
// play nice and wait for the prior operation to finish, complain
|
||||
// if it doesn't
|
||||
select {
|
||||
case <-r.done:
|
||||
return
|
||||
case <-finished: // noop, expected
|
||||
case <-time.After(r.explicitReconciliationAbortTimeout): // very unexpected
|
||||
log.Error("reconciler action failed to stop upon cancellation")
|
||||
}
|
||||
}
|
||||
// copy 'finished' to 'fin' here in case we end up with simultaneous go-routines,
|
||||
// if cancellation takes too long or fails - we don't want to close the same chan
|
||||
// more than once
|
||||
cancel = make(chan struct{})
|
||||
finished = make(chan struct{})
|
||||
go func(fin chan struct{}) {
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
metrics.ReconciliationLatency.Observe(metrics.InMicroseconds(time.Since(startedAt)))
|
||||
}()
|
||||
|
||||
metrics.ReconciliationExecuted.WithLabelValues("explicit").Inc()
|
||||
defer close(fin)
|
||||
err := <-r.Action(driver, cancel)
|
||||
if err == reconciliationCancelledErr {
|
||||
metrics.ReconciliationCancelled.WithLabelValues("explicit").Inc()
|
||||
log.Infoln(err.Error())
|
||||
} else if err != nil {
|
||||
log.Errorf("reconciler action failed: %v", err)
|
||||
}
|
||||
}(finished)
|
||||
slowdown:
|
||||
// don't allow reconciliation to run very frequently, either explicit or implicit
|
||||
select {
|
||||
case <-r.done:
|
||||
return
|
||||
case <-time.After(r.cooldown): // noop
|
||||
}
|
||||
} // for
|
||||
}
|
||||
|
||||
func (ks *KubernetesScheduler) recoverTasks() error {
|
||||
ctx := api.NewDefaultContext()
|
||||
podList, err := ks.client.Pods(api.NamespaceValue(ctx)).List(labels.Everything(), fields.Everything())
|
||||
if err != nil {
|
||||
log.V(1).Infof("failed to recover pod registry, madness may ensue: %v", err)
|
||||
return err
|
||||
}
|
||||
recoverSlave := func(t *podtask.T) {
|
||||
|
||||
slaveId := t.Spec.SlaveID
|
||||
ks.slaves.checkAndAdd(slaveId, t.Offer.Host())
|
||||
}
|
||||
for _, pod := range podList.Items {
|
||||
if t, ok, err := podtask.RecoverFrom(pod); err != nil {
|
||||
log.Errorf("failed to recover task from pod, will attempt to delete '%v/%v': %v", pod.Namespace, pod.Name, err)
|
||||
err := ks.client.Pods(pod.Namespace).Delete(pod.Name, nil)
|
||||
//TODO(jdef) check for temporary or not-found errors
|
||||
if err != nil {
|
||||
log.Errorf("failed to delete pod '%v/%v': %v", pod.Namespace, pod.Name, err)
|
||||
}
|
||||
} else if ok {
|
||||
ks.taskRegistry.Register(t, nil)
|
||||
recoverSlave(t)
|
||||
log.Infof("recovered task %v from pod %v/%v", t.ID, pod.Namespace, pod.Name)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
350
contrib/mesos/pkg/scheduler/scheduler_test.go
Normal file
350
contrib/mesos/pkg/scheduler/scheduler_test.go
Normal file
@@ -0,0 +1,350 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc"
|
||||
schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
util "github.com/mesos/mesos-go/mesosutil"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// Check that same slave is only added once.
|
||||
func TestSlaveStorage_checkAndAdd(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
slaveStorage := newSlaveStorage()
|
||||
assert.Equal(0, len(slaveStorage.slaves))
|
||||
|
||||
slaveId := "slave1"
|
||||
slaveHostname := "slave1Hostname"
|
||||
slaveStorage.checkAndAdd(slaveId, slaveHostname)
|
||||
assert.Equal(1, len(slaveStorage.getSlaveIds()))
|
||||
|
||||
slaveStorage.checkAndAdd(slaveId, slaveHostname)
|
||||
assert.Equal(1, len(slaveStorage.getSlaveIds()))
|
||||
}
|
||||
|
||||
// Check that getSlave returns notExist for nonexisting slave.
|
||||
func TestSlaveStorage_getSlave(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
slaveStorage := newSlaveStorage()
|
||||
assert.Equal(0, len(slaveStorage.slaves))
|
||||
|
||||
slaveId := "slave1"
|
||||
slaveHostname := "slave1Hostname"
|
||||
|
||||
_, exists := slaveStorage.getSlave(slaveId)
|
||||
assert.Equal(false, exists)
|
||||
|
||||
slaveStorage.checkAndAdd(slaveId, slaveHostname)
|
||||
assert.Equal(1, len(slaveStorage.getSlaveIds()))
|
||||
|
||||
_, exists = slaveStorage.getSlave(slaveId)
|
||||
assert.Equal(true, exists)
|
||||
}
|
||||
|
||||
// Check that getSlaveIds returns array with all slaveIds.
|
||||
func TestSlaveStorage_getSlaveIds(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
slaveStorage := newSlaveStorage()
|
||||
assert.Equal(0, len(slaveStorage.slaves))
|
||||
|
||||
slaveId := "1"
|
||||
slaveHostname := "hn1"
|
||||
slaveStorage.checkAndAdd(slaveId, slaveHostname)
|
||||
assert.Equal(1, len(slaveStorage.getSlaveIds()))
|
||||
|
||||
slaveId = "2"
|
||||
slaveHostname = "hn2"
|
||||
slaveStorage.checkAndAdd(slaveId, slaveHostname)
|
||||
assert.Equal(2, len(slaveStorage.getSlaveIds()))
|
||||
|
||||
slaveIds := slaveStorage.getSlaveIds()
|
||||
|
||||
slaveIdsMap := make(map[string]bool, len(slaveIds))
|
||||
for _, s := range slaveIds {
|
||||
slaveIdsMap[s] = true
|
||||
}
|
||||
|
||||
_, ok := slaveIdsMap["1"]
|
||||
assert.Equal(ok, true)
|
||||
|
||||
_, ok = slaveIdsMap["2"]
|
||||
assert.Equal(ok, true)
|
||||
|
||||
}
|
||||
|
||||
//get number of non-expired offers from offer registry
|
||||
func getNumberOffers(os offers.Registry) int {
|
||||
//walk offers and check it is stored in registry
|
||||
walked := 0
|
||||
walker1 := func(p offers.Perishable) (bool, error) {
|
||||
walked++
|
||||
return false, nil
|
||||
|
||||
}
|
||||
os.Walk(walker1)
|
||||
return walked
|
||||
}
|
||||
|
||||
//test adding of ressource offer, should be added to offer registry and slavesf
|
||||
func TestResourceOffer_Add(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
testScheduler := &KubernetesScheduler{
|
||||
offers: offers.CreateRegistry(offers.RegistryConfig{
|
||||
Compat: func(o *mesos.Offer) bool {
|
||||
return true
|
||||
},
|
||||
DeclineOffer: func(offerId string) <-chan error {
|
||||
return proc.ErrorChan(nil)
|
||||
},
|
||||
// remember expired offers so that we can tell if a previously scheduler offer relies on one
|
||||
LingerTTL: schedcfg.DefaultOfferLingerTTL,
|
||||
TTL: schedcfg.DefaultOfferTTL,
|
||||
ListenerDelay: schedcfg.DefaultListenerDelay,
|
||||
}),
|
||||
slaves: newSlaveStorage(),
|
||||
}
|
||||
|
||||
hostname := "h1"
|
||||
offerID1 := util.NewOfferID("test1")
|
||||
offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
|
||||
offers1 := []*mesos.Offer{offer1}
|
||||
testScheduler.ResourceOffers(nil, offers1)
|
||||
|
||||
assert.Equal(1, getNumberOffers(testScheduler.offers))
|
||||
//check slave hostname
|
||||
assert.Equal(1, len(testScheduler.slaves.getSlaveIds()))
|
||||
|
||||
//add another offer
|
||||
hostname2 := "h2"
|
||||
offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
|
||||
offers2 := []*mesos.Offer{offer2}
|
||||
testScheduler.ResourceOffers(nil, offers2)
|
||||
|
||||
//check it is stored in registry
|
||||
assert.Equal(2, getNumberOffers(testScheduler.offers))
|
||||
|
||||
//check slave hostnames
|
||||
assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
|
||||
}
|
||||
|
||||
//test adding of ressource offer, should be added to offer registry and slavesf
|
||||
func TestResourceOffer_Add_Rescind(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
testScheduler := &KubernetesScheduler{
|
||||
offers: offers.CreateRegistry(offers.RegistryConfig{
|
||||
Compat: func(o *mesos.Offer) bool {
|
||||
return true
|
||||
},
|
||||
DeclineOffer: func(offerId string) <-chan error {
|
||||
return proc.ErrorChan(nil)
|
||||
},
|
||||
// remember expired offers so that we can tell if a previously scheduler offer relies on one
|
||||
LingerTTL: schedcfg.DefaultOfferLingerTTL,
|
||||
TTL: schedcfg.DefaultOfferTTL,
|
||||
ListenerDelay: schedcfg.DefaultListenerDelay,
|
||||
}),
|
||||
slaves: newSlaveStorage(),
|
||||
}
|
||||
|
||||
hostname := "h1"
|
||||
offerID1 := util.NewOfferID("test1")
|
||||
offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
|
||||
offers1 := []*mesos.Offer{offer1}
|
||||
testScheduler.ResourceOffers(nil, offers1)
|
||||
|
||||
assert.Equal(1, getNumberOffers(testScheduler.offers))
|
||||
|
||||
//check slave hostname
|
||||
assert.Equal(1, len(testScheduler.slaves.getSlaveIds()))
|
||||
|
||||
//add another offer
|
||||
hostname2 := "h2"
|
||||
offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
|
||||
offers2 := []*mesos.Offer{offer2}
|
||||
testScheduler.ResourceOffers(nil, offers2)
|
||||
|
||||
assert.Equal(2, getNumberOffers(testScheduler.offers))
|
||||
|
||||
//check slave hostnames
|
||||
assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
|
||||
|
||||
//next whether offers can be rescinded
|
||||
testScheduler.OfferRescinded(nil, offerID1)
|
||||
assert.Equal(1, getNumberOffers(testScheduler.offers))
|
||||
|
||||
//next whether offers can be rescinded
|
||||
testScheduler.OfferRescinded(nil, util.NewOfferID("test2"))
|
||||
//walk offers again and check it is removed from registry
|
||||
assert.Equal(0, getNumberOffers(testScheduler.offers))
|
||||
|
||||
//remove non existing ID
|
||||
testScheduler.OfferRescinded(nil, util.NewOfferID("notExist"))
|
||||
}
|
||||
|
||||
//test that when a slave is lost we remove all offers
|
||||
func TestSlave_Lost(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
//
|
||||
testScheduler := &KubernetesScheduler{
|
||||
offers: offers.CreateRegistry(offers.RegistryConfig{
|
||||
Compat: func(o *mesos.Offer) bool {
|
||||
return true
|
||||
},
|
||||
// remember expired offers so that we can tell if a previously scheduler offer relies on one
|
||||
LingerTTL: schedcfg.DefaultOfferLingerTTL,
|
||||
TTL: schedcfg.DefaultOfferTTL,
|
||||
ListenerDelay: schedcfg.DefaultListenerDelay,
|
||||
}),
|
||||
slaves: newSlaveStorage(),
|
||||
}
|
||||
|
||||
hostname := "h1"
|
||||
offer1 := &mesos.Offer{Id: util.NewOfferID("test1"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
|
||||
offers1 := []*mesos.Offer{offer1}
|
||||
testScheduler.ResourceOffers(nil, offers1)
|
||||
offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
|
||||
offers2 := []*mesos.Offer{offer2}
|
||||
testScheduler.ResourceOffers(nil, offers2)
|
||||
|
||||
//add another offer from different slaveID
|
||||
hostname2 := "h2"
|
||||
offer3 := &mesos.Offer{Id: util.NewOfferID("test3"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
|
||||
offers3 := []*mesos.Offer{offer3}
|
||||
testScheduler.ResourceOffers(nil, offers3)
|
||||
|
||||
//test precondition
|
||||
assert.Equal(3, getNumberOffers(testScheduler.offers))
|
||||
assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
|
||||
|
||||
//remove first slave
|
||||
testScheduler.SlaveLost(nil, util.NewSlaveID(hostname))
|
||||
|
||||
//offers should be removed
|
||||
assert.Equal(1, getNumberOffers(testScheduler.offers))
|
||||
//slave hostnames should still be all present
|
||||
assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
|
||||
|
||||
//remove second slave
|
||||
testScheduler.SlaveLost(nil, util.NewSlaveID(hostname2))
|
||||
|
||||
//offers should be removed
|
||||
assert.Equal(0, getNumberOffers(testScheduler.offers))
|
||||
//slave hostnames should still be all present
|
||||
assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
|
||||
|
||||
//try to remove non existing slave
|
||||
testScheduler.SlaveLost(nil, util.NewSlaveID("notExist"))
|
||||
|
||||
}
|
||||
|
||||
//test when we loose connection to master we invalidate all cached offers
|
||||
func TestDisconnect(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
//
|
||||
testScheduler := &KubernetesScheduler{
|
||||
offers: offers.CreateRegistry(offers.RegistryConfig{
|
||||
Compat: func(o *mesos.Offer) bool {
|
||||
return true
|
||||
},
|
||||
// remember expired offers so that we can tell if a previously scheduler offer relies on one
|
||||
LingerTTL: schedcfg.DefaultOfferLingerTTL,
|
||||
TTL: schedcfg.DefaultOfferTTL,
|
||||
ListenerDelay: schedcfg.DefaultListenerDelay,
|
||||
}),
|
||||
slaves: newSlaveStorage(),
|
||||
}
|
||||
|
||||
hostname := "h1"
|
||||
offer1 := &mesos.Offer{Id: util.NewOfferID("test1"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
|
||||
offers1 := []*mesos.Offer{offer1}
|
||||
testScheduler.ResourceOffers(nil, offers1)
|
||||
offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
|
||||
offers2 := []*mesos.Offer{offer2}
|
||||
testScheduler.ResourceOffers(nil, offers2)
|
||||
|
||||
//add another offer from different slaveID
|
||||
hostname2 := "h2"
|
||||
offer3 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
|
||||
offers3 := []*mesos.Offer{offer3}
|
||||
testScheduler.ResourceOffers(nil, offers3)
|
||||
|
||||
//disconnect
|
||||
testScheduler.Disconnected(nil)
|
||||
|
||||
//all offers should be removed
|
||||
assert.Equal(0, getNumberOffers(testScheduler.offers))
|
||||
//slave hostnames should still be all present
|
||||
assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
|
||||
}
|
||||
|
||||
//test we can handle different status updates, TODO check state transitions
|
||||
func TestStatus_Update(t *testing.T) {
|
||||
|
||||
mockdriver := MockSchedulerDriver{}
|
||||
// setup expectations
|
||||
mockdriver.On("KillTask", util.NewTaskID("test-task-001")).Return(mesos.Status_DRIVER_RUNNING, nil)
|
||||
|
||||
testScheduler := &KubernetesScheduler{
|
||||
offers: offers.CreateRegistry(offers.RegistryConfig{
|
||||
Compat: func(o *mesos.Offer) bool {
|
||||
return true
|
||||
},
|
||||
// remember expired offers so that we can tell if a previously scheduler offer relies on one
|
||||
LingerTTL: schedcfg.DefaultOfferLingerTTL,
|
||||
TTL: schedcfg.DefaultOfferTTL,
|
||||
ListenerDelay: schedcfg.DefaultListenerDelay,
|
||||
}),
|
||||
slaves: newSlaveStorage(),
|
||||
driver: &mockdriver,
|
||||
taskRegistry: podtask.NewInMemoryRegistry(),
|
||||
}
|
||||
|
||||
taskStatus_task_starting := util.NewTaskStatus(
|
||||
util.NewTaskID("test-task-001"),
|
||||
mesos.TaskState_TASK_RUNNING,
|
||||
)
|
||||
testScheduler.StatusUpdate(testScheduler.driver, taskStatus_task_starting)
|
||||
|
||||
taskStatus_task_running := util.NewTaskStatus(
|
||||
util.NewTaskID("test-task-001"),
|
||||
mesos.TaskState_TASK_RUNNING,
|
||||
)
|
||||
testScheduler.StatusUpdate(testScheduler.driver, taskStatus_task_running)
|
||||
|
||||
taskStatus_task_failed := util.NewTaskStatus(
|
||||
util.NewTaskID("test-task-001"),
|
||||
mesos.TaskState_TASK_FAILED,
|
||||
)
|
||||
testScheduler.StatusUpdate(testScheduler.driver, taskStatus_task_failed)
|
||||
|
||||
//assert that mock was invoked
|
||||
mockdriver.AssertExpectations(t)
|
||||
}
|
||||
32
contrib/mesos/pkg/scheduler/service/compat_testing.go
Normal file
32
contrib/mesos/pkg/scheduler/service/compat_testing.go
Normal file
@@ -0,0 +1,32 @@
|
||||
// +build unit_test
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func makeFailoverSigChan() <-chan os.Signal {
|
||||
return nil
|
||||
}
|
||||
|
||||
func makeDisownedProcAttr() *syscall.SysProcAttr {
|
||||
return nil
|
||||
}
|
||||
38
contrib/mesos/pkg/scheduler/service/compat_unix.go
Normal file
38
contrib/mesos/pkg/scheduler/service/compat_unix.go
Normal file
@@ -0,0 +1,38 @@
|
||||
// +build darwin dragonfly freebsd linux netbsd openbsd
|
||||
// +build !unit_test
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func makeFailoverSigChan() <-chan os.Signal {
|
||||
ch := make(chan os.Signal, 1)
|
||||
signal.Notify(ch, syscall.SIGUSR1)
|
||||
return ch
|
||||
}
|
||||
|
||||
func makeDisownedProcAttr() *syscall.SysProcAttr {
|
||||
return &syscall.SysProcAttr{
|
||||
Setpgid: true, // disown the spawned scheduler
|
||||
}
|
||||
}
|
||||
51
contrib/mesos/pkg/scheduler/service/compat_windows.go
Normal file
51
contrib/mesos/pkg/scheduler/service/compat_windows.go
Normal file
@@ -0,0 +1,51 @@
|
||||
// +build windows
|
||||
// +build !unit_test
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func makeFailoverSigChan() <-chan os.Signal {
|
||||
/* TODO(jdef)
|
||||
from go's windows compatibility test, it looks like we need to provide a filtered
|
||||
signal channel here
|
||||
|
||||
c := make(chan os.Signal, 10)
|
||||
signal.Notify(c)
|
||||
select {
|
||||
case s := <-c:
|
||||
if s != os.Interrupt {
|
||||
log.Fatalf("Wrong signal received: got %q, want %q\n", s, os.Interrupt)
|
||||
}
|
||||
case <-time.After(3 * time.Second):
|
||||
log.Fatalf("Timeout waiting for Ctrl+Break\n")
|
||||
}
|
||||
*/
|
||||
return nil
|
||||
}
|
||||
|
||||
func makeDisownedProcAttr() *syscall.SysProcAttr {
|
||||
//TODO(jdef) test this somehow?!?!
|
||||
return &syscall.SysProcAttr{
|
||||
CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP | syscall.CREATE_UNICODE_ENVIRONMENT,
|
||||
}
|
||||
}
|
||||
18
contrib/mesos/pkg/scheduler/service/doc.go
Normal file
18
contrib/mesos/pkg/scheduler/service/doc.go
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package service contains the cmd/k8sm-scheduler glue code
|
||||
package service
|
||||
121
contrib/mesos/pkg/scheduler/service/publish.go
Normal file
121
contrib/mesos/pkg/scheduler/service/publish.go
Normal file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"net"
|
||||
"reflect"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/master/ports"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
const (
|
||||
SCHEDULER_SERVICE_NAME = "k8sm-scheduler"
|
||||
)
|
||||
|
||||
func (m *SchedulerServer) newServiceWriter(stop <-chan struct{}) func() {
|
||||
return func() {
|
||||
for {
|
||||
// Update service & endpoint records.
|
||||
// TODO(k8s): when it becomes possible to change this stuff,
|
||||
// stop polling and start watching.
|
||||
if err := m.createSchedulerServiceIfNeeded(SCHEDULER_SERVICE_NAME, ports.SchedulerPort); err != nil {
|
||||
glog.Errorf("Can't create scheduler service: %v", err)
|
||||
}
|
||||
|
||||
if err := m.setEndpoints(SCHEDULER_SERVICE_NAME, net.IP(m.Address), m.Port); err != nil {
|
||||
glog.Errorf("Can't create scheduler endpoints: %v", err)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-stop:
|
||||
return
|
||||
case <-time.After(10 * time.Second):
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// createSchedulerServiceIfNeeded will create the specified service if it
|
||||
// doesn't already exist.
|
||||
func (m *SchedulerServer) createSchedulerServiceIfNeeded(serviceName string, servicePort int) error {
|
||||
ctx := api.NewDefaultContext()
|
||||
if _, err := m.client.Services(api.NamespaceValue(ctx)).Get(serviceName); err == nil {
|
||||
// The service already exists.
|
||||
return nil
|
||||
}
|
||||
svc := &api.Service{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: serviceName,
|
||||
Namespace: api.NamespaceDefault,
|
||||
Labels: map[string]string{"provider": "k8sm", "component": "scheduler"},
|
||||
},
|
||||
Spec: api.ServiceSpec{
|
||||
Ports: []api.ServicePort{{Port: servicePort, Protocol: api.ProtocolTCP}},
|
||||
// maintained by this code, not by the pod selector
|
||||
Selector: nil,
|
||||
SessionAffinity: api.ServiceAffinityNone,
|
||||
},
|
||||
}
|
||||
if m.ServiceAddress != nil {
|
||||
svc.Spec.ClusterIP = m.ServiceAddress.String()
|
||||
}
|
||||
_, err := m.client.Services(api.NamespaceValue(ctx)).Create(svc)
|
||||
if err != nil && errors.IsAlreadyExists(err) {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// setEndpoints sets the endpoints for the given service.
|
||||
// in a multi-master scenario only the master will be publishing an endpoint.
|
||||
// see SchedulerServer.bootstrap.
|
||||
func (m *SchedulerServer) setEndpoints(serviceName string, ip net.IP, port int) error {
|
||||
// The setting we want to find.
|
||||
want := []api.EndpointSubset{{
|
||||
Addresses: []api.EndpointAddress{{IP: ip.String()}},
|
||||
Ports: []api.EndpointPort{{Port: port, Protocol: api.ProtocolTCP}},
|
||||
}}
|
||||
|
||||
ctx := api.NewDefaultContext()
|
||||
e, err := m.client.Endpoints(api.NamespaceValue(ctx)).Get(serviceName)
|
||||
createOrUpdate := m.client.Endpoints(api.NamespaceValue(ctx)).Update
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
createOrUpdate = m.client.Endpoints(api.NamespaceValue(ctx)).Create
|
||||
}
|
||||
e = &api.Endpoints{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: serviceName,
|
||||
Namespace: api.NamespaceDefault,
|
||||
},
|
||||
}
|
||||
}
|
||||
if !reflect.DeepEqual(e.Subsets, want) {
|
||||
e.Subsets = want
|
||||
glog.Infof("setting endpoints for master service %q to %#v", serviceName, e)
|
||||
_, err = createOrUpdate(e)
|
||||
return err
|
||||
}
|
||||
// We didn't make any changes, no need to actually call update.
|
||||
return nil
|
||||
}
|
||||
751
contrib/mesos/pkg/scheduler/service/service.go
Normal file
751
contrib/mesos/pkg/scheduler/service/service.go
Normal file
@@ -0,0 +1,751 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/election"
|
||||
execcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/config"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/profile"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler"
|
||||
schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/ha"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/uid"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/clientauth"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/master/ports"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/tools"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
"github.com/coreos/go-etcd/etcd"
|
||||
"github.com/gogo/protobuf/proto"
|
||||
log "github.com/golang/glog"
|
||||
"github.com/kardianos/osext"
|
||||
"github.com/mesos/mesos-go/auth"
|
||||
"github.com/mesos/mesos-go/auth/sasl"
|
||||
"github.com/mesos/mesos-go/auth/sasl/mech"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
mutil "github.com/mesos/mesos-go/mesosutil"
|
||||
bindings "github.com/mesos/mesos-go/scheduler"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/spf13/pflag"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultMesosMaster = "localhost:5050"
|
||||
defaultMesosUser = "root" // should have privs to execute docker and iptables commands
|
||||
defaultReconcileInterval = 300 // 5m default task reconciliation interval
|
||||
defaultReconcileCooldown = 15 * time.Second
|
||||
defaultFrameworkName = "Kubernetes"
|
||||
)
|
||||
|
||||
type SchedulerServer struct {
|
||||
Port int
|
||||
Address util.IP
|
||||
EnableProfiling bool
|
||||
AuthPath string
|
||||
APIServerList util.StringList
|
||||
EtcdServerList util.StringList
|
||||
EtcdConfigFile string
|
||||
AllowPrivileged bool
|
||||
ExecutorPath string
|
||||
ProxyPath string
|
||||
MesosMaster string
|
||||
MesosUser string
|
||||
MesosRole string
|
||||
MesosAuthPrincipal string
|
||||
MesosAuthSecretFile string
|
||||
Checkpoint bool
|
||||
FailoverTimeout float64
|
||||
ExecutorBindall bool
|
||||
ExecutorRunProxy bool
|
||||
ExecutorProxyBindall bool
|
||||
ExecutorLogV int
|
||||
ExecutorSuicideTimeout time.Duration
|
||||
MesosAuthProvider string
|
||||
DriverPort uint
|
||||
HostnameOverride string
|
||||
ReconcileInterval int64
|
||||
ReconcileCooldown time.Duration
|
||||
SchedulerConfigFileName string
|
||||
Graceful bool
|
||||
FrameworkName string
|
||||
FrameworkWebURI string
|
||||
HA bool
|
||||
AdvertisedAddress string
|
||||
ServiceAddress util.IP
|
||||
HADomain string
|
||||
KMPath string
|
||||
ClusterDNS util.IP
|
||||
ClusterDomain string
|
||||
KubeletRootDirectory string
|
||||
KubeletDockerEndpoint string
|
||||
KubeletPodInfraContainerImage string
|
||||
KubeletCadvisorPort uint
|
||||
KubeletHostNetworkSources string
|
||||
KubeletSyncFrequency time.Duration
|
||||
KubeletNetworkPluginName string
|
||||
|
||||
executable string // path to the binary running this service
|
||||
client *client.Client
|
||||
driver bindings.SchedulerDriver
|
||||
driverMutex sync.RWMutex
|
||||
mux *http.ServeMux
|
||||
}
|
||||
|
||||
// useful for unit testing specific funcs
|
||||
type schedulerProcessInterface interface {
|
||||
End() <-chan struct{}
|
||||
Failover() <-chan struct{}
|
||||
Terminal() <-chan struct{}
|
||||
}
|
||||
|
||||
// NewSchedulerServer creates a new SchedulerServer with default parameters
|
||||
func NewSchedulerServer() *SchedulerServer {
|
||||
s := SchedulerServer{
|
||||
Port: ports.SchedulerPort,
|
||||
Address: util.IP(net.ParseIP("127.0.0.1")),
|
||||
FailoverTimeout: time.Duration((1 << 62) - 1).Seconds(),
|
||||
ExecutorRunProxy: true,
|
||||
ExecutorSuicideTimeout: execcfg.DefaultSuicideTimeout,
|
||||
MesosAuthProvider: sasl.ProviderName,
|
||||
MesosMaster: defaultMesosMaster,
|
||||
MesosUser: defaultMesosUser,
|
||||
ReconcileInterval: defaultReconcileInterval,
|
||||
ReconcileCooldown: defaultReconcileCooldown,
|
||||
Checkpoint: true,
|
||||
FrameworkName: defaultFrameworkName,
|
||||
HA: false,
|
||||
mux: http.NewServeMux(),
|
||||
KubeletCadvisorPort: 4194, // copied from github.com/GoogleCloudPlatform/kubernetes/blob/release-0.14/cmd/kubelet/app/server.go
|
||||
KubeletSyncFrequency: 10 * time.Second,
|
||||
}
|
||||
// cache this for later use. also useful in case the original binary gets deleted, e.g.
|
||||
// during upgrades, development deployments, etc.
|
||||
if filename, err := osext.Executable(); err != nil {
|
||||
log.Fatalf("failed to determine path to currently running executable: %v", err)
|
||||
} else {
|
||||
s.executable = filename
|
||||
s.KMPath = filename
|
||||
}
|
||||
|
||||
return &s
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) addCoreFlags(fs *pflag.FlagSet) {
|
||||
fs.IntVar(&s.Port, "port", s.Port, "The port that the scheduler's http service runs on")
|
||||
fs.Var(&s.Address, "address", "The IP address to serve on (set to 0.0.0.0 for all interfaces)")
|
||||
fs.BoolVar(&s.EnableProfiling, "profiling", s.EnableProfiling, "Enable profiling via web interface host:port/debug/pprof/")
|
||||
fs.Var(&s.APIServerList, "api-servers", "List of Kubernetes API servers for publishing events, and reading pods and services. (ip:port), comma separated.")
|
||||
fs.StringVar(&s.AuthPath, "auth-path", s.AuthPath, "Path to .kubernetes_auth file, specifying how to authenticate to API server.")
|
||||
fs.Var(&s.EtcdServerList, "etcd-servers", "List of etcd servers to watch (http://ip:port), comma separated. Mutually exclusive with --etcd-config")
|
||||
fs.StringVar(&s.EtcdConfigFile, "etcd-config", s.EtcdConfigFile, "The config file for the etcd client. Mutually exclusive with --etcd-servers.")
|
||||
fs.BoolVar(&s.AllowPrivileged, "allow-privileged", s.AllowPrivileged, "If true, allow privileged containers.")
|
||||
fs.StringVar(&s.ClusterDomain, "cluster-domain", s.ClusterDomain, "Domain for this cluster. If set, kubelet will configure all containers to search this domain in addition to the host's search domains")
|
||||
fs.Var(&s.ClusterDNS, "cluster-dns", "IP address for a cluster DNS server. If set, kubelet will configure all containers to use this for DNS resolution in addition to the host's DNS servers")
|
||||
|
||||
fs.StringVar(&s.MesosMaster, "mesos-master", s.MesosMaster, "Location of the Mesos master. The format is a comma-delimited list of of hosts like zk://host1:port,host2:port/mesos. If using ZooKeeper, pay particular attention to the leading zk:// and trailing /mesos! If not using ZooKeeper, standard URLs like http://localhost are also acceptable.")
|
||||
fs.StringVar(&s.MesosUser, "mesos-user", s.MesosUser, "Mesos user for this framework, defaults to root.")
|
||||
fs.StringVar(&s.MesosRole, "mesos-role", s.MesosRole, "Mesos role for this framework, defaults to none.")
|
||||
fs.StringVar(&s.MesosAuthPrincipal, "mesos-authentication-principal", s.MesosAuthPrincipal, "Mesos authentication principal.")
|
||||
fs.StringVar(&s.MesosAuthSecretFile, "mesos-authentication-secret-file", s.MesosAuthSecretFile, "Mesos authentication secret file.")
|
||||
fs.StringVar(&s.MesosAuthProvider, "mesos-authentication-provider", s.MesosAuthProvider, fmt.Sprintf("Authentication provider to use, default is SASL that supports mechanisms: %+v", mech.ListSupported()))
|
||||
fs.BoolVar(&s.Checkpoint, "checkpoint", s.Checkpoint, "Enable/disable checkpointing for the kubernetes-mesos framework.")
|
||||
fs.Float64Var(&s.FailoverTimeout, "failover-timeout", s.FailoverTimeout, fmt.Sprintf("Framework failover timeout, in sec."))
|
||||
fs.UintVar(&s.DriverPort, "driver-port", s.DriverPort, "Port that the Mesos scheduler driver process should listen on.")
|
||||
fs.StringVar(&s.HostnameOverride, "hostname-override", s.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.")
|
||||
fs.Int64Var(&s.ReconcileInterval, "reconcile-interval", s.ReconcileInterval, "Interval at which to execute task reconciliation, in sec. Zero disables.")
|
||||
fs.DurationVar(&s.ReconcileCooldown, "reconcile-cooldown", s.ReconcileCooldown, "Minimum rest period between task reconciliation operations.")
|
||||
fs.StringVar(&s.SchedulerConfigFileName, "scheduler-config", s.SchedulerConfigFileName, "An ini-style configuration file with low-level scheduler settings.")
|
||||
fs.BoolVar(&s.Graceful, "graceful", s.Graceful, "Indicator of a graceful failover, intended for internal use only.")
|
||||
fs.BoolVar(&s.HA, "ha", s.HA, "Run the scheduler in high availability mode with leader election. All peers should be configured exactly the same.")
|
||||
fs.StringVar(&s.FrameworkName, "framework-name", s.FrameworkName, "The framework name to register with Mesos.")
|
||||
fs.StringVar(&s.FrameworkWebURI, "framework-weburi", s.FrameworkWebURI, "A URI that points to a web-based interface for interacting with the framework.")
|
||||
fs.StringVar(&s.AdvertisedAddress, "advertised-address", s.AdvertisedAddress, "host:port address that is advertised to clients. May be used to construct artifact download URIs.")
|
||||
fs.Var(&s.ServiceAddress, "service-address", "The service portal IP address that the scheduler should register with (if unset, chooses randomly)")
|
||||
|
||||
fs.BoolVar(&s.ExecutorBindall, "executor-bindall", s.ExecutorBindall, "When true will set -address of the executor to 0.0.0.0.")
|
||||
fs.IntVar(&s.ExecutorLogV, "executor-logv", s.ExecutorLogV, "Logging verbosity of spawned executor processes.")
|
||||
fs.BoolVar(&s.ExecutorProxyBindall, "executor-proxy-bindall", s.ExecutorProxyBindall, "When true pass -proxy-bindall to the executor.")
|
||||
fs.BoolVar(&s.ExecutorRunProxy, "executor-run-proxy", s.ExecutorRunProxy, "Run the kube-proxy as a child process of the executor.")
|
||||
fs.DurationVar(&s.ExecutorSuicideTimeout, "executor-suicide-timeout", s.ExecutorSuicideTimeout, "Executor self-terminates after this period of inactivity. Zero disables suicide watch.")
|
||||
|
||||
fs.StringVar(&s.KubeletRootDirectory, "kubelet-root-dir", s.KubeletRootDirectory, "Directory path for managing kubelet files (volume mounts,etc). Defaults to executor sandbox.")
|
||||
fs.StringVar(&s.KubeletDockerEndpoint, "kubelet-docker-endpoint", s.KubeletDockerEndpoint, "If non-empty, kubelet will use this for the docker endpoint to communicate with.")
|
||||
fs.StringVar(&s.KubeletPodInfraContainerImage, "kubelet-pod-infra-container-image", s.KubeletPodInfraContainerImage, "The image whose network/ipc namespaces containers in each pod will use.")
|
||||
fs.UintVar(&s.KubeletCadvisorPort, "kubelet-cadvisor-port", s.KubeletCadvisorPort, "The port of the kubelet's local cAdvisor endpoint")
|
||||
fs.StringVar(&s.KubeletHostNetworkSources, "kubelet-host-network-sources", s.KubeletHostNetworkSources, "Comma-separated list of sources from which the Kubelet allows pods to use of host network. For all sources use \"*\" [default=\"file\"]")
|
||||
fs.DurationVar(&s.KubeletSyncFrequency, "kubelet-sync-frequency", s.KubeletSyncFrequency, "Max period between synchronizing running containers and config")
|
||||
fs.StringVar(&s.KubeletNetworkPluginName, "kubelet-network-plugin", s.KubeletNetworkPluginName, "<Warning: Alpha feature> The name of the network plugin to be invoked for various events in kubelet/pod lifecycle")
|
||||
|
||||
//TODO(jdef) support this flag once we have a better handle on mesos-dns and k8s DNS integration
|
||||
//fs.StringVar(&s.HADomain, "ha-domain", s.HADomain, "Domain of the HA scheduler service, only used in HA mode. If specified may be used to construct artifact download URIs.")
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) AddStandaloneFlags(fs *pflag.FlagSet) {
|
||||
s.addCoreFlags(fs)
|
||||
fs.StringVar(&s.ExecutorPath, "executor-path", s.ExecutorPath, "Location of the kubernetes executor executable")
|
||||
fs.StringVar(&s.ProxyPath, "proxy-path", s.ProxyPath, "Location of the kubernetes proxy executable")
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) AddHyperkubeFlags(fs *pflag.FlagSet) {
|
||||
s.addCoreFlags(fs)
|
||||
fs.StringVar(&s.KMPath, "km-path", s.KMPath, "Location of the km executable, may be a URI or an absolute file path.")
|
||||
}
|
||||
|
||||
// returns (downloadURI, basename(path))
|
||||
func (s *SchedulerServer) serveFrameworkArtifact(path string) (string, string) {
|
||||
serveFile := func(pattern string, filename string) {
|
||||
s.mux.HandleFunc(pattern, func(w http.ResponseWriter, r *http.Request) {
|
||||
http.ServeFile(w, r, filename)
|
||||
})
|
||||
}
|
||||
|
||||
// Create base path (http://foobar:5000/<base>)
|
||||
pathSplit := strings.Split(path, "/")
|
||||
var base string
|
||||
if len(pathSplit) > 0 {
|
||||
base = pathSplit[len(pathSplit)-1]
|
||||
} else {
|
||||
base = path
|
||||
}
|
||||
serveFile("/"+base, path)
|
||||
|
||||
hostURI := ""
|
||||
if s.AdvertisedAddress != "" {
|
||||
hostURI = fmt.Sprintf("http://%s/%s", s.AdvertisedAddress, base)
|
||||
} else if s.HA && s.HADomain != "" {
|
||||
hostURI = fmt.Sprintf("http://%s.%s:%d/%s", SCHEDULER_SERVICE_NAME, s.HADomain, ports.SchedulerPort, base)
|
||||
} else {
|
||||
hostURI = fmt.Sprintf("http://%s:%d/%s", s.Address.String(), s.Port, base)
|
||||
}
|
||||
log.V(2).Infof("Hosting artifact '%s' at '%s'", path, hostURI)
|
||||
|
||||
return hostURI, base
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) prepareExecutorInfo(hks hyperkube.Interface) (*mesos.ExecutorInfo, *uid.UID, error) {
|
||||
ci := &mesos.CommandInfo{
|
||||
Shell: proto.Bool(false),
|
||||
}
|
||||
|
||||
//TODO(jdef) these should be shared constants with km
|
||||
const (
|
||||
KM_EXECUTOR = "executor"
|
||||
KM_PROXY = "proxy"
|
||||
)
|
||||
|
||||
if s.ExecutorPath != "" {
|
||||
uri, executorCmd := s.serveFrameworkArtifact(s.ExecutorPath)
|
||||
ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)})
|
||||
ci.Value = proto.String(fmt.Sprintf("./%s", executorCmd))
|
||||
} else if !hks.FindServer(KM_EXECUTOR) {
|
||||
return nil, nil, fmt.Errorf("either run this scheduler via km or else --executor-path is required")
|
||||
} else {
|
||||
if strings.Index(s.KMPath, "://") > 0 {
|
||||
// URI could point directly to executable, e.g. hdfs:///km
|
||||
// or else indirectly, e.g. http://acmestorage/tarball.tgz
|
||||
// so we assume that for this case the command will always "km"
|
||||
ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(s.KMPath), Executable: proto.Bool(true)})
|
||||
ci.Value = proto.String("./km") // TODO(jdef) extract constant
|
||||
} else if s.KMPath != "" {
|
||||
uri, kmCmd := s.serveFrameworkArtifact(s.KMPath)
|
||||
ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)})
|
||||
ci.Value = proto.String(fmt.Sprintf("./%s", kmCmd))
|
||||
} else {
|
||||
uri, kmCmd := s.serveFrameworkArtifact(s.executable)
|
||||
ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)})
|
||||
ci.Value = proto.String(fmt.Sprintf("./%s", kmCmd))
|
||||
}
|
||||
ci.Arguments = append(ci.Arguments, KM_EXECUTOR)
|
||||
}
|
||||
|
||||
if s.ProxyPath != "" {
|
||||
uri, proxyCmd := s.serveFrameworkArtifact(s.ProxyPath)
|
||||
ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)})
|
||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--proxy-exec=./%s", proxyCmd))
|
||||
} else if !hks.FindServer(KM_PROXY) {
|
||||
return nil, nil, fmt.Errorf("either run this scheduler via km or else --proxy-path is required")
|
||||
} else if s.ExecutorPath != "" {
|
||||
return nil, nil, fmt.Errorf("proxy can only use km binary if executor does the same")
|
||||
} // else, executor is smart enough to know when proxy-path is required, or to use km
|
||||
|
||||
//TODO(jdef): provide some way (env var?) for users to customize executor config
|
||||
//TODO(jdef): set -address to 127.0.0.1 if `address` is 127.0.0.1
|
||||
//TODO(jdef): propagate dockercfg from RootDirectory?
|
||||
|
||||
apiServerArgs := strings.Join(s.APIServerList, ",")
|
||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--api-servers=%s", apiServerArgs))
|
||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--v=%d", s.ExecutorLogV))
|
||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--allow-privileged=%t", s.AllowPrivileged))
|
||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--suicide-timeout=%v", s.ExecutorSuicideTimeout))
|
||||
|
||||
if s.ExecutorBindall {
|
||||
//TODO(jdef) determine whether hostname-override is really needed for bindall because
|
||||
//it conflicts with kubelet node status checks/updates
|
||||
//ci.Arguments = append(ci.Arguments, "--hostname-override=0.0.0.0")
|
||||
ci.Arguments = append(ci.Arguments, "--address=0.0.0.0")
|
||||
}
|
||||
|
||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--proxy-bindall=%v", s.ExecutorProxyBindall))
|
||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--run-proxy=%v", s.ExecutorRunProxy))
|
||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--cadvisor-port=%v", s.KubeletCadvisorPort))
|
||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.KubeletSyncFrequency))
|
||||
|
||||
if s.AuthPath != "" {
|
||||
//TODO(jdef) should probably support non-local files, e.g. hdfs:///some/config/file
|
||||
uri, basename := s.serveFrameworkArtifact(s.AuthPath)
|
||||
ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri)})
|
||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--auth-path=%s", basename))
|
||||
}
|
||||
appendOptional := func(name string, value string) {
|
||||
if value != "" {
|
||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--%s=%s", name, value))
|
||||
}
|
||||
}
|
||||
if s.ClusterDNS != nil {
|
||||
appendOptional("cluster-dns", s.ClusterDNS.String())
|
||||
}
|
||||
appendOptional("cluster-domain", s.ClusterDomain)
|
||||
appendOptional("root-dir", s.KubeletRootDirectory)
|
||||
appendOptional("docker-endpoint", s.KubeletDockerEndpoint)
|
||||
appendOptional("pod-infra-container-image", s.KubeletPodInfraContainerImage)
|
||||
appendOptional("host-network-sources", s.KubeletHostNetworkSources)
|
||||
appendOptional("network-plugin", s.KubeletNetworkPluginName)
|
||||
|
||||
log.V(1).Infof("prepared executor command %q with args '%+v'", ci.GetValue(), ci.Arguments)
|
||||
|
||||
// Create mesos scheduler driver.
|
||||
info := &mesos.ExecutorInfo{
|
||||
Command: ci,
|
||||
Name: proto.String(execcfg.DefaultInfoName),
|
||||
Source: proto.String(execcfg.DefaultInfoSource),
|
||||
}
|
||||
|
||||
// calculate ExecutorInfo hash to be used for validating compatibility
|
||||
// of ExecutorInfo's generated by other HA schedulers.
|
||||
ehash := hashExecutorInfo(info)
|
||||
eid := uid.New(ehash, execcfg.DefaultInfoID)
|
||||
info.ExecutorId = &mesos.ExecutorID{Value: proto.String(eid.String())}
|
||||
|
||||
return info, eid, nil
|
||||
}
|
||||
|
||||
// TODO(jdef): hacked from kubelet/server/server.go
|
||||
// TODO(k8s): replace this with clientcmd
|
||||
func (s *SchedulerServer) createAPIServerClient() (*client.Client, error) {
|
||||
authInfo, err := clientauth.LoadFromFile(s.AuthPath)
|
||||
if err != nil {
|
||||
log.Warningf("Could not load kubernetes auth path: %v. Continuing with defaults.", err)
|
||||
}
|
||||
if authInfo == nil {
|
||||
// authInfo didn't load correctly - continue with defaults.
|
||||
authInfo = &clientauth.Info{}
|
||||
}
|
||||
clientConfig, err := authInfo.MergeWithConfig(client.Config{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(s.APIServerList) < 1 {
|
||||
return nil, fmt.Errorf("no api servers specified")
|
||||
}
|
||||
// TODO: adapt Kube client to support LB over several servers
|
||||
if len(s.APIServerList) > 1 {
|
||||
log.Infof("Multiple api servers specified. Picking first one")
|
||||
}
|
||||
clientConfig.Host = s.APIServerList[0]
|
||||
c, err := client.New(&clientConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) setDriver(driver bindings.SchedulerDriver) {
|
||||
s.driverMutex.Lock()
|
||||
defer s.driverMutex.Unlock()
|
||||
s.driver = driver
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) getDriver() (driver bindings.SchedulerDriver) {
|
||||
s.driverMutex.RLock()
|
||||
defer s.driverMutex.RUnlock()
|
||||
return s.driver
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) Run(hks hyperkube.Interface, _ []string) error {
|
||||
// get scheduler low-level config
|
||||
sc := schedcfg.CreateDefaultConfig()
|
||||
if s.SchedulerConfigFileName != "" {
|
||||
f, err := os.Open(s.SchedulerConfigFileName)
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot open scheduler config file: %v", err)
|
||||
}
|
||||
|
||||
err = sc.Read(bufio.NewReader(f))
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid scheduler config file: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
schedulerProcess, driverFactory, etcdClient, eid := s.bootstrap(hks, sc)
|
||||
|
||||
if s.EnableProfiling {
|
||||
profile.InstallHandler(s.mux)
|
||||
}
|
||||
go runtime.Until(func() {
|
||||
log.V(1).Info("Starting HTTP interface")
|
||||
log.Error(http.ListenAndServe(net.JoinHostPort(s.Address.String(), strconv.Itoa(s.Port)), s.mux))
|
||||
}, sc.HttpBindInterval.Duration, schedulerProcess.Terminal())
|
||||
|
||||
if s.HA {
|
||||
validation := ha.ValidationFunc(validateLeadershipTransition)
|
||||
srv := ha.NewCandidate(schedulerProcess, driverFactory, validation)
|
||||
path := fmt.Sprintf(meta.DefaultElectionFormat, s.FrameworkName)
|
||||
sid := uid.New(eid.Group(), "").String()
|
||||
log.Infof("registering for election at %v with id %v", path, sid)
|
||||
go election.Notify(election.NewEtcdMasterElector(etcdClient), path, sid, srv, nil)
|
||||
} else {
|
||||
log.Infoln("self-electing in non-HA mode")
|
||||
schedulerProcess.Elect(driverFactory)
|
||||
}
|
||||
return s.awaitFailover(schedulerProcess, func() error { return s.failover(s.getDriver(), hks) })
|
||||
}
|
||||
|
||||
// watch the scheduler process for failover signals and properly handle such. may never return.
|
||||
func (s *SchedulerServer) awaitFailover(schedulerProcess schedulerProcessInterface, handler func() error) error {
|
||||
|
||||
// we only want to return the first error (if any), everyone else can block forever
|
||||
errCh := make(chan error, 1)
|
||||
doFailover := func() error {
|
||||
// we really don't expect handler to return, if it does something went seriously wrong
|
||||
err := handler()
|
||||
if err != nil {
|
||||
defer schedulerProcess.End()
|
||||
err = fmt.Errorf("failover failed, scheduler will terminate: %v", err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// guard for failover signal processing, first signal processor wins
|
||||
failoverLatch := &runtime.Latch{}
|
||||
runtime.On(schedulerProcess.Terminal(), func() {
|
||||
if !failoverLatch.Acquire() {
|
||||
log.V(1).Infof("scheduler process ending, already failing over")
|
||||
select {}
|
||||
}
|
||||
var err error
|
||||
defer func() { errCh <- err }()
|
||||
select {
|
||||
case <-schedulerProcess.Failover():
|
||||
err = doFailover()
|
||||
default:
|
||||
if s.HA {
|
||||
err = fmt.Errorf("ha scheduler exiting instead of failing over")
|
||||
} else {
|
||||
log.Infof("exiting scheduler")
|
||||
}
|
||||
}
|
||||
})
|
||||
runtime.OnOSSignal(makeFailoverSigChan(), func(_ os.Signal) {
|
||||
if !failoverLatch.Acquire() {
|
||||
log.V(1).Infof("scheduler process signalled, already failing over")
|
||||
select {}
|
||||
}
|
||||
errCh <- doFailover()
|
||||
})
|
||||
return <-errCh
|
||||
}
|
||||
|
||||
func validateLeadershipTransition(desired, current string) {
|
||||
log.Infof("validating leadership transition")
|
||||
d := uid.Parse(desired).Group()
|
||||
c := uid.Parse(current).Group()
|
||||
if d == 0 {
|
||||
// should *never* happen, but..
|
||||
log.Fatalf("illegal scheduler UID: %q", desired)
|
||||
}
|
||||
if d != c && c != 0 {
|
||||
log.Fatalf("desired scheduler group (%x) != current scheduler group (%x)", d, c)
|
||||
}
|
||||
}
|
||||
|
||||
// hacked from https://github.com/GoogleCloudPlatform/kubernetes/blob/release-0.14/cmd/kube-apiserver/app/server.go
|
||||
func newEtcd(etcdConfigFile string, etcdServerList util.StringList) (client tools.EtcdGetSet, err error) {
|
||||
if etcdConfigFile != "" {
|
||||
client, err = etcd.NewClientFromFile(etcdConfigFile)
|
||||
} else {
|
||||
client = etcd.NewClient(etcdServerList)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config) (*ha.SchedulerProcess, ha.DriverFactory, tools.EtcdGetSet, *uid.UID) {
|
||||
|
||||
s.FrameworkName = strings.TrimSpace(s.FrameworkName)
|
||||
if s.FrameworkName == "" {
|
||||
log.Fatalf("framework-name must be a non-empty string")
|
||||
}
|
||||
s.FrameworkWebURI = strings.TrimSpace(s.FrameworkWebURI)
|
||||
|
||||
metrics.Register()
|
||||
runtime.Register()
|
||||
s.mux.Handle("/metrics", prometheus.Handler())
|
||||
|
||||
if (s.EtcdConfigFile != "" && len(s.EtcdServerList) != 0) || (s.EtcdConfigFile == "" && len(s.EtcdServerList) == 0) {
|
||||
log.Fatalf("specify either --etcd-servers or --etcd-config")
|
||||
}
|
||||
|
||||
if len(s.APIServerList) < 1 {
|
||||
log.Fatal("No api servers specified.")
|
||||
}
|
||||
|
||||
client, err := s.createAPIServerClient()
|
||||
if err != nil {
|
||||
log.Fatalf("Unable to make apiserver client: %v", err)
|
||||
}
|
||||
s.client = client
|
||||
|
||||
if s.ReconcileCooldown < defaultReconcileCooldown {
|
||||
s.ReconcileCooldown = defaultReconcileCooldown
|
||||
log.Warningf("user-specified reconcile cooldown too small, defaulting to %v", s.ReconcileCooldown)
|
||||
}
|
||||
|
||||
executor, eid, err := s.prepareExecutorInfo(hks)
|
||||
if err != nil {
|
||||
log.Fatalf("misconfigured executor: %v", err)
|
||||
}
|
||||
|
||||
// TODO(jdef): remove the dependency on etcd as soon as
|
||||
// (1) the generic config store is available for the FrameworkId storage
|
||||
// (2) the generic master election is provided by the apiserver
|
||||
// Compare docs/proposals/high-availability.md
|
||||
etcdClient, err := newEtcd(s.EtcdConfigFile, s.EtcdServerList)
|
||||
if err != nil {
|
||||
log.Fatalf("misconfigured etcd: %v", err)
|
||||
}
|
||||
|
||||
mesosPodScheduler := scheduler.New(scheduler.Config{
|
||||
Schedcfg: *sc,
|
||||
Executor: executor,
|
||||
ScheduleFunc: scheduler.FCFSScheduleFunc,
|
||||
Client: client,
|
||||
EtcdClient: etcdClient,
|
||||
FailoverTimeout: s.FailoverTimeout,
|
||||
ReconcileInterval: s.ReconcileInterval,
|
||||
ReconcileCooldown: s.ReconcileCooldown,
|
||||
})
|
||||
|
||||
masterUri := s.MesosMaster
|
||||
info, cred, err := s.buildFrameworkInfo()
|
||||
if err != nil {
|
||||
log.Fatalf("Misconfigured mesos framework: %v", err)
|
||||
}
|
||||
|
||||
schedulerProcess := ha.New(mesosPodScheduler)
|
||||
dconfig := &bindings.DriverConfig{
|
||||
Scheduler: schedulerProcess,
|
||||
Framework: info,
|
||||
Master: masterUri,
|
||||
Credential: cred,
|
||||
BindingAddress: net.IP(s.Address),
|
||||
BindingPort: uint16(s.DriverPort),
|
||||
HostnameOverride: s.HostnameOverride,
|
||||
WithAuthContext: func(ctx context.Context) context.Context {
|
||||
ctx = auth.WithLoginProvider(ctx, s.MesosAuthProvider)
|
||||
ctx = sasl.WithBindingAddress(ctx, net.IP(s.Address))
|
||||
return ctx
|
||||
},
|
||||
}
|
||||
|
||||
kpl := scheduler.NewPlugin(mesosPodScheduler.NewDefaultPluginConfig(schedulerProcess.Terminal(), s.mux))
|
||||
runtime.On(mesosPodScheduler.Registration(), func() { kpl.Run(schedulerProcess.Terminal()) })
|
||||
runtime.On(mesosPodScheduler.Registration(), s.newServiceWriter(schedulerProcess.Terminal()))
|
||||
|
||||
driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) {
|
||||
log.V(1).Infoln("performing deferred initialization")
|
||||
if err = mesosPodScheduler.Init(schedulerProcess.Master(), kpl, s.mux); err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err)
|
||||
}
|
||||
log.V(1).Infoln("deferred init complete")
|
||||
// defer obtaining framework ID to prevent multiple schedulers
|
||||
// from overwriting each other's framework IDs
|
||||
dconfig.Framework.Id, err = s.fetchFrameworkID(etcdClient)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch framework ID from etcd: %v", err)
|
||||
}
|
||||
log.V(1).Infoln("constructing mesos scheduler driver")
|
||||
drv, err = bindings.NewMesosSchedulerDriver(*dconfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct scheduler driver: %v", err)
|
||||
}
|
||||
log.V(1).Infoln("constructed mesos scheduler driver:", drv)
|
||||
s.setDriver(drv)
|
||||
return drv, nil
|
||||
})
|
||||
|
||||
return schedulerProcess, driverFactory, etcdClient, eid
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) failover(driver bindings.SchedulerDriver, hks hyperkube.Interface) error {
|
||||
if driver != nil {
|
||||
stat, err := driver.Stop(true)
|
||||
if stat != mesos.Status_DRIVER_STOPPED {
|
||||
return fmt.Errorf("failed to stop driver for failover, received unexpected status code: %v", stat)
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// there's no guarantee that all goroutines are actually programmed intelligently with 'done'
|
||||
// signals, so we'll need to restart if we want to really stop everything
|
||||
|
||||
// run the same command that we were launched with
|
||||
//TODO(jdef) assumption here is that the sheduler is the only service running in this process, we should probably validate that somehow
|
||||
args := []string{}
|
||||
flags := pflag.CommandLine
|
||||
if hks != nil {
|
||||
args = append(args, hks.Name())
|
||||
flags = hks.Flags()
|
||||
}
|
||||
flags.Visit(func(flag *pflag.Flag) {
|
||||
if flag.Name != "api-servers" && flag.Name != "etcd-servers" {
|
||||
args = append(args, fmt.Sprintf("--%s=%s", flag.Name, flag.Value.String()))
|
||||
}
|
||||
})
|
||||
if !s.Graceful {
|
||||
args = append(args, "--graceful")
|
||||
}
|
||||
if len(s.APIServerList) > 0 {
|
||||
args = append(args, "--api-servers="+strings.Join(s.APIServerList, ","))
|
||||
}
|
||||
if len(s.EtcdServerList) > 0 {
|
||||
args = append(args, "--etcd-servers="+strings.Join(s.EtcdServerList, ","))
|
||||
}
|
||||
args = append(args, flags.Args()...)
|
||||
|
||||
log.V(1).Infof("spawning scheduler for graceful failover: %s %+v", s.executable, args)
|
||||
|
||||
cmd := exec.Command(s.executable, args...)
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
cmd.SysProcAttr = makeDisownedProcAttr()
|
||||
|
||||
// TODO(jdef) pass in a pipe FD so that we can block, waiting for the child proc to be ready
|
||||
//cmd.ExtraFiles = []*os.File{}
|
||||
|
||||
exitcode := 0
|
||||
log.Flush() // TODO(jdef) it would be really nice to ensure that no one else in our process was still logging
|
||||
if err := cmd.Start(); err != nil {
|
||||
//log to stdtout here to avoid conflicts with normal stderr logging
|
||||
fmt.Fprintf(os.Stdout, "failed to spawn failover process: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
os.Exit(exitcode)
|
||||
select {} // will never reach here
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) buildFrameworkInfo() (info *mesos.FrameworkInfo, cred *mesos.Credential, err error) {
|
||||
username, err := s.getUsername()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
log.V(2).Infof("Framework configured with mesos user %v", username)
|
||||
info = &mesos.FrameworkInfo{
|
||||
Name: proto.String(s.FrameworkName),
|
||||
User: proto.String(username),
|
||||
Checkpoint: proto.Bool(s.Checkpoint),
|
||||
}
|
||||
if s.FrameworkWebURI != "" {
|
||||
info.WebuiUrl = proto.String(s.FrameworkWebURI)
|
||||
}
|
||||
if s.FailoverTimeout > 0 {
|
||||
info.FailoverTimeout = proto.Float64(s.FailoverTimeout)
|
||||
}
|
||||
if s.MesosRole != "" {
|
||||
info.Role = proto.String(s.MesosRole)
|
||||
}
|
||||
if s.MesosAuthPrincipal != "" {
|
||||
info.Principal = proto.String(s.MesosAuthPrincipal)
|
||||
if s.MesosAuthSecretFile == "" {
|
||||
return nil, nil, errors.New("authentication principal specified without the required credentials file")
|
||||
}
|
||||
secret, err := ioutil.ReadFile(s.MesosAuthSecretFile)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
cred = &mesos.Credential{
|
||||
Principal: proto.String(s.MesosAuthPrincipal),
|
||||
Secret: secret,
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) fetchFrameworkID(client tools.EtcdGetSet) (*mesos.FrameworkID, error) {
|
||||
if s.FailoverTimeout > 0 {
|
||||
if response, err := client.Get(meta.FrameworkIDKey, false, false); err != nil {
|
||||
if !tools.IsEtcdNotFound(err) {
|
||||
return nil, fmt.Errorf("unexpected failure attempting to load framework ID from etcd: %v", err)
|
||||
}
|
||||
log.V(1).Infof("did not find framework ID in etcd")
|
||||
} else if response.Node.Value != "" {
|
||||
log.Infof("configuring FrameworkInfo with Id found in etcd: '%s'", response.Node.Value)
|
||||
return mutil.NewFrameworkID(response.Node.Value), nil
|
||||
}
|
||||
} else {
|
||||
//TODO(jdef) this seems like a totally hackish way to clean up the framework ID
|
||||
if _, err := client.Delete(meta.FrameworkIDKey, true); err != nil {
|
||||
if !tools.IsEtcdNotFound(err) {
|
||||
return nil, fmt.Errorf("failed to delete framework ID from etcd: %v", err)
|
||||
}
|
||||
log.V(1).Infof("nothing to delete: did not find framework ID in etcd")
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (s *SchedulerServer) getUsername() (username string, err error) {
|
||||
username = s.MesosUser
|
||||
if username == "" {
|
||||
if u, err := user.Current(); err == nil {
|
||||
username = u.Username
|
||||
if username == "" {
|
||||
username = defaultMesosUser
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
108
contrib/mesos/pkg/scheduler/service/service_test.go
Normal file
108
contrib/mesos/pkg/scheduler/service/service_test.go
Normal file
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// +build unit_test
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
type fakeSchedulerProcess struct {
|
||||
doneFunc func() <-chan struct{}
|
||||
failoverFunc func() <-chan struct{}
|
||||
}
|
||||
|
||||
func (self *fakeSchedulerProcess) Terminal() <-chan struct{} {
|
||||
if self == nil || self.doneFunc == nil {
|
||||
return nil
|
||||
}
|
||||
return self.doneFunc()
|
||||
}
|
||||
|
||||
func (self *fakeSchedulerProcess) Failover() <-chan struct{} {
|
||||
if self == nil || self.failoverFunc == nil {
|
||||
return nil
|
||||
}
|
||||
return self.failoverFunc()
|
||||
}
|
||||
|
||||
func (self *fakeSchedulerProcess) End() <-chan struct{} {
|
||||
ch := make(chan struct{})
|
||||
close(ch)
|
||||
return ch
|
||||
}
|
||||
|
||||
func Test_awaitFailoverDone(t *testing.T) {
|
||||
done := make(chan struct{})
|
||||
p := &fakeSchedulerProcess{
|
||||
doneFunc: func() <-chan struct{} { return done },
|
||||
}
|
||||
ss := &SchedulerServer{}
|
||||
failoverHandlerCalled := false
|
||||
failoverFailedHandler := func() error {
|
||||
failoverHandlerCalled = true
|
||||
return nil
|
||||
}
|
||||
errCh := make(chan error, 1)
|
||||
go func() {
|
||||
errCh <- ss.awaitFailover(p, failoverFailedHandler)
|
||||
}()
|
||||
close(done)
|
||||
select {
|
||||
case err := <-errCh:
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatalf("timed out waiting for failover")
|
||||
}
|
||||
if failoverHandlerCalled {
|
||||
t.Fatalf("unexpected call to failover handler")
|
||||
}
|
||||
}
|
||||
|
||||
func Test_awaitFailoverDoneFailover(t *testing.T) {
|
||||
ch := make(chan struct{})
|
||||
p := &fakeSchedulerProcess{
|
||||
doneFunc: func() <-chan struct{} { return ch },
|
||||
failoverFunc: func() <-chan struct{} { return ch },
|
||||
}
|
||||
ss := &SchedulerServer{}
|
||||
failoverHandlerCalled := false
|
||||
failoverFailedHandler := func() error {
|
||||
failoverHandlerCalled = true
|
||||
return nil
|
||||
}
|
||||
errCh := make(chan error, 1)
|
||||
go func() {
|
||||
errCh <- ss.awaitFailover(p, failoverFailedHandler)
|
||||
}()
|
||||
close(ch)
|
||||
select {
|
||||
case err := <-errCh:
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatalf("timed out waiting for failover")
|
||||
}
|
||||
if !failoverHandlerCalled {
|
||||
t.Fatalf("expected call to failover handler")
|
||||
}
|
||||
}
|
||||
88
contrib/mesos/pkg/scheduler/service/util.go
Normal file
88
contrib/mesos/pkg/scheduler/service/util.go
Normal file
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"hash/crc64"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
)
|
||||
|
||||
// compute a hashcode for ExecutorInfo that may be used as a reasonable litmus test
|
||||
// with respect to compatibility across HA schedulers. the intent is that an HA scheduler
|
||||
// should fail-fast if it doesn't pass this test, rather than generating (potentially many)
|
||||
// errors at run-time because a Mesos master decides that the ExecutorInfo generated by a
|
||||
// secondary scheduler doesn't match that of the primary scheduler.
|
||||
//
|
||||
// see https://github.com/apache/mesos/blob/0.22.0/src/common/type_utils.cpp#L110
|
||||
func hashExecutorInfo(info *mesos.ExecutorInfo) uint64 {
|
||||
// !!! we specifically do NOT include:
|
||||
// - Framework ID because it's a value that's initialized too late for us to use
|
||||
// - Executor ID because it's a value that includes a copy of this hash
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString(info.GetName())
|
||||
buf.WriteString(info.GetSource())
|
||||
buf.Write(info.Data)
|
||||
|
||||
if info.Command != nil {
|
||||
buf.WriteString(info.Command.GetValue())
|
||||
buf.WriteString(info.Command.GetUser())
|
||||
buf.WriteString(strconv.FormatBool(info.Command.GetShell()))
|
||||
if sz := len(info.Command.Arguments); sz > 0 {
|
||||
x := make([]string, sz)
|
||||
copy(x, info.Command.Arguments)
|
||||
sort.Strings(x)
|
||||
for _, item := range x {
|
||||
buf.WriteString(item)
|
||||
}
|
||||
}
|
||||
if vars := info.Command.Environment.GetVariables(); vars != nil && len(vars) > 0 {
|
||||
names := []string{}
|
||||
e := make(map[string]string)
|
||||
|
||||
for _, v := range vars {
|
||||
if name := v.GetName(); name != "" {
|
||||
names = append(names, name)
|
||||
e[name] = v.GetValue()
|
||||
}
|
||||
}
|
||||
sort.Strings(names)
|
||||
for _, n := range names {
|
||||
buf.WriteString(n)
|
||||
buf.WriteString("=")
|
||||
buf.WriteString(e[n])
|
||||
}
|
||||
}
|
||||
if uris := info.Command.GetUris(); len(uris) > 0 {
|
||||
su := []string{}
|
||||
for _, uri := range uris {
|
||||
su = append(su, fmt.Sprintf("%s%t%t", uri.GetValue(), uri.GetExecutable(), uri.GetExtract()))
|
||||
}
|
||||
sort.Strings(su)
|
||||
for _, uri := range su {
|
||||
buf.WriteString(uri)
|
||||
}
|
||||
}
|
||||
//TODO(jdef) add support for Resources and Container
|
||||
}
|
||||
table := crc64.MakeTable(crc64.ECMA)
|
||||
return crc64.Checksum(buf.Bytes(), table)
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user