promote contrib/mesos to incubator

This commit is contained in:
deads2k 2016-09-28 09:37:24 -04:00
parent 33d29b5d6b
commit 06b1a9636d
235 changed files with 0 additions and 26615 deletions

View File

@ -566,10 +566,6 @@ function kube::build::run_build_command() {
"${DOCKER_MOUNT_ARGS[@]}"
)
if [ -n "${KUBERNETES_CONTRIB:-}" ]; then
docker_run_opts+=(-e "KUBERNETES_CONTRIB=${KUBERNETES_CONTRIB}")
fi
docker_run_opts+=(
--env "KUBE_FASTBUILD=${KUBE_FASTBUILD:-false}"
--env "KUBE_BUILDER_OS=${OSTYPE:-notdetected}"

View File

@ -168,7 +168,6 @@ function prepare-e2e {
# Execute prior to running tests to build a release if required for env
function test-build-release {
# Make a release
export KUBERNETES_CONTRIB=mesos
export KUBE_RELEASE_RUN_TESTS=N
"${KUBE_ROOT}/build/release.sh"
}

View File

@ -1,2 +0,0 @@
assignees:
- k82cn

View File

@ -1,35 +0,0 @@
# Kubernetes-Mesos
Kubernetes-Mesos modifies Kubernetes to act as an [Apache Mesos](http://mesos.apache.org/) framework.
## Features On Mesos
Kubernetes gains the following benefits when installed on Mesos:
- **Node-Level Auto-Scaling** - Kubernetes minion nodes are created automatically, up to the size of the provisioned Mesos cluster.
- **Resource Sharing** - Co-location of Kubernetes with other popular next-generation services on the same cluster (e.g. [Hadoop](https://github.com/mesos/hadoop), [Spark](http://spark.apache.org/), and [Chronos](https://mesos.github.io/chronos/), [Cassandra](http://mesosphere.github.io/cassandra-mesos/), etc.). Resources are allocated to the frameworks based on fairness and can be claimed or passed on depending on framework load.
- **Independence from special Network Infrastructure** - Mesos can (but of course doesn't have to) run on networks which cannot assign a routable IP to every container. The Kubernetes on Mesos endpoint controller is specially modified to allow pods to communicate with services in such an environment.
For more information about how Kubernetes-Mesos is different from Kubernetes, see [Architecture](./docs/architecture.md).
## Release Status
Kubernetes-Mesos is alpha quality, still under active development, and not yet recommended for production systems.
For more information about development progress, see the [known issues](./docs/issues.md) or the [kubernetes-mesos repository](https://github.com/mesosphere/kubernetes-mesos) where backlog issues are tracked.
## Usage
This project combines concepts and technologies from two already-complex projects: Mesos and Kubernetes. It may help to familiarize yourself with the basics of each project before reading on:
* [Mesos Documentation](http://mesos.apache.org/documentation/latest)
* [Kubernetes Documentation](../../README.md)
To get up and running with Kubernetes-Mesos, follow:
- the [Getting started guide](../../docs/getting-started-guides/mesos.md) to launch a Kubernetes-Mesos cluster,
- the [Kubernetes-Mesos Scheduler Guide](./docs/scheduler.md) for topics concerning the custom scheduler used in this distribution.
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/contrib/mesos/README.md?pixel)]()

View File

@ -1,38 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Cleans output files/images and builds a full release from scratch
#
# Prerequisite:
# ./cluster/mesos/docker/test/build.sh
#
# Example Usage:
# ./contrib/mesos/ci/build-release.sh
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
"${KUBE_ROOT}/contrib/mesos/ci/run.sh" make clean
export KUBERNETES_CONTRIB=mesos
export KUBE_RELEASE_RUN_TESTS="${KUBE_RELEASE_RUN_TESTS:-N}"
export KUBE_SKIP_CONFIRMATIONS=Y
"${KUBE_ROOT}/build/release.sh"

View File

@ -1,36 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Cleans output files/images and builds linux binaries from scratch
#
# Prerequisite:
# ./cluster/mesos/docker/test/build.sh
#
# Example Usage:
# ./contrib/mesos/ci/build.sh
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
TEST_ARGS="$@"
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
export KUBERNETES_CONTRIB=mesos
"${KUBE_ROOT}/contrib/mesos/ci/run.sh" make clean all ${TEST_ARGS}

View File

@ -1,87 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Deploys a test cluster, runs the specified command, and destroys the test cluster.
# Runs all commands inside the mesosphere/kubernetes-mesos-test docker image (built on demand).
# Uses the mesos/docker cluster provider.
#
# Prerequisite:
# ./cluster/mesos/docker/test/build.sh
#
# Example Usage:
# ./contrib/mesos/ci/run-with-cluster.sh ./cluster/test-smoke.sh -v=2
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
RUN_CMD="$@"
[ -z "${RUN_CMD:-}" ] && echo "No command supplied" && exit 1
KUBERNETES_PROVIDER="mesos/docker"
MESOS_DOCKER_WORK_DIR="${MESOS_DOCKER_WORK_DIR:-${HOME}/tmp/kubernetes}"
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
# Clean (test artifacts)
echo "Cleaning work dir"
echo "${MESOS_DOCKER_WORK_DIR}"
rm -rf "${MESOS_DOCKER_WORK_DIR}"
mkdir -p "${MESOS_DOCKER_WORK_DIR}"
echo "Detecting docker client"
# Mount docker client binary to avoid client/compose/daemon version conflicts
if [ -n "${DOCKER_MACHINE_NAME:-}" ] && which docker-machine; then
# On a Mac with docker-machine, use the binary in the VM, not the host binary
DOCKER_BIN_PATH="$(docker-machine ssh "${DOCKER_MACHINE_NAME}" which docker)"
else
DOCKER_BIN_PATH="$(which docker)"
fi
echo "${DOCKER_BIN_PATH}"
# Clean (k8s output & images), Build, Kube-Up, Test, Kube-Down
cd "${KUBE_ROOT}"
docker run \
--rm \
-v "${KUBE_ROOT}:/go/src/github.com/GoogleCloudPlatform/kubernetes" \
-v "/var/run/docker.sock:/var/run/docker.sock" \
-v "${DOCKER_BIN_PATH}:/usr/bin/docker" \
-v "${MESOS_DOCKER_WORK_DIR}/auth:${MESOS_DOCKER_WORK_DIR}/auth" \
-v "${MESOS_DOCKER_WORK_DIR}/log:${MESOS_DOCKER_WORK_DIR}/log" \
-v "${MESOS_DOCKER_WORK_DIR}/mesosslave1/mesos:${MESOS_DOCKER_WORK_DIR}/mesosslave1/mesos" \
-v "${MESOS_DOCKER_WORK_DIR}/mesosslave2/mesos:${MESOS_DOCKER_WORK_DIR}/mesosslave2/mesos" \
-v "${MESOS_DOCKER_WORK_DIR}/overlay:${MESOS_DOCKER_WORK_DIR}/overlay" \
-v "${MESOS_DOCKER_WORK_DIR}/reports:${MESOS_DOCKER_WORK_DIR}/reports" \
$(test -d /teamcity/system/git && echo "-v /teamcity/system/git:/teamcity/system/git" || true) \
-e "MESOS_DOCKER_WORK_DIR=${MESOS_DOCKER_WORK_DIR}" \
-e "MESOS_DOCKER_IMAGE_DIR=/var/tmp/kubernetes" \
-e "MESOS_DOCKER_OVERLAY_DIR=${MESOS_DOCKER_WORK_DIR}/overlay" \
-e "KUBERNETES_CONTRIB=mesos" \
-e "KUBERNETES_PROVIDER=mesos/docker" \
-e "USER=root" \
-e "E2E_REPORT_DIR=${MESOS_DOCKER_WORK_DIR}/reports" \
-t $(tty &>/dev/null && echo "-i") \
mesosphere/kubernetes-mesos-test \
-ceux "\
make clean all && \
trap 'timeout 5m ./cluster/kube-down.sh' EXIT && \
./cluster/kube-down.sh && \
./cluster/kube-up.sh && \
trap \"test \\\$? != 0 && export MESOS_DOCKER_DUMP_LOGS=true; cd \${PWD} && timeout 5m ./cluster/kube-down.sh\" EXIT && \
${RUN_CMD}
"

View File

@ -1,56 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Runs the specified command in the test container (mesosphere/kubernetes-mesos-test).
#
# Prerequisite:
# ./cluster/mesos/docker/test/build.sh
#
# Example Usage:
# ./contrib/mesos/ci/run.sh make test
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
RUN_CMD="$@"
[ -z "${RUN_CMD:-}" ] && echo "No command supplied" && exit 1
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
echo "Detecting docker client"
# Mount docker client binary to avoid client/compose/daemon version conflicts
if [ -n "${DOCKER_MACHINE_NAME:-}" ] && which docker-machine; then
# On a Mac with docker-machine, use the binary in the VM, not the host binary
DOCKER_BIN_PATH="$(docker-machine ssh "${DOCKER_MACHINE_NAME}" which docker)"
else
DOCKER_BIN_PATH="$(which docker)"
fi
echo "${DOCKER_BIN_PATH}"
# Clean (k8s output & images) & Build
cd "${KUBE_ROOT}"
exec docker run \
--rm \
-v "${KUBE_ROOT}:/go/src/github.com/GoogleCloudPlatform/kubernetes" \
-v "/var/run/docker.sock:/var/run/docker.sock" \
-v "${DOCKER_BIN_PATH}:/usr/bin/docker" \
-e "KUBERNETES_CONTRIB=mesos" \
-e "USER=root" \
-t $(tty &>/dev/null && echo "-i") \
mesosphere/kubernetes-mesos-test \
-ceux "${RUN_CMD}"

View File

@ -1,44 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Deploys a test cluster, runs the conformance tests, and destroys the test cluster.
#
# Prerequisite:
# ./cluster/mesos/docker/test/build.sh
#
# Example Usage:
# ./contrib/mesos/ci/test-conformance.sh -v=2
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
TEST_ARGS="$@"
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
TEST_CMD="KUBERNETES_CONFORMANCE_TEST=y KUBECONFIG=~/.kube/config go run hack/e2e.go --test --test_args=\"--ginkgo.focus=\\[Conformance\\]\""
if [ -n "${CONFORMANCE_BRANCH}" ]; then
# create a CONFORMANCE_BRANCH clone in a subdirectory
TEST_CMD="
git fetch https://github.com/kubernetes/kubernetes --tags -q ${CONFORMANCE_BRANCH} &&
git branch -f ${CONFORMANCE_BRANCH} FETCH_HEAD &&
git clone -s -b ${CONFORMANCE_BRANCH} . conformance &&
cd conformance && make all && ${TEST_CMD}"
fi
"${KUBE_ROOT}/contrib/mesos/ci/run-with-cluster.sh" ${TEST_CMD} ${TEST_ARGS}

View File

@ -1,34 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Deploys a test cluster, runs the e2e tests, and destroys the test cluster.
#
# Prerequisite:
# ./cluster/mesos/docker/test/build.sh
#
# Example Usage:
# ./contrib/mesos/ci/test-e2e.sh -v=2
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
TEST_ARGS="$@"
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
"${KUBE_ROOT}/contrib/mesos/ci/run-with-cluster.sh" ./cluster/test-e2e.sh ${TEST_ARGS}

View File

@ -1,34 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Cleans & runs the integration tests in the test container (mesosphere/kubernetes-mesos-test).
#
# Prerequisite:
# ./cluster/mesos/docker/test/build.sh
#
# Example Usage:
# ./contrib/mesos/ci/test-integration.sh
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
TEST_ARGS="$@"
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
"${KUBE_ROOT}/contrib/mesos/ci/run.sh" make clean test-integration ${TEST_ARGS}

View File

@ -1,34 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Deploys a test cluster, runs the smoke tests, and destroys the test cluster.
#
# Prerequisite:
# ./cluster/mesos/docker/test/build.sh
#
# Example Usage:
# ./contrib/mesos/ci/test-smoke.sh -v=2
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
TEST_ARGS="$@"
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
"${KUBE_ROOT}/contrib/mesos/ci/run-with-cluster.sh" ./cluster/test-smoke.sh ${TEST_ARGS}

View File

@ -1,34 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Cleans & runs the unit tests in the test container (mesosphere/kubernetes-mesos-test).
#
# Prerequisite:
# ./cluster/mesos/docker/test/build.sh
#
# Example Usage:
# ./contrib/mesos/ci/test-unit.sh
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
TEST_ARGS="$@"
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
"${KUBE_ROOT}/contrib/mesos/ci/run.sh" make clean test ${TEST_ARGS}

View File

@ -1,23 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This package main implements the executable Kubernetes Mesos controller manager.
//
// It is mainly a clone of the upstream cmd/hyperkube module right now because
// the upstream hyperkube module is not reusable.
//
// TODO(jdef,sttts): refactor upstream cmd/kube-controller-manager to be reusable with the necessary mesos changes
package main

View File

@ -1,52 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"fmt"
"os"
"k8s.io/kubernetes/pkg/healthz"
"k8s.io/kubernetes/pkg/util/flag"
"k8s.io/kubernetes/pkg/util/logs"
"k8s.io/kubernetes/pkg/version/verflag"
"k8s.io/kubernetes/contrib/mesos/pkg/controllermanager"
"github.com/spf13/pflag"
)
func init() {
healthz.DefaultHealthz()
}
func main() {
s := controllermanager.NewCMServer()
s.AddFlags(pflag.CommandLine)
flag.InitFlags()
logs.InitLogs()
defer logs.FlushLogs()
verflag.PrintAndExitIfRequested()
if err := s.Run(pflag.CommandLine.Args()); err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
}

View File

@ -1,18 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This package main implements the executable Kubernetes Mesos executor.
package main

View File

@ -1,46 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"fmt"
"os"
"github.com/spf13/pflag"
"k8s.io/kubernetes/contrib/mesos/pkg/executor/service"
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
"k8s.io/kubernetes/pkg/util/flag"
"k8s.io/kubernetes/pkg/util/logs"
"k8s.io/kubernetes/pkg/version/verflag"
)
func main() {
s := service.NewKubeletExecutorServer()
s.AddFlags(pflag.CommandLine)
flag.InitFlags()
logs.InitLogs()
defer logs.FlushLogs()
verflag.PrintAndExitIfRequested()
if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
}

View File

@ -1,18 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This package main implements the executable Kubernetes Mesos scheduler.
package main

View File

@ -1,45 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"fmt"
"os"
"github.com/spf13/pflag"
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/service"
"k8s.io/kubernetes/pkg/util/flag"
"k8s.io/kubernetes/pkg/util/logs"
"k8s.io/kubernetes/pkg/version/verflag"
)
func main() {
s := service.NewSchedulerServer()
s.AddStandaloneFlags(pflag.CommandLine)
flag.InitFlags()
logs.InitLogs()
defer logs.FlushLogs()
verflag.PrintAndExitIfRequested()
if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(1)
}
}

View File

@ -1,24 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This package main morphs all binaries under cmd/ and several other stock
// Kubernetes binaries into a single executable.
//
// It is mainly a clone of the upstream cmd/hyperkube module right now because
// the upstream hyperkube module is not reusable.
//
// TODO(jdef,sttts): refactor upstream cmd/hyperkube to be reusable with the necessary mesos changes
package main // import "k8s.io/kubernetes/contrib/mesos/cmd/km"

View File

@ -1,202 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// clone of the upstream cmd/hypercube/hyperkube.go
package main
import (
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"path"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/logs"
"k8s.io/kubernetes/pkg/version/verflag"
"github.com/spf13/pflag"
)
// HyperKube represents a single binary that can morph/manage into multiple
// servers.
type HyperKube struct {
Name string // The executable name, used for help and soft-link invocation
Long string // A long description of the binary. It will be world wrapped before output.
servers []Server
baseFlags *pflag.FlagSet
out io.Writer
helpFlagVal bool
}
// AddServer adds a server to the HyperKube object.
func (hk *HyperKube) AddServer(s *Server) {
hk.servers = append(hk.servers, *s)
hk.servers[len(hk.servers)-1].hk = hk
}
// FindServer will find a specific server named name.
func (hk *HyperKube) FindServer(name string) (*Server, error) {
for _, s := range hk.servers {
if s.Name() == name {
return &s, nil
}
}
return nil, fmt.Errorf("Server not found: %s", name)
}
// Servers returns a list of all of the registred servers
func (hk *HyperKube) Servers() []Server {
return hk.servers
}
// Flags returns a flagset for "global" flags.
func (hk *HyperKube) Flags() *pflag.FlagSet {
if hk.baseFlags == nil {
hk.baseFlags = pflag.NewFlagSet(hk.Name, pflag.ContinueOnError)
hk.baseFlags.SetOutput(ioutil.Discard)
hk.baseFlags.BoolVarP(&hk.helpFlagVal, "help", "h", false, "help for "+hk.Name)
// These will add all of the "global" flags (defined with both the
// flag and pflag packages) to the new flag set we have.
hk.baseFlags.AddGoFlagSet(flag.CommandLine)
hk.baseFlags.AddFlagSet(pflag.CommandLine)
}
return hk.baseFlags
}
// Out returns the io.Writer that is used for all usage/error information
func (hk *HyperKube) Out() io.Writer {
if hk.out == nil {
hk.out = os.Stderr
}
return hk.out
}
// SetOut sets the output writer for all usage/error information
func (hk *HyperKube) SetOut(w io.Writer) {
hk.out = w
}
// Print is a convenience method to Print to the defined output
func (hk *HyperKube) Print(i ...interface{}) {
fmt.Fprint(hk.Out(), i...)
}
// Println is a convenience method to Println to the defined output
func (hk *HyperKube) Println(i ...interface{}) {
fmt.Fprintln(hk.Out(), i...)
}
// Printf is a convenience method to Printf to the defined output
func (hk *HyperKube) Printf(format string, i ...interface{}) {
fmt.Fprintf(hk.Out(), format, i...)
}
// Run the server. This will pick the appropriate server and run it.
func (hk *HyperKube) Run(args []string) error {
// If we are called directly, parse all flags up to the first real
// argument. That should be the server to run.
baseCommand := path.Base(args[0])
serverName := baseCommand
if serverName == hk.Name {
args = args[1:]
baseFlags := hk.Flags()
baseFlags.SetInterspersed(false) // Only parse flags up to the next real command
err := baseFlags.Parse(args)
if err != nil || hk.helpFlagVal {
if err != nil {
hk.Println("Error:", err)
}
hk.Usage()
return err
}
verflag.PrintAndExitIfRequested()
args = baseFlags.Args()
if len(args) > 0 && len(args[0]) > 0 {
serverName = args[0]
baseCommand = baseCommand + " " + serverName
args = args[1:]
} else {
err = errors.New("no server specified")
hk.Printf("Error: %v\n\n", err)
hk.Usage()
return err
}
}
s, err := hk.FindServer(serverName)
if err != nil {
hk.Printf("Error: %v\n\n", err)
hk.Usage()
return err
}
s.Flags().AddFlagSet(hk.Flags())
err = s.Flags().Parse(args)
if err != nil || hk.helpFlagVal {
if err != nil {
hk.Printf("Error: %v\n\n", err)
}
s.Usage()
return err
}
verflag.PrintAndExitIfRequested()
logs.InitLogs()
defer logs.FlushLogs()
err = s.Run(s, s.Flags().Args())
if err != nil {
hk.Println("Error:", err)
}
return err
}
// RunToExit will run the hyperkube and then call os.Exit with an appropriate exit code.
func (hk *HyperKube) RunToExit(args []string) {
err := hk.Run(args)
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
os.Exit(0)
}
// Usage will write out a summary for all servers that this binary supports.
func (hk *HyperKube) Usage() {
tt := `{{if .Long}}{{.Long | trim | wrap ""}}
{{end}}Usage
{{.Name}} <server> [flags]
Servers
{{range .Servers}}
{{.Name}}
{{.Long | trim | wrap " "}}{{end}}
Call '{{.Name}} <server> --help' for help on a specific server.
`
util.ExecuteTemplate(hk.Out(), tt, hk)
}

View File

@ -1,144 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// clone of the upstream cmd/hypercube/hyperkube_test.go
package main
import (
"bytes"
"errors"
"fmt"
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
type result struct {
err error
output string
}
func testServer(n string) *Server {
return &Server{
SimpleUsage: n,
Long: fmt.Sprintf("A simple server named %s", n),
Run: func(s *Server, args []string) error {
s.hk.Printf("%s Run\n", s.Name())
return nil
},
}
}
func testServerError(n string) *Server {
return &Server{
SimpleUsage: n,
Long: fmt.Sprintf("A simple server named %s that returns an error", n),
Run: func(s *Server, args []string) error {
s.hk.Printf("%s Run\n", s.Name())
return errors.New("server returning error")
},
}
}
func runFull(t *testing.T, args string) *result {
buf := new(bytes.Buffer)
hk := HyperKube{
Name: "hyperkube",
Long: "hyperkube is an all-in-one server binary.",
}
hk.SetOut(buf)
hk.AddServer(testServer("test1"))
hk.AddServer(testServer("test2"))
hk.AddServer(testServer("test3"))
hk.AddServer(testServerError("test-error"))
a := strings.Split(args, " ")
t.Logf("Running full with args: %q", a)
err := hk.Run(a)
r := &result{err, buf.String()}
t.Logf("Result err: %v, output: %q", r.err, r.output)
return r
}
func TestRun(t *testing.T) {
x := runFull(t, "hyperkube test1")
assert.Contains(t, x.output, "test1 Run")
assert.NoError(t, x.err)
}
func TestLinkRun(t *testing.T) {
x := runFull(t, "test1")
assert.Contains(t, x.output, "test1 Run")
assert.NoError(t, x.err)
}
func TestTopNoArgs(t *testing.T) {
x := runFull(t, "hyperkube")
assert.EqualError(t, x.err, "no server specified")
}
func TestBadServer(t *testing.T) {
x := runFull(t, "hyperkube bad-server")
assert.EqualError(t, x.err, "Server not found: bad-server")
assert.Contains(t, x.output, "Usage")
}
func TestTopHelp(t *testing.T) {
x := runFull(t, "hyperkube --help")
assert.NoError(t, x.err)
assert.Contains(t, x.output, "all-in-one")
assert.Contains(t, x.output, "A simple server named test1")
}
func TestTopFlags(t *testing.T) {
x := runFull(t, "hyperkube --help test1")
assert.NoError(t, x.err)
assert.Contains(t, x.output, "all-in-one")
assert.Contains(t, x.output, "A simple server named test1")
assert.NotContains(t, x.output, "test1 Run")
}
func TestTopFlagsBad(t *testing.T) {
x := runFull(t, "hyperkube --bad-flag")
assert.EqualError(t, x.err, "unknown flag: --bad-flag")
assert.Contains(t, x.output, "all-in-one")
assert.Contains(t, x.output, "A simple server named test1")
}
func TestServerHelp(t *testing.T) {
x := runFull(t, "hyperkube test1 --help")
assert.NoError(t, x.err)
assert.Contains(t, x.output, "A simple server named test1")
assert.Contains(t, x.output, "-h, --help help for hyperkube")
assert.NotContains(t, x.output, "test1 Run")
}
func TestServerFlagsBad(t *testing.T) {
x := runFull(t, "hyperkube test1 --bad-flag")
assert.EqualError(t, x.err, "unknown flag: --bad-flag")
assert.Contains(t, x.output, "A simple server named test1")
assert.Contains(t, x.output, "-h, --help help for hyperkube")
assert.NotContains(t, x.output, "test1 Run")
}
func TestServerError(t *testing.T) {
x := runFull(t, "hyperkube test-error")
assert.Contains(t, x.output, "test-error Run")
assert.EqualError(t, x.err, "server returning error")
}

View File

@ -1,39 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// clone of the upstream cmd/hypercube/kube-controllermanager.go
package main
import (
"k8s.io/kubernetes/contrib/mesos/pkg/controllermanager"
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
)
// NewHyperkubeServer creates a new hyperkube Server object that includes the
// description and flags.
func NewControllerManager() *Server {
s := controllermanager.NewCMServer()
hks := Server{
SimpleUsage: hyperkube.CommandControllerManager,
Long: "A server that runs a set of active components. This includes replication controllers, service endpoints and nodes.",
Run: func(_ *Server, args []string) error {
return s.Run(args)
},
}
s.AddFlags(hks.Flags())
return &hks
}

View File

@ -1,41 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"k8s.io/kubernetes/contrib/mesos/pkg/executor/service"
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
)
// NewHyperkubeServer creates a new hyperkube Server object that includes the
// description and flags.
func NewKubeletExecutor() *Server {
s := service.NewKubeletExecutorServer()
hks := Server{
SimpleUsage: hyperkube.CommandExecutor,
Long: `The kubelet-executor binary is responsible for maintaining a set of containers
on a particular node. It syncs data from a specialized Mesos source that tracks
task launches and kills. It then queries Docker to see what is currently
running. It synchronizes the configuration data, with the running set of
containers by starting or stopping Docker containers.`,
Run: func(hks *Server, args []string) error {
return s.Run(hks, args)
},
}
s.AddFlags(hks.Flags())
return &hks
}

View File

@ -1,39 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
"k8s.io/kubernetes/contrib/mesos/pkg/minion"
)
// NewMinion creates a new hyperkube Server object that includes the
// description and flags.
func NewMinion() *Server {
s := minion.NewMinionServer()
hks := Server{
SimpleUsage: hyperkube.CommandMinion,
Long: `Implements a Kubernetes minion. This will launch the proxy and executor.`,
Run: func(hks *Server, args []string) error {
return s.Run(hks, args)
},
}
s.AddMinionFlags(hks.Flags())
s.AddExecutorFlags(hks.Flags())
return &hks
}

View File

@ -1,40 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// clone of the upstream cmd/hypercube/k8sm-scheduler.go
package main
import (
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/service"
)
// NewScheduler creates a new hyperkube Server object that includes the
// description and flags.
func NewScheduler() *Server {
s := service.NewSchedulerServer()
hks := Server{
SimpleUsage: hyperkube.CommandScheduler,
Long: `Implements the Kubernetes-Mesos scheduler. This will launch Mesos tasks which
results in pods assigned to kubelets based on capacity and constraints.`,
Run: func(hks *Server, args []string) error {
return s.Run(hks, args)
},
}
s.AddHyperkubeFlags(hks.Flags())
return &hks
}

View File

@ -1,41 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// clone of the upstream cmd/hypercube/main.go
package main
import (
"os"
_ "k8s.io/kubernetes/pkg/client/metrics/prometheus" // for client metric registration
_ "k8s.io/kubernetes/pkg/version/prometheus" // for version metric registration
)
func main() {
hk := HyperKube{
Name: "km",
Long: "This is an all-in-one binary that can run any of the various Kubernetes-Mesos servers.",
}
hk.AddServer(NewKubeAPIServer())
hk.AddServer(NewControllerManager())
hk.AddServer(NewScheduler())
hk.AddServer(NewKubeletExecutor())
hk.AddServer(NewKubeProxy())
hk.AddServer(NewMinion())
hk.RunToExit(os.Args)
}

View File

@ -1,40 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// clone of the upstream cmd/hypercube/kube-apiserver.go
package main
import (
"k8s.io/kubernetes/cmd/kube-apiserver/app"
"k8s.io/kubernetes/cmd/kube-apiserver/app/options"
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
)
// NewKubeAPIServer creates a new hyperkube Server object that includes the
// description and flags.
func NewKubeAPIServer() *Server {
s := options.NewAPIServer()
hks := Server{
SimpleUsage: hyperkube.CommandApiserver,
Long: "The main API entrypoint and interface to the storage system. The API server is also the focal point for all authorization decisions.",
Run: func(_ *Server, _ []string) error {
return app.Run(s)
},
}
s.AddFlags(hks.Flags())
return &hks
}

View File

@ -1,52 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// clone of the upstream cmd/hypercube/kube-proxy.go
package main
import (
"k8s.io/kubernetes/cmd/kube-proxy/app"
"k8s.io/kubernetes/cmd/kube-proxy/app/options"
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
)
// NewKubeProxy creates a new hyperkube Server object that includes the
// description and flags.
func NewKubeProxy() *Server {
config := options.NewProxyConfig()
hks := Server{
SimpleUsage: hyperkube.CommandProxy,
Long: `The Kubernetes proxy server is responsible for taking traffic directed at
services and forwarding it to the appropriate pods. It generally runs on
nodes next to the Kubelet and proxies traffic from local pods to remote pods.
It is also used when handling incoming external traffic.`,
}
config.AddFlags(hks.Flags())
hks.Run = func(_ *Server, _ []string) error {
s, err := app.NewProxyServerDefault(config)
if err != nil {
return err
}
return s.Run()
}
return &hks
}

View File

@ -1,82 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// clone of the upstream cmd/hypercube/server.go
package main
import (
"io/ioutil"
"strings"
"k8s.io/kubernetes/pkg/util"
"github.com/spf13/pflag"
)
type serverRunFunc func(s *Server, args []string) error
// Server describes a server that this binary can morph into.
type Server struct {
SimpleUsage string // One line description of the server.
Long string // Longer free form description of the server
Run serverRunFunc // Run the server. This is not expected to return.
flags *pflag.FlagSet // Flags for the command (and all dependents)
name string
hk *HyperKube
}
// Usage returns the full usage string including all of the flags.
func (s *Server) Usage() error {
tt := `{{if .Long}}{{.Long | trim | wrap ""}}
{{end}}Usage:
{{.SimpleUsage}} [flags]
Available Flags:
{{.Flags.FlagUsages}}`
return util.ExecuteTemplate(s.hk.Out(), tt, s)
}
// Name returns the name of the command as derived from the usage line.
func (s *Server) Name() string {
if s.name != "" {
return s.name
}
name := s.SimpleUsage
i := strings.Index(name, " ")
if i >= 0 {
name = name[:i]
}
return name
}
// Flags returns a flagset for this server
func (s *Server) Flags() *pflag.FlagSet {
if s.flags == nil {
s.flags = pflag.NewFlagSet(s.Name(), pflag.ContinueOnError)
s.flags.SetOutput(ioutil.Discard)
}
return s.flags
}
func (s *Server) FindServer(name string) bool {
if s == nil {
return false
}
_, err := s.hk.FindServer(name)
return err == nil
}

File diff suppressed because one or more lines are too long

View File

@ -1,67 +0,0 @@
# Kubernetes-Mesos Architecture
An [Apache Mesos][1] cluster consists of one or more masters, and one or more slaves.
Kubernetes-Mesos (k8sm) operates as a Mesos framework that runs on the cluster.
As a framework, k8sm provides scheduler and executor components, both of which are hybrids of Kubernetes and Mesos:
the scheduler component integrates the Kubernetes scheduling API and the Mesos scheduler runtime, whereas;
the executor component integrates Kubernetes kubelet services and the Mesos executor runtime.
Multiple Mesos masters are typically configured to coordinate leadership election via Zookeeper.
Future releases of Mesos may implement leader election protocols [differently][2].
Kubernetes maintains its internal registry (pods, replication controllers, bindings, nodes, services) in etcd.
Users typically interact with Kubernetes using the `kubectl` command to manage Kubernetes primitives.
When a pod is created in Kubernetes, the k8sm scheduler creates an associated Mesos task and queues it for scheduling.
Upon pairing the pod/task with an acceptable resource offer, the scheduler binds the pod/task to the offer's slave.
As a result of binding the pod/task is launched and delivered to an executor (an executor is created by the Mesos slave if one is not already running).
The executor launches the pod/task, which registers the bound pod with the kubelet engine and the kubelet begins to manage the lifecycle of the pod instance.
![Architecture Diagram](architecture.png)
## Scheduling
The scheduling of a pod on Kubernetes on Mesos is essentially a two-phase process:
1. A new pod is noticed by the k8sm-scheduler and possibly matched with a
Mesos offer. Then:
- The offer is *accepted*,
- the pod is *annotated* with a number of annotation, especially `k8s.mesosphere.io/bindingHost`
- the pod is *launched* on a Mesos slave.
The existence of the `bindingHost` annotation tells the k8sm-scheduler that this pod has been launched. If it is not set, the pod is considered *new*.
2. The Mesos slave receives the task launch event and starts (if not running yet) the k8sm-executor (possibly via the km hyperkube binary). Then:
- The k8sm-executor *binds* the tasks to the node via the apiserver, which means that the `NodeName` field is set by the apiserver.
- The k8sm-executor sends the pod to the kubelet which is part of the k8sm-executor process.
- The kubelet launches the containers using Docker.
## Networking
Kubernetes-Mesos uses "normal" Docker IPv4, host-private networking, rather than Kubernetes' SDN-based networking that assigns an IP per pod. This is mostly transparent to the user, especially when using the service abstraction to access pods. For details on some issues it creates, see [issues][3].
![Network Diagram](networking.png)
## Resource Accounting
Mesos is designed to handle resource accounting and enforcement across the cluster. Part of that enforcement involves "growing" and "shrinking" the pool of resources allocated for executor containers.
The implementation of the k8sm-executor launches pods as Docker containers (just like the upstream kubelet). The containers are resource limited (cpu and memory) with the means of `docker run` by the kubelet code. Moreover, all containers launched by the kubelet code are children of the k8sm-executor cgroup. This parent cgroup is assigned to the k8sm-executor by the Mesos slave.
To actually enforce the defined resource limit for the k8sm-executor and its pods, enable the cpu and memory isolator in your Mesos slaves.
The described resource allocation also applies to static pods which are run on every Mesos slave which runs a k8sm-executor.
Kubernetes allows to define pods without resource limits for cpu and/or memory. The upstream kubelet will then run the containers without resource bounds. Because Mesos enforces resource accounting, it assign default container cpu and memory limits for those pods. By default these are 0.25 cpu shares and 64 MB of memory. These values can be customized via the `--default-container-cpu-limit` and `--default-container-mem-limit` of the k8sm-scheduler.
Note that currently static pods without cpu and memory limit are not allowed and will make the k8sm-scheduler refuse to start (compare the [k8sm issues](issues.md)).
[1]: http://mesos.apache.org/
[2]: https://issues.apache.org/jira/browse/MESOS-1806
[3]: issues.md#service-endpoints
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/contrib/mesos/docs/README.md?pixel)]()
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/contrib/mesos/docs/architecture.md?pixel)]()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 147 KiB

View File

@ -1,91 +0,0 @@
# Discovery
## DNS
### kube-dns
[**kube-dns**](https://github.com/kubernetes/kubernetes/blob/release-1.1/docs/admin/dns.md) is a Kubernetes add-on that works out of the box with Kubernetes-Mesos.
For details on usage see the implementation in the `cluster/mesos/docker` source tree.
kube-dns provides records both for services and pods.
### mesos-dns
**NOTE:** There is still no support for publishing Kubernetes *services* in mesos-dns.
**mesos-dns** communicates with the leading Mesos master to build a DNS record set that reflects the tasks running in a Mesos cluster as documented here: http://mesosphere.github.io/mesos-dns/docs/naming.html.
As of Kubernetes-Mesos [release v0.7.2](https://github.com/mesosphere/kubernetes/releases/tag/v0.7.2-v1.1.5) there is experimental support in the scheduler to populate a task's *discovery-info* field in order to generate alternative/more friendly record names in mesos-dns, for *pods* only.
To enable this feature, set `--mesos-generate-task-discovery=true` when launching the scheduler.
The following discovery-info fields may be set using labels (without a namespace prefix) or else `k8s.mesosphere.io/discovery-XXX` annotations:
* `visibility`: may be `framework`, `external`, or `cluster` (defaults to `cluster`)
* `environment`
* `location`
* `name` (this alters record set generation in *mesos-dns*)
* `version`
In the case where both a label as well as an annotation are supplied the value of the annotation is observed.
The interpretation of value of the `name` label (and `discovery-name` annotation) is a special case: the generated Mesos `discovery-info.name` value will be `${name}.${pod-namespace}.pod`; all other discovery-info values are passed through without modification.
#### Example 1: Use a `name` label on a pod template
```yaml
apiVersion: v1
kind: ReplicationController
metadata:
name: frontend
spec:
replicas: 3
template:
metadata:
labels:
app: guestbook
tier: frontend
name: custom-name
spec:
containers:
- name: php-redis
image: gcr.io/google_samples/gb-frontend:v3
resources:
requests:
cpu: 100m
memory: 100Mi
env:
- name: GET_HOSTS_FROM
value: dns
ports:
- containerPort: 80
```
#### Example 2: Use a `discovery-name` annotation on a pod template
```yaml
apiVersion: v1
kind: ReplicationController
metadata:
name: frontend
spec:
replicas: 3
template:
metadata:
labels:
app: guestbook
tier: frontend
annotations:
k8s.mesosphere.io/discovery-name: custom-name
spec:
containers:
- name: php-redis
image: gcr.io/google_samples/gb-frontend:v3
resources:
requests:
cpu: 100m
memory: 100Mi
env:
- name: GET_HOSTS_FROM
value: dns
ports:
- containerPort: 80
```
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/contrib/mesos/docs/discovery.md?pixel)]()

View File

@ -1,526 +0,0 @@
# High Availability
Kubernetes on Mesos will eventually support two HA modes:
* [Hot-standby](#hot-standby) (*work-in-progress*)
* [Cold-standby](#cold-standby)
Hot-standby mode is currently still work-in-progress as controller manager is not
yet HA-aware (the work is being tracked [here][2]). Nevertheless, we will
describe how hot-standby mode is intended to work. It is recommended to use
cold-standby mode for HA for the time being until this work is done. In hot-standby
mode all master components (apiserver, controller manager, and scheduler)
actively run on every master node. Additional logic is added to the controller
manager and scheduler to coordinate their access to the etcd backend to deal
with concurrency issues when modifying cluster state. As apiserver does not
modify cluster state, multiple of these can run concurrently without
coordination. When the leader (i.e., the node whose scheduler is active)
crashes, other master nodes will detect the failure after some time and then
elect a new leader.
In cold-standby mode, similar to hot-standby mode apiserver will actively run
on every master node. However, only one scheduler and controller manager will
run at any instance in time. This is coordinated by a small external program
called `podmaster` that uses etcd to perform leadership selection, and only on
the leader node will the `podmaster` start the scheduler and controller
manager. Cold-standby mode is how Kubernetes supports HA, and more information
can be found [here][1].
## Hot-standby
### Scheduler
The implementation of the scheduler HA feature includes:
- Checkpointing by default (`--checkpoint`)
- Large failover-timeout by default (`--failover-timeout`)
- Hot-failover w/ multiple scheduler instances (`--ha`)
- Best effort task reconciliation on failover
#### Multiple Instances
Multiple scheduler instances may be run to support a warm-standby scenario in which one scheduler fails and another takes over immediately.
But at any moment in time only one scheduler is actually registered with the leading Mesos master.
Scheduler leader election is implemented using etcd so it is important to have an HA etcd configuration established for reliable scheduler HA.
It is currently recommended that no more than 2 scheduler instances be running at the same time.
Running more than 2 schedulers at once may work but has not been extensively tested.
YMMV.
#### Failover
Scheduler failover may be triggered by either the following events:
- loss of leadership when running in HA mode (`--ha`).
- the leading scheduler process receives a USR1 signal.
It is currently possible signal failover to a single, non-HA scheduler process.
In this case, if there are problems launching a replacement scheduler process then the cluster may be without a scheduler until another is manually started.
#### How To
##### Command Line Arguments
- `--ha` is required to enable scheduler HA and multi-scheduler leader election.
- `--km-path` or else (`--executor-path` and `--proxy-path`) should reference non-local-file URI's and must be identical across schedulers.
If you have HDFS installed on your slaves then you can specify HDFS URI locations for the binaries:
```shell
$ hdfs dfs -put -f bin/km hdfs:///km
$ ./bin/km scheduler ... --mesos-master=zk://zk1:2181,zk2:2181/mesos --ha --km-path=hdfs:///km
```
**IMPORTANT:** some command line parameters specified for the scheduler process are passed to the Kubelet-executor and so are subject to compatibility tests:
- a Mesos master will not recognize differently configured executors as being compatible, and so...
- a scheduler will refuse to accept any offer for slave resources if there are incompatible executors running on the slave.
Within the scheduler, compatibility is largely determined by comparing executor configuration hashes:
a hash is calculated from a subset of the executor-related command line parameters provided to the scheduler process.
The command line parameters that affect the hash calculation are listed below.
- `--allow-privileged`
- `--api-servers`
- `--auth-path`
- `--cluster-*`
- `--executor-*`
- `--kubelet-*`
- `--km-path`
- `--mesos-cgroup-prefix`
- `--mesos-launch-grace-period`
- `--minion-*`
- `--profiling`
- `--proxy-*`
- `--static-pods-config`
## Cold-standby
Setting up Kubernetes on Mesos in cold-standby mode is similar to Kubernetes in
standalone mode described in [Kubernetes HA][1]. However, special attention is
needed when setting up K8sm scheduler so that when the currently active
scheduler crashes/dies, a new one can be instantiated and take over the work.
More precisely, the new scheduler needs to be compatible with the executors
that were started previously by the dead scheduler.
### Environment Variables
We will set up K8sm master on 2 nodes in HA mode. The same steps can be
extended to set up more master nodes to deal with more concurrent failures. We
will define a few environment variables first to describe the testbed
environment.
```
MESOS_IP=192.168.0.1
MESOS_PORT=5050
ETCD_IP=192.168.0.2
ETCD_PORT=4001
K8S_1_IP=192.168.0.3
K8S_2_IP=192.168.0.4
K8S_APISERVER_PORT=8080
K8S_SCHEDULER_PORT=10251
NGINX_IP=192.168.0.5
NGINX_APISERVER_PORT=80
NGINX_SCHEDULER_PORT=81
```
Other than the 2 K8sm master nodes (`192.168.0.3` and `192.168.0.4`), we also
define a Mesos master at `192.168.0.1`, an etcd server at `192.168.0.2`, and an
Nginx server that load balances between the 2 K8sm master nodes.
### K8sm Container Image
We use podmaster to coordinate leadership selection amongst K8sm masters.
However, podmaster needs to run in a container (preferably in a pod), and on
the leader node, its podmaster will instantiate scheduler and controller
manager also in their separate pods. The podmaster image is pre-built and can
be obtained from `gcr.io/google_containers/podmaster`. An official image that
contains the `km` binary to start apiserver, scheduler, and controller
manager is not yet available. But it can be built fairly easily.
```shell
$ cat <<EOF >Dockerfile
FROM ubuntu
MAINTAINER Hai Huang <haih@us.ibm.com>
RUN mkdir -p /opt/kubernetes
COPY kubernetes/_output/dockerized/bin/linux/amd64/ /opt/kubernetes
ENTRYPOINT ["/opt/kubernetes/km"]
EOF
$ cat <<EOF >build.sh
#!/bin/bash
K8SM_IMAGE_NAME=haih/k8sm
git clone https://github.com/mesosphere/kubernetes
cd kubernetes
git checkout release-v0.7-v1.1
KUBERNETES_CONTRIB=mesos build/run.sh make
cd ..
sudo docker build -t $K8SM_IMAGE_NAME --no-cache .
EOF
$ chmod 755 build.sh
$ ./build.sh
```
Make sure Docker engine is running locally as we will compile Kubernetes using
a Docker image. One can also change the image name and which Kubernetes release
to compile by modifying the script. After the script has finished running,
there should be a local Docker image called `haih/k8sm` (use `docker images` to
check).
Optionally, we can also push the image to Docker Hub (i.e., `docker push
$K8SM_IMAGE_NAME`) so we do not have to compile the image on every K8sm master
node.
**IMPORTANT:** Mesosphere team is currently maintaining the stable K8sm release in
a separate [fork][3]. At the time of this writing, the latest stable release is
`release-v0.7-v1.1`.
### Configure ETCD
We assume there's an etcd server on `$ETCD_IP`. Ideally this should be a
cluster of etcd servers running in HA mode backed up by redundant persistent
storage. For testing purposes, on the etcd server one can spin up an etcd
instance in a Docker container.
```shell
$ docker run -d --hostname $(uname -n) --name etcd \
-p ${ETCD_PORT}:${ETCD_PORT} \
quay.io/coreos/etcd:v2.0.12 \
--listen-client-urls http://0.0.0.0:${ETCD_PORT} \
--advertise-client-urls http://${ETCD_IP}:${ETCD_PORT}
```
### Configure Podmaster
Since we plan to run all K8sm components and podmaster in pods, we can use
`kubelet` to bootstrap these pods by specifying a manifests directory.
```shell
$ mkdir -p /etc/kubernetes/manifests/
$ mkdir -p /srv/kubernetes/manifests/
```
Once the kubelet has started, it will check the manifests directory periodically
to see if it needs to start or stop pods. Pods can be started by putting their
specification yaml files into the manifests directory, and subsequently they
can be stopped by removing these yaml files.
```shell
$ cat <<EOF > /etc/kubernetes/manifests/podmaster.yaml
apiVersion: v1
kind: Pod
metadata:
name: kube-podmaster
namespace: kube-system
spec:
hostNetwork: true
containers:
- name: scheduler-elector
image: gcr.io/google_containers/podmaster:1.1
command:
- /podmaster
- --etcd-servers=http://${ETCD_IP}:${ETCD_PORT}
- --key=scheduler
- --whoami=${MY_IP}
- --source-file=/src/manifests/scheduler.yaml
- --dest-file=/dst/manifests/scheduler.yaml
volumeMounts:
- mountPath: /src/manifests
name: manifest-src
readOnly: true
- mountPath: /dst/manifests
name: manifest-dst
- name: controller-manager-elector
image: gcr.io/google_containers/podmaster:1.1
command:
- /podmaster
- --etcd-servers=http://${ETCD_IP}:${ETCD_PORT}
- --key=controller
- --whoami=${MY_IP}
- --source-file=/src/manifests/controller-mgr.yaml
- --dest-file=/dst/manifests/controller-mgr.yaml
terminationMessagePath: /dev/termination-log
volumeMounts:
- mountPath: /src/manifests
name: manifest-src
readOnly: true
- mountPath: /dst/manifests
name: manifest-dst
volumes:
- hostPath:
path: /srv/kubernetes/manifests
name: manifest-src
- hostPath:
path: /etc/kubernetes/manifests
name: manifest-dst
EOF
```
One must change `$MY_IP` to either `$K8S_1_IP` or `K8S_2_IP` depending
on which master node you are currently setting up the podmaster. Podmasters
will compete with each other for leadership, and the winner will copy scheduler
and controller manager's pod specification yaml files from
`/srv/kubernetes/manifests/` to `/etc/kubernetes/manifests/`. When the kubelet
detects these new yaml files, it will start the corresponding pods.
### Configure Scheduler
The scheduler pod specification will be put into `/srv/kubernetes/manifests/`.
```shell
$ cat <<EOF > /srv/kubernetes/manifests/scheduler.yaml
apiVersion: v1
kind: Pod
metadata:
name: kube-scheduler
namespace: kube-system
spec:
hostNetwork: true
containers:
- name: kube-scheduler
image: haih/k8sm:latest
imagePullPolicy: IfNotPresent
command:
- /opt/kubernetes/km
- scheduler
- --address=${MY_IP}
- --advertised-address=${NGINX_IP}:${NGINX_SCHEDULER_PORT}
- --mesos-master=${MESOS_IP}:${MESOS_PORT}
- --etcd-servers=http://${ETCD_IP}:${ETCD_PORT}
- --api-servers=${NGINX_IP}:${NGINX_APISERVER_PORT}
- --v=10
EOF
```
Again, one must change `$MY_IP` to either `$K8S_1_IP` or `K8S_2_IP` depending
on which master node is currently being working on. Even though we have not set
up Nginx yet, we can still specify `--api-servers` and `--advertised-address`
using Nginx's address and ports (make sure Nginx is already running before
turning on the scheduler). Having `--api-servers` point to Nginx allows
executors to maintain connectivity to one of the apiservers even when one or
more apiservers is down as Nginx can automatically re-route requests to a
working apiserver.
It is critically important to point `--advertised-address` to Nginx so all the
schedulers would be assigned the same executor ID. Otherwise, if we assign
`--advertised-address=${K8S_1_IP}` on the first K8s master and
`--advertised-address=${K8S_2_IP}` on the second K8s master, they would
generate different executor IDs. During a fail-over, the new scheduler would
not be able to use the executor started by the failed scheduler. If so, one
could get this error message in the scheduler log:
> Declining incompatible offer...
### Configure Controller Manager
The controller manager pod specification will also be put into `/srv/kubernetes/manifests/`.
```shell
$ cat <<EOF > /srv/kubernetes/manifests/controller-mgr.yaml
apiVersion: v1
kind: Pod
metadata:
name: kube-controller-manager
namespace: kube-system
spec:
hostNetwork: true
containers:
- name: kube-controller-manager
image: haih/k8sm:latest
imagePullPolicy: IfNotPresent
command:
- /opt/kubernetes/km
- controller-manager
- --master=http://${NGINX_IP}:${NGINX_APISERVER_PORT}
- --cloud-provider=mesos
- --cloud-config=/etc/kubernetes/mesos-cloud.conf
volumeMounts:
- mountPath: /etc/kubernetes
name: kubernetes-config
readOnly: true
volumes:
- hostPath:
path: /etc/kubernetes
name: kubernetes-config
EOF
```
Controller manager also needs a mesos configuration file as one of its
parameters, and this configuration file is written to
`/etc/kubernetes/mesos-cloud.conf`.
```shell
$ cat <<EOF >/etc/kubernetes/mesos-cloud.conf
[mesos-cloud]
mesos-master = ${MESOS_IP}:${MESOS_PORT}
EOF
```
### Configure Apiserver
Apiserver runs on every master node, so its specification file is put into
`/etc/kubernetes/manifests/`.
```shell
cat <<EOF > /etc/kubernetes/manifests/apiserver.yaml
apiVersion: v1
kind: Pod
metadata:
name: kube-apiserver
namespace: kube-system
spec:
hostNetwork: true
containers:
- name: kube-apiserver
image: haih/k8sm:latest
imagePullPolicy: IfNotPresent
command:
- /opt/kubernetes/km
- apiserver
- --insecure-bind-address=0.0.0.0
- --etcd-servers=http://${ETCD_IP}:${ETCD_PORT}
- --allow-privileged=true
- --service-cluster-ip-range=10.10.10.0/24
- --insecure-port=${K8S_APISERVER_PORT}
- --cloud-provider=mesos
- --cloud-config=/etc/kubernetes/mesos-cloud.conf
- --advertise-address=${MY_IP}
ports:
- containerPort: ${K8S_APISERVER_PORT}
hostPort: ${K8S_APISERVER_PORT}
name: local
volumeMounts:
- mountPath: /etc/kubernetes
name: kubernetes-config
readOnly: true
volumes:
- hostPath:
path: /etc/kubernetes
name: kubernetes-config
EOF
```
Again, one must change `$MY_IP` to either `$K8S_1_IP` or `K8S_2_IP`
depending on which master node is currently being working on.
To summarize our current setup: we have apiserver and podmaster's pod
specification files put into `/etc/kubernetes/manifests/` so they run on every
master node. Scheduler and controller manager's pod specification files are
put into `/srv/kubernetes/manifests/`, and they will be copied into
`/etc/kubernetes/manifests/` by their podmaster if and only if their podmaster was
elected the leader.
### Configure Nginx
Nginx needs to be configured to load balance for both the apiserver and scheduler.
For testing purpose, one can start Nginx in a Docker container.
```shell
cat <<EOF >nginx.conf
events {
worker_connections 4096; ## Default: 1024
}
http {
upstream apiservers {
server ${K8S_1_IP}:${K8S_APISERVER_PORT};
server ${K8S_2_IP}:${K8S_APISERVER_PORT};
}
upstream schedulers {
server ${K8S_1_IP}:${K8S_SCHEDULER_PORT};
server ${K8S_2_IP}:${K8S_SCHEDULER_PORT};
}
server {
listen ${NGINX_APISERVER_PORT};
location / {
proxy_pass http://apiservers;
proxy_next_upstream error timeout invalid_header http_500;
proxy_connect_timeout 2;
proxy_buffering off;
proxy_read_timeout 12h;
proxy_send_timeout 12h;
}
}
server {
listen ${NGINX_SCHEDULER_PORT};
location / {
proxy_pass http://schedulers;
proxy_next_upstream error timeout invalid_header http_500;
proxy_connect_timeout 2;
proxy_buffering off;
proxy_read_timeout 12h;
proxy_send_timeout 12h;
}
}
}
EOF
$ docker run \
-p $NGINX_APISERVER_PORT:$NGINX_APISERVER_PORT \
-p $NGINX_SCHEDULER_PORT:$NGINX_SCHEDULER_PORT \
--name nginx \
-v `pwd`/nginx.conf:/etc/nginx/nginx.conf:ro \
-d nginx:latest
```
For the sake of clarity, configuring Nginx to support HTTP over TLS/SPDY is
outside of our scope. However, one should keep in mind that without TLS/SPDY
properly configured, some `kubectl` commands might not work properly. This
problem is documented [here][4].
### Start Kubelet
To start everything up, we need to start the kubelet on K8s master nodes so
they can start apiserver and podmaster. On the leader node, podmaster will
subsequently start the scheduler and controller manager.
```shell
$ mkdir -p /var/log/kubernetes
$ kubelet \
--api_servers=http://127.0.0.1:${K8S_APISERVER_PORT} \
--register-node=false \
--allow-privileged=true \
--config=/etc/kubernetes/manifests \
1>/var/log/kubernetes/kubelet.log 2>&1 &
```
### Verification
On each of the K8s master nodes, one can run `docker ps` to verify that there
is an apiserver pod and a podmaster pod running, and on one of the K8s master
nodes, there is a controller manager and a scheduler pod running.
One should also verify if we can create user pods in the K8sm cluster
```shell
$ export KUBERNETES_MASTER=http://${NGINX_IP}:${NGINX_APISERVER_PORT}
$ kubectl create -f <userpod yaml file>
$ kubectl get pods
```
The pod should be shown in a `Running` state after some short amount of time.
### Tuning
During a fail-over, cold-standby mode takes some time before a new scheduler
can be started to take over the work from the failed one. However, one can
tune various parameters to shorten this time.
Podmaster has `--sleep` and `--ttl-secs` parameters that can be tuned, and both
allow for faster failure detection. However, it is probably not a good idea to
set `--ttl-secs` too small to minimize false positives.
Kubelet has `--file-check-frequency` parameter that controls how frequently it
checks the manifests directory. It is set to 20 seconds by default.
[1]: http://kubernetes.io/v1.0/docs/admin/high-availability.html
[2]: https://github.com/mesosphere/kubernetes-mesos/issues/457
[3]: https://github.com/mesosphere/kubernetes
[4]: https://github.com/kubernetes/kubernetes/blob/master/contrib/mesos/docs/issues.md#kubectl
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/contrib/mesos/docs/ha.md?pixel)]()

View File

@ -1,232 +0,0 @@
## Known Issues
This page identifies significant known issues with the Kubernetes-Mesos distribution.
* [General Known Issues](#general-known-issues)
* [DCOS Package Known Issues](#dcos-package-known-issues), in addendum to the above.
## General Known Issues
These known issues apply to all builds of Kubernetes-Mesos.
### Upgrades
Upgrading your Kubernetes-Mesos cluster is currently unsupported.
One known problem exists with respect to expressing executor (kubelet and kube-proxy) process configuration via command line flags.
It is **strongly** recommended that all of the Kubernetes-Mesos executors are destroyed before upgrading the Kubernetes-Mesos scheduler component:
- destroy all daemon controllers running in the cluster, across all namespaces
- destroy all replication controllers running in the cluster, across all namespaces
- destroy all pods running in the cluster, across all namespaces
- invoke the "kamikaze" debug endpoint on the scheduler (e.g. `curl http://10.2.0.5:10251/debug/actions/kamikaze`) to terminate all executors
Not following the above steps prior to upgrading the scheduler can result in a cluster wherein pods will never again be scheduled upon one or more nodes.
This issue is being tracked here: https://github.com/mesosphere/kubernetes-mesos/issues/572.
### Netfilter Connection Tracking
The scheduler offers flags to tweak connection tracking for kube-proxy instances that are launched on slave nodes:
- conntrack-max (do **NOT** set this to a non-zero value if the Mesos slave process is running in a non-root network namespace)
- conntrack-tcp-timeout-established
By default both of these are set to 0 when running Kubernetes-Mesos.
Setting either of these flags to non-zero values may impact connection tracking for the entire slave.
### Port Specifications
In order for pods (replicated, or otherwise) to be scheduled on the cluster, it is strongly recommended that:
* `pod.spec.containers[x].ports[y].hostPort` be left unspecified (or zero), or else;
* `pod.spec.containers[x].ports[y].hostPort` exists in the range of `ports` resources declared on Mesos slaves
- double-check the resource declarations for your Mesos slaves, the default for `ports` is typically `[31000-32000]`
Mesos slave host `ports` are resources that are managed by the Mesos resource/offers ecosystem; slave host ports are consumed by launched tasks.
Kubernetes pod container specifications identify two types of ports, "container ports" and "host ports":
- container ports are allocated from the network namespace of the pod, which is independent from that of the host, whereas;
- host ports are allocated from the network namespace of the host.
**Notable on Kubernetes-Mesos**
- Mesos slaves must be configured to offer host `ports` resources in order for pods to use them. Most Mesos package distributions, by default, configure a `ports` resource range for each slave.
- The scheduler recognizes the declared *host ports* of each container in a pod/task and for each such host port, attempts to allocate it from the offered port resources listed in Mesos offers.
- If no host port is declared for a given port spec, then the scheduler may map that port spec's container port to any host port from the offered ports ranges.
- Any *host ports* explicitly declared in the pod container specification must fall within that range of `ports` offered by slaves in the cluster.
Ports declared outside that range (other than zero) will never match resource offers received by the scheduler, and so pod specifications that declare such ports will never be executed as tasks on the cluster.
- A missing pod container host port declaration or a host port set to zero will, by default, result in the allocation of a host port from a resource offer.
- If a pod is the target of a Kubernetes service selector then the related target container ports must be declared in the pod spec.
- In vanilla Kubernetes, host ports with the value zero are ignored.
To obtain the same behavior with the Kubernetes-Mesos scheduler pods must be assigned a label of `k8s.mesosphere.io/portMapping` with the value `fixed`
(see [#527](https://github.com/mesosphere/kubernetes-mesos/issues/527)).
### Pods
#### Pod Updates
Once a task has been launched for a given pod, Kubernetes-Mesos is blind to any updates applied to the pod state (other than for forced, or graceful deletion).
#### Pod Placement
The initial plan was to implement pod placement (aka scheduling "constraints") using rules similar to those found in Marathon.
Upon further consideration it has been decided that a greater alignment between the stock Kubernetes scheduler and Kubernetes-Mesos scheduler would benefit both projects, as well as end-users.
Currently there is limited support for pod placement using the Kubernetes-Mesos [scheduler](scheduler.md).
This issue is being tracked here: https://github.com/mesosphere/kubernetes-mesos/issues/338
**Note:** An upcoming changeset will update the scheduler with initial support for multiple Mesos roles
(see [#482](https://github.com/mesosphere/kubernetes-mesos/issues/482)).
#### Static Pods
Static pods are supported by the scheduler.
The path to a directory containing pod definitions can be set via the `--static-pods-config` flag.
Static pods are subject to the following restrictions:
- Static pods *are read only once* by the scheduler on startup.
Only newly started executor will get the latest static pod specs from the defined static pod directory.
#### Orphan Pods
The default `executor_shutdown_grace_period` of a Mesos slave is 3 seconds.
When the executor is shut down it forcefully terminates the Docker containers that it manages.
However, if terminating the Docker containers takes longer than the `executor_shutdown_grace_period` then some containers may not get a termination signal at all.
A consequence of this is that some pod containers, previously managed by the framework's executor, will remain running on the slave indefinitely.
There are two work-arounds to this problem:
* Restart the framework and it should terminate the orphaned tasks.
* Adjust the value of `executor_shutdown_grace_period` to something greater than 3 seconds.
### Services
#### Port Specifications
In order for Endpoints (therefore, Services) to be fully operational, it is strongly recommended that:
- service ports explicitly define a `name`
- service ports explicitly define a `targetPort`
For example:
```yaml
apiVersion: v1
kind: Service
metadata:
name: redis-master
labels:
app: redis
role: master
tier: backend
spec:
ports:
# the port that this service should serve on
- port: 6379
targetPort: 6379
name: k8sm-works-best-with-a-name-here
selector:
app: redis
role: master
tier: backend
```
#### Endpoints
At the time of this writing both Kubernetes and Mesos are using IPv4 addressing, albeit under different assumptions.
Mesos clusters configured with Docker typically use default Docker networking, which is host-private.
Kubernetes clusters assume a custom Docker networking configuration that assigns a cluster-routable IPv4 address to each pod, meaning that a process running anywhere on a Kubernetes cluster can reach a pod running on the same cluster by using the pod's Docker-assigned IPv4 address.
Kubernetes service endpoints terminate, by default, at a backing pod's IPv4 address using the container-port selected for in the service specification (PodIP:ContainerPort).
This is problematic when default Docker networking has been configured, such as in the case of typical Mesos clusters, because a pod's host-private IPv4 address is not intended to be reachable outside of its host.
The Kubernetes-Mesos project has implemented a work-around:
service endpoints are terminated at HostIP:HostPort, where the HostIP is the IP address of the Mesos slave and the HostPort is the host port declared in the pod container port specification.
Host ports that are not defined, or else defined as zero, will automatically be assigned a (host) port resource from a resource offer.
To disable the work-around and revert to vanilla Kubernetes service endpoint termination:
- execute the k8sm scheduler with `-host-port-endpoints=false`
- execute the k8sm controller-manager with `-host-port-endpoints=false`
Then the usual Kubernetes network assumptions must be fulfilled for Kubernetes to work with Mesos, i.e. each container must get a cluster-wide routable IP (compare [Kubernetes Networking documentation](../../../docs/design/networking.md#container-to-container)).
This workaround may be mitigated down the road by:
- Future support for IPv6 addressing in Docker and Kubernetes
- Native IP-per-container support via Mesos with a custom Kubernetes network plugin
### Scheduling
Statements in this section regarding the "scheduler" pertain specifically to the Kubernetes-Mesos scheduler, unless otherwise noted.
Some factors that influence when pods are scheduled by k8s-mesos:
- availability of a resource offer that "fits" the pod (mesos master/slave);
- scheduler *backoff* (to avoid busy-looping) during pod scheduling (k8s-mesos scheduler)
The scheduler attempts to mitigate the second item by cancelling the backoff period if an offer arrives that fits a pod-in-waiting.
However, there is nothing that the scheduler can do if there are no resources available in the cluster.
That said, the current scheduling algorithm is naive: it makes **no attempts to pack multiple pods into a single offer**.
This means that each pod launch requires an independent offer.
In a small cluster resource offers do not arrive very frequently.
In a large cluster with a "decent" amount of free resources the arrival rate of offers is expected to be much higher.
The slave on each host announces offers to Mesos periodically.
In a single node cluster only a single slave process is advertising resources to the master.
The master will pass those along to the scheduler, at some interval and level of 'fairness' determined by mesos.
That scheduler will pair each resource offer with a pod that needs to be placed in the cluster.
Once paired, a task is launched to instantiate the pod.
The used resources will be marked as consumed, the remaining resources are "returned" to the cluster and the scheduler will wait for the next resource offer from the master... and the cycle repeats itself.
This likely limits the scheduling throughput observable in a single-node cluster.
The team plans to conduct benchmarks on the scheduling algorithm to establish some baselines, and is definitely thinking about ways to increase scheduling throughput- including scheduling multiple pods per offer.
#### Runtime Configuration
- mesos: `--offer_timeout` : Duration of time before an offer is rescinded from a framework.
This helps fairness when running frameworks that hold on to offers, or frameworks that accidentally drop offers.
([via](http://mesos.apache.org/documentation/latest/configuration/))
- k8s-mesos `--scheduler-config` : An ini-style configuration file with low-level scheduler settings.
See `offer-ttl`, `initial-pod-backoff`, and `max-pod-backoff`.
([via](https://github.com/kubernetes/kubernetes/blob/master/contrib/mesos/pkg/scheduler/config/config.go))
What is not configurable, but perhaps should be, are the mesos "filters" that the scheduler includes when declining offers that are not matched to pods within the configured `offer-ttl` (see https://github.com/apache/mesos/blob/0.25.0/include/mesos/mesos.proto#L1165): the current `refuse_seconds` value is hard-coded to 5s.
That parameter should probably be exposed via the scheduler fine tuning mechanism.
#### Backoff
If no matching resource offer can be found for a pod then that pod is put into a backoff queue.
Once the backoff period expires the pod is re-added to the scheduling queue.
The backoff period may be truncated by the arrival of an offer with matching resources.
This is an event-based design and there is no polling.
#### Debugging
Good insight may be achieved when all of the relevant logs are collected into a single tool (Splunk, or an ELK stack) in a manner such that it is trivial to search for something along the lines of a task-id or pod-id during cluster debugging sessions.
The scheduler also offers `/debug` API endpoints that may be useful:
- on-demand explicit reconciliation: /debug/actions/requestExplicit
- on-demand implicit reconciliation: /debug/actions/requestImplicit
- kamikaze (terminate all "empty" executors that aren't running pods): /debug/actions/kamikaze
- pods to be scheduled: /debug/scheduler/podqueue
- pod registry changes waiting to be processed: /debug/scheduler/podstore
- schedulers internal task registry state: /debug/registry/tasks
- scheduler metrics are available at /metrics
## DCOS Package Known Issues
All of the issues in the above section also apply to the Kubernetes-Mesos DCOS package builds.
The issues listed in this section apply specifically to the Kubernetes-Mesos DCOS package available from https://github.com/mesosphere/multiverse.
### Etcd
The default configuration of the DCOS Kubernetes package launches an internal etcd process **which only persists the cluster state in the sandbox of the current container instance**. While this is simpler for the first steps with Kubernetes-Mesos, it means that any cluster state is lost when the Kubernetes-Mesos Docker container is restarted.
Hence, for any kind of production-like deployment it is highly recommended to install the etcd DCOS package alongside Kubernetes-Mesos and
configure the later to use the etcd cluster. Further instructions
can be found at https://docs.mesosphere.com/services/kubernetes/#install.
This situation will eventually go away as soon as DCOS supports package dependencies and/or interactive package configuration.
### Kubectl
The following `kubectl` and `dcos kubectl` commands are not yet supported:
- exec (see [#356](https://github.com/mesosphere/kubernetes-mesos/issues/356))
- logs (see [#587](https://github.com/mesosphere/kubernetes-mesos/issues/587))
- port-forward
- proxy
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/contrib/mesos/docs/issues.md?pixel)]()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.1 KiB

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 46 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 97 KiB

View File

@ -1,178 +0,0 @@
# Kubernetes-Mesos Scheduler
Kubernetes on Mesos does not use the upstream scheduler binary, but replaces it
with its own Mesos framework scheduler. The following gives an overview of
the differences.
## Labels and Mesos Agent Attributes
The scheduler of Kubernetes-Mesos takes [labels][1] into account: it matches
specified labels in pod specs with defined labels of nodes.
In addition to user defined labels, [attributes of Mesos agents][2] are converted
into node labels by the scheduler, following the pattern
```yaml
k8s.mesosphere.io/attribute-<name>: value
```
As an example, a Mesos agent attribute of `generation:2015` will result in the node label
```yaml
k8s.mesosphere.io/attribute-generation: 2015
```
and can be used to schedule pods onto nodes which are of generation 2015.
**Note:** Node labels prefixed by `k8s.mesosphere.io` are managed by
Kubernetes-Mesos and should not be modified manually by the user or admin. For
example, the Kubernetes-Mesos executor manages `k8s.mesosphere.io/attribute`
labels and will auto-detect and update modified attributes when the mesos-slave
is restarted.
## Resource Roles
A Mesos cluster can be statically partitioned using [resources roles][2]. Each
resource is assigned such a role (`*` is the default role, if none is explicitly
assigned in the mesos-slave command line). The Mesos master will send offers to
frameworks for `*` resources and optionally one additional role that a
framework is assigned to. Right now only one such additional role for a framework is
supported.
### Configuring Roles for the Scheduler
Every Mesos framework scheduler can choose among offered `*` resources and
optionally one additional role. The Kubernetes-Mesos scheduler supports this by setting
the framework roles in the scheduler command line, e.g.
```bash
$ km scheduler ... --mesos-framework-roles="*,role1" ...
```
This permits the Kubernetes-Mesos scheduler to accept offered resources for the `*` and `role1` roles.
By default pods may be assigned any combination of resources for the roles accepted by the scheduler.
This default role assignment behavior may be overridden using the `--mesos-default-pod-roles` flag or
else by annotating the pod (as described later).
One can configure default pod roles, e.g.
```bash
$ km scheduler ... --mesos-default-pod-roles="role1" ...
```
This will tell the Kubernetes-Mesos scheduler to default to `role1` resource offers.
The configured default pod roles must be a subset of the configured framework roles.
The order of configured default pod roles is relevant,
`--mesos-default-pod-roles=role1,*` will first try to consume `role1` resources
from an offer and, once depleted, fall back to `*` resources.
The configuration `--mesos-default-pod-roles=*,role1` has the reverse behavior.
It first tries to consume `*` resources from an offer and, once depleted, falls
back to `role1` resources.
Due to restrictions of Mesos, currently only one additional role next to `*` can be configured
for both framework and default pod roles.
### Specifying Roles for Pods
By default a pod is scheduled using resources as specified using the
`--mesos-default-pod-roles` configuration.
A pod can override of this default behaviour using a `k8s.mesosphere.io/roles`
annotation:
```yaml
k8s.mesosphere.io/roles: "*,role1"
```
The format is a comma separated list of allowed resource roles. The scheduler
will try to schedule the pod with `*` resources first, using `role1`
resources if the former are not available or are depleted.
**Note:** An empty list will mean that no resource roles are allowed which is
equivalent to a pod which is unschedulable.
For example:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: backend
annotations:
k8s.mesosphere.io/roles: "*,public"
namespace: prod
spec:
...
```
This `*/public` pod will be scheduled using resources from both roles,
preferably using `*` resources, followed by `public`. If none
of those roles provides enough resources, the scheduling fails.
**Note:** The scheduler will also allow to mix different roles in the following
sense: if a node provides `cpu` resources for the `*` role, but `mem` resources
only for the `public` role, the above pod will be scheduled using `cpu(*)` and
`mem(public)` resources.
**Note:** The scheduler might also mix within one resource type, i.e. it will
use as many `cpu`s of the `*` role as possible. If a pod requires even more
`cpu` resources (defined using the `pod.spec.resources.limits` property) for successful
scheduling, the scheduler will add resources from the `public`
role until the pod resource requirements are satisfied. E.g. a
pod might be scheduled with 0.5 `cpu(*)`, 1.5 `cpu(public)`
resources plus e.g. 2 GB `mem(public)` resources.
## Tuning
The scheduler configuration can be fine-tuned using an ini-style configuration file.
The filename is passed via `--scheduler-config` to the `km scheduler` command.
Be warned though that some them are pretty low-level and one has to know the inner
workings of k8sm to find sensible values. Moreover, these settings may change or
even disappear from version to version without further notice.
The following settings are the default:
```
[scheduler]
; duration an offer is viable, prior to being expired
offer-ttl = 5s
; duration an expired offer lingers in history
offer-linger-ttl = 2m
; duration between offer listener notifications
listener-delay = 1s
; size of the pod updates channel
updates-backlog = 2048
; interval we update the frameworkId stored in etcd
framework-id-refresh-interval = 30s
; wait this amount of time after initial registration before attempting
; implicit reconciliation
initial-implicit-reconciliation-delay = 15s
; interval in between internal task status checks/updates
explicit-reconciliation-max-backoff = 2m
; waiting period after attempting to cancel an ongoing reconciliation
explicit-reconciliation-abort-timeout = 30s
initial-pod-backoff = 1s
max-pod-backoff = 60s
http-handler-timeout = 10s
http-bind-interval = 5s
```
## Low-Level Scheduler Architecture
![Scheduler Structure](scheduler.png)
[1]: ../../../docs/user-guide/labels.md
[2]: http://mesos.apache.org/documentation/attributes-resources/
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/contrib/mesos/docs/scheduler.md?pixel)]()

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 217 KiB

View File

@ -1,43 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package assert
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
)
// EventuallyTrue asserts that the given predicate becomes true within the given timeout. It
// checks the predicate regularly each 100ms.
func EventuallyTrue(t *testing.T, timeout time.Duration, fn func() bool, msgAndArgs ...interface{}) bool {
start := time.Now()
for {
if fn() {
return true
}
if time.Now().Sub(start) > timeout {
if len(msgAndArgs) > 0 {
return assert.Fail(t, msgAndArgs[0].(string), msgAndArgs[1:]...)
} else {
return assert.Fail(t, "predicate fn has not been true after %v", timeout.String())
}
}
time.Sleep(100 * time.Millisecond)
}
}

View File

@ -1,19 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package assert is an utility package containing reusable testing functionality
// extending github.com/stretchr/testify/assert
package assert // import "k8s.io/kubernetes/contrib/mesos/pkg/assert"

View File

@ -1,96 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package backoff
import (
"math/rand"
"sync"
"time"
log "github.com/golang/glog"
)
type clock interface {
Now() time.Time
}
type realClock struct{}
func (realClock) Now() time.Time {
return time.Now()
}
type backoffEntry struct {
backoff time.Duration
lastUpdate time.Time
}
type Backoff struct {
perItemBackoff map[string]*backoffEntry
lock sync.Mutex
clock clock
defaultDuration time.Duration
maxDuration time.Duration
}
func New(initial, max time.Duration) *Backoff {
return &Backoff{
perItemBackoff: map[string]*backoffEntry{},
clock: realClock{},
defaultDuration: initial,
maxDuration: max,
}
}
func (p *Backoff) getEntry(id string) *backoffEntry {
p.lock.Lock()
defer p.lock.Unlock()
entry, ok := p.perItemBackoff[id]
if !ok {
entry = &backoffEntry{backoff: p.defaultDuration}
p.perItemBackoff[id] = entry
}
entry.lastUpdate = p.clock.Now()
return entry
}
func (p *Backoff) Get(id string) time.Duration {
entry := p.getEntry(id)
duration := entry.backoff
entry.backoff *= 2
if entry.backoff > p.maxDuration {
entry.backoff = p.maxDuration
}
//TODO(jdef) parameterize use of jitter?
// add jitter, get better backoff distribution
duration = time.Duration(rand.Int63n(int64(duration)))
log.V(3).Infof("Backing off %v for pod %s", duration, id)
return duration
}
// Garbage collect records that have aged past maxDuration. Backoff users are expected
// to invoke this periodically.
func (p *Backoff) GC() {
p.lock.Lock()
defer p.lock.Unlock()
now := p.clock.Now()
for id, entry := range p.perItemBackoff {
if now.Sub(entry.lastUpdate) > p.maxDuration {
delete(p.perItemBackoff, id)
}
}
}

View File

@ -1,19 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package backoff provides backoff functionality with a simple API.
// Originally copied from Kubernetes: plugin/pkg/scheduler/factory/factory.go
package backoff // import "k8s.io/kubernetes/contrib/mesos/pkg/backoff"

View File

@ -1,371 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package controllermanager
import (
"fmt"
"io/ioutil"
"math/rand"
"net"
"net/http"
"strconv"
"time"
kubecontrollermanager "k8s.io/kubernetes/cmd/kube-controller-manager/app"
"k8s.io/kubernetes/cmd/kube-controller-manager/app/options"
"k8s.io/kubernetes/contrib/mesos/pkg/node"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/apimachinery/registered"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
"k8s.io/kubernetes/pkg/client/restclient"
"k8s.io/kubernetes/pkg/client/typed/dynamic"
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/client/unversioned/clientcmd"
clientcmdapi "k8s.io/kubernetes/pkg/client/unversioned/clientcmd/api"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/cloudprovider/providers/mesos"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/daemon"
"k8s.io/kubernetes/pkg/controller/deployment"
endpointcontroller "k8s.io/kubernetes/pkg/controller/endpoint"
"k8s.io/kubernetes/pkg/controller/informers"
"k8s.io/kubernetes/pkg/controller/job"
namespacecontroller "k8s.io/kubernetes/pkg/controller/namespace"
nodecontroller "k8s.io/kubernetes/pkg/controller/node"
"k8s.io/kubernetes/pkg/controller/podautoscaler"
"k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
"k8s.io/kubernetes/pkg/controller/podgc"
replicaset "k8s.io/kubernetes/pkg/controller/replicaset"
replicationcontroller "k8s.io/kubernetes/pkg/controller/replication"
resourcequotacontroller "k8s.io/kubernetes/pkg/controller/resourcequota"
routecontroller "k8s.io/kubernetes/pkg/controller/route"
servicecontroller "k8s.io/kubernetes/pkg/controller/service"
serviceaccountcontroller "k8s.io/kubernetes/pkg/controller/serviceaccount"
persistentvolumecontroller "k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
"k8s.io/kubernetes/pkg/healthz"
quotainstall "k8s.io/kubernetes/pkg/quota/install"
"k8s.io/kubernetes/pkg/serviceaccount"
certutil "k8s.io/kubernetes/pkg/util/cert"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/contrib/mesos/pkg/profile"
kmendpoint "k8s.io/kubernetes/contrib/mesos/pkg/service"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/pflag"
)
const (
// Jitter used when starting controller managers
ControllerStartJitter = 1.0
)
// CMServer is the main context object for the controller manager.
type CMServer struct {
*options.CMServer
UseHostPortEndpoints bool
}
// NewCMServer creates a new CMServer with a default config.
func NewCMServer() *CMServer {
s := &CMServer{
CMServer: options.NewCMServer(),
}
s.CloudProvider = mesos.ProviderName
s.UseHostPortEndpoints = true
return s
}
// AddFlags adds flags for a specific CMServer to the specified FlagSet
func (s *CMServer) AddFlags(fs *pflag.FlagSet) {
s.CMServer.AddFlags(fs)
fs.BoolVar(&s.UseHostPortEndpoints, "host-port-endpoints", s.UseHostPortEndpoints, "Map service endpoints to hostIP:hostPort instead of podIP:containerPort. Default true.")
}
func (s *CMServer) resyncPeriod() time.Duration {
factor := rand.Float64() + 1
return time.Duration(float64(time.Hour) * 12.0 * factor)
}
func (s *CMServer) Run(_ []string) error {
if s.Kubeconfig == "" && s.Master == "" {
glog.Warningf("Neither --kubeconfig nor --master was specified. Using default API client. This might not work.")
}
// This creates a client, first loading any specified kubeconfig
// file, and then overriding the Master flag, if non-empty.
kubeconfig, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(
&clientcmd.ClientConfigLoadingRules{ExplicitPath: s.Kubeconfig},
&clientcmd.ConfigOverrides{ClusterInfo: clientcmdapi.Cluster{Server: s.Master}}).ClientConfig()
if err != nil {
return err
}
kubeconfig.QPS = 20.0
kubeconfig.Burst = 30
kubeClient, err := client.New(kubeconfig)
if err != nil {
glog.Fatalf("Invalid API configuration: %v", err)
}
go func() {
mux := http.NewServeMux()
healthz.InstallHandler(mux)
if s.EnableProfiling {
profile.InstallHandler(mux)
}
mux.Handle("/metrics", prometheus.Handler())
server := &http.Server{
Addr: net.JoinHostPort(s.Address, strconv.Itoa(int(s.Port))),
Handler: mux,
}
glog.Fatal(server.ListenAndServe())
}()
endpoints := s.createEndpointController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "endpoint-controller")))
go endpoints.Run(int(s.ConcurrentEndpointSyncs), wait.NeverStop)
go replicationcontroller.NewReplicationManagerFromClient(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "replication-controller")), s.resyncPeriod, replicationcontroller.BurstReplicas, int(s.LookupCacheSizeForRC)).
Run(int(s.ConcurrentRCSyncs), wait.NeverStop)
go podgc.NewFromClient(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "pod-garbage-collector")), int(s.TerminatedPodGCThreshold)).
Run(wait.NeverStop)
//TODO(jdef) should eventually support more cloud providers here
if s.CloudProvider != mesos.ProviderName {
glog.Fatalf("Only provider %v is supported, you specified %v", mesos.ProviderName, s.CloudProvider)
}
cloud, err := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile)
if err != nil {
glog.Fatalf("Cloud provider could not be initialized: %v", err)
}
_, clusterCIDR, _ := net.ParseCIDR(s.ClusterCIDR)
_, serviceCIDR, _ := net.ParseCIDR(s.ServiceCIDR)
nodeController, err := nodecontroller.NewNodeControllerFromClient(cloud, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "node-controller")),
s.PodEvictionTimeout.Duration, s.NodeEvictionRate, s.SecondaryNodeEvictionRate, s.LargeClusterSizeThreshold, s.UnhealthyZoneThreshold,
s.NodeMonitorGracePeriod.Duration, s.NodeStartupGracePeriod.Duration, s.NodeMonitorPeriod.Duration, clusterCIDR, serviceCIDR, int(s.NodeCIDRMaskSize), s.AllocateNodeCIDRs)
if err != nil {
glog.Fatalf("Failed to initialize nodecontroller: %v", err)
}
nodeController.Run()
nodeStatusUpdaterController := node.NewStatusUpdater(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "node-status-controller")), s.NodeMonitorPeriod.Duration, time.Now)
if err := nodeStatusUpdaterController.Run(wait.NeverStop); err != nil {
glog.Fatalf("Failed to start node status update controller: %v", err)
}
serviceController, err := servicecontroller.New(cloud, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "service-controller")), s.ClusterName)
if err != nil {
glog.Errorf("Failed to start service controller: %v", err)
} else {
serviceController.Run(int(s.ConcurrentServiceSyncs))
}
if s.AllocateNodeCIDRs && s.ConfigureCloudRoutes {
if cloud == nil {
glog.Warning("configure-cloud-routes is set, but no cloud provider specified. Will not configure cloud provider routes.")
} else if routes, ok := cloud.Routes(); !ok {
glog.Warning("configure-cloud-routes is set, but cloud provider does not support routes. Will not configure cloud provider routes.")
} else {
routeController := routecontroller.New(routes, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "route-controller")), s.ClusterName, clusterCIDR)
routeController.Run(s.RouteReconciliationPeriod.Duration)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
}
} else {
glog.Infof("Will not configure cloud provider routes for allocate-node-cidrs: %v, configure-cloud-routes: %v.", s.AllocateNodeCIDRs, s.ConfigureCloudRoutes)
}
resourceQuotaControllerClient := clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "resource-quota-controller"))
resourceQuotaRegistry := quotainstall.NewRegistry(resourceQuotaControllerClient)
groupKindsToReplenish := []unversioned.GroupKind{
api.Kind("Pod"),
api.Kind("Service"),
api.Kind("ReplicationController"),
api.Kind("PersistentVolumeClaim"),
api.Kind("Secret"),
}
resourceQuotaControllerOptions := &resourcequotacontroller.ResourceQuotaControllerOptions{
KubeClient: resourceQuotaControllerClient,
ResyncPeriod: controller.StaticResyncPeriodFunc(s.ResourceQuotaSyncPeriod.Duration),
Registry: resourceQuotaRegistry,
GroupKindsToReplenish: groupKindsToReplenish,
ReplenishmentResyncPeriod: s.resyncPeriod,
ControllerFactory: resourcequotacontroller.NewReplenishmentControllerFactoryFromClient(resourceQuotaControllerClient),
}
go resourcequotacontroller.NewResourceQuotaController(resourceQuotaControllerOptions).Run(int(s.ConcurrentResourceQuotaSyncs), wait.NeverStop)
// If apiserver is not running we should wait for some time and fail only then. This is particularly
// important when we start apiserver and controller manager at the same time.
var versionStrings []string
err = wait.PollImmediate(time.Second, 10*time.Second, func() (bool, error) {
if versionStrings, err = restclient.ServerAPIVersions(kubeconfig); err == nil {
return true, nil
}
glog.Errorf("Failed to get api versions from server: %v", err)
return false, nil
})
if err != nil {
glog.Fatalf("Failed to get api versions from server: %v", err)
}
versions := &unversioned.APIVersions{Versions: versionStrings}
resourceMap, err := kubeClient.Discovery().ServerResources()
if err != nil {
glog.Fatalf("Failed to get supported resources from server: %v", err)
}
// Find the list of namespaced resources via discovery that the namespace controller must manage
namespaceKubeClient := clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "namespace-controller"))
namespaceClientPool := dynamic.NewClientPool(restclient.AddUserAgent(kubeconfig, "namespace-controller"), registered.RESTMapper(), dynamic.LegacyAPIPathResolverFunc)
groupVersionResources, err := namespaceKubeClient.Discovery().ServerPreferredNamespacedResources()
if err != nil {
glog.Fatalf("Failed to get supported resources from server: %v", err)
}
namespaceController := namespacecontroller.NewNamespaceController(namespaceKubeClient, namespaceClientPool, groupVersionResources, s.NamespaceSyncPeriod.Duration, api.FinalizerKubernetes)
go namespaceController.Run(int(s.ConcurrentNamespaceSyncs), wait.NeverStop)
groupVersion := "extensions/v1beta1"
resources, found := resourceMap[groupVersion]
// TODO(k8s): this needs to be dynamic so users don't have to restart their controller manager if they change the apiserver
if containsVersion(versions, groupVersion) && found {
glog.Infof("Starting %s apis", groupVersion)
if containsResource(resources, "horizontalpodautoscalers") {
glog.Infof("Starting horizontal pod controller.")
hpaClient := clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "horizontal-pod-autoscaler"))
metricsClient := metrics.NewHeapsterMetricsClient(
hpaClient,
metrics.DefaultHeapsterNamespace,
metrics.DefaultHeapsterScheme,
metrics.DefaultHeapsterService,
metrics.DefaultHeapsterPort,
)
go podautoscaler.NewHorizontalController(hpaClient.Core(), hpaClient.Extensions(), hpaClient, metricsClient, s.HorizontalPodAutoscalerSyncPeriod.Duration).
Run(wait.NeverStop)
}
if containsResource(resources, "daemonsets") {
glog.Infof("Starting daemon set controller")
informerFactory := informers.NewSharedInformerFactory(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "daemon-set-controller")), s.resyncPeriod())
go daemon.NewDaemonSetsController(informerFactory.DaemonSets(), informerFactory.Pods(), informerFactory.Nodes(), clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "daemon-set-controller")), int(s.LookupCacheSizeForDaemonSet)).
Run(int(s.ConcurrentDaemonSetSyncs), wait.NeverStop)
informerFactory.Start(wait.NeverStop)
}
if containsResource(resources, "jobs") {
glog.Infof("Starting job controller")
go job.NewJobControllerFromClient(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "job-controller")), s.resyncPeriod).
Run(int(s.ConcurrentJobSyncs), wait.NeverStop)
}
if containsResource(resources, "deployments") {
glog.Infof("Starting deployment controller")
go deployment.NewDeploymentController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "deployment-controller")), s.resyncPeriod).
Run(int(s.ConcurrentDeploymentSyncs), wait.NeverStop)
}
if containsResource(resources, "replicasets") {
glog.Infof("Starting ReplicaSet controller")
go replicaset.NewReplicaSetControllerFromClient(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "replicaset-controller")), s.resyncPeriod, replicaset.BurstReplicas, int(s.LookupCacheSizeForRS)).
Run(int(s.ConcurrentRSSyncs), wait.NeverStop)
}
}
alphaProvisioner, err := kubecontrollermanager.NewAlphaVolumeProvisioner(cloud, s.VolumeConfiguration)
if err != nil {
glog.Fatalf("An backward-compatible provisioner could not be created: %v, but one was expected. Provisioning will not work. This functionality is considered an early Alpha version.", err)
}
params := persistentvolumecontroller.ControllerParameters{
KubeClient: clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "persistent-volume-binder")),
SyncPeriod: s.PVClaimBinderSyncPeriod.Duration,
AlphaProvisioner: alphaProvisioner,
VolumePlugins: kubecontrollermanager.ProbeControllerVolumePlugins(cloud, s.VolumeConfiguration),
Cloud: cloud,
ClusterName: s.ClusterName,
EnableDynamicProvisioning: s.VolumeConfiguration.EnableDynamicProvisioning,
}
volumeController := persistentvolumecontroller.NewController(params)
volumeController.Run(wait.NeverStop)
var rootCA []byte
if s.RootCAFile != "" {
rootCA, err = ioutil.ReadFile(s.RootCAFile)
if err != nil {
return fmt.Errorf("error reading root-ca-file at %s: %v", s.RootCAFile, err)
}
if _, err := certutil.ParseCertsPEM(rootCA); err != nil {
return fmt.Errorf("error parsing root-ca-file at %s: %v", s.RootCAFile, err)
}
} else {
rootCA = kubeconfig.CAData
}
if len(s.ServiceAccountKeyFile) > 0 {
privateKey, err := serviceaccount.ReadPrivateKey(s.ServiceAccountKeyFile)
if err != nil {
glog.Errorf("Error reading key for service account token controller: %v", err)
} else {
go serviceaccountcontroller.NewTokensController(
clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "tokens-controller")),
serviceaccountcontroller.TokensControllerOptions{
TokenGenerator: serviceaccount.JWTTokenGenerator(privateKey),
RootCA: rootCA,
},
).Run(int(s.ConcurrentSATokenSyncs), wait.NeverStop)
}
}
serviceaccountcontroller.NewServiceAccountsController(
clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "service-account-controller")),
serviceaccountcontroller.DefaultServiceAccountsControllerOptions(),
).Run()
select {}
}
func (s *CMServer) createEndpointController(client *clientset.Clientset) kmendpoint.EndpointController {
if s.UseHostPortEndpoints {
glog.V(2).Infof("Creating hostIP:hostPort endpoint controller")
return kmendpoint.NewEndpointController(client)
}
glog.V(2).Infof("Creating podIP:containerPort endpoint controller")
stockEndpointController := endpointcontroller.NewEndpointControllerFromClient(client, s.resyncPeriod)
return stockEndpointController
}
func containsVersion(versions *unversioned.APIVersions, version string) bool {
for ix := range versions.Versions {
if versions.Versions[ix] == version {
return true
}
}
return false
}
func containsResource(resources *unversioned.APIResourceList, resourceName string) bool {
for ix := range resources.APIResources {
resource := resources.APIResources[ix]
if resource.Name == resourceName {
return true
}
}
return false
}

View File

@ -1,20 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package controllermanager is largely a clone of the upstream implementation,
// with additional functionality to select between stock or a customized
// endpoints controller.
package controllermanager // import "k8s.io/kubernetes/contrib/mesos/pkg/controllermanager"

View File

@ -1,18 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package election provides interfaces used for master election.
package election // import "k8s.io/kubernetes/contrib/mesos/pkg/election"

View File

@ -1,198 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package election
import (
"fmt"
"time"
etcd "github.com/coreos/etcd/client"
"github.com/golang/glog"
"golang.org/x/net/context"
"k8s.io/kubernetes/pkg/api/unversioned"
etcdutil "k8s.io/kubernetes/pkg/storage/etcd/util"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/pkg/watch"
)
// Master is used to announce the current elected master.
type Master string
// IsAnAPIObject is used solely so we can work with the watch package.
// TODO(k8s): Either fix watch so this isn't necessary, or make this a real API Object.
// TODO(k8s): when it becomes clear how this package will be used, move these declarations to
// to the proper place.
func (obj Master) GetObjectKind() unversioned.ObjectKind { return unversioned.EmptyObjectKind }
// NewEtcdMasterElector returns an implementation of election.MasterElector backed by etcd.
func NewEtcdMasterElector(h etcd.Client) MasterElector {
return &etcdMasterElector{etcd: etcd.NewKeysAPI(h)}
}
type empty struct{}
// internal implementation struct
type etcdMasterElector struct {
etcd etcd.KeysAPI
done chan empty
events chan watch.Event
}
// Elect implements the election.MasterElector interface.
func (e *etcdMasterElector) Elect(path, id string) watch.Interface {
e.done = make(chan empty)
e.events = make(chan watch.Event)
go wait.Until(func() { e.run(path, id) }, time.Second*5, wait.NeverStop)
return e
}
func (e *etcdMasterElector) run(path, id string) {
masters := make(chan string)
errors := make(chan error)
go e.master(path, id, 30, masters, errors, e.done) // TODO(jdef) extract constant
for {
select {
case m := <-masters:
e.events <- watch.Event{
Type: watch.Modified,
Object: Master(m),
}
case e := <-errors:
glog.Errorf("Error in election: %v", e)
}
}
}
// ResultChan implements the watch.Interface interface.
func (e *etcdMasterElector) ResultChan() <-chan watch.Event {
return e.events
}
// extendMaster attempts to extend ownership of a master lock for TTL seconds.
// returns "", nil if extension failed
// returns id, nil if extension succeeded
// returns "", err if an error occurred
func (e *etcdMasterElector) extendMaster(path, id string, ttl uint64, res *etcd.Response) (string, error) {
// If it matches the passed in id, extend the lease by writing a new entry.
// Uses compare and swap, so that if we TTL out in the meantime, the write will fail.
// We don't handle the TTL delete w/o a write case here, it's handled in the next loop
// iteration.
opts := etcd.SetOptions{
TTL: time.Duration(ttl) * time.Second,
PrevValue: "",
PrevIndex: res.Node.ModifiedIndex,
}
_, err := e.etcd.Set(context.TODO(), path, id, &opts)
if err != nil && !etcdutil.IsEtcdTestFailed(err) {
return "", err
}
if err != nil && etcdutil.IsEtcdTestFailed(err) {
return "", nil
}
return id, nil
}
// becomeMaster attempts to become the master for this lock.
// returns "", nil if the attempt failed
// returns id, nil if the attempt succeeded
// returns "", err if an error occurred
func (e *etcdMasterElector) becomeMaster(path, id string, ttl uint64) (string, error) {
opts := etcd.SetOptions{
TTL: time.Duration(ttl) * time.Second,
PrevExist: etcd.PrevNoExist,
}
_, err := e.etcd.Set(context.TODO(), path, id, &opts)
if err != nil && !etcdutil.IsEtcdNodeExist(err) {
// unexpected error
return "", err
}
if err != nil && etcdutil.IsEtcdNodeExist(err) {
return "", nil
}
return id, nil
}
// handleMaster performs one loop of master locking.
// on success it returns <master>, nil
// on error it returns "", err
// in situations where you should try again due to concurrent state changes (e.g. another actor simultaneously acquiring the lock)
// it returns "", nil
func (e *etcdMasterElector) handleMaster(path, id string, ttl uint64) (string, error) {
res, err := e.etcd.Get(context.TODO(), path, nil)
// Unexpected error, bail out
if err != nil && !etcdutil.IsEtcdNotFound(err) {
return "", err
}
// There is no master, try to become the master.
if err != nil && etcdutil.IsEtcdNotFound(err) {
return e.becomeMaster(path, id, ttl)
}
// This should never happen.
if res.Node == nil {
return "", fmt.Errorf("unexpected response: %#v", res)
}
// We're not the master, just return the current value
if res.Node.Value != id {
return res.Node.Value, nil
}
// We are the master, try to extend out lease
return e.extendMaster(path, id, ttl, res)
}
// master provices a distributed master election lock, maintains lock until failure, or someone sends something in the done channel.
// The basic algorithm is:
// while !done
// Get the current master
// If there is no current master
// Try to become the master
// Otherwise
// If we are the master, extend the lease
// If the master is different than the last time through the loop, report the master
// Sleep 80% of TTL
func (e *etcdMasterElector) master(path, id string, ttl uint64, masters chan<- string, errors chan<- error, done <-chan empty) {
lastMaster := ""
for {
master, err := e.handleMaster(path, id, ttl)
if err != nil {
errors <- err
} else if len(master) == 0 {
continue
} else if master != lastMaster {
lastMaster = master
masters <- master
}
// TODO(k8s): Add Watch here, skip the polling for faster reactions
// If done is closed, break out.
select {
case <-done:
return
case <-time.After(time.Duration((ttl*8)/10) * time.Second):
}
}
}
// ResultChan implements the watch.Interface interface
func (e *etcdMasterElector) Stop() {
close(e.done)
}

View File

@ -1,78 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package election
import (
"testing"
etcd "github.com/coreos/etcd/client"
"golang.org/x/net/context"
etcdtesting "k8s.io/kubernetes/pkg/storage/etcd/testing"
"k8s.io/kubernetes/pkg/watch"
)
func TestEtcdMasterOther(t *testing.T) {
server := etcdtesting.NewEtcdTestClientServer(t)
defer server.Terminate(t)
path := "foo"
keysAPI := etcd.NewKeysAPI(server.Client)
if _, err := keysAPI.Set(context.TODO(), path, "baz", nil); err != nil {
t.Errorf("unexpected error: %v", err)
}
master := NewEtcdMasterElector(server.Client)
w := master.Elect(path, "bar")
result := <-w.ResultChan()
if result.Type != watch.Modified || result.Object.(Master) != "baz" {
t.Errorf("unexpected event: %#v", result)
}
w.Stop()
}
func TestEtcdMasterNoOther(t *testing.T) {
server := etcdtesting.NewEtcdTestClientServer(t)
defer server.Terminate(t)
path := "foo"
master := NewEtcdMasterElector(server.Client)
w := master.Elect(path, "bar")
result := <-w.ResultChan()
if result.Type != watch.Modified || result.Object.(Master) != "bar" {
t.Errorf("unexpected event: %#v", result)
}
w.Stop()
}
func TestEtcdMasterNoOtherThenConflict(t *testing.T) {
server := etcdtesting.NewEtcdTestClientServer(t)
defer server.Terminate(t)
path := "foo"
master := NewEtcdMasterElector(server.Client)
leader := NewEtcdMasterElector(server.Client)
w_ldr := leader.Elect(path, "baz")
result := <-w_ldr.ResultChan()
w := master.Elect(path, "bar")
result = <-w.ResultChan()
if result.Type != watch.Modified || result.Object.(Master) != "baz" {
t.Errorf("unexpected event: %#v", result)
}
w.Stop()
w_ldr.Stop()
}

View File

@ -1,53 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package election
import (
"sync"
"k8s.io/kubernetes/pkg/watch"
)
// Fake allows for testing of anything consuming a MasterElector.
type Fake struct {
mux *watch.Broadcaster
currentMaster Master
lock sync.Mutex // Protect access of currentMaster
}
// NewFake makes a new fake MasterElector.
func NewFake() *Fake {
// 0 means block for clients.
return &Fake{mux: watch.NewBroadcaster(0, watch.WaitIfChannelFull)}
}
func (f *Fake) ChangeMaster(newMaster Master) {
f.lock.Lock()
defer f.lock.Unlock()
f.mux.Action(watch.Modified, newMaster)
f.currentMaster = newMaster
}
func (f *Fake) Elect(path, id string) watch.Interface {
f.lock.Lock()
defer f.lock.Unlock()
w := f.mux.Watch()
if f.currentMaster != "" {
f.mux.Action(watch.Modified, f.currentMaster)
}
return w
}

View File

@ -1,121 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package election
import (
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
"k8s.io/kubernetes/pkg/watch"
"github.com/golang/glog"
)
// MasterElector is an interface for services that can elect masters.
// Important Note: MasterElectors are not inter-operable, all participants in the election need to be
// using the same underlying implementation of this interface for correct behavior.
type MasterElector interface {
// Elect makes the caller represented by 'id' enter into a master election for the
// distributed lock defined by 'path'
// The returned watch.Interface provides a stream of Master objects which
// contain the current master.
// Calling Stop on the returned interface relinquishes ownership (if currently possesed)
// and removes the caller from the election
Elect(path, id string) watch.Interface
}
// Service represents anything that can start and stop on demand.
type Service interface {
Validate(desired, current Master)
Start()
Stop()
}
type notifier struct {
masters chan Master // elected masters arrive here, should be buffered to better deal with rapidly flapping masters
// for comparison, to see if we are master.
id Master
service Service
}
// Notify runs Elect() on m, and calls Start()/Stop() on s when the
// elected master starts/stops matching 'id'. Never returns.
func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) {
n := &notifier{id: Master(id), service: s, masters: make(chan Master, 1)}
finished := runtime.After(func() {
runtime.Until(func() {
for {
w := m.Elect(path, id)
for {
select {
case <-abort:
return
case event, open := <-w.ResultChan():
if !open {
break
}
if event.Type != watch.Modified {
continue
}
electedMaster, ok := event.Object.(Master)
if !ok {
glog.Errorf("Unexpected object from election channel: %v", event.Object)
break
}
sendElected:
for {
select {
case <-abort:
return
case n.masters <- electedMaster:
break sendElected
default: // ring full, discard old value and add the new
select {
case <-abort:
return
case <-n.masters:
default: // ring was cleared for us?!
}
}
}
}
}
}
}, 0, abort)
})
runtime.Until(func() { n.serviceLoop(finished) }, 0, abort)
}
// serviceLoop waits for changes, and calls Start()/Stop() as needed.
func (n *notifier) serviceLoop(abort <-chan struct{}) {
var current Master
for {
select {
case <-abort:
return
case desired := <-n.masters:
if current != n.id && desired == n.id {
n.service.Validate(desired, current)
n.service.Start()
} else if current == n.id && desired != n.id {
n.service.Stop()
}
current = desired
}
}
}

View File

@ -1,106 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package election
import (
"testing"
"time"
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
)
type slowService struct {
t *testing.T
on bool
// We explicitly have no lock to prove that
// Start and Stop are not called concurrently.
changes chan<- bool
done <-chan struct{}
}
func (s *slowService) Validate(d, c Master) {
// noop
}
func (s *slowService) Start() {
select {
case <-s.done:
return // avoid writing to closed changes chan
default:
}
if s.on {
s.t.Errorf("started already on service")
}
time.Sleep(2 * time.Millisecond)
s.on = true
s.changes <- true
}
func (s *slowService) Stop() {
select {
case <-s.done:
return // avoid writing to closed changes chan
default:
}
if !s.on {
s.t.Errorf("stopped already off service")
}
time.Sleep(2 * time.Millisecond)
s.on = false
s.changes <- false
}
func Test(t *testing.T) {
m := NewFake()
changes := make(chan bool, 1500)
done := make(chan struct{})
s := &slowService{t: t, changes: changes, done: done}
// change master to "notme" such that the initial m.Elect call inside Notify
// will trigger an obversable event. We will wait for it to make sure the
// Notify loop will see those master changes triggered by the go routine below.
m.ChangeMaster(Master("me"))
temporaryWatch := m.mux.Watch()
ch := temporaryWatch.ResultChan()
notifyDone := runtime.After(func() { Notify(m, "", "me", s, done) })
// wait for the event triggered by the initial m.Elect of Notify. Then drain
// the channel to not block anything.
<-ch
temporaryWatch.Stop()
for i := 0; i < len(ch); i += 1 { // go 1.3 and 1.4 compatible loop
<-ch
}
go func() {
defer close(done)
for i := 0; i < 500; i++ {
for _, key := range []string{"me", "notme", "alsonotme"} {
m.ChangeMaster(Master(key))
}
}
}()
<-notifyDone
close(changes)
changesNum := len(changes)
if changesNum > 1000 || changesNum == 0 {
t.Errorf("unexpected number of changes: %v", changesNum)
}
}

View File

@ -1,45 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import (
"k8s.io/kubernetes/contrib/mesos/pkg/node"
"k8s.io/kubernetes/pkg/api"
unversionedcore "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/unversioned"
)
type kubeAPI interface {
killPod(ns, name string) error
}
type nodeAPI interface {
createOrUpdate(hostname string, slaveAttrLabels, annotations map[string]string) (*api.Node, error)
}
// clientAPIWrapper implements kubeAPI and node API, which serve to isolate external dependencies
// such that they're easier to mock in unit test.
type clientAPIWrapper struct {
client unversionedcore.CoreInterface
}
func (cw *clientAPIWrapper) killPod(ns, name string) error {
return cw.client.Pods(ns).Delete(name, api.NewDeleteOptions(0))
}
func (cw *clientAPIWrapper) createOrUpdate(hostname string, slaveAttrLabels, annotations map[string]string) (*api.Node, error) {
return node.CreateOrUpdate(cw.client, hostname, slaveAttrLabels, annotations)
}

View File

@ -1,29 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"time"
)
// default values to use when constructing mesos ExecutorInfo messages
const (
DefaultInfoID = "k8sm-executor"
DefaultInfoSource = "kubernetes"
DefaultSuicideTimeout = 20 * time.Minute
DefaultLaunchGracePeriod = 5 * time.Minute
)

View File

@ -1,18 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package config contains executor configuration constants.
package config // import "k8s.io/kubernetes/contrib/mesos/pkg/executor/config"

View File

@ -1,21 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Package executor includes a mesos executor, which contains
a kubelet as its member to manage containers.
*/
package executor // import "k8s.io/kubernetes/contrib/mesos/pkg/executor"

View File

@ -1,755 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import (
"bytes"
"encoding/json"
"fmt"
"math"
"strings"
"sync"
"sync/atomic"
"time"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
dockertypes "github.com/docker/engine-api/types"
"github.com/gogo/protobuf/proto"
log "github.com/golang/glog"
bindings "github.com/mesos/mesos-go/executor"
mesos "github.com/mesos/mesos-go/mesosproto"
mutil "github.com/mesos/mesos-go/mesosutil"
"k8s.io/kubernetes/contrib/mesos/pkg/executor/messages"
"k8s.io/kubernetes/contrib/mesos/pkg/node"
"k8s.io/kubernetes/contrib/mesos/pkg/podutil"
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/executorinfo"
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/meta"
"k8s.io/kubernetes/pkg/api"
apierrors "k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
kruntime "k8s.io/kubernetes/pkg/runtime"
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
)
type stateType int32
const (
disconnectedState stateType = iota
connectedState
suicidalState
terminalState
)
func (s *stateType) get() stateType {
return stateType(atomic.LoadInt32((*int32)(s)))
}
func (s *stateType) transition(from, to stateType) bool {
return atomic.CompareAndSwapInt32((*int32)(s), int32(from), int32(to))
}
func (s *stateType) transitionTo(to stateType, unless ...stateType) bool {
if len(unless) == 0 {
atomic.StoreInt32((*int32)(s), int32(to))
return true
}
for {
state := s.get()
for _, x := range unless {
if state == x {
return false
}
}
if s.transition(state, to) {
return true
}
}
}
// KubernetesExecutor is an mesos executor that runs pods
// in a minion machine.
type Executor struct {
state stateType
lock sync.Mutex
terminate chan struct{} // signals that the executor is shutting down
outgoing chan func() (mesos.Status, error) // outgoing queue to the mesos driver
dockerClient dockertools.DockerInterface
suicideWatch suicideWatcher
suicideTimeout time.Duration
shutdownAlert func() // invoked just prior to executor shutdown
kubeletFinished <-chan struct{} // signals that kubelet Run() died
exitFunc func(int)
staticPodsConfigPath string
staticPodsFilters podutil.Filters
launchGracePeriod time.Duration
nodeInfos chan<- NodeInfo
initCompleted chan struct{} // closes upon completion of Init()
registry Registry
watcher *watcher
kubeAPI kubeAPI
nodeAPI nodeAPI
}
type Config struct {
APIClient *clientset.Clientset
Docker dockertools.DockerInterface
ShutdownAlert func()
SuicideTimeout time.Duration
KubeletFinished <-chan struct{} // signals that kubelet Run() died
ExitFunc func(int)
LaunchGracePeriod time.Duration
NodeInfos chan<- NodeInfo
Registry Registry
Options []Option // functional options
}
// Option is a functional option type for Executor
type Option func(*Executor)
func (k *Executor) isConnected() bool {
return connectedState == (&k.state).get()
}
// New creates a new kubernetes executor.
func New(config Config) *Executor {
launchGracePeriod := config.LaunchGracePeriod
if launchGracePeriod == 0 {
// this is the equivalent of saying "the timer never expires" and simplies nil
// timer checks elsewhere in the code. it's a little hacky but less code to
// maintain that alternative approaches.
launchGracePeriod = time.Duration(math.MaxInt64)
}
k := &Executor{
state: disconnectedState,
terminate: make(chan struct{}),
outgoing: make(chan func() (mesos.Status, error), 1024),
dockerClient: config.Docker,
suicideTimeout: config.SuicideTimeout,
kubeletFinished: config.KubeletFinished,
suicideWatch: &suicideTimer{},
shutdownAlert: config.ShutdownAlert,
exitFunc: config.ExitFunc,
launchGracePeriod: launchGracePeriod,
nodeInfos: config.NodeInfos,
initCompleted: make(chan struct{}),
registry: config.Registry,
}
if config.APIClient != nil {
k.kubeAPI = &clientAPIWrapper{config.APIClient.Core()}
k.nodeAPI = &clientAPIWrapper{config.APIClient.Core()}
}
// apply functional options
for _, opt := range config.Options {
opt(k)
}
runtime.On(k.initCompleted, k.runSendLoop)
k.watcher = newWatcher(k.registry.watch())
runtime.On(k.initCompleted, k.watcher.run)
return k
}
// StaticPods creates a static pods Option for an Executor
func StaticPods(configPath string, f podutil.Filters) Option {
return func(k *Executor) {
k.staticPodsFilters = f
k.staticPodsConfigPath = configPath
}
}
// Done returns a chan that closes when the executor is shutting down
func (k *Executor) Done() <-chan struct{} {
return k.terminate
}
func (k *Executor) Init(driver bindings.ExecutorDriver) {
defer close(k.initCompleted)
k.killKubeletContainers()
k.resetSuicideWatch(driver)
k.watcher.addFilter(func(podEvent *PodEvent) bool {
switch podEvent.eventType {
case PodEventIncompatibleUpdate:
log.Warningf("killing %s because of an incompatible update", podEvent.FormatShort())
k.killPodTask(driver, podEvent.taskID)
// halt processing of this event; when the pod is deleted we'll receive another
// event for that.
return false
case PodEventDeleted:
// an active pod-task was deleted, alert mesos:
// send back a TASK_KILLED status, we completed the pod-task lifecycle normally.
k.resetSuicideWatch(driver)
k.sendStatus(driver, newStatus(mutil.NewTaskID(podEvent.taskID), mesos.TaskState_TASK_KILLED, "pod-deleted"))
}
return true
})
//TODO(jdef) monitor kubeletFinished and shutdown if it happens
}
func (k *Executor) isDone() bool {
select {
case <-k.terminate:
return true
default:
return false
}
}
// Registered is called when the executor is successfully registered with the slave.
func (k *Executor) Registered(
driver bindings.ExecutorDriver,
executorInfo *mesos.ExecutorInfo,
frameworkInfo *mesos.FrameworkInfo,
slaveInfo *mesos.SlaveInfo,
) {
if k.isDone() {
return
}
log.Infof(
"Executor %v of framework %v registered with slave %v\n",
executorInfo, frameworkInfo, slaveInfo,
)
if !(&k.state).transition(disconnectedState, connectedState) {
log.Errorf("failed to register/transition to a connected state")
}
k.initializeStaticPodsSource(executorInfo)
annotations, err := annotationsFor(executorInfo)
if err != nil {
log.Errorf(
"cannot get node annotations from executor info %v error %v",
executorInfo, err,
)
}
if slaveInfo != nil {
_, err := k.nodeAPI.createOrUpdate(
slaveInfo.GetHostname(),
node.SlaveAttributesToLabels(slaveInfo.Attributes),
annotations,
)
if err != nil {
log.Errorf("cannot update node labels: %v", err)
}
}
k.lock.Lock()
defer k.lock.Unlock()
if slaveInfo != nil && k.nodeInfos != nil {
k.nodeInfos <- nodeInfo(slaveInfo, executorInfo) // leave it behind the upper lock to avoid panics
}
}
// Reregistered is called when the executor is successfully re-registered with the slave.
// This can happen when the slave fails over.
func (k *Executor) Reregistered(driver bindings.ExecutorDriver, slaveInfo *mesos.SlaveInfo) {
if k.isDone() {
return
}
log.Infof("Reregistered with slave %v\n", slaveInfo)
if !(&k.state).transition(disconnectedState, connectedState) {
log.Errorf("failed to reregister/transition to a connected state")
}
if slaveInfo != nil {
_, err := k.nodeAPI.createOrUpdate(
slaveInfo.GetHostname(),
node.SlaveAttributesToLabels(slaveInfo.Attributes),
nil, // don't change annotations
)
if err != nil {
log.Errorf("cannot update node labels: %v", err)
}
}
if slaveInfo != nil && k.nodeInfos != nil {
// make sure nodeInfos is not nil and send new NodeInfo
k.lock.Lock()
defer k.lock.Unlock()
if k.isDone() {
return
}
k.nodeInfos <- nodeInfo(slaveInfo, nil)
}
}
// initializeStaticPodsSource unzips the data slice into the static-pods directory
func (k *Executor) initializeStaticPodsSource(executorInfo *mesos.ExecutorInfo) {
if data := executorInfo.GetData(); len(data) > 0 && k.staticPodsConfigPath != "" {
log.V(2).Infof("extracting static pods config to %s", k.staticPodsConfigPath)
err := podutil.WriteToDir(
k.staticPodsFilters.Do(podutil.Gunzip(executorInfo.Data)),
k.staticPodsConfigPath,
)
if err != nil {
log.Errorf("failed to initialize static pod configuration: %v", err)
}
}
}
// Disconnected is called when the executor is disconnected from the slave.
func (k *Executor) Disconnected(driver bindings.ExecutorDriver) {
if k.isDone() {
return
}
log.Infof("Slave is disconnected\n")
if !(&k.state).transition(connectedState, disconnectedState) {
log.Errorf("failed to disconnect/transition to a disconnected state")
}
}
// LaunchTask is called when the executor receives a request to launch a task.
// The happens when the k8sm scheduler has decided to schedule the pod
// (which corresponds to a Mesos Task) onto the node where this executor
// is running, but the binding is not recorded in the Kubernetes store yet.
// This function is invoked to tell the executor to record the binding in the
// Kubernetes store and start the pod via the Kubelet.
func (k *Executor) LaunchTask(driver bindings.ExecutorDriver, taskInfo *mesos.TaskInfo) {
if k.isDone() {
return
}
log.Infof("Launch task %v\n", taskInfo)
taskID := taskInfo.GetTaskId().GetValue()
if p := k.registry.pod(taskID); p != nil {
log.Warningf("task %v already launched", taskID)
// Not to send back TASK_RUNNING or TASK_FAILED here, because
// may be duplicated messages
return
}
if !k.isConnected() {
log.Errorf("Ignore launch task because the executor is disconnected\n")
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
messages.ExecutorUnregistered))
return
}
obj, err := kruntime.Decode(api.Codecs.UniversalDecoder(), taskInfo.GetData())
if err != nil {
log.Errorf("failed to extract yaml data from the taskInfo.data %v", err)
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
messages.UnmarshalTaskDataFailure))
return
}
pod, ok := obj.(*api.Pod)
if !ok {
log.Errorf("expected *api.Pod instead of %T: %+v", pod, pod)
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
messages.UnmarshalTaskDataFailure))
return
}
k.resetSuicideWatch(driver)
// run the next step aync because it calls out to apiserver and we don't want to block here
go k.bindAndWatchTask(driver, taskInfo, time.NewTimer(k.launchGracePeriod), pod)
}
// determine whether we need to start a suicide countdown. if so, then start
// a timer that, upon expiration, causes this executor to commit suicide.
// this implementation runs asynchronously. callers that wish to wait for the
// reset to complete may wait for the returned signal chan to close.
func (k *Executor) resetSuicideWatch(driver bindings.ExecutorDriver) <-chan struct{} {
ch := make(chan struct{})
go func() {
defer close(ch)
k.lock.Lock()
defer k.lock.Unlock()
if k.suicideTimeout < 1 {
return
}
if k.suicideWatch != nil {
if !k.registry.empty() {
k.suicideWatch.Stop()
return
}
if k.suicideWatch.Reset(k.suicideTimeout) {
// valid timer, reset was successful
return
}
}
//TODO(jdef) reduce verbosity here once we're convinced that suicide watch is working properly
log.Infof("resetting suicide watch timer for %v", k.suicideTimeout)
k.suicideWatch = k.suicideWatch.Next(k.suicideTimeout, driver, jumper(k.attemptSuicide))
}()
return ch
}
func (k *Executor) attemptSuicide(driver bindings.ExecutorDriver, abort <-chan struct{}) {
k.lock.Lock()
defer k.lock.Unlock()
// this attempt may have been queued and since been aborted
select {
case <-abort:
//TODO(jdef) reduce verbosity once suicide watch is working properly
log.Infof("aborting suicide attempt since watch was cancelled")
return
default: // continue
}
// fail-safe, will abort kamikaze attempts if there are tasks
if !k.registry.empty() {
log.Errorf("suicide attempt failed, there are still running tasks")
return
}
log.Infoln("Attempting suicide")
if (&k.state).transitionTo(suicidalState, suicidalState, terminalState) {
//TODO(jdef) let the scheduler know?
//TODO(jdef) is suicide more graceful than slave-demanded shutdown?
k.doShutdown(driver)
}
}
func podStatusData(pod *api.Pod, status api.PodStatus) ([]byte, string, error) {
podFullName := container.GetPodFullName(pod)
data, err := json.Marshal(api.PodStatusResult{
ObjectMeta: api.ObjectMeta{
Name: podFullName,
SelfLink: "/podstatusresult",
},
Status: status,
})
return data, podFullName, err
}
// async continuation of LaunchTask
func (k *Executor) bindAndWatchTask(driver bindings.ExecutorDriver, task *mesos.TaskInfo, launchTimer *time.Timer, pod *api.Pod) {
success := false
defer func() {
if !success {
k.killPodTask(driver, task.TaskId.GetValue())
k.resetSuicideWatch(driver)
}
}()
// allow a recently failed-over scheduler the chance to recover the task/pod binding:
// it may have failed and recovered before the apiserver is able to report the updated
// binding information. replays of this status event will signal to the scheduler that
// the apiserver should be up-to-date.
startingData, _, err := podStatusData(pod, api.PodStatus{})
if err != nil {
log.Errorf("failed to generate pod-task starting data for task %v pod %v/%v: %v",
task.TaskId.GetValue(), pod.Namespace, pod.Name, err)
k.sendStatus(driver, newStatus(task.TaskId, mesos.TaskState_TASK_FAILED, err.Error()))
return
}
err = k.registry.bind(task.TaskId.GetValue(), pod)
if err != nil {
log.Errorf("failed to bind task %v pod %v/%v: %v",
task.TaskId.GetValue(), pod.Namespace, pod.Name, err)
k.sendStatus(driver, newStatus(task.TaskId, mesos.TaskState_TASK_FAILED, err.Error()))
return
}
// send TASK_STARTING
k.sendStatus(driver, &mesos.TaskStatus{
TaskId: task.TaskId,
State: mesos.TaskState_TASK_STARTING.Enum(),
Message: proto.String(messages.CreateBindingSuccess),
Data: startingData,
})
// within the launch timeout window we should see a pod-task update via the registry.
// if we see a Running update then we need to generate a TASK_RUNNING status update for mesos.
handlerFinished := false
handler := &watchHandler{
expiration: watchExpiration{
timeout: launchTimer.C,
onEvent: func(taskID string) {
if !handlerFinished {
// launch timeout expired
k.killPodTask(driver, task.TaskId.GetValue())
}
},
},
onEvent: func(podEvent *PodEvent) (bool, error) {
switch podEvent.eventType {
case PodEventUpdated:
log.V(2).Infof("Found status: '%v' for %s", podEvent.pod.Status, podEvent.FormatShort())
if podEvent.pod.Status.Phase != api.PodRunning {
// still waiting for pod to transition to a running state, so
// we're not done monitoring yet; check back later..
break
}
data, podFullName, err := podStatusData(podEvent.pod, podEvent.pod.Status)
if err != nil {
return false, fmt.Errorf("failed to marshal pod status result: %v", err)
}
defer k.sendStatus(driver, &mesos.TaskStatus{
TaskId: task.TaskId,
State: mesos.TaskState_TASK_RUNNING.Enum(),
Message: proto.String("pod-running:" + podFullName),
Data: data,
})
fallthrough
case PodEventDeleted:
// we're done monitoring because pod has been deleted
handlerFinished = true
launchTimer.Stop()
}
return handlerFinished, nil
},
}
k.watcher.forTask(task.TaskId.GetValue(), handler)
success = true
}
// KillTask is called when the executor receives a request to kill a task.
func (k *Executor) KillTask(driver bindings.ExecutorDriver, taskId *mesos.TaskID) {
k.killPodTask(driver, taskId.GetValue())
}
// deletes the pod and task associated with the task identified by taskID and sends a task
// status update to mesos. also attempts to reset the suicide watch.
func (k *Executor) killPodTask(driver bindings.ExecutorDriver, taskID string) {
pod := k.registry.pod(taskID)
if pod == nil {
log.V(1).Infof("Failed to remove task, unknown task %v\n", taskID)
k.sendStatus(driver, newStatus(&mesos.TaskID{Value: &taskID}, mesos.TaskState_TASK_LOST, "kill-pod-task"))
return
}
// force-delete the pod from the API server
// TODO(jdef) possibly re-use eviction code from stock k8s once it lands?
err := k.kubeAPI.killPod(pod.Namespace, pod.Name)
if err != nil {
log.V(1).Infof("failed to delete task %v pod %v/%v from apiserver: %+v", taskID, pod.Namespace, pod.Name, err)
if apierrors.IsNotFound(err) {
k.sendStatus(driver, newStatus(&mesos.TaskID{Value: &taskID}, mesos.TaskState_TASK_LOST, "kill-pod-task"))
}
}
}
// FrameworkMessage is called when the framework sends some message to the executor
func (k *Executor) FrameworkMessage(driver bindings.ExecutorDriver, message string) {
if k.isDone() {
return
}
if !k.isConnected() {
log.Warningf("Ignore framework message because the executor is disconnected\n")
return
}
log.Infof("Receives message from framework %v\n", message)
//TODO(jdef) master reported a lost task, reconcile this! @see framework.go:handleTaskLost
if strings.HasPrefix(message, messages.TaskLost+":") {
taskId := message[len(messages.TaskLost)+1:]
if taskId != "" {
// TODO(jdef) would it make more sense to check the status of the task and
// just replay the last non-terminal message that we sent if the task is
// still active?
// clean up pod state
k.sendStatus(driver, newStatus(&mesos.TaskID{Value: &taskId}, mesos.TaskState_TASK_LOST, messages.TaskLostAck))
k.killPodTask(driver, taskId)
}
return
}
switch message {
case messages.Kamikaze:
k.attemptSuicide(driver, nil)
}
}
// Shutdown is called when the executor receives a shutdown request.
func (k *Executor) Shutdown(driver bindings.ExecutorDriver) {
k.lock.Lock()
defer k.lock.Unlock()
k.doShutdown(driver)
}
// assumes that caller has obtained state lock
func (k *Executor) doShutdown(driver bindings.ExecutorDriver) {
defer func() {
log.Errorf("exiting with unclean shutdown: %v", recover())
if k.exitFunc != nil {
k.exitFunc(1)
}
}()
(&k.state).transitionTo(terminalState)
// signal to all listeners that this KubeletExecutor is done!
close(k.terminate)
close(k.nodeInfos)
if k.shutdownAlert != nil {
func() {
utilruntime.HandleCrash()
k.shutdownAlert()
}()
}
log.Infoln("Stopping executor driver")
_, err := driver.Stop()
if err != nil {
log.Warningf("failed to stop executor driver: %v", err)
}
log.Infoln("Shutdown the executor")
// according to docs, mesos will generate TASK_LOST updates for us
// if needed, so don't take extra time to do that here.
k.registry.shutdown()
select {
// the main Run() func may still be running... wait for it to finish: it will
// clear the pod configuration cleanly, telling k8s "there are no pods" and
// clean up resources (pods, volumes, etc).
case <-k.kubeletFinished:
//TODO(jdef) attempt to wait for events to propagate to API server?
// TODO(jdef) extract constant, should be smaller than whatever the
// slave graceful shutdown timeout period is.
case <-time.After(15 * time.Second):
log.Errorf("timed out waiting for kubelet Run() to die")
}
log.Infoln("exiting")
if k.exitFunc != nil {
k.exitFunc(0)
}
}
// Destroy existing k8s containers
func (k *Executor) killKubeletContainers() {
if containers, err := dockertools.GetKubeletDockerContainers(k.dockerClient, true); err == nil {
opts := dockertypes.ContainerRemoveOptions{
RemoveVolumes: true,
Force: true,
}
for _, container := range containers {
log.V(2).Infof("Removing container: %v", container.ID)
if err := k.dockerClient.RemoveContainer(container.ID, opts); err != nil {
log.Warning(err)
}
}
} else {
log.Warningf("Failed to list kubelet docker containers: %v", err)
}
}
// Error is called when some error happens.
func (k *Executor) Error(driver bindings.ExecutorDriver, message string) {
log.Errorln(message)
}
func newStatus(taskId *mesos.TaskID, state mesos.TaskState, message string) *mesos.TaskStatus {
return &mesos.TaskStatus{
TaskId: taskId,
State: &state,
Message: proto.String(message),
}
}
func (k *Executor) sendStatus(driver bindings.ExecutorDriver, status *mesos.TaskStatus) {
select {
case <-k.terminate:
default:
k.outgoing <- func() (mesos.Status, error) { return driver.SendStatusUpdate(status) }
}
}
func (k *Executor) sendFrameworkMessage(driver bindings.ExecutorDriver, msg string) {
select {
case <-k.terminate:
default:
k.outgoing <- func() (mesos.Status, error) { return driver.SendFrameworkMessage(msg) }
}
}
func (k *Executor) runSendLoop() {
defer log.V(1).Info("sender loop exiting")
for {
select {
case <-k.terminate:
return
default:
if !k.isConnected() {
select {
case <-k.terminate:
case <-time.After(1 * time.Second):
}
continue
}
sender, ok := <-k.outgoing
if !ok {
// programming error
panic("someone closed the outgoing channel")
}
if status, err := sender(); err == nil {
continue
} else {
log.Error(err)
if status == mesos.Status_DRIVER_ABORTED {
return
}
}
// attempt to re-queue the sender
select {
case <-k.terminate:
case k.outgoing <- sender:
}
}
}
}
func annotationsFor(ei *mesos.ExecutorInfo) (annotations map[string]string, err error) {
annotations = map[string]string{}
if ei == nil {
return
}
var buf bytes.Buffer
if err = executorinfo.EncodeResources(&buf, ei.GetResources()); err != nil {
return
}
annotations[meta.ExecutorIdKey] = ei.GetExecutorId().GetValue()
annotations[meta.ExecutorResourcesKey] = buf.String()
return
}

View File

@ -1,636 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import (
"fmt"
"io/ioutil"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"reflect"
"sync"
"sync/atomic"
"testing"
"time"
assertext "k8s.io/kubernetes/contrib/mesos/pkg/assert"
"k8s.io/kubernetes/contrib/mesos/pkg/executor/messages"
"k8s.io/kubernetes/contrib/mesos/pkg/podutil"
kmruntime "k8s.io/kubernetes/contrib/mesos/pkg/runtime"
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/podtask"
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/podtask/hostport"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/testapi"
"k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/client/cache"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/runtime"
utiltesting "k8s.io/kubernetes/pkg/util/testing"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/pkg/watch"
"github.com/mesos/mesos-go/mesosproto"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
)
// TestExecutorRegister ensures that the executor thinks it is connected
// after Register is called.
func TestExecutorRegister(t *testing.T) {
mockDriver := &MockExecutorDriver{}
executor := NewTestKubernetesExecutor()
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
assert.Equal(t, true, executor.isConnected(), "executor should be connected")
mockDriver.AssertExpectations(t)
}
// TestExecutorDisconnect ensures that the executor thinks that it is not
// connected after a call to Disconnected has occurred.
func TestExecutorDisconnect(t *testing.T) {
mockDriver := &MockExecutorDriver{}
executor := NewTestKubernetesExecutor()
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
executor.Disconnected(mockDriver)
assert.Equal(t, false, executor.isConnected(),
"executor should not be connected after Disconnected")
mockDriver.AssertExpectations(t)
}
// TestExecutorReregister ensures that the executor thinks it is connected
// after a connection problem happens, followed by a call to Reregistered.
func TestExecutorReregister(t *testing.T) {
mockDriver := &MockExecutorDriver{}
executor := NewTestKubernetesExecutor()
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
executor.Disconnected(mockDriver)
executor.Reregistered(mockDriver, nil)
assert.Equal(t, true, executor.isConnected(), "executor should be connected")
mockDriver.AssertExpectations(t)
}
type fakeRegistry struct {
sync.Mutex
boundTasks map[string]*api.Pod
updates chan *PodEvent
}
func newFakeRegistry() *fakeRegistry {
return &fakeRegistry{boundTasks: map[string]*api.Pod{}, updates: make(chan *PodEvent, 100)}
}
func (r *fakeRegistry) empty() bool {
r.Lock()
defer r.Unlock()
return len(r.boundTasks) == 0
}
func (r *fakeRegistry) pod(taskID string) *api.Pod {
r.Lock()
defer r.Unlock()
return r.boundTasks[taskID]
}
func (r *fakeRegistry) watch() <-chan *PodEvent { return r.updates }
func (r *fakeRegistry) shutdown() {
r.Lock()
defer r.Unlock()
r.boundTasks = map[string]*api.Pod{}
}
func (r *fakeRegistry) bind(taskID string, pod *api.Pod) error {
r.Lock()
defer r.Unlock()
pod.Annotations = map[string]string{
"k8s.mesosphere.io/taskId": taskID,
}
r.boundTasks[taskID] = pod
// the normal registry sends a bind..
r.updates <- &PodEvent{pod: pod, taskID: taskID, eventType: PodEventBound}
return nil
}
func (r *fakeRegistry) Update(pod *api.Pod) (*PodEvent, error) {
r.Lock()
defer r.Unlock()
taskID, err := taskIDFor(pod)
if err != nil {
return nil, err
}
if _, ok := r.boundTasks[taskID]; !ok {
return nil, errUnknownTask
}
rp := &PodEvent{pod: pod, taskID: taskID, eventType: PodEventUpdated}
r.updates <- rp
return rp, nil
}
func (r *fakeRegistry) Remove(taskID string) error {
r.Lock()
defer r.Unlock()
pod, ok := r.boundTasks[taskID]
if !ok {
return errUnknownTask
}
delete(r.boundTasks, taskID)
r.updates <- &PodEvent{pod: pod, taskID: taskID, eventType: PodEventDeleted}
return nil
}
// phaseChange simulates a pod source update; normally this update is generated from a watch
func (r *fakeRegistry) phaseChange(pod *api.Pod, phase api.PodPhase) error {
clone, err := api.Scheme.DeepCopy(pod)
if err != nil {
return err
}
phasedPod := clone.(*api.Pod)
phasedPod.Status.Phase = phase
_, err = r.Update(phasedPod)
return err
}
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch tasks and generates
// appropriate status messages for mesos. It then kills the task and validates that appropriate
// actions are taken by the executor.
func TestExecutorLaunchAndKillTask(t *testing.T) {
var (
mockDriver = &MockExecutorDriver{}
registry = newFakeRegistry()
executor = New(Config{
Docker: dockertools.ConnectToDockerOrDie("fake://", 0),
NodeInfos: make(chan NodeInfo, 1),
Registry: registry,
})
mockKubeAPI = &mockKubeAPI{}
pod = NewTestPod(1)
executorinfo = &mesosproto.ExecutorInfo{}
)
executor.kubeAPI = mockKubeAPI
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
podTask, err := podtask.New(
api.NewDefaultContext(),
podtask.Config{
Prototype: executorinfo,
HostPortStrategy: hostport.StrategyWildcard,
},
pod,
)
assert.Equal(t, nil, err, "must be able to create a task from a pod")
pod.Annotations = map[string]string{
"k8s.mesosphere.io/taskId": podTask.ID,
}
podTask.Spec = &podtask.Spec{Executor: executorinfo}
taskInfo, err := podTask.BuildTaskInfo()
assert.Equal(t, nil, err, "must be able to build task info")
data, err := runtime.Encode(testapi.Default.Codec(), pod)
assert.Equal(t, nil, err, "must be able to encode a pod's spec data")
taskInfo.Data = data
var statusUpdateCalls sync.WaitGroup
statusUpdateCalls.Add(1)
statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() }
mockDriver.On(
"SendStatusUpdate",
mesosproto.TaskState_TASK_STARTING,
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
statusUpdateCalls.Add(1)
mockDriver.On(
"SendStatusUpdate",
mesosproto.TaskState_TASK_RUNNING,
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
executor.LaunchTask(mockDriver, taskInfo)
assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
executor.lock.Lock()
defer executor.lock.Unlock()
return !registry.empty()
}, "executor must be able to create a task and a pod")
// simulate a pod source update; normally this update is generated when binding a pod
err = registry.phaseChange(pod, api.PodPending)
assert.NoError(t, err)
// simulate a pod source update; normally this update is generated by the kubelet once the pod is healthy
err = registry.phaseChange(pod, api.PodRunning)
assert.NoError(t, err)
// Allow some time for asynchronous requests to the driver.
finished := kmruntime.After(statusUpdateCalls.Wait)
select {
case <-finished:
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timed out waiting for status update calls to finish")
}
statusUpdateCalls.Add(1)
mockDriver.On(
"SendStatusUpdate",
mesosproto.TaskState_TASK_KILLED,
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
// simulate what happens when the apiserver is told to delete a pod
mockKubeAPI.On("killPod", pod.Namespace, pod.Name).Return(nil).Run(func(_ mock.Arguments) {
registry.Remove(podTask.ID)
})
executor.KillTask(mockDriver, taskInfo.TaskId)
assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
executor.lock.Lock()
defer executor.lock.Unlock()
return registry.empty()
}, "executor must be able to kill a created task and pod")
// Allow some time for asynchronous requests to the driver.
finished = kmruntime.After(statusUpdateCalls.Wait)
select {
case <-finished:
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timed out waiting for status update calls to finish")
}
mockDriver.AssertExpectations(t)
mockKubeAPI.AssertExpectations(t)
}
// TestExecutorStaticPods test that the ExecutorInfo.data is parsed
// as a zip archive with pod definitions.
func TestExecutorInitializeStaticPodsSource(t *testing.T) {
// create some zip with static pod definition
givenPodsDir, err := utiltesting.MkTmpdir("executor-givenpods")
assert.NoError(t, err)
defer os.RemoveAll(givenPodsDir)
var wg sync.WaitGroup
reportErrors := func(errCh <-chan error) {
wg.Add(1)
go func() {
defer wg.Done()
for err := range errCh {
t.Error(err)
}
}()
}
createStaticPodFile := func(fileName, name string) {
spod := `{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {
"name": "%v",
"namespace": "staticpods",
"labels": { "name": "foo", "cluster": "bar" }
},
"spec": {
"containers": [{
"name": "%v",
"image": "library/nginx",
"ports": [{ "containerPort": 80, "name": "http" }]
}]
}
}`
destfile := filepath.Join(givenPodsDir, fileName)
err = os.MkdirAll(filepath.Dir(destfile), 0770)
assert.NoError(t, err)
err = ioutil.WriteFile(destfile, []byte(fmt.Sprintf(spod, name, name)), 0660)
assert.NoError(t, err)
}
createStaticPodFile("spod.json", "spod-01")
createStaticPodFile("spod2.json", "spod-02")
createStaticPodFile("dir/spod.json", "spod-03") // same file name as first one to check for overwriting
staticpods, errs := podutil.ReadFromDir(givenPodsDir)
reportErrors(errs)
gzipped, err := podutil.Gzip(staticpods)
assert.NoError(t, err)
expectedStaticPodsNum := 2 // subdirectories are ignored by FileSource, hence only 2
// temporary directory which is normally located in the executor sandbox
staticPodsConfigPath, err := utiltesting.MkTmpdir("executor-k8sm-archive")
assert.NoError(t, err)
defer os.RemoveAll(staticPodsConfigPath)
executor := &Executor{
staticPodsConfigPath: staticPodsConfigPath,
}
// extract the pods into staticPodsConfigPath
executor.initializeStaticPodsSource(&mesosproto.ExecutorInfo{Data: gzipped})
actualpods, errs := podutil.ReadFromDir(staticPodsConfigPath)
reportErrors(errs)
list := podutil.List(actualpods)
assert.NotNil(t, list)
assert.Equal(t, expectedStaticPodsNum, len(list.Items))
var (
expectedNames = map[string]struct{}{
"spod-01": {},
"spod-02": {},
}
actualNames = map[string]struct{}{}
)
for _, pod := range list.Items {
actualNames[pod.Name] = struct{}{}
}
assert.True(t, reflect.DeepEqual(expectedNames, actualNames), "expected %v instead of %v", expectedNames, actualNames)
wg.Wait()
}
// TestExecutorFrameworkMessage ensures that the executor is able to
// handle messages from the framework, specifically about lost tasks
// and Kamikaze. When a task is lost, the executor needs to clean up
// its state. When a Kamikaze message is received, the executor should
// attempt suicide.
func TestExecutorFrameworkMessage(t *testing.T) {
// TODO(jdef): Fix the unexpected call in the mocking system.
t.Skip("This test started failing when panic catching was disabled.")
var (
mockDriver = &MockExecutorDriver{}
kubeletFinished = make(chan struct{})
registry = newFakeRegistry()
executor = New(Config{
Docker: dockertools.ConnectToDockerOrDie("fake://", 0),
NodeInfos: make(chan NodeInfo, 1),
ShutdownAlert: func() {
close(kubeletFinished)
},
KubeletFinished: kubeletFinished,
Registry: registry,
})
pod = NewTestPod(1)
mockKubeAPI = &mockKubeAPI{}
)
executor.kubeAPI = mockKubeAPI
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
executor.FrameworkMessage(mockDriver, "test framework message")
// set up a pod to then lose
executorinfo := &mesosproto.ExecutorInfo{}
podTask, _ := podtask.New(
api.NewDefaultContext(),
podtask.Config{
ID: "foo",
Prototype: executorinfo,
HostPortStrategy: hostport.StrategyWildcard,
},
pod,
)
pod.Annotations = map[string]string{
"k8s.mesosphere.io/taskId": podTask.ID,
}
podTask.Spec = &podtask.Spec{
Executor: executorinfo,
}
taskInfo, err := podTask.BuildTaskInfo()
assert.Equal(t, nil, err, "must be able to build task info")
data, _ := runtime.Encode(testapi.Default.Codec(), pod)
taskInfo.Data = data
mockDriver.On(
"SendStatusUpdate",
mesosproto.TaskState_TASK_STARTING,
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Once()
called := make(chan struct{})
mockDriver.On(
"SendStatusUpdate",
mesosproto.TaskState_TASK_RUNNING,
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
executor.LaunchTask(mockDriver, taskInfo)
// must wait for this otherwise phase changes may not apply
assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
executor.lock.Lock()
defer executor.lock.Unlock()
return !registry.empty()
}, "executor must be able to create a task and a pod")
err = registry.phaseChange(pod, api.PodPending)
assert.NoError(t, err)
err = registry.phaseChange(pod, api.PodRunning)
assert.NoError(t, err)
// waiting until the pod is really running b/c otherwise a TASK_FAILED could be
// triggered by the asynchronously running executor methods when removing the task
// from k.tasks through the "task-lost:foo" message below.
select {
case <-called:
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timed out waiting for SendStatusUpdate for the running task")
}
// send task-lost message for it
called = make(chan struct{})
mockDriver.On(
"SendStatusUpdate",
mesosproto.TaskState_TASK_LOST,
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
// simulate what happens when the apiserver is told to delete a pod
mockKubeAPI.On("killPod", pod.Namespace, pod.Name).Return(nil).Run(func(_ mock.Arguments) {
registry.Remove(podTask.ID)
})
executor.FrameworkMessage(mockDriver, "task-lost:foo")
assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
executor.lock.Lock()
defer executor.lock.Unlock()
return registry.empty()
}, "executor must be able to kill a created task and pod")
select {
case <-called:
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timed out waiting for SendStatusUpdate")
}
mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()
executor.FrameworkMessage(mockDriver, messages.Kamikaze)
assert.Equal(t, true, executor.isDone(),
"executor should have shut down after receiving a Kamikaze message")
mockDriver.AssertExpectations(t)
mockKubeAPI.AssertExpectations(t)
}
// Create a pod with a given index, requiring one port
func NewTestPod(i int) *api.Pod {
name := fmt.Sprintf("pod%d", i)
return &api.Pod{
TypeMeta: unversioned.TypeMeta{APIVersion: testapi.Default.GroupVersion().String()},
ObjectMeta: api.ObjectMeta{
Name: name,
Namespace: api.NamespaceDefault,
SelfLink: testapi.Default.SelfLink("pods", string(i)),
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "foo",
Ports: []api.ContainerPort{
{
ContainerPort: int32(8000 + i),
Protocol: api.ProtocolTCP,
},
},
},
},
},
Status: api.PodStatus{
Conditions: []api.PodCondition{
{
Type: api.PodReady,
Status: api.ConditionTrue,
},
},
},
}
}
// Create mock of pods ListWatch, usually listening on the apiserver pods watch endpoint
type MockPodsListWatch struct {
ListWatch cache.ListWatch
fakeWatcher *watch.FakeWatcher
list api.PodList
}
// A apiserver mock which partially mocks the pods API
type TestServer struct {
server *httptest.Server
Stats map[string]uint
lock sync.Mutex
}
func NewTestServer(t *testing.T, namespace string) *TestServer {
ts := TestServer{
Stats: map[string]uint{},
}
mux := http.NewServeMux()
mux.HandleFunc(testapi.Default.ResourcePath("bindings", namespace, ""), func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
})
ts.server = httptest.NewServer(mux)
return &ts
}
func NewMockPodsListWatch(initialPodList api.PodList) *MockPodsListWatch {
lw := MockPodsListWatch{
fakeWatcher: watch.NewFake(),
list: initialPodList,
}
lw.ListWatch = cache.ListWatch{
WatchFunc: func(options api.ListOptions) (watch.Interface, error) {
return lw.fakeWatcher, nil
},
ListFunc: func(options api.ListOptions) (runtime.Object, error) {
return &lw.list, nil
},
}
return &lw
}
// TestExecutorShutdown ensures that the executor properly shuts down
// when Shutdown is called.
func TestExecutorShutdown(t *testing.T) {
var (
mockDriver = &MockExecutorDriver{}
kubeletFinished = make(chan struct{})
exitCalled = int32(0)
executor = New(Config{
Docker: dockertools.ConnectToDockerOrDie("fake://", 0),
NodeInfos: make(chan NodeInfo, 1),
ShutdownAlert: func() {
close(kubeletFinished)
},
KubeletFinished: kubeletFinished,
ExitFunc: func(_ int) {
atomic.AddInt32(&exitCalled, 1)
},
Registry: newFakeRegistry(),
})
)
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()
executor.Shutdown(mockDriver)
assert.Equal(t, false, executor.isConnected(),
"executor should not be connected after Shutdown")
assert.Equal(t, true, executor.isDone(),
"executor should be in Done state after Shutdown")
assert.Equal(t, true, atomic.LoadInt32(&exitCalled) > 0,
"the executor should call its ExitFunc when it is ready to close down")
mockDriver.AssertExpectations(t)
}
func TestExecutorsendFrameworkMessage(t *testing.T) {
mockDriver := &MockExecutorDriver{}
executor := NewTestKubernetesExecutor()
executor.Init(mockDriver)
executor.Registered(mockDriver, nil, nil, nil)
called := make(chan struct{})
mockDriver.On(
"SendFrameworkMessage",
"foo bar baz",
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
executor.sendFrameworkMessage(mockDriver, "foo bar baz")
// guard against data race in mock driver between AssertExpectations and Called
select {
case <-called: // expected
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("expected call to SendFrameworkMessage")
}
mockDriver.AssertExpectations(t)
}

View File

@ -1,18 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package messages exposes executor event/message names as constants.
package messages // import "k8s.io/kubernetes/contrib/mesos/pkg/executor/messages"

View File

@ -1,36 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package messages
// messages that ship with TaskStatus objects
const (
ContainersDisappeared = "containers-disappeared"
CreateBindingFailure = "create-binding-failure"
CreateBindingSuccess = "create-binding-success"
ExecutorUnregistered = "executor-unregistered"
ExecutorShutdown = "executor-shutdown"
LaunchTaskFailed = "launch-task-failed"
KubeletPodLaunchFailed = "kubelet-pod-launch-failed"
TaskKilled = "task-killed"
TaskLost = "task-lost"
UnmarshalTaskDataFailure = "unmarshal-task-data-failure"
TaskLostAck = "task-lost-ack" // executor acknowledgment of forwarded TASK_LOST framework message
Kamikaze = "kamikaze"
WrongSlaveFailure = "pod-for-wrong-slave-failure"
AnnotationUpdateFailure = "annotation-update-failure"
)

View File

@ -1,90 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import (
"testing"
"github.com/mesos/mesos-go/mesosproto"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
)
type mockKubeAPI struct {
mock.Mock
}
func (m *mockKubeAPI) killPod(ns, name string) error {
args := m.Called(ns, name)
return args.Error(0)
}
type MockExecutorDriver struct {
mock.Mock
}
func (m *MockExecutorDriver) Start() (mesosproto.Status, error) {
args := m.Called()
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) Stop() (mesosproto.Status, error) {
args := m.Called()
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) Abort() (mesosproto.Status, error) {
args := m.Called()
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) Join() (mesosproto.Status, error) {
args := m.Called()
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) Run() (mesosproto.Status, error) {
args := m.Called()
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) SendStatusUpdate(taskStatus *mesosproto.TaskStatus) (mesosproto.Status, error) {
args := m.Called(*taskStatus.State)
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func (m *MockExecutorDriver) SendFrameworkMessage(msg string) (mesosproto.Status, error) {
args := m.Called(msg)
return args.Get(0).(mesosproto.Status), args.Error(1)
}
func NewTestKubernetesExecutor() *Executor {
return New(Config{
Docker: dockertools.ConnectToDockerOrDie("fake://", 0),
Registry: newFakeRegistry(),
})
}
func TestExecutorNew(t *testing.T) {
mockDriver := &MockExecutorDriver{}
executor := NewTestKubernetesExecutor()
executor.Init(mockDriver)
assert.Equal(t, executor.isDone(), false, "executor should not be in Done state on initialization")
assert.Equal(t, executor.isConnected(), false, "executor should not be connected on initialization")
}

View File

@ -1,67 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import mesos "github.com/mesos/mesos-go/mesosproto"
type NodeInfo struct {
Cores int
Mem uint64 // in bytes
}
func nodeInfo(si *mesos.SlaveInfo, ei *mesos.ExecutorInfo) NodeInfo {
var executorCPU, executorMem float64
// get executor resources
if ei != nil {
for _, r := range ei.GetResources() {
if r == nil || r.GetType() != mesos.Value_SCALAR {
continue
}
switch r.GetName() {
case "cpus":
executorCPU += r.GetScalar().GetValue()
case "mem":
executorMem += r.GetScalar().GetValue()
}
}
}
// get resource capacity of the node
ni := NodeInfo{}
for _, r := range si.GetResources() {
if r == nil || r.GetType() != mesos.Value_SCALAR {
continue
}
switch r.GetName() {
case "cpus":
// We intentionally take the floor of executorCPU because cores are integers
// and we would loose a complete cpu here if the value is <1.
// TODO(sttts): switch to float64 when "Machine Allocables" are implemented
ni.Cores += int(r.GetScalar().GetValue())
case "mem":
ni.Mem += uint64(r.GetScalar().GetValue()) * 1024 * 1024
}
}
// TODO(sttts): subtract executorCPU/Mem from static pod resources before subtracting them from the capacity
ni.Cores -= int(executorCPU)
ni.Mem -= uint64(executorMem) * 1024 * 1024
return ni
}

View File

@ -1,340 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import (
"encoding/json"
"errors"
"sync"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
"k8s.io/kubernetes/contrib/mesos/pkg/executor/messages"
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/meta"
"k8s.io/kubernetes/pkg/api"
log "github.com/golang/glog"
)
type (
podEventType int
PodEvent struct {
pod *api.Pod
taskID string
eventType podEventType
}
// Registry is a state store for pod task metadata. Clients are expected to watch() the
// event stream to observe changes over time.
Registry interface {
// Update modifies the registry's iternal representation of the pod; it may also
// modify the pod argument itself. An update may fail because either a pod isn't
// labeled with a task ID, the task ID is unknown, or the nature of the update may
// be incompatible with what's supported in kubernetes-mesos.
Update(pod *api.Pod) (*PodEvent, error)
// Remove the task from this registry, returns an error if the taskID is unknown.
Remove(taskID string) error
// bind associates a taskID with a pod, triggers the binding API on the k8s apiserver
// and stores the resulting pod-task metadata.
bind(taskID string, pod *api.Pod) error
// watch returns the event stream of the registry. clients are expected to read this
// stream otherwise the event buffer will fill up and registry ops will block.
watch() <-chan *PodEvent
// return true if there are no tasks registered
empty() bool
// return the api.Pod registered to the given taskID or else nil
pod(taskID string) *api.Pod
// shutdown any related async processing and clear the internal state of the registry
shutdown()
}
registryImpl struct {
client *clientset.Clientset
updates chan *PodEvent
m sync.RWMutex
boundTasks map[string]*api.Pod
}
)
var (
errCreateBindingFailed = errors.New(messages.CreateBindingFailure)
errAnnotationUpdateFailure = errors.New(messages.AnnotationUpdateFailure)
errUnknownTask = errors.New("unknown task ID")
errUnsupportedUpdate = errors.New("pod update allowed by k8s is incompatible with this version of k8s-mesos")
)
const (
PodEventBound podEventType = iota
PodEventUpdated
PodEventDeleted
PodEventIncompatibleUpdate
updatesBacklogSize = 200
)
func IsUnsupportedUpdate(err error) bool {
return err == errUnsupportedUpdate
}
func (rp *PodEvent) Task() string {
return rp.taskID
}
func (rp *PodEvent) Pod() *api.Pod {
return rp.pod
}
func (rp *PodEvent) FormatShort() string {
return "task '" + rp.taskID + "' pod '" + rp.pod.Namespace + "/" + rp.pod.Name + "'"
}
func NewRegistry(client *clientset.Clientset) Registry {
r := &registryImpl{
client: client,
updates: make(chan *PodEvent, updatesBacklogSize),
boundTasks: make(map[string]*api.Pod),
}
return r
}
func (r *registryImpl) watch() <-chan *PodEvent {
return r.updates
}
func taskIDFor(pod *api.Pod) (taskID string, err error) {
taskID = pod.Annotations[meta.TaskIdKey]
if taskID == "" {
err = errUnknownTask
}
return
}
func (r *registryImpl) shutdown() {
//TODO(jdef) flesh this out
r.m.Lock()
defer r.m.Unlock()
r.boundTasks = map[string]*api.Pod{}
}
func (r *registryImpl) empty() bool {
r.m.RLock()
defer r.m.RUnlock()
return len(r.boundTasks) == 0
}
func (r *registryImpl) pod(taskID string) *api.Pod {
r.m.RLock()
defer r.m.RUnlock()
return r.boundTasks[taskID]
}
func (r *registryImpl) Remove(taskID string) error {
r.m.Lock()
defer r.m.Unlock()
pod, ok := r.boundTasks[taskID]
if !ok {
return errUnknownTask
}
delete(r.boundTasks, taskID)
r.updates <- &PodEvent{
pod: pod,
taskID: taskID,
eventType: PodEventDeleted,
}
log.V(1).Infof("unbound task %v from pod %v/%v", taskID, pod.Namespace, pod.Name)
return nil
}
func (r *registryImpl) Update(pod *api.Pod) (*PodEvent, error) {
// Don't do anything for pods without task anotation which means:
// - "pre-scheduled" pods which have a NodeName set to this node without being scheduled already.
// - static/mirror pods: they'll never have a TaskID annotation, and we don't expect them to ever change.
// - all other pods that haven't passed through the launch-task-binding phase, which would set annotations.
taskID, err := taskIDFor(pod)
if err != nil {
// There also could be a race between the overall launch-task process and this update, but here we
// will never be able to process such a stale update because the "update pod" that we're receiving
// in this func won't yet have a task ID annotation. It follows that we can safely drop such a stale
// update on the floor because we'll get another update later that, in addition to the changes that
// we're dropping now, will also include the changes from the binding process.
log.V(5).Infof("ignoring pod update for %s/%s because %s annotation is missing", pod.Namespace, pod.Name, meta.TaskIdKey)
return nil, err
}
// be a good citizen: copy the arg before making any changes to it
clone, err := api.Scheme.DeepCopy(pod)
if err != nil {
return nil, err
}
pod = clone.(*api.Pod)
r.m.Lock()
defer r.m.Unlock()
oldPod, ok := r.boundTasks[taskID]
if !ok {
return nil, errUnknownTask
}
registeredPod := &PodEvent{
pod: pod,
taskID: taskID,
eventType: PodEventUpdated,
}
// TODO(jdef) would be nice to only execute this logic based on the presence of
// some particular annotation:
// - preserve the original container port spec since the k8sm scheduler
// has likely changed it.
if !copyPorts(pod, oldPod) {
// TODO(jdef) the state of "pod" is possibly inconsistent at this point.
// we don't care for the moment - we might later.
registeredPod.eventType = PodEventIncompatibleUpdate
r.updates <- registeredPod
log.Warningf("pod containers changed in an incompatible way; aborting update")
return registeredPod, errUnsupportedUpdate
}
// update our internal copy and broadcast the change
r.boundTasks[taskID] = pod
r.updates <- registeredPod
log.V(1).Infof("updated task %v pod %v/%v", taskID, pod.Namespace, pod.Name)
return registeredPod, nil
}
// copyPorts copies the container pod specs from src to dest and returns
// true if all ports (in both dest and src) are accounted for, otherwise
// false. if returning false then it's possible that only a partial copy
// has been performed.
func copyPorts(dest, src *api.Pod) bool {
containers := src.Spec.Containers
ctPorts := make(map[string][]api.ContainerPort, len(containers))
for i := range containers {
ctPorts[containers[i].Name] = containers[i].Ports
}
containers = dest.Spec.Containers
for i := range containers {
name := containers[i].Name
if ports, found := ctPorts[name]; found {
containers[i].Ports = ports
delete(ctPorts, name)
} else {
// old pod spec is missing this container?!
return false
}
}
if len(ctPorts) > 0 {
// new pod spec has containers that aren't in the old pod spec
return false
}
return true
}
func (r *registryImpl) bind(taskID string, pod *api.Pod) error {
// validate taskID matches that of the annotation
annotatedTaskID, err := taskIDFor(pod)
if err != nil {
log.Warning("failed to bind: missing task ID annotation for pod ", pod.Namespace+"/"+pod.Name)
return errCreateBindingFailed
}
if annotatedTaskID != taskID {
log.Warningf("failed to bind: expected task-id %v instead of %v for pod %v/%v", taskID, annotatedTaskID, pod.Namespace, pod.Name)
return errCreateBindingFailed
}
// record this as a bound task for now so that we can avoid racing with the mesos pod source, who is
// watching the apiserver for pod updates and will verify pod-task validity with us upon receiving such
boundSuccessfully := false
defer func() {
if !boundSuccessfully {
r.m.Lock()
defer r.m.Unlock()
delete(r.boundTasks, taskID)
}
}()
func() {
r.m.Lock()
defer r.m.Unlock()
r.boundTasks[taskID] = pod
}()
if pod.Spec.NodeName == "" {
//HACK(jdef): cloned binding construction from k8s plugin/pkg/scheduler/framework.go
binding := &api.Binding{
ObjectMeta: api.ObjectMeta{
Namespace: pod.Namespace,
Name: pod.Name,
Annotations: make(map[string]string),
},
Target: api.ObjectReference{
Kind: "Node",
Name: pod.Annotations[meta.BindingHostKey],
},
}
// forward the annotations that the scheduler wants to apply
for k, v := range pod.Annotations {
binding.Annotations[k] = v
}
// create binding on apiserver
log.Infof("Binding task %v pod '%v/%v' to '%v' with annotations %+v...",
taskID, pod.Namespace, pod.Name, binding.Target.Name, binding.Annotations)
ctx := api.WithNamespace(api.NewContext(), binding.Namespace)
err := r.client.CoreClient.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error()
if err != nil {
log.Warningf("failed to bind task %v pod %v/%v: %v", taskID, pod.Namespace, pod.Name, err)
return errCreateBindingFailed
}
} else {
// post annotations update to apiserver
patch := struct {
Metadata struct {
Annotations map[string]string `json:"annotations"`
} `json:"metadata"`
}{}
patch.Metadata.Annotations = pod.Annotations
patchJson, _ := json.Marshal(patch)
log.V(4).Infof("Patching annotations %v of task %v pod %v/%v: %v", pod.Annotations, taskID, pod.Namespace, pod.Name, string(patchJson))
err := r.client.CoreClient.Patch(api.MergePatchType).RequestURI(pod.SelfLink).Body(patchJson).Do().Error()
if err != nil {
log.Errorf("Error updating annotations of ready-to-launch task %v pod %v/%v: %v", taskID, pod.Namespace, pod.Name, err)
return errAnnotationUpdateFailure
}
}
boundSuccessfully = true
r.updates <- &PodEvent{
pod: pod,
taskID: taskID,
eventType: PodEventBound,
}
log.V(1).Infof("bound task %v to pod %v/%v", taskID, pod.Namespace, pod.Name)
return nil
}

View File

@ -1,51 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package service
import (
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
cadvisorapi "github.com/google/cadvisor/info/v1"
)
type MesosCadvisor struct {
cadvisor.Interface
cores int
mem uint64
}
func NewMesosCadvisor(cores int, mem uint64, port uint, runtime string) (*MesosCadvisor, error) {
c, err := cadvisor.New(port, runtime)
if err != nil {
return nil, err
}
return &MesosCadvisor{c, cores, mem}, nil
}
func (mc *MesosCadvisor) MachineInfo() (*cadvisorapi.MachineInfo, error) {
mi, err := mc.Interface.MachineInfo()
if err != nil {
return nil, err
}
// set Mesos provided values
mesosMi := *mi
mesosMi.NumCores = mc.cores
mesosMi.MemoryCapacity = mc.mem
return &mesosMi, nil
}

View File

@ -1,18 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package service contains the cmd/k8sm-executor glue code.
package service // import "k8s.io/kubernetes/contrib/mesos/pkg/executor/service"

View File

@ -1,84 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package service
import (
log "github.com/golang/glog"
"k8s.io/kubernetes/pkg/kubelet"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/util/runtime"
"k8s.io/kubernetes/pkg/util/wait"
)
// executorKubelet decorates the kubelet with a Run function that notifies the
// executor by closing kubeletDone before entering blocking state.
type executorKubelet struct {
*kubelet.Kubelet
kubeletDone chan<- struct{} // closed once kubelet.Run() returns
executorDone <-chan struct{} // closed when executor terminates
}
// Run runs the main kubelet loop, closing the kubeletFinished chan when the
// loop exits. Like the upstream Run, it will never return.
func (kl *executorKubelet) Run(mergedUpdates <-chan kubetypes.PodUpdate) {
defer func() {
// When this Run function is called, we close it here.
// Otherwise, KubeletExecutorServer.runKubelet will.
close(kl.kubeletDone)
runtime.HandleCrash()
log.Infoln("kubelet run terminated") //TODO(jdef) turn down verbosity
// important: never return! this is in our contract
select {}
}()
// push merged updates into another, closable update channel which is closed
// when the executor shuts down.
closableUpdates := make(chan kubetypes.PodUpdate)
go func() {
// closing closableUpdates will cause our patched kubelet's syncLoop() to exit
defer close(closableUpdates)
pipeLoop:
for {
select {
case <-kl.executorDone:
break pipeLoop
default:
select {
case u := <-mergedUpdates:
select {
case closableUpdates <- u: // noop
case <-kl.executorDone:
break pipeLoop
}
case <-kl.executorDone:
break pipeLoop
}
}
}
}()
// we expect that Run() will complete after closableUpdates is closed and the
// kubelet's syncLoop() has finished processing its backlog, which hopefully
// will not take very long. Peeking into the future (current k8s master) it
// seems that the backlog has grown from 1 to 50 -- this may negatively impact
// us going forward, time will tell.
wait.Until(func() { kl.Kubelet.Run(closableUpdates) }, 0, kl.executorDone)
//TODO(jdef) revisit this if/when executor failover lands
// Force kubelet to delete all pods.
kl.HandlePodRemoves(kl.GetPods())
}

View File

@ -1,200 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podsource
import (
"k8s.io/kubernetes/contrib/mesos/pkg/executor"
"k8s.io/kubernetes/contrib/mesos/pkg/podutil"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/client/cache"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
log "github.com/golang/glog"
)
type (
filterType int
podName struct {
namespace, name string
}
// Filter is invoked for each snapshot of pod state that passes through this source
Filter interface {
// Before is invoked before any pods are evaluated
Before(podCount int)
// Accept returns true if this pod should be accepted by the source; a value
// of false results in the pod appearing to have been removed from apiserver.
// If true, the caller should use the output pod value for the remainder of
// the processing task. If false then the output pod value may be nil.
Accept(*api.Pod) (*api.Pod, bool)
// After is invoked after all pods have been evaluated
After()
}
// FilterFunc is a simplified Filter implementation that only implements Filter.Accept, its
// Before and After implementations are noop.
FilterFunc func(*api.Pod) (*api.Pod, bool)
Source struct {
stop <-chan struct{}
out chan<- interface{} // never close this because pkg/util/config.mux doesn't handle that very well
filters []Filter // additional filters to apply to pod objects
}
Option func(*Source)
)
const (
// if we don't use this source then the kubelet will do funny, mirror things. we alias
// this here for convenience. see the docs for Source for additional explanation.
// @see ConfigSourceAnnotationKey
MesosSource = kubetypes.ApiserverSource
)
func (f FilterFunc) Before(_ int) {}
func (f FilterFunc) After() {}
func (f FilterFunc) Accept(pod *api.Pod) (*api.Pod, bool) { return f(pod) }
// Mesos spawns a new pod source that watches API server for changes and collaborates with
// executor.Registry to generate api.Pod objects in a fashion that's very Mesos-aware.
func Mesos(
stop <-chan struct{},
out chan<- interface{},
podWatch *cache.ListWatch,
registry executor.Registry,
options ...Option,
) {
source := &Source{
stop: stop,
out: out,
filters: []Filter{
FilterFunc(filterMirrorPod),
&registeredPodFilter{registry: registry},
},
}
// note: any filters added by options should be applied after the defaults
for _, opt := range options {
opt(source)
}
// reflect changes from the watch into a chan, filtered to include only mirror pods
// (have an ConfigMirrorAnnotationKey attr)
cache.NewReflector(
podWatch,
&api.Pod{},
cache.NewUndeltaStore(source.send, cache.MetaNamespaceKeyFunc),
0,
).RunUntil(stop)
}
func filterMirrorPod(p *api.Pod) (*api.Pod, bool) {
_, ok := (*p).Annotations[kubetypes.ConfigMirrorAnnotationKey]
return p, ok
}
type registeredPodFilter struct {
priorPodNames, podNames map[podName]string // maps a podName to a taskID
registry executor.Registry
}
func (rpf *registeredPodFilter) Before(podCount int) {
rpf.priorPodNames = rpf.podNames
rpf.podNames = make(map[podName]string, podCount)
}
func (rpf *registeredPodFilter) After() {
// detect when pods are deleted and notify the registry
for k, taskID := range rpf.priorPodNames {
if _, found := rpf.podNames[k]; !found {
rpf.registry.Remove(taskID)
}
}
}
func (rpf *registeredPodFilter) Accept(p *api.Pod) (*api.Pod, bool) {
rpod, err := rpf.registry.Update(p)
if err == nil {
// pod is bound to a task, and the update is compatible
// so we'll allow it through
p = rpod.Pod() // use the (possibly) updated pod spec!
rpf.podNames[podName{p.Namespace, p.Name}] = rpod.Task()
return p, true
}
if rpod != nil {
// we were able to ID the pod but the update still failed...
log.Warningf("failed to update registry for task %v pod %v/%v: %v",
rpod.Task(), p.Namespace, p.Name, err)
}
return nil, false
}
// send is an update callback invoked by NewUndeltaStore; it applies all of source.filters
// to the incoming pod snapshot and forwards a PodUpdate that contains a snapshot of all
// the pods that were accepted by the filters.
func (source *Source) send(objs []interface{}) {
var (
podCount = len(objs)
pods = make([]*api.Pod, 0, podCount)
)
for _, f := range source.filters {
f.Before(podCount)
}
foreachPod:
for _, o := range objs {
p := o.(*api.Pod)
for _, f := range source.filters {
if p, ok := f.Accept(p); ok {
pods = append(pods, p)
continue foreachPod
}
}
// unrecognized pod
log.V(2).Infof("skipping pod %v/%v", p.Namespace, p.Name)
}
// TODO(jdef) should these be applied in reverse order instead?
for _, f := range source.filters {
f.After()
}
u := kubetypes.PodUpdate{
Op: kubetypes.SET,
Pods: pods,
Source: MesosSource,
}
select {
case <-source.stop:
case source.out <- u:
log.V(2).Infof("sent %d pod updates", len(pods))
}
}
func ContainerEnvOverlay(env []api.EnvVar) Option {
return func(s *Source) {
// prepend this filter so that it impacts *all* pods running on the slave
s.filters = append([]Filter{filterContainerEnvOverlay(env)}, s.filters...)
}
}
func filterContainerEnvOverlay(env []api.EnvVar) FilterFunc {
f := podutil.Environment(env)
return func(pod *api.Pod) (*api.Pod, bool) {
f(pod)
// we should't vote, let someone else decide whether the pod gets accepted
return pod, false
}
}

View File

@ -1,321 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package service
import (
"fmt"
"net"
"os"
"path/filepath"
"time"
log "github.com/golang/glog"
bindings "github.com/mesos/mesos-go/executor"
"github.com/spf13/pflag"
kubeletapp "k8s.io/kubernetes/cmd/kubelet/app"
"k8s.io/kubernetes/cmd/kubelet/app/options"
"k8s.io/kubernetes/contrib/mesos/pkg/executor"
"k8s.io/kubernetes/contrib/mesos/pkg/executor/config"
"k8s.io/kubernetes/contrib/mesos/pkg/executor/service/podsource"
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
"k8s.io/kubernetes/contrib/mesos/pkg/podutil"
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/meta"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/apis/componentconfig"
"k8s.io/kubernetes/pkg/client/cache"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
"k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/kubelet"
"k8s.io/kubernetes/pkg/kubelet/cm"
kconfig "k8s.io/kubernetes/pkg/kubelet/config"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/types"
)
// TODO(jdef): passing the value of envContainerID to all docker containers instantiated
// through the kubelet is part of a strategy to enable orphan container GC; this can all
// be ripped out once we have a kubelet runtime that leverages Mesos native containerization.
// envContainerID is the name of the environment variable that contains the
// Mesos-assigned container ID of the Executor.
const envContainerID = "MESOS_EXECUTOR_CONTAINER_UUID"
type KubeletExecutorServer struct {
*options.KubeletServer
SuicideTimeout time.Duration
LaunchGracePeriod time.Duration
containerID string
}
func NewKubeletExecutorServer() *KubeletExecutorServer {
k := &KubeletExecutorServer{
KubeletServer: options.NewKubeletServer(),
SuicideTimeout: config.DefaultSuicideTimeout,
LaunchGracePeriod: config.DefaultLaunchGracePeriod,
}
if pwd, err := os.Getwd(); err != nil {
log.Warningf("failed to determine current directory: %v", err)
} else {
k.RootDirectory = pwd // mesos sandbox dir
}
k.Address = defaultBindingAddress()
return k
}
func (s *KubeletExecutorServer) AddFlags(fs *pflag.FlagSet) {
s.KubeletServer.AddFlags(fs)
fs.DurationVar(&s.SuicideTimeout, "suicide-timeout", s.SuicideTimeout, "Self-terminate after this period of inactivity. Zero disables suicide watch.")
fs.DurationVar(&s.LaunchGracePeriod, "mesos-launch-grace-period", s.LaunchGracePeriod, "Launch grace period after which launching tasks will be cancelled. Zero disables launch cancellation.")
}
func (s *KubeletExecutorServer) runExecutor(
nodeInfos chan<- executor.NodeInfo,
kubeletFinished <-chan struct{},
staticPodsConfigPath string,
apiclient *clientset.Clientset,
registry executor.Registry,
) (<-chan struct{}, error) {
staticPodFilters := podutil.Filters{
// annotate the pod with BindingHostKey so that the scheduler will ignore the pod
// once it appears in the pod registry. the stock kubelet sets the pod host in order
// to accomplish the same; we do this because the k8sm scheduler works differently.
podutil.Annotator(map[string]string{
meta.BindingHostKey: s.HostnameOverride,
}),
}
if s.containerID != "" {
// tag all pod containers with the containerID so that they can be properly GC'd by Mesos
staticPodFilters = append(staticPodFilters, podutil.Environment([]api.EnvVar{
{Name: envContainerID, Value: s.containerID},
}))
}
exec := executor.New(executor.Config{
Registry: registry,
APIClient: apiclient,
Docker: dockertools.ConnectToDockerOrDie(s.DockerEndpoint, 0),
SuicideTimeout: s.SuicideTimeout,
KubeletFinished: kubeletFinished,
ExitFunc: os.Exit,
NodeInfos: nodeInfos,
Options: []executor.Option{
executor.StaticPods(staticPodsConfigPath, staticPodFilters),
},
})
// initialize driver and initialize the executor with it
dconfig := bindings.DriverConfig{
Executor: exec,
HostnameOverride: s.HostnameOverride,
BindingAddress: net.ParseIP(s.Address),
}
driver, err := bindings.NewMesosExecutorDriver(dconfig)
if err != nil {
return nil, fmt.Errorf("failed to create executor driver: %v", err)
}
log.V(2).Infof("Initialize executor driver...")
exec.Init(driver)
// start the driver
go func() {
if _, err := driver.Run(); err != nil {
log.Fatalf("executor driver failed: %v", err)
}
log.Info("executor Run completed")
}()
return exec.Done(), nil
}
func (s *KubeletExecutorServer) runKubelet(
nodeInfos <-chan executor.NodeInfo,
kubeletDone chan<- struct{},
staticPodsConfigPath string,
apiclient *clientset.Clientset,
podLW *cache.ListWatch,
registry executor.Registry,
executorDone <-chan struct{},
) (err error) {
defer func() {
if err != nil {
// close the channel here. When Run returns without error, the executorKubelet is
// responsible to do this. If it returns with an error, we are responsible here.
close(kubeletDone)
}
}()
kubeDeps, err := kubeletapp.UnsecuredKubeletDeps(s.KubeletServer)
if err != nil {
return err
}
// apply Mesos specific settings
kubeDeps.Builder = func(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *kubelet.KubeletDeps, standaloneMode bool) (kubelet.KubeletBootstrap, error) {
k, err := kubeletapp.CreateAndInitKubelet(kubeCfg, kubeDeps, standaloneMode)
if err != nil {
return k, err
}
// decorate kubelet such that it shuts down when the executor is
decorated := &executorKubelet{
Kubelet: k.(*kubelet.Kubelet),
kubeletDone: kubeletDone,
executorDone: executorDone,
}
return decorated, nil
}
s.RuntimeCgroups = "" // don't move the docker daemon into a cgroup
kubeDeps.KubeClient = apiclient
// taken from KubeletServer#Run(*KubeletConfig)
eventClientConfig, err := kubeletapp.CreateAPIServerClientConfig(s.KubeletServer)
if err != nil {
return err
}
// make a separate client for events
eventClientConfig.QPS = float32(s.EventRecordQPS)
eventClientConfig.Burst = int(s.EventBurst)
kubeDeps.EventClient, err = clientset.NewForConfig(eventClientConfig)
if err != nil {
return err
}
kubeDeps.PodConfig = kconfig.NewPodConfig(kconfig.PodConfigNotificationIncremental, kubeDeps.Recorder) // override the default pod source
s.SystemCgroups = "" // don't take control over other system processes.
if kubeDeps.Cloud != nil {
// fail early and hard because having the cloud provider loaded would go unnoticed,
// but break bigger cluster because accessing the state.json from every slave kills the master.
panic("cloud provider must not be set")
}
// create custom cAdvisor interface which return the resource values that Mesos reports
ni := <-nodeInfos
cAdvisorInterface, err := NewMesosCadvisor(ni.Cores, ni.Mem, uint(s.CAdvisorPort), s.ContainerRuntime)
if err != nil {
return err
}
kubeDeps.CAdvisorInterface = cAdvisorInterface
kubeDeps.ContainerManager, err = cm.NewContainerManager(kubeDeps.Mounter, cAdvisorInterface, cm.NodeConfig{
RuntimeCgroupsName: s.RuntimeCgroups,
SystemCgroupsName: s.SystemCgroups,
KubeletCgroupsName: s.KubeletCgroups,
ContainerRuntime: s.ContainerRuntime,
})
if err != nil {
return err
}
go func() {
for ni := range nodeInfos {
// TODO(sttts): implement with MachineAllocable mechanism when https://github.com/kubernetes/kubernetes/issues/13984 is finished
log.V(3).Infof("ignoring updated node resources: %v", ni)
}
}()
// create main pod source, it will stop generating events once executorDone is closed
var containerOptions []podsource.Option
if s.containerID != "" {
// tag all pod containers with the containerID so that they can be properly GC'd by Mesos
containerOptions = append(containerOptions, podsource.ContainerEnvOverlay([]api.EnvVar{
{Name: envContainerID, Value: s.containerID},
}))
kubeDeps.ContainerRuntimeOptions = append(kubeDeps.ContainerRuntimeOptions,
dockertools.PodInfraContainerEnv(map[string]string{
envContainerID: s.containerID,
}))
}
podsource.Mesos(executorDone, kubeDeps.PodConfig.Channel(podsource.MesosSource), podLW, registry, containerOptions...)
// create static-pods directory file source
log.V(2).Infof("initializing static pods source factory, configured at path %q", staticPodsConfigPath)
fileSourceUpdates := kubeDeps.PodConfig.Channel(kubetypes.FileSource)
kconfig.NewSourceFile(staticPodsConfigPath, s.HostnameOverride, s.FileCheckFrequency.Duration, fileSourceUpdates)
// run the kubelet
// NOTE: because kubeDeps != nil holds, the upstream Run function will not
// initialize the cloud provider. We explicitly wouldn't want
// that because then every kubelet instance would query the master
// state.json which does not scale.
s.KubeletServer.LockFilePath = "" // disable lock file
err = kubeletapp.Run(s.KubeletServer, kubeDeps)
return
}
// Run runs the specified KubeletExecutorServer.
func (s *KubeletExecutorServer) Run(hks hyperkube.Interface, _ []string) error {
// create shared channels
kubeletFinished := make(chan struct{})
nodeInfos := make(chan executor.NodeInfo, 1)
// create static pods directory
staticPodsConfigPath := filepath.Join(s.RootDirectory, "static-pods")
err := os.Mkdir(staticPodsConfigPath, 0750)
if err != nil {
return err
}
// we're expecting that either Mesos or the minion process will set this for us
s.containerID = os.Getenv(envContainerID)
if s.containerID == "" {
log.Warningf("missing expected environment variable %q", envContainerID)
}
// create apiserver client
var apiclient *clientset.Clientset
clientConfig, err := kubeletapp.CreateAPIServerClientConfig(s.KubeletServer)
if err == nil {
apiclient, err = clientset.NewForConfig(clientConfig)
}
if err != nil {
// required for k8sm since we need to send api.Binding information back to the apiserver
return fmt.Errorf("cannot create API client: %v", err)
}
var (
pw = cache.NewListWatchFromClient(apiclient.CoreClient, "pods", api.NamespaceAll,
fields.OneTermEqualSelector(api.PodHostField, s.HostnameOverride),
)
reg = executor.NewRegistry(apiclient)
)
// start executor
var executorDone <-chan struct{}
executorDone, err = s.runExecutor(nodeInfos, kubeletFinished, staticPodsConfigPath, apiclient, reg)
if err != nil {
return err
}
// start kubelet, blocking
return s.runKubelet(nodeInfos, kubeletFinished, staticPodsConfigPath, apiclient, pw, reg, executorDone)
}
func defaultBindingAddress() string {
libProcessIP := os.Getenv("LIBPROCESS_IP")
if libProcessIP == "" {
return "0.0.0.0"
} else {
return libProcessIP
}
}

View File

@ -1,65 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import (
"time"
log "github.com/golang/glog"
bindings "github.com/mesos/mesos-go/executor"
)
// func that attempts suicide
type jumper func(bindings.ExecutorDriver, <-chan struct{})
type suicideWatcher interface {
Next(time.Duration, bindings.ExecutorDriver, jumper) suicideWatcher
Reset(time.Duration) bool
Stop() bool
}
// TODO(jdef) add metrics for this?
type suicideTimer struct {
timer *time.Timer
}
func (w *suicideTimer) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher {
return &suicideTimer{
timer: time.AfterFunc(d, func() {
log.Warningf("Suicide timeout (%v) expired", d)
f(driver, nil)
}),
}
}
func (w *suicideTimer) Stop() (result bool) {
if w != nil && w.timer != nil {
log.Infoln("stopping suicide watch") //TODO(jdef) debug
result = w.timer.Stop()
}
return
}
// return true if the timer was successfully reset
func (w *suicideTimer) Reset(d time.Duration) bool {
if w != nil && w.timer != nil {
log.Infoln("resetting suicide watch") //TODO(jdef) debug
w.timer.Reset(d)
return true
}
return false
}

View File

@ -1,197 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import (
"sync/atomic"
"testing"
"time"
"github.com/golang/glog"
bindings "github.com/mesos/mesos-go/executor"
"k8s.io/kubernetes/pkg/api"
)
type suicideTracker struct {
suicideWatcher
stops uint32
resets uint32
timers uint32
jumps *uint32
}
func (t *suicideTracker) Reset(d time.Duration) bool {
defer func() { t.resets++ }()
return t.suicideWatcher.Reset(d)
}
func (t *suicideTracker) Stop() bool {
defer func() { t.stops++ }()
return t.suicideWatcher.Stop()
}
func (t *suicideTracker) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher {
tracker := &suicideTracker{
stops: t.stops,
resets: t.resets,
jumps: t.jumps,
timers: t.timers + 1,
}
jumper := tracker.makeJumper(f)
tracker.suicideWatcher = t.suicideWatcher.Next(d, driver, jumper)
return tracker
}
func (t *suicideTracker) makeJumper(_ jumper) jumper {
return jumper(func(driver bindings.ExecutorDriver, cancel <-chan struct{}) {
glog.Warningln("Jumping?!")
if t.jumps != nil {
atomic.AddUint32(t.jumps, 1)
}
})
}
func TestSuicide_zeroTimeout(t *testing.T) {
defer glog.Flush()
k := NewTestKubernetesExecutor()
tracker := &suicideTracker{suicideWatcher: k.suicideWatch}
k.suicideWatch = tracker
ch := k.resetSuicideWatch(nil)
select {
case <-ch:
case <-time.After(2 * time.Second):
t.Fatalf("timeout waiting for reset of suicide watch")
}
if tracker.stops != 0 {
t.Fatalf("expected no stops since suicideWatchTimeout was never set")
}
if tracker.resets != 0 {
t.Fatalf("expected no resets since suicideWatchTimeout was never set")
}
if tracker.timers != 0 {
t.Fatalf("expected no timers since suicideWatchTimeout was never set")
}
}
func TestSuicide_WithTasks(t *testing.T) {
defer glog.Flush()
k := NewTestKubernetesExecutor()
k.suicideTimeout = 50 * time.Millisecond
jumps := uint32(0)
tracker := &suicideTracker{suicideWatcher: k.suicideWatch, jumps: &jumps}
k.suicideWatch = tracker
k.registry.bind("foo", &api.Pod{}) // prevent suicide attempts from succeeding
// call reset with a nil timer
glog.Infoln("Resetting suicide watch with 1 task")
select {
case <-k.resetSuicideWatch(nil):
tracker = k.suicideWatch.(*suicideTracker)
if tracker.stops != 1 {
t.Fatalf("expected suicide attempt to Stop() since there are registered tasks")
}
if tracker.resets != 0 {
t.Fatalf("expected no resets since")
}
if tracker.timers != 0 {
t.Fatalf("expected no timers since")
}
case <-time.After(1 * time.Second):
t.Fatalf("initial suicide watch setup failed")
}
k.registry.Remove("foo") // zero remaining tasks
k.suicideTimeout = 1500 * time.Millisecond
suicideStart := time.Now()
// reset the suicide watch, which should actually start a timer now
glog.Infoln("Resetting suicide watch with 0 tasks")
select {
case <-k.resetSuicideWatch(nil):
tracker = k.suicideWatch.(*suicideTracker)
if tracker.stops != 1 {
t.Fatalf("did not expect suicide attempt to Stop() since there are no registered tasks")
}
if tracker.resets != 1 {
t.Fatalf("expected 1 resets instead of %d", tracker.resets)
}
if tracker.timers != 1 {
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
}
case <-time.After(1 * time.Second):
t.Fatalf("2nd suicide watch setup failed")
}
k.lock.Lock()
k.registry.bind("foo", &api.Pod{}) // prevent suicide attempts from succeeding
k.lock.Unlock()
// reset the suicide watch, which should stop the existing timer
glog.Infoln("Resetting suicide watch with 1 task")
select {
case <-k.resetSuicideWatch(nil):
tracker = k.suicideWatch.(*suicideTracker)
if tracker.stops != 2 {
t.Fatalf("expected 2 stops instead of %d since there are registered tasks", tracker.stops)
}
if tracker.resets != 1 {
t.Fatalf("expected 1 resets instead of %d", tracker.resets)
}
if tracker.timers != 1 {
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
}
case <-time.After(1 * time.Second):
t.Fatalf("3rd suicide watch setup failed")
}
k.lock.Lock()
k.registry.Remove("foo") // allow suicide attempts to schedule
k.lock.Unlock()
// reset the suicide watch, which should reset a stopped timer
glog.Infoln("Resetting suicide watch with 0 tasks")
select {
case <-k.resetSuicideWatch(nil):
tracker = k.suicideWatch.(*suicideTracker)
if tracker.stops != 2 {
t.Fatalf("expected 2 stops instead of %d since there are no registered tasks", tracker.stops)
}
if tracker.resets != 2 {
t.Fatalf("expected 2 resets instead of %d", tracker.resets)
}
if tracker.timers != 1 {
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
}
case <-time.After(1 * time.Second):
t.Fatalf("4th suicide watch setup failed")
}
sinceWatch := time.Since(suicideStart)
time.Sleep(3*time.Second - sinceWatch) // give the first timer to misfire (it shouldn't since Stop() was called)
if j := atomic.LoadUint32(&jumps); j != 1 {
t.Fatalf("expected 1 jumps instead of %d since stop was called", j)
} else {
glog.Infoln("Jumps verified") // glog so we get a timestamp
}
}

View File

@ -1,150 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package executor
import (
"sync"
"time"
log "github.com/golang/glog"
)
type (
// filter registration events, return false to abort further processing of the event
watchFilter func(pod *PodEvent) (accept bool)
watchExpiration struct {
// timeout closes when the handler has expired; it delivers at most one Time.
timeout <-chan time.Time
// onEvent is an optional callback that is invoked if/when the expired chan
// closes
onEvent func(taskID string)
}
watchHandler struct {
// prevent callbacks from being invoked simultaneously
sync.Mutex
// handle registration events, return true to indicate the handler should be
// de-registered upon completion. If pod is nil then the associated handler
// has expired.
onEvent func(pod *PodEvent) (done bool, err error)
// expiration is an optional configuration that indicates when a handler should
// be considered to have expired, and what action to take upon such
expiration watchExpiration
}
// watcher observes PodEvent events and conditionally executes handlers that
// have been associated with the taskID of the PodEvent.
watcher struct {
updates <-chan *PodEvent
rw sync.RWMutex
handlers map[string]*watchHandler
filters []watchFilter
runOnce chan struct{}
}
)
func newWatcher(updates <-chan *PodEvent) *watcher {
return &watcher{
updates: updates,
handlers: make(map[string]*watchHandler),
runOnce: make(chan struct{}),
}
}
func (pw *watcher) run() {
select {
case <-pw.runOnce:
log.Error("run() has already been invoked for this pod-watcher")
return
default:
close(pw.runOnce)
}
updateLoop:
for u := range pw.updates {
log.V(2).Info("filtering " + u.FormatShort())
for _, f := range pw.filters {
if !f(u) {
continue updateLoop
}
}
log.V(1).Info("handling " + u.FormatShort())
h, ok := func() (h *watchHandler, ok bool) {
pw.rw.RLock()
defer pw.rw.RUnlock()
h, ok = pw.handlers[u.taskID]
return
}()
if ok {
log.V(1).Info("executing action for " + u.FormatShort())
done, err := func() (bool, error) {
h.Lock()
defer h.Unlock()
return h.onEvent(u)
}()
if err != nil {
log.Error(err)
}
if done {
// de-register handler upon successful completion of action
log.V(1).Info("de-registering handler for " + u.FormatShort())
func() {
pw.rw.Lock()
delete(pw.handlers, u.taskID)
pw.rw.Unlock()
}()
}
}
}
}
func (pw *watcher) addFilter(f watchFilter) {
select {
case <-pw.runOnce:
log.Errorf("failed to add filter because pod-watcher is already running")
default:
pw.filters = append(pw.filters, f)
}
}
// forTask associates a handler `h` with the given taskID.
func (pw *watcher) forTask(taskID string, h *watchHandler) {
pw.rw.Lock()
pw.handlers[taskID] = h
pw.rw.Unlock()
if exp := h.expiration; exp.timeout != nil {
go func() {
<-exp.timeout
log.V(1).Infof("expiring handler for task %v", taskID)
// de-register handler upon expiration
pw.rw.Lock()
delete(pw.handlers, taskID)
pw.rw.Unlock()
if exp.onEvent != nil {
h.Lock()
defer h.Unlock()
exp.onEvent(taskID)
}
}()
}
}

View File

@ -1,47 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package flagutil
import (
"flag"
// kubelet attempts to customize default values for some cadvisor flags, so
// make sure that we pick these up.
_ "k8s.io/kubernetes/pkg/kubelet/cadvisor"
)
// FlagFunc retrieves a specific flag instance; returns nil if the flag is not configured.
type FlagFunc func() *flag.Flag
// NameValue returns the name and value of a flag, if it exists, otherwise empty strings.
func (ff FlagFunc) NameValue() (name, value string) {
if f := ff(); f != nil {
name, value = f.Name, f.Value.String()
}
return
}
func flagFunc(name string) FlagFunc { return func() *flag.Flag { return flag.Lookup(name) } }
// Cadvisor fields return the configured values of cadvisor global flags
var Cadvisor = struct {
HousekeepingInterval FlagFunc
GlobalHousekeepingInterval FlagFunc
}{
flagFunc("housekeeping_interval"),
flagFunc("global_housekeeping_interval"),
}

View File

@ -1,24 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package flagutil
import (
// TODO(jdef) kill this once cadvisor flags are no longer configured by
// global variables. Importing it this way guarantees that the global flag
// variables are initialized.
_ "github.com/google/cadvisor/manager"
)

View File

@ -1,21 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package hyperkube facilitates the combination of multiple
// kubernetes-mesos components into a single binary form, providing a
// simple mechanism for intra-component discovery as per the original
// Kubernetes hyperkube package.
package hyperkube // import "k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"

View File

@ -1,26 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package hyperkube
const (
CommandApiserver = "apiserver"
CommandControllerManager = "controller-manager"
CommandExecutor = "executor"
CommandMinion = "minion"
CommandProxy = "proxy"
CommandScheduler = "scheduler"
)

View File

@ -1,54 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package hyperkube
import (
"github.com/spf13/pflag"
)
var (
nilKube = &nilKubeType{}
)
type Interface interface {
// FindServer will find a specific server named name.
FindServer(name string) bool
// The executable name, used for help and soft-link invocation
Name() string
// Flags returns a flagset for "global" flags.
Flags() *pflag.FlagSet
}
type nilKubeType struct{}
func (n *nilKubeType) FindServer(_ string) bool {
return false
}
func (n *nilKubeType) Name() string {
return ""
}
func (n *nilKubeType) Flags() *pflag.FlagSet {
return nil
}
func Nil() Interface {
return nilKube
}

View File

@ -1,33 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"k8s.io/kubernetes/pkg/api/resource"
)
const (
DefaultLogMaxBackups = 5 // how many backup to keep
DefaultLogMaxAgeInDays = 7 // after how many days to rotate at most
DefaultCgroupPrefix = "mesos"
)
// DefaultLogMaxSize returns the maximal log file size before rotation
func DefaultLogMaxSize() resource.Quantity {
return *resource.NewQuantity(10*1024*1024, resource.BinarySI)
}

View File

@ -1,18 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package config contains minion configuration constants.
package config // import "k8s.io/kubernetes/contrib/mesos/pkg/minion/config"

View File

@ -1,18 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package minion contains the executor and proxy bootstrap code for a Mesos slave
package minion // import "k8s.io/kubernetes/contrib/mesos/pkg/minion"

View File

@ -1,25 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package minion
import (
log "github.com/golang/glog"
)
func enterPrivateMountNamespace() {
log.Info("Skipping mount namespace, only available on Linux")
}

View File

@ -1,55 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package minion
import (
"syscall"
log "github.com/golang/glog"
)
// enterPrivateMountNamespace does just that: the current mount ns is unshared (isolated)
// and then made a slave to the root mount / of the parent mount ns (mount events from /
// or its children that happen in the parent NS propagate to us).
//
// this is not yet compatible with volume plugins as implemented by the kubelet, which
// depends on using host-volume args to 'docker run' to attach plugin volumes to CT's
// at runtime. as such, docker needs to be able to see the volumes mounted by k8s plugins,
// which is impossible if k8s volume plugins are running in an isolated mount ns.
//
// an alternative approach would be to always run the kubelet in the host's mount-ns and
// rely upon mesos to forcibly umount bindings in the task sandbox before rmdir'ing it:
// https://issues.apache.org/jira/browse/MESOS-349.
//
// use at your own risk.
func enterPrivateMountNamespace() {
log.Warningln("EXPERIMENTAL FEATURE: entering private mount ns")
// enter a new mount NS, useful for isolating changes to the mount table
// that are made by the kubelet for storage volumes.
err := syscall.Unshare(syscall.CLONE_NEWNS)
if err != nil {
log.Fatalf("failed to enter private mount NS: %v", err)
}
// make the rootfs / rslave to the parent mount NS so that we
// pick up on any changes made there
err = syscall.Mount("", "/", "dontcare", syscall.MS_REC|syscall.MS_SLAVE, "")
if err != nil {
log.Fatalf("failed to mark / rslave: %v", err)
}
}

View File

@ -1,381 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package minion
import (
"fmt"
"io"
"io/ioutil"
"os"
"os/signal"
"path"
"strconv"
"strings"
"syscall"
log "github.com/golang/glog"
"github.com/kardianos/osext"
"github.com/spf13/pflag"
"gopkg.in/natefinch/lumberjack.v2"
kubeletapp "k8s.io/kubernetes/cmd/kubelet/app"
exservice "k8s.io/kubernetes/contrib/mesos/pkg/executor/service"
"k8s.io/kubernetes/contrib/mesos/pkg/flagutil"
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
"k8s.io/kubernetes/contrib/mesos/pkg/minion/config"
"k8s.io/kubernetes/contrib/mesos/pkg/minion/tasks"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/client/restclient"
)
const (
proxyLogFilename = "proxy.log"
executorLogFilename = "executor.log"
)
type MinionServer struct {
// embed the executor server to be able to use its flags
// TODO(sttts): get rid of this mixing of the minion and the executor server with a multiflags implementation for km
KubeletExecutorServer *exservice.KubeletExecutorServer
privateMountNS bool
hks hyperkube.Interface
clientConfig *restclient.Config
kmBinary string
tasks []*tasks.Task
pathOverride string // the PATH environment for the sub-processes
cgroupPrefix string // e.g. mesos
cgroupRoot string // the cgroupRoot that we pass to the kubelet-executor, depends on containPodResources
mesosCgroup string // discovered mesos cgroup root, e.g. /mesos/{container-id}
containPodResources bool
logMaxSize resource.Quantity
logMaxBackups int
logMaxAgeInDays int
logVerbosity int32 // see glog.Level
runProxy bool
proxyKubeconfig string
proxyLogV int
proxyBindall bool
proxyMode string
conntrackMax int
conntrackTCPTimeoutEstablished int
}
// NewMinionServer creates the MinionServer struct with default values to be used by hyperkube
func NewMinionServer() *MinionServer {
s := &MinionServer{
KubeletExecutorServer: exservice.NewKubeletExecutorServer(),
privateMountNS: false, // disabled until Docker supports customization of the parent mount namespace
cgroupPrefix: config.DefaultCgroupPrefix,
containPodResources: true,
logMaxSize: config.DefaultLogMaxSize(),
logMaxBackups: config.DefaultLogMaxBackups,
logMaxAgeInDays: config.DefaultLogMaxAgeInDays,
runProxy: true,
proxyMode: "userspace", // upstream default is "iptables" post-v1.1
}
// cache this for later use
binary, err := osext.Executable()
if err != nil {
log.Fatalf("failed to determine currently running executable: %v", err)
}
s.kmBinary = binary
return s
}
// filterArgsByFlagSet returns a list of args which are parsed by the given flag set
// and another list with those which do not match
func filterArgsByFlagSet(args []string, flags *pflag.FlagSet) ([]string, []string) {
matched := []string{}
notMatched := []string{}
for _, arg := range args {
err := flags.Parse([]string{arg})
if err != nil {
notMatched = append(notMatched, arg)
} else {
matched = append(matched, arg)
}
}
return matched, notMatched
}
func findMesosCgroup(prefix string) (cgroupPath string, containerID string) {
// derive our cgroup from MESOS_DIRECTORY environment
mesosDir := os.Getenv("MESOS_DIRECTORY")
if mesosDir == "" {
log.V(2).Infof("cannot derive executor's cgroup because MESOS_DIRECTORY is empty")
return
}
containerID = path.Base(mesosDir)
if containerID == "" {
log.V(2).Infof("cannot derive executor's cgroup from MESOS_DIRECTORY=%q", mesosDir)
return
}
cgroupPath = path.Join("/", prefix, containerID)
return
}
func (ms *MinionServer) launchProxyServer() {
bindAddress := "0.0.0.0"
if !ms.proxyBindall {
bindAddress = ms.KubeletExecutorServer.Address
}
args := []string{
fmt.Sprintf("--bind-address=%s", bindAddress),
fmt.Sprintf("--v=%d", ms.proxyLogV),
"--logtostderr=true",
// TODO(jdef) resource-container is going away completely at some point, but
// we need to override it here to disable the current default behavior
"--resource-container=", // disable this; mesos slave doesn't like sub-containers yet
"--proxy-mode=" + ms.proxyMode,
"--conntrack-max=" + strconv.Itoa(ms.conntrackMax),
"--conntrack-tcp-timeout-established=" + strconv.Itoa(ms.conntrackTCPTimeoutEstablished),
}
if ms.proxyKubeconfig != "" {
args = append(args, fmt.Sprintf("--kubeconfig=%s", ms.proxyKubeconfig))
}
if ms.clientConfig.Host != "" {
args = append(args, fmt.Sprintf("--master=%s", ms.clientConfig.Host))
}
if ms.KubeletExecutorServer.HostnameOverride != "" {
args = append(args, fmt.Sprintf("--hostname-override=%s", ms.KubeletExecutorServer.HostnameOverride))
}
ms.launchHyperkubeServer(hyperkube.CommandProxy, args, proxyLogFilename)
}
// launchExecutorServer returns a chan that closes upon kubelet-executor death. since the kubelet-
// executor doesn't support failover right now, the right thing to do is to fail completely since all
// pods will be lost upon restart and we want mesos to recover the resources from them.
func (ms *MinionServer) launchExecutorServer(containerID string) <-chan struct{} {
allArgs := os.Args[2:]
// filter out minion flags, leaving those for the executor
executorFlags := pflag.NewFlagSet("executor", pflag.ContinueOnError)
executorFlags.SetOutput(ioutil.Discard)
ms.AddExecutorFlags(executorFlags)
executorArgs, _ := filterArgsByFlagSet(allArgs, executorFlags)
// disable resource-container; mesos slave doesn't like sub-containers yet
executorArgs = append(executorArgs, "--kubelet-cgroups=")
appendOptional := func(name, value string) {
if value != "" {
executorArgs = append(executorArgs, "--"+name+"="+value)
}
}
appendOptional("cgroup-root", ms.cgroupRoot)
// forward global cadvisor flag values to the executor
// TODO(jdef) remove this code once cadvisor global flags have been cleaned up
appendOptional(flagutil.Cadvisor.HousekeepingInterval.NameValue())
appendOptional(flagutil.Cadvisor.GlobalHousekeepingInterval.NameValue())
// forward containerID so that the executor may pass it along to containers that it launches
var ctidOpt tasks.Option
ctidOpt = func(t *tasks.Task) tasks.Option {
oldenv := t.Env[:]
t.Env = append(t.Env, "MESOS_EXECUTOR_CONTAINER_UUID="+containerID)
return func(t2 *tasks.Task) tasks.Option {
t2.Env = oldenv
return ctidOpt
}
}
// run executor and quit minion server when this exits cleanly
execDied := make(chan struct{})
ms.launchHyperkubeServer(hyperkube.CommandExecutor, executorArgs, executorLogFilename, tasks.NoRespawn(execDied), ctidOpt)
return execDied
}
func (ms *MinionServer) launchHyperkubeServer(server string, args []string, logFileName string, options ...tasks.Option) {
log.V(2).Infof("Spawning hyperkube %v with args '%+v'", server, args)
kmArgs := append([]string{server}, args...)
maxSize := ms.logMaxSize.Value()
if maxSize > 0 {
// convert to MB
maxSize = maxSize / 1024 / 1024
if maxSize == 0 {
log.Warning("maximal log file size is rounded to 1 MB")
maxSize = 1
}
}
writerFunc := func() io.WriteCloser {
return &lumberjack.Logger{
Filename: logFileName,
MaxSize: int(maxSize),
MaxBackups: ms.logMaxBackups,
MaxAge: ms.logMaxAgeInDays,
}
}
// prepend env, allow later options to customize further
options = append([]tasks.Option{tasks.Environment(os.Environ()), ms.applyPathOverride()}, options...)
t := tasks.New(server, ms.kmBinary, kmArgs, writerFunc, options...)
go t.Start()
ms.tasks = append(ms.tasks, t)
}
// applyPathOverride overrides PATH and also adds $SANDBOX/bin (needed for locating bundled binary deps
// as well as external deps like iptables)
func (ms *MinionServer) applyPathOverride() tasks.Option {
return func(t *tasks.Task) tasks.Option {
kmEnv := make([]string, 0, len(t.Env))
for _, e := range t.Env {
if !strings.HasPrefix(e, "PATH=") {
kmEnv = append(kmEnv, e)
} else {
if ms.pathOverride != "" {
e = "PATH=" + ms.pathOverride
}
pwd, err := os.Getwd()
if err != nil {
panic(fmt.Errorf("Cannot get current directory: %v", err))
}
kmEnv = append(kmEnv, fmt.Sprintf("%s:%s", e, path.Join(pwd, "bin")))
}
}
oldenv := t.Env
t.Env = kmEnv
return tasks.Environment(oldenv)
}
}
// runs the main kubelet loop, closing the kubeletFinished chan when the loop exits.
// never returns.
func (ms *MinionServer) Run(hks hyperkube.Interface, _ []string) error {
if ms.privateMountNS {
// only the Linux version will do anything
enterPrivateMountNamespace()
}
// create apiserver client
clientConfig, err := kubeletapp.CreateAPIServerClientConfig(ms.KubeletExecutorServer.KubeletServer)
if err != nil {
// required for k8sm since we need to send api.Binding information
// back to the apiserver
log.Fatalf("No API client: %v", err)
}
ms.clientConfig = clientConfig
// derive the executor cgroup and use it as:
// - pod container cgroup root (e.g. docker cgroup-parent, optionally; see comments below)
// - parent of kubelet container
// - parent of kube-proxy container
containerID := ""
ms.mesosCgroup, containerID = findMesosCgroup(ms.cgroupPrefix)
log.Infof("discovered mesos cgroup at %q", ms.mesosCgroup)
// hack alert, this helps to work around systemd+docker+mesos integration problems
// when docker's cgroup-parent flag is used (!containPodResources = don't use the docker flag)
if ms.containPodResources {
ms.cgroupRoot = ms.mesosCgroup
}
cgroupLogger := log.Infof
if ms.cgroupRoot == "" {
cgroupLogger = log.Warningf
}
cgroupLogger("using cgroup-root %q", ms.cgroupRoot)
// run subprocesses until ms.done is closed on return of this function
if ms.runProxy {
ms.launchProxyServer()
}
// abort closes when the kubelet-executor dies
abort := ms.launchExecutorServer(containerID)
shouldQuit := termSignalListener(abort)
te := tasks.MergeOutput(ms.tasks, shouldQuit)
// TODO(jdef) do something fun here, such as reporting task completion to the apiserver
<-te.Close().Done() // we don't listen for any specific events yet; wait for all tasks to finish
return nil
}
// termSignalListener returns a signal chan that closes when either (a) the process receives a termination
// signal: SIGTERM, SIGINT, or SIGHUP; or (b) the abort chan closes.
func termSignalListener(abort <-chan struct{}) <-chan struct{} {
shouldQuit := make(chan struct{})
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh)
go func() {
defer close(shouldQuit)
for {
select {
case <-abort:
log.Infof("executor died, aborting")
return
case s, ok := <-sigCh:
if !ok {
return
}
switch s {
case os.Interrupt, os.Signal(syscall.SIGTERM), os.Signal(syscall.SIGINT), os.Signal(syscall.SIGHUP):
log.Infof("received signal %q, aborting", s)
return
case os.Signal(syscall.SIGCHLD): // who cares?
default:
log.Errorf("unexpected signal: %T %#v", s, s)
}
}
}
}()
return shouldQuit
}
func (ms *MinionServer) AddExecutorFlags(fs *pflag.FlagSet) {
ms.KubeletExecutorServer.AddFlags(fs)
// hack to forward log verbosity flag to the executor
fs.Int32Var(&ms.logVerbosity, "v", ms.logVerbosity, "log level for V logs")
}
func (ms *MinionServer) AddMinionFlags(fs *pflag.FlagSet) {
// general minion flags
fs.StringVar(&ms.cgroupPrefix, "mesos-cgroup-prefix", ms.cgroupPrefix, "The cgroup prefix concatenated with MESOS_DIRECTORY must give the executor cgroup set by Mesos")
fs.BoolVar(&ms.privateMountNS, "private-mountns", ms.privateMountNS, "Enter a private mount NS before spawning procs (linux only). Experimental, not yet compatible with k8s volumes.")
fs.StringVar(&ms.pathOverride, "path-override", ms.pathOverride, "Override the PATH in the environment of the sub-processes.")
fs.BoolVar(&ms.containPodResources, "contain-pod-resources", ms.containPodResources, "Allocate pod CPU and memory resources from offers and reparent pod containers into mesos cgroups; disable if you're having strange mesos/docker/systemd interactions.")
// log file flags
fs.Var(resource.NewQuantityFlagValue(&ms.logMaxSize), "max-log-size", "Maximum log file size for the executor and proxy before rotation")
fs.IntVar(&ms.logMaxAgeInDays, "max-log-age", ms.logMaxAgeInDays, "Maximum log file age of the executor and proxy in days")
fs.IntVar(&ms.logMaxBackups, "max-log-backups", ms.logMaxBackups, "Maximum log file backups of the executor and proxy to keep after rotation")
// proxy flags
fs.BoolVar(&ms.runProxy, "run-proxy", ms.runProxy, "Maintain a running kube-proxy instance as a child proc of this kubelet-executor.")
fs.StringVar(&ms.proxyKubeconfig, "proxy-kubeconfig", ms.proxyKubeconfig, "Path to kubeconfig file used by the child kube-proxy.")
fs.IntVar(&ms.proxyLogV, "proxy-logv", ms.proxyLogV, "Log verbosity of the child kube-proxy.")
fs.BoolVar(&ms.proxyBindall, "proxy-bindall", ms.proxyBindall, "When true will cause kube-proxy to bind to 0.0.0.0.")
fs.StringVar(&ms.proxyMode, "proxy-mode", ms.proxyMode, "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
fs.IntVar(&ms.conntrackMax, "conntrack-max", ms.conntrackMax, "Maximum number of NAT connections to track on agent nodes (0 to leave as-is)")
fs.IntVar(&ms.conntrackTCPTimeoutEstablished, "conntrack-tcp-timeout-established", ms.conntrackTCPTimeoutEstablished, "Idle timeout for established TCP connections on agent nodes (0 to leave as-is)")
}

View File

@ -1,20 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package tasks provides an API for supervising system processes as Task's.
// It provides stronger guarantees with respect to process lifecycle than a
// standalone kubelet running static pods.
package tasks // import "k8s.io/kubernetes/contrib/mesos/pkg/minion/tasks"

View File

@ -1,98 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package tasks
type Events interface {
// Close stops delivery of events in the completion and errors channels; callers must close this when they intend to no longer read from completion() or errors()
Close() Events
// Completion reports Completion events as they happen
Completion() <-chan *Completion
// Done returns a signal chan that closes when all tasks have completed and there are no more events to deliver
Done() <-chan struct{}
}
type eventsImpl struct {
tc chan *Completion
stopForwarding chan struct{}
done <-chan struct{}
}
func newEventsImpl(tcin <-chan *Completion, done <-chan struct{}) *eventsImpl {
ei := &eventsImpl{
tc: make(chan *Completion),
stopForwarding: make(chan struct{}),
done: done,
}
go func() {
defer close(ei.tc)
forwardCompletionUntil(tcin, ei.tc, ei.stopForwarding, done, nil)
}()
return ei
}
func (e *eventsImpl) Close() Events { close(e.stopForwarding); return e }
func (e *eventsImpl) Completion() <-chan *Completion { return e.tc }
func (e *eventsImpl) Done() <-chan struct{} { return e.done }
// forwardCompletionUntil is a generic pipe that forwards objects between channels.
// if discard is closed, objects are silently dropped.
// if tap != nil then it's invoked for each object as it's read from tin, but before it's written to tch.
// returns when either reading from tin completes (no more objects, and is closed), or else
// abort is closed, which ever happens first.
func forwardCompletionUntil(tin <-chan *Completion, tch chan<- *Completion, discard <-chan struct{}, abort <-chan struct{}, tap func(*Completion, bool)) {
var tc *Completion
var ok bool
forwardLoop:
for {
select {
case tc, ok = <-tin:
if !ok {
return
}
if tap != nil {
tap(tc, false)
}
select {
case <-abort:
break forwardLoop
case <-discard:
case tch <- tc:
}
case <-abort:
// best effort
select {
case tc, ok = <-tin:
if ok {
if tap != nil {
tap(tc, true)
}
break forwardLoop
}
default:
}
return
}
}
// best effort
select {
case tch <- tc:
case <-discard:
default:
}
}

View File

@ -1,431 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package tasks
import (
"fmt"
"io"
"io/ioutil"
"os/exec"
"sync"
"sync/atomic"
"syscall"
"time"
log "github.com/golang/glog"
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
)
const (
defaultTaskRestartDelay = 5 * time.Second
// TODO(jdef) there's no easy way for us to discover the grace period that we actually
// have, from mesos: it's simply a missing core feature. there's a MESOS-xyz ticket for
// this somewhere. if it was discoverable then we could come up with a better strategy.
// there are some comments in the executor regarding this as well (because there we're
// concerned about cleaning up pods within the grace period). we could pick a some
// higher (arbitrary) value but without knowing when the slave will forcibly kill us
// it seems a somewhat futile exercise.
defaultKillGracePeriod = 5 * time.Second
)
// Completion represents the termination of a Task process. Each process execution should
// yield (barring drops because of an abort signal) exactly one Completion.
type Completion struct {
name string // name of the task
code int // exit code that the task process completed with
err error // process management errors are reported here
}
// systemProcess is a useful abstraction for testing
type systemProcess interface {
// Wait works like exec.Cmd.Wait()
Wait() error
// Kill returns the pid of the process that was killed
Kill(force bool) (int, error)
}
type cmdProcess struct {
delegate *exec.Cmd
}
func (cp *cmdProcess) Wait() error {
return cp.delegate.Wait()
}
func (cp *cmdProcess) Kill(force bool) (int, error) {
// kill the entire process group, not just the one process
pid := cp.delegate.Process.Pid
processGroup := 0 - pid
// we send a SIGTERM here for a graceful stop. users of this package should
// wait for tasks to complete normally. as a fallback/safeguard, child procs
// are spawned in notStartedTask to receive a SIGKILL when this process dies.
sig := syscall.SIGTERM
if force {
sig = syscall.SIGKILL
}
rc := syscall.Kill(processGroup, sig)
return pid, rc
}
// task is a specification for running a system process; it provides hooks for customizing
// logging and restart handling as well as provides event channels for communicating process
// termination and errors related to process management.
type Task struct {
Env []string // optional: process environment override
Finished func(restarting bool) bool // callback invoked when a task process has completed; if `restarting` then it will be restarted if it returns true
RestartDelay time.Duration // interval between repeated task restarts
name string // required: unique name for this task
bin string // required: path to executable
args []string // optional: process arguments
createLogger func() io.WriteCloser // factory func that builds a log writer
cmd systemProcess // process that we started
completedCh chan *Completion // reports exit codes encountered when task processes exit, or errors during process management
shouldQuit chan struct{} // shouldQuit is closed to indicate that the task should stop its running process, if any
done chan struct{} // done closes when all processes related to the task have terminated
initialState taskStateFn // prepare and start a new live process, defaults to notStartedTask; should be set by run()
runLatch int32 // guard against multiple Task.run calls
killFunc func(bool) (int, error)
}
// New builds a newly initialized task object but does not start any processes for it. callers
// are expected to invoke task.run(...) on their own.
func New(name, bin string, args []string, cl func() io.WriteCloser, options ...Option) *Task {
t := &Task{
name: name,
bin: bin,
args: args,
createLogger: cl,
completedCh: make(chan *Completion),
shouldQuit: make(chan struct{}),
done: make(chan struct{}),
RestartDelay: defaultTaskRestartDelay,
Finished: func(restarting bool) bool { return restarting },
}
t.killFunc = func(force bool) (int, error) { return t.cmd.Kill(force) }
for _, opt := range options {
opt(t)
}
return t
}
// Start spawns a goroutine to execute the Task. Panics if invoked more than once.
func (t *Task) Start() {
go t.run(notStartedTask)
}
// run executes the state machine responsible for starting, monitoring, and possibly restarting
// a system process for the task. The initialState func is the entry point of the state machine.
// Upon returning the done and completedCh chans are all closed.
func (t *Task) run(initialState taskStateFn) {
if !atomic.CompareAndSwapInt32(&t.runLatch, 0, 1) {
panic("Task.run() may only be invoked once")
}
t.initialState = initialState
defer close(t.done)
defer close(t.completedCh)
state := initialState
for state != nil {
next := state(t)
state = next
}
}
func (t *Task) tryComplete(tc *Completion) {
select {
case <-t.shouldQuit:
// best effort
select {
case t.completedCh <- tc:
default:
}
case t.completedCh <- tc:
}
}
// tryError is a convenience func that invokes tryComplete with a completion error
func (t *Task) tryError(err error) {
t.tryComplete(&Completion{err: err})
}
type taskStateFn func(*Task) taskStateFn
func taskShouldRestart(t *Task) taskStateFn {
// make our best effort to stop here if signalled (shouldQuit). not doing so here
// could add cost later (a process might be launched).
// sleep for a bit; then return t.initialState
tm := time.NewTimer(t.RestartDelay)
defer tm.Stop()
select {
case <-tm.C:
select {
case <-t.shouldQuit:
default:
if t.Finished(true) {
select {
case <-t.shouldQuit:
// the world has changed, die
return nil
default:
}
return t.initialState
}
// finish call decided not to respawn, so die
return nil
}
case <-t.shouldQuit:
}
// we're quitting, tell the Finished callback and then die
t.Finished(false)
return nil
}
func (t *Task) initLogging(r io.Reader) {
writer := t.createLogger()
go func() {
defer writer.Close()
_, err := io.Copy(writer, r)
if err != nil && err != io.EOF {
// using tryComplete is racy because the state machine closes completedCh and
// so we don't want to attempt to write to a closed/closing chan. so
// just log this for now.
log.Errorf("logger for task %q crashed: %v", t.bin, err)
}
}()
}
// notStartedTask spawns the given task and transitions to a startedTask state
func notStartedTask(t *Task) taskStateFn {
log.Infof("starting task process %q with args '%+v'", t.bin, t.args)
// create command
cmd := exec.Command(t.bin, t.args...)
stdout, err := cmd.StdoutPipe()
if err != nil {
t.tryError(fmt.Errorf("error getting stdout of %v: %v", t.name, err))
return taskShouldRestart
}
go func() {
defer stdout.Close()
io.Copy(ioutil.Discard, stdout) // TODO(jdef) we might want to save this at some point
}()
stderrLogs, err := cmd.StderrPipe()
if err != nil {
t.tryError(fmt.Errorf("error getting stderr of %v: %v", t.name, err))
return taskShouldRestart
}
t.initLogging(stderrLogs)
if len(t.Env) > 0 {
cmd.Env = t.Env
}
cmd.SysProcAttr = sysProcAttr()
// last min check for shouldQuit here
select {
case <-t.shouldQuit:
t.tryError(fmt.Errorf("task execution canceled, aborting process launch"))
return taskShouldRestart
default:
}
if err := cmd.Start(); err != nil {
t.tryError(fmt.Errorf("failed to start task process %q: %v", t.bin, err))
return taskShouldRestart
}
log.Infoln("task started", t.name)
t.cmd = &cmdProcess{delegate: cmd}
return taskRunning
}
type exitError interface {
error
// see os.ProcessState.Sys: returned value can be converted to something like syscall.WaitStatus
Sys() interface{}
}
func taskRunning(t *Task) taskStateFn {
// listen for normal process completion in a goroutine; don't block because we need to listen for shouldQuit
waitCh := make(chan *Completion, 1)
go func() {
wr := &Completion{name: t.name}
defer func() {
waitCh <- wr
close(waitCh)
}()
if err := t.cmd.Wait(); err != nil {
if exitError, ok := err.(exitError); ok {
if waitStatus, ok := exitError.Sys().(syscall.WaitStatus); ok {
wr.code = waitStatus.ExitStatus()
return
}
}
wr.err = fmt.Errorf("task wait ended strangely for %q: %v", t.bin, err)
}
}()
select {
case <-t.shouldQuit:
t.tryComplete(t.awaitDeath(&realTimer{}, defaultKillGracePeriod, waitCh))
case wr := <-waitCh:
t.tryComplete(wr)
}
return taskShouldRestart
}
// awaitDeath waits for the process to complete, or else for a "quit" signal on the task-
// at which point we'll attempt to kill manually.
func (t *Task) awaitDeath(timer timer, gracePeriod time.Duration, waitCh <-chan *Completion) *Completion {
defer timer.discard()
select {
case wr := <-waitCh:
// got a signal to quit, but we're already finished
return wr
default:
}
forceKill := false
wr := &Completion{name: t.name, err: fmt.Errorf("failed to kill process: %q", t.bin)}
// the loop is here in case we receive a shouldQuit signal; we need to kill the task.
// in this case, first send a SIGTERM (force=false) to the task and then wait for it
// to die (within the gracePeriod). if it doesn't die, then we loop around only this
// time we'll send a SIGKILL (force=true) and wait for a reduced gracePeriod. There
// does exist a slim chance that the underlying wait4() syscall won't complete before
// this process dies, in which case a zombie will rise. Starting the mesos slave with
// pid namespace isolation should mitigate this.
waitLoop:
for i := 0; i < 2; i++ {
log.Infof("killing %s (force=%t) : %s", t.name, forceKill, t.bin)
pid, err := t.killFunc(forceKill)
if err != nil {
log.Warningf("failed to kill process: %q pid %d: %v", t.bin, pid, err)
break waitLoop
}
// Wait for the kill to be processed, and child proc resources cleaned up; try to avoid zombies!
timer.set(gracePeriod)
select {
case wr = <-waitCh:
break waitLoop
case <-timer.await():
// want a timeout, but a shorter one than we used initially.
// using /= 2 is deterministic and yields the desirable effect.
gracePeriod /= 2
forceKill = true
continue waitLoop
}
}
return wr
}
// forwardUntil forwards task process completion status and errors to the given output
// chans until either the task terminates or abort is closed.
func (t *Task) forwardUntil(tch chan<- *Completion, abort <-chan struct{}) {
// merge task completion and error until we're told to die, then
// tell the task to stop
defer close(t.shouldQuit)
forwardCompletionUntil(t.completedCh, tch, nil, abort, nil)
}
// MergeOutput waits for the given tasks to complete. meanwhile it logs each time a task
// process completes or generates an error. when shouldQuit closes, tasks are canceled and this
// func eventually returns once all ongoing event handlers have completed running.
func MergeOutput(tasks []*Task, shouldQuit <-chan struct{}) Events {
tc := make(chan *Completion)
var waitForTasks sync.WaitGroup
waitForTasks.Add(len(tasks))
for _, t := range tasks {
t := t
// translate task dead signal into Done
go func() {
<-t.done
waitForTasks.Done()
}()
// fan-in task completion and error events to tc, ec
go t.forwardUntil(tc, shouldQuit)
}
tclistener := make(chan *Completion)
done := runtime.After(func() {
completionFinished := runtime.After(func() {
defer close(tclistener)
forwardCompletionUntil(tc, tclistener, nil, shouldQuit, func(tt *Completion, shutdown bool) {
prefix := ""
if shutdown {
prefix = "(shutdown) "
}
log.Infof(prefix+"task %q exited with status %d", tt.name, tt.code)
})
})
waitForTasks.Wait()
close(tc)
<-completionFinished
})
ei := newEventsImpl(tclistener, done)
return ei
}
// Option is a functional option type for a Task that returns an "undo" Option after upon modifying the Task
type Option func(*Task) Option
// NoRespawn configures the Task lifecycle such that it will not respawn upon termination
func NoRespawn(listener chan<- struct{}) Option {
return func(t *Task) Option {
finished, restartDelay := t.Finished, t.RestartDelay
t.Finished = func(_ bool) bool {
// this func implements the task.finished spec, so when the task exits
// we return false to indicate that it should not be restarted. we also
// close execDied to signal interested listeners.
if listener != nil {
close(listener)
listener = nil
}
return false
}
// since we only expect to die once, and there is no restart; don't delay any longer than needed
t.RestartDelay = 0
return func(t2 *Task) Option {
t2.Finished, t2.RestartDelay = finished, restartDelay
return NoRespawn(listener)
}
}
}
// Environment customizes the process runtime environment for a Task
func Environment(env []string) Option {
return func(t *Task) Option {
oldenv := t.Env
t.Env = env[:]
return Environment(oldenv)
}
}

View File

@ -1,28 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package tasks
import (
"syscall"
)
func sysProcAttr() *syscall.SysProcAttr {
return &syscall.SysProcAttr{
Setpgid: true,
Pdeathsig: syscall.SIGKILL, // see cmdProcess.Kill
}
}

View File

@ -1,38 +0,0 @@
// +build !linux
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package tasks
import (
"syscall"
)
func sysProcAttr() *syscall.SysProcAttr {
// TODO(jdef)
// Consequence of not having Pdeathdig is that on non-Linux systems,
// if SIGTERM doesn't stop child procs then they may "leak" and be
// reparented 'up the chain' somewhere when the minion process
// terminates. For example, such child procs end up living indefinitely
// as children of the mesos slave process (I think the slave could handle
// this case, but currently doesn't do it very well). Pdeathsig on Linux
// was a fallback/failsafe mechanism implemented to guard against this. I
// don't know if OS X has any syscalls that do something similar.
return &syscall.SysProcAttr{
Setpgid: true,
}
}

View File

@ -1,311 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package tasks
import (
"bytes"
"errors"
"fmt"
"io"
"sync"
"syscall"
"testing"
"time"
log "github.com/golang/glog"
"github.com/stretchr/testify/assert"
)
type badWriteCloser struct {
err error
}
func (b *badWriteCloser) Write(_ []byte) (int, error) { return 0, b.err }
func (b *badWriteCloser) Close() error { return b.err }
type discardCloser int
func (d discardCloser) Write(b []byte) (int, error) { return len(b), nil }
func (d discardCloser) Close() error { return nil }
var devNull = func() io.WriteCloser { return discardCloser(0) }
type fakeExitError uint32
func (f fakeExitError) Sys() interface{} { return syscall.WaitStatus(f << 8) }
func (f fakeExitError) Error() string { return fmt.Sprintf("fake-exit-error: %d", f) }
type fakeProcess struct {
done chan struct{}
pid int
err error
}
func (f *fakeProcess) Wait() error {
<-f.done
return f.err
}
func (f *fakeProcess) Kill(_ bool) (int, error) {
close(f.done)
return f.pid, f.err
}
func (f *fakeProcess) exit(code int) {
f.err = fakeExitError(code)
close(f.done)
}
func newFakeProcess() *fakeProcess {
return &fakeProcess{
done: make(chan struct{}),
}
}
func TestBadLogger(t *testing.T) {
err := errors.New("qux")
fp := newFakeProcess()
tt := New("foo", "bar", nil, func() io.WriteCloser {
defer func() {
fp.pid = 123 // sanity check
fp.Kill(false) // this causes Wait() to return
}()
return &badWriteCloser{err}
})
tt.RestartDelay = 0 // don't slow the test down for no good reason
finishCalled := make(chan struct{})
tt.Finished = func(ok bool) bool {
log.Infof("tt.Finished: ok %t", ok)
if ok {
close(finishCalled)
}
return false // never respawn, this causes t.done to close
}
// abuse eventsImpl: we're not going to listen on the task completion or event chans,
// and we don't want to block the state machine, so discard all task events as they happen
ei := newEventsImpl(tt.completedCh, tt.done)
ei.Close()
go tt.run(func(_ *Task) taskStateFn {
log.Infof("tt initialized")
tt.initLogging(bytes.NewBuffer(([]byte)("unlogged bytes")))
tt.cmd = fp
return taskRunning
})
// if the logger fails the task will be killed
// badWriteLogger generates an error immediately and results in a task kill
<-finishCalled
<-tt.done
// this should never data race since the state machine is dead at this point
if fp.pid != 123 {
t.Fatalf("incorrect pid, expected 123 not %d", fp.pid)
}
// TODO(jdef) would be nice to check for a specific error that indicates the logger died
}
func TestMergeOutput(t *testing.T) {
var tasksStarted, tasksDone sync.WaitGroup
tasksDone.Add(2)
tasksStarted.Add(2)
t1 := New("foo", "", nil, devNull)
t1exited := make(chan struct{})
t1.RestartDelay = 0 // don't slow the test down for no good reason
t1.Finished = func(ok bool) bool {
// we expect each of these cases to happen exactly once
if !ok {
tasksDone.Done()
} else {
close(t1exited)
}
return ok
}
go t1.run(func(t *Task) taskStateFn {
defer tasksStarted.Done()
t.initLogging(bytes.NewBuffer([]byte{}))
t.cmd = newFakeProcess()
return taskRunning
})
t2 := New("bar", "", nil, devNull)
t2exited := make(chan struct{})
t2.RestartDelay = 0 // don't slow the test down for no good reason
t2.Finished = func(ok bool) bool {
// we expect each of these cases to happen exactly once
if !ok {
tasksDone.Done()
} else {
close(t2exited)
}
return ok
}
go t2.run(func(t *Task) taskStateFn {
defer tasksStarted.Done()
t.initLogging(bytes.NewBuffer([]byte{}))
t.cmd = newFakeProcess()
return taskRunning
})
shouldQuit := make(chan struct{})
te := MergeOutput([]*Task{t1, t2}, shouldQuit)
tasksStarted.Wait()
tasksStarted.Add(2) // recycle the barrier
// kill each task once, let it restart; make sure that we get the completion status?
t1.cmd.(*fakeProcess).exit(1)
t2.cmd.(*fakeProcess).exit(2)
codes := map[int]struct{}{}
for i := 0; i < 2; i++ {
switch tc := <-te.Completion(); tc.code {
case 1, 2:
codes[tc.code] = struct{}{}
default:
if tc.err != nil {
t.Errorf("unexpected task completion error: %v", tc.err)
} else {
t.Errorf("unexpected task completion code: %d", tc.code)
}
}
}
te.Close() // we're not going to read any other completion or error events
if len(codes) != 2 {
t.Fatalf("expected each task process to exit once")
}
// each task invokes Finished() once
<-t1exited
<-t2exited
log.Infoln("each task process has completed one round")
tasksStarted.Wait() // tasks will auto-restart their exited procs
// assert that the tasks are not dead; TODO(jdef) not sure that these checks are useful
select {
case <-t1.done:
t.Fatalf("t1 is unexpectedly dead")
default:
}
select {
case <-t2.done:
t.Fatalf("t2 is unexpectedly dead")
default:
}
log.Infoln("firing quit signal")
close(shouldQuit) // fire shouldQuit, and everything should terminate gracefully
log.Infoln("waiting for tasks to die")
tasksDone.Wait() // our tasks should die
log.Infoln("waiting for merge to complete")
<-te.Done() // wait for the merge to complete
}
type fakeTimer struct {
ch chan time.Time
}
func (t *fakeTimer) set(d time.Duration) {}
func (t *fakeTimer) discard() {}
func (t *fakeTimer) await() <-chan time.Time { return t.ch }
func (t *fakeTimer) expire() { t.ch = make(chan time.Time); close(t.ch) }
func (t *fakeTimer) reset() { t.ch = nil }
func TestAfterDeath(t *testing.T) {
// test kill escalation since that's not covered by other unit tests
t1 := New("foo", "", nil, devNull)
kills := 0
waitCh := make(chan *Completion, 1)
timer := &fakeTimer{}
timer.expire()
t1.killFunc = func(force bool) (int, error) {
// > 0 is intentional, multiple calls to close() should panic
if kills > 0 {
assert.True(t, force)
timer.reset() // don't want to race w/ waitCh
waitCh <- &Completion{name: t1.name, code: 123}
close(waitCh)
} else {
assert.False(t, force)
}
kills++
return 0, nil
}
wr := t1.awaitDeath(timer, 0, waitCh)
assert.Equal(t, "foo", wr.name)
assert.Equal(t, 123, wr.code)
assert.NoError(t, wr.err)
// test tie between shouldQuit and waitCh
waitCh = make(chan *Completion, 1)
waitCh <- &Completion{name: t1.name, code: 456}
close(waitCh)
t1.killFunc = func(force bool) (int, error) {
t.Fatalf("should not attempt to kill a task that has already reported completion")
return 0, nil
}
timer.reset() // don't race w/ waitCh
wr = t1.awaitDeath(timer, 0, waitCh)
assert.Equal(t, 456, wr.code)
assert.NoError(t, wr.err)
// test delayed killFunc failure
kills = 0
killFailed := errors.New("for some reason kill failed")
t1.killFunc = func(force bool) (int, error) {
// > 0 is intentional, multiple calls to close() should panic
if kills > 0 {
assert.True(t, force)
return -1, killFailed
} else {
assert.False(t, force)
}
kills++
return 0, nil
}
timer.expire()
wr = t1.awaitDeath(timer, 0, nil)
assert.Equal(t, "foo", wr.name)
assert.Error(t, wr.err)
// test initial killFunc failure
kills = 0
t1.killFunc = func(force bool) (int, error) {
// > 0 is intentional, multiple calls to close() should panic
if kills > 0 {
assert.True(t, force)
t.Fatalf("killFunc should only be invoked once, not again after is has already failed")
} else {
assert.False(t, force)
}
kills++
return 0, killFailed
}
timer.expire()
wr = t1.awaitDeath(timer, 0, nil)
assert.Equal(t, "foo", wr.name)
assert.Error(t, wr.err)
}

View File

@ -1,52 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package tasks
import (
"time"
)
type timer interface {
set(time.Duration)
discard()
await() <-chan time.Time
}
type realTimer struct {
*time.Timer
}
func (t *realTimer) set(d time.Duration) {
if t.Timer == nil {
t.Timer = time.NewTimer(d)
} else {
t.Reset(d)
}
}
func (t *realTimer) await() <-chan time.Time {
if t.Timer == nil {
return nil
}
return t.C
}
func (t *realTimer) discard() {
if t.Timer != nil {
t.Stop()
}
}

View File

@ -1,18 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package node provides utilities to create and update nodes
package node // import "k8s.io/kubernetes/contrib/mesos/pkg/node"

View File

@ -1,226 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"fmt"
"reflect"
"strconv"
"strings"
"time"
unversionedcore "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/unversioned"
log "github.com/golang/glog"
mesos "github.com/mesos/mesos-go/mesosproto"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/util/validation"
)
const (
labelPrefix = "k8s.mesosphere.io/attribute-"
clientRetryCount = 5
clientRetryInterval = time.Second
)
// Create creates a new node api object with the given hostname,
// slave attribute labels and annotations
func Create(
client unversionedcore.NodesGetter,
hostName string,
slaveAttrLabels,
annotations map[string]string,
) (*api.Node, error) {
n := api.Node{
ObjectMeta: api.ObjectMeta{
Name: hostName,
},
Spec: api.NodeSpec{
ExternalID: hostName,
},
Status: api.NodeStatus{
Phase: api.NodePending,
// WORKAROUND(sttts): make sure that the Ready condition is the
// first one. The kube-ui v3 depends on this assumption.
// TODO(sttts): remove this workaround when kube-ui v4 is used or we
// merge this with the statusupdate in the controller manager.
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: slaveReadyReason,
Message: slaveReadyMessage,
LastHeartbeatTime: unversioned.Now(),
},
},
},
}
n.Labels = mergeMaps(
map[string]string{"kubernetes.io/hostname": hostName},
slaveAttrLabels,
)
n.Annotations = annotations
// try to create
return client.Nodes().Create(&n)
}
// Update updates an existing node api object
// by looking up the given hostname.
// The updated node merges the given slave attribute labels
// and annotations with the found api object.
func Update(
client unversionedcore.NodesGetter,
hostname string,
slaveAttrLabels,
annotations map[string]string,
) (n *api.Node, err error) {
for i := 0; i < clientRetryCount; i++ {
n, err = client.Nodes().Get(hostname)
if err != nil {
return nil, fmt.Errorf("error getting node %q: %v", hostname, err)
}
if n == nil {
return nil, fmt.Errorf("no node instance returned for %q", hostname)
}
// update labels derived from Mesos slave attributes, keep all other labels
n.Labels = mergeMaps(
filterMap(n.Labels, IsNotSlaveAttributeLabel),
slaveAttrLabels,
)
n.Annotations = mergeMaps(n.Annotations, annotations)
n, err = client.Nodes().Update(n)
if err == nil && !errors.IsConflict(err) {
return n, nil
}
log.Infof("retry %d/%d: error updating node %v err %v", i, clientRetryCount, n, err)
time.Sleep(time.Duration(i) * clientRetryInterval)
}
return nil, err
}
// CreateOrUpdate creates a node api object or updates an existing one
func CreateOrUpdate(
client unversionedcore.NodesGetter,
hostname string,
slaveAttrLabels,
annotations map[string]string,
) (*api.Node, error) {
n, err := Create(client, hostname, slaveAttrLabels, annotations)
if err == nil {
return n, nil
}
if !errors.IsAlreadyExists(err) {
return nil, fmt.Errorf("unable to register %q with the apiserver: %v", hostname, err)
}
// fall back to update an old node with new labels
return Update(client, hostname, slaveAttrLabels, annotations)
}
// IsNotSlaveAttributeLabel returns true iff the given label is not derived from a slave attribute
func IsNotSlaveAttributeLabel(key, value string) bool {
return !IsSlaveAttributeLabel(key, value)
}
// IsSlaveAttributeLabel returns true iff the given label is derived from a slave attribute
func IsSlaveAttributeLabel(key, value string) bool {
return strings.HasPrefix(key, labelPrefix)
}
// IsUpToDate returns true iff the node's slave labels match the given attributes labels
func IsUpToDate(n *api.Node, labels map[string]string) bool {
slaveLabels := map[string]string{}
for k, v := range n.Labels {
if IsSlaveAttributeLabel(k, "") {
slaveLabels[k] = v
}
}
return reflect.DeepEqual(slaveLabels, labels)
}
// SlaveAttributesToLabels converts slave attributes into string key/value labels
func SlaveAttributesToLabels(attrs []*mesos.Attribute) map[string]string {
l := map[string]string{}
for _, a := range attrs {
if a == nil {
continue
}
var v string
k := labelPrefix + a.GetName()
switch a.GetType() {
case mesos.Value_TEXT:
v = a.GetText().GetValue()
case mesos.Value_SCALAR:
v = strconv.FormatFloat(a.GetScalar().GetValue(), 'G', -1, 64)
}
if errs := validation.IsQualifiedName(k); len(errs) != 0 {
log.V(3).Infof("ignoring invalid node label %q: %v", k, errs)
continue
}
if errs := validation.IsValidLabelValue(v); len(errs) != 0 {
log.V(3).Infof("ignoring invalid node %s=%q: %v", k, v, errs)
continue
}
l[k] = v
}
return l
}
// filterMap filters the given map and returns a new map
// containing all original elements matching the given key-value predicate.
func filterMap(m map[string]string, predicate func(string, string) bool) map[string]string {
result := make(map[string]string, len(m))
for k, v := range m {
if predicate(k, v) {
result[k] = v
}
}
return result
}
// mergeMaps merges all given maps into a single map.
// There is no advanced key conflict resolution.
// The last key from the given maps wins.
func mergeMaps(ms ...map[string]string) map[string]string {
var l int
for _, m := range ms {
l += len(m)
}
result := make(map[string]string, l)
for _, m := range ms {
for k, v := range m {
result[k] = v
}
}
return result
}

View File

@ -1,151 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"fmt"
"time"
unversionedcore "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/unversioned"
log "github.com/golang/glog"
"k8s.io/kubernetes/contrib/mesos/pkg/queue"
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/errors"
)
type Registrator interface {
// Register checks whether the node is registered with the given labels. If it
// is not, it is created or updated on the apiserver. If an the node was up-to-date,
// false is returned.
Register(hostName string, labels map[string]string) (bool, error)
// Start the registration loop and return immediately.
Run(terminate <-chan struct{}) error
}
type registration struct {
hostName string
labels map[string]string
}
func (r *registration) Copy() queue.Copyable {
return &registration{
hostName: r.hostName,
labels: r.labels, // labels are never changed, no need to clone
}
}
func (r *registration) GetUID() string {
return r.hostName
}
func (r *registration) Value() queue.UniqueCopyable {
return r
}
type LookupFunc func(hostName string) *api.Node
type clientRegistrator struct {
lookupNode LookupFunc
client unversionedcore.NodesGetter
queue *queue.HistoricalFIFO
}
func NewRegistrator(client unversionedcore.NodesGetter, lookupNode LookupFunc) *clientRegistrator {
return &clientRegistrator{
lookupNode: lookupNode,
client: client,
queue: queue.NewHistorical(nil),
}
}
func (r *clientRegistrator) Run(terminate <-chan struct{}) error {
loop := func() {
RegistrationLoop:
for {
obj := r.queue.Pop(terminate)
log.V(3).Infof("registration event observed")
if obj == nil {
break RegistrationLoop
}
select {
case <-terminate:
break RegistrationLoop
default:
}
rg := obj.(*registration)
n, needsUpdate := r.updateNecessary(rg.hostName, rg.labels)
if !needsUpdate {
log.V(2).Infof("no update needed, skipping for %s: %v", rg.hostName, rg.labels)
continue
}
if n == nil {
log.V(2).Infof("creating node %s with labels %v", rg.hostName, rg.labels)
_, err := CreateOrUpdate(r.client, rg.hostName, rg.labels, nil)
if err != nil {
log.Errorf("error creating the node %s: %v", rg.hostName, rg.labels)
}
} else {
log.V(2).Infof("updating node %s with labels %v", rg.hostName, rg.labels)
_, err := Update(r.client, rg.hostName, rg.labels, nil)
if err != nil && errors.IsNotFound(err) {
// last chance when our store was out of date
_, err = Create(r.client, rg.hostName, rg.labels, nil)
}
if err != nil {
log.Errorf("error updating the node %s: %v", rg.hostName, rg.labels)
}
}
}
}
go runtime.Until(loop, time.Second, terminate)
return nil
}
func (r *clientRegistrator) Register(hostName string, labels map[string]string) (bool, error) {
_, needsUpdate := r.updateNecessary(hostName, labels)
if needsUpdate {
log.V(5).Infof("queuing registration for node %s with labels %v", hostName, labels)
err := r.queue.Update(&registration{
hostName: hostName,
labels: labels,
})
if err != nil {
return false, fmt.Errorf("cannot register node %s: %v", hostName, err)
}
return true, nil
}
return false, nil
}
// updateNecessary retrieves the node with the given hostname and checks whether the given
// labels would mean any update to the node. The unmodified node is returned, plus
// true iff an update is necessary.
func (r *clientRegistrator) updateNecessary(hostName string, labels map[string]string) (*api.Node, bool) {
if r.lookupNode == nil {
return nil, true
}
n := r.lookupNode(hostName)
return n, n == nil || !IsUpToDate(n, labels)
}

View File

@ -1,160 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"testing"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/api/unversioned"
unversionedcore "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/unversioned"
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/unversioned/fake"
"k8s.io/kubernetes/pkg/client/testing/core"
"k8s.io/kubernetes/pkg/runtime"
)
type fakeNodes struct {
*fake.FakeNodes
}
func (f *fakeNodes) Nodes() unversionedcore.NodeInterface {
return f
}
func calledOnce(h bool, ret runtime.Object, err error) (<-chan struct{}, func(core.Action) (bool, runtime.Object, error)) {
ch := make(chan struct{})
return ch, func(_ core.Action) (bool, runtime.Object, error) {
select {
case <-ch:
panic("called more than once")
default:
close(ch)
}
return h, ret, err
}
}
func TestRegister_withUnknownNode(t *testing.T) {
fc := &core.Fake{}
nodes := &fakeNodes{&fake.FakeNodes{Fake: &fake.FakeCore{Fake: fc}}}
createCalled, createOnce := calledOnce(true, nil, nil)
fc.AddReactor("create", "nodes", createOnce)
lookup := func(hostName string) *api.Node {
select {
case <-createCalled:
return &api.Node{ObjectMeta: api.ObjectMeta{Name: "foo"}}
default:
return nil
}
}
r := NewRegistrator(nodes, lookup)
ch := make(chan struct{})
defer close(ch)
r.Run(ch)
t.Logf("registering node foo")
ok, err := r.Register("foo", nil)
if !ok {
t.Fatalf("registration failed without error")
} else if err != nil {
t.Fatalf("registration failed with error %v", err)
}
// wait for node creation
t.Logf("awaiting node creation")
<-createCalled
}
func TestRegister_withKnownNode(t *testing.T) {
fc := &core.Fake{}
nodes := &fakeNodes{&fake.FakeNodes{Fake: &fake.FakeCore{Fake: fc}}}
updateCalled, updateOnce := calledOnce(true, nil, nil)
fc.AddReactor("update", "nodes", updateOnce)
lookup := func(hostName string) *api.Node {
select {
case <-updateCalled:
return &api.Node{ObjectMeta: api.ObjectMeta{Name: "foo"}}
default:
// this node needs an update because it has labels: the updated version doesn't
return &api.Node{ObjectMeta: api.ObjectMeta{Name: "foo", Labels: map[string]string{"a": "b"}}}
}
}
r := NewRegistrator(nodes, lookup)
ch := make(chan struct{})
defer close(ch)
r.Run(ch)
t.Logf("registering node foo")
ok, err := r.Register("foo", nil)
if !ok {
t.Fatalf("registration failed without error")
} else if err != nil {
t.Fatalf("registration failed with error %v", err)
}
// wait for node update
t.Logf("awaiting node update")
<-updateCalled
}
func TestRegister_withSemiKnownNode(t *testing.T) {
// semi-known because the lookup func doesn't see the a very newly created node
// but our apiserver "create" call returns an already-exists error. in this case
// CreateOrUpdate should proceed to attempt an update.
fc := &core.Fake{}
nodes := &fakeNodes{&fake.FakeNodes{Fake: &fake.FakeCore{Fake: fc}}}
createCalled, createOnce := calledOnce(true, nil, errors.NewAlreadyExists(unversioned.GroupResource{Group: "", Resource: ""}, "nodes"))
fc.AddReactor("create", "nodes", createOnce)
updateCalled, updateOnce := calledOnce(true, nil, nil)
fc.AddReactor("update", "nodes", updateOnce)
lookup := func(hostName string) *api.Node {
select {
case <-updateCalled:
return &api.Node{ObjectMeta: api.ObjectMeta{Name: "foo"}}
default:
// this makes the node semi-known: apiserver knows it but the store/cache doesn't
return nil
}
}
r := NewRegistrator(nodes, lookup)
ch := make(chan struct{})
defer close(ch)
r.Run(ch)
t.Logf("registering node foo")
ok, err := r.Register("foo", nil)
if !ok {
t.Fatalf("registration failed without error")
} else if err != nil {
t.Fatalf("registration failed with error %v", err)
}
// wait for node update
t.Logf("awaiting node update")
<-createCalled
<-updateCalled
}

View File

@ -1,190 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"fmt"
"time"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
log "github.com/golang/glog"
"k8s.io/kubernetes/cmd/kubelet/app/options"
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/client/cache"
"k8s.io/kubernetes/pkg/cloudprovider/providers/mesos"
"k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/util/sets"
)
const (
nodeStatusUpdateRetry = 5
slaveReadyReason = "SlaveReady"
slaveReadyMessage = "mesos reports ready status"
)
type StatusUpdater struct {
client *clientset.Clientset
relistPeriod time.Duration
heartBeatPeriod time.Duration
nowFunc func() time.Time
}
func NewStatusUpdater(client *clientset.Clientset, relistPeriod time.Duration, nowFunc func() time.Time) *StatusUpdater {
kubecfg := options.NewKubeletServer() // only create to get the config, this is without side-effects
return &StatusUpdater{
client: client,
relistPeriod: relistPeriod,
heartBeatPeriod: kubecfg.NodeStatusUpdateFrequency.Duration,
nowFunc: nowFunc,
}
}
func (u *StatusUpdater) Run(terminate <-chan struct{}) error {
nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
nodeLW := cache.NewListWatchFromClient(u.client.CoreClient, "nodes", api.NamespaceAll, fields.Everything())
cache.NewReflector(nodeLW, &api.Node{}, nodeStore, u.relistPeriod).Run()
monitor := func() {
// build up a set of listed slave nodes without a kubelet
slaves, err := mesos.CloudProvider.ListWithoutKubelet()
if err != nil {
log.Errorf("Error listing slaves without kubelet: %v", err)
return
}
slavesWithoutKubelet := sets.NewString(slaves...)
// update status for nodes which do not have a kubelet running and
// which are still existing as slave. This status update must be done
// before the node controller counts down the NodeMonitorGracePeriod
nodes := nodeStore.List()
for _, n := range nodes {
node := n.(*api.Node)
if !slavesWithoutKubelet.Has(node.Spec.ExternalID) {
// let the kubelet do its job updating the status, or the
// node controller will remove this node if the node does not even
// exist anymore
continue
}
err := u.updateStatus(node)
if err != nil {
log.Errorf("Error updating node status: %v", err)
}
}
}
go runtime.Until(monitor, u.heartBeatPeriod, terminate)
return nil
}
func (u *StatusUpdater) updateStatus(n *api.Node) error {
for i := 0; i < nodeStatusUpdateRetry; i++ {
if err := u.tryUpdateStatus(n); err != nil && !errors.IsConflict(err) {
log.Errorf("Error updating node status, will retry: %v", err)
} else {
return nil
}
}
return fmt.Errorf("Update node status exceeds retry count")
}
// nodeWithUpdatedStatus clones the given node and updates the NodeReady condition.
// The updated node is return and a boolean indicating whether the node was changed
// at all.
func (u *StatusUpdater) nodeWithUpdatedStatus(n *api.Node) (*api.Node, bool, error) {
readyCondition := getCondition(&n.Status, api.NodeReady)
currentTime := unversioned.NewTime(u.nowFunc())
// avoid flapping by waiting at least twice the kubetlet update frequency, i.e.
// give the kubelet the chance twice to update the heartbeat. This is necessary
// because we only poll the Mesos master state.json once in a while and we
// know that that the information from there can easily be outdated.
gracePeriod := u.heartBeatPeriod * 2
if readyCondition != nil && !currentTime.After(readyCondition.LastHeartbeatTime.Add(gracePeriod)) {
return n, false, nil
}
clone, err := api.Scheme.DeepCopy(n)
if err != nil {
return nil, false, err
}
n = clone.(*api.Node)
newNodeReadyCondition := api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: slaveReadyReason,
Message: slaveReadyMessage,
LastHeartbeatTime: currentTime,
}
found := false
for i := range n.Status.Conditions {
c := &n.Status.Conditions[i]
if c.Type == api.NodeReady {
if c.Status == newNodeReadyCondition.Status {
newNodeReadyCondition.LastTransitionTime = c.LastTransitionTime
} else {
newNodeReadyCondition.LastTransitionTime = currentTime
}
n.Status.Conditions[i] = newNodeReadyCondition
found = true
break
}
}
if !found {
newNodeReadyCondition.LastTransitionTime = currentTime
n.Status.Conditions = append(n.Status.Conditions, newNodeReadyCondition)
}
return n, true, nil
}
// tryUpdateStatus updates the status of the given node and tries to persist that
// on the apiserver
func (u *StatusUpdater) tryUpdateStatus(n *api.Node) error {
n, updated, err := u.nodeWithUpdatedStatus(n)
if err != nil {
return err
}
if !updated {
return nil
}
_, err = u.client.Nodes().UpdateStatus(n)
return err
}
// getCondition returns a condition object for the specific condition
// type, nil if the condition is not set.
func getCondition(status *api.NodeStatus, conditionType api.NodeConditionType) *api.NodeCondition {
if status == nil {
return nil
}
for i := range status.Conditions {
if status.Conditions[i].Type == conditionType {
return &status.Conditions[i]
}
}
return nil
}

Some files were not shown because too many files have changed in this diff Show More