promote contrib/mesos to incubator
@ -566,10 +566,6 @@ function kube::build::run_build_command() {
|
||||
"${DOCKER_MOUNT_ARGS[@]}"
|
||||
)
|
||||
|
||||
if [ -n "${KUBERNETES_CONTRIB:-}" ]; then
|
||||
docker_run_opts+=(-e "KUBERNETES_CONTRIB=${KUBERNETES_CONTRIB}")
|
||||
fi
|
||||
|
||||
docker_run_opts+=(
|
||||
--env "KUBE_FASTBUILD=${KUBE_FASTBUILD:-false}"
|
||||
--env "KUBE_BUILDER_OS=${OSTYPE:-notdetected}"
|
||||
|
@ -168,7 +168,6 @@ function prepare-e2e {
|
||||
# Execute prior to running tests to build a release if required for env
|
||||
function test-build-release {
|
||||
# Make a release
|
||||
export KUBERNETES_CONTRIB=mesos
|
||||
export KUBE_RELEASE_RUN_TESTS=N
|
||||
"${KUBE_ROOT}/build/release.sh"
|
||||
}
|
||||
|
@ -1,2 +0,0 @@
|
||||
assignees:
|
||||
- k82cn
|
@ -1,35 +0,0 @@
|
||||
# Kubernetes-Mesos
|
||||
|
||||
Kubernetes-Mesos modifies Kubernetes to act as an [Apache Mesos](http://mesos.apache.org/) framework.
|
||||
|
||||
## Features On Mesos
|
||||
|
||||
Kubernetes gains the following benefits when installed on Mesos:
|
||||
|
||||
- **Node-Level Auto-Scaling** - Kubernetes minion nodes are created automatically, up to the size of the provisioned Mesos cluster.
|
||||
- **Resource Sharing** - Co-location of Kubernetes with other popular next-generation services on the same cluster (e.g. [Hadoop](https://github.com/mesos/hadoop), [Spark](http://spark.apache.org/), and [Chronos](https://mesos.github.io/chronos/), [Cassandra](http://mesosphere.github.io/cassandra-mesos/), etc.). Resources are allocated to the frameworks based on fairness and can be claimed or passed on depending on framework load.
|
||||
- **Independence from special Network Infrastructure** - Mesos can (but of course doesn't have to) run on networks which cannot assign a routable IP to every container. The Kubernetes on Mesos endpoint controller is specially modified to allow pods to communicate with services in such an environment.
|
||||
|
||||
For more information about how Kubernetes-Mesos is different from Kubernetes, see [Architecture](./docs/architecture.md).
|
||||
|
||||
|
||||
## Release Status
|
||||
|
||||
Kubernetes-Mesos is alpha quality, still under active development, and not yet recommended for production systems.
|
||||
|
||||
For more information about development progress, see the [known issues](./docs/issues.md) or the [kubernetes-mesos repository](https://github.com/mesosphere/kubernetes-mesos) where backlog issues are tracked.
|
||||
|
||||
## Usage
|
||||
|
||||
This project combines concepts and technologies from two already-complex projects: Mesos and Kubernetes. It may help to familiarize yourself with the basics of each project before reading on:
|
||||
|
||||
* [Mesos Documentation](http://mesos.apache.org/documentation/latest)
|
||||
* [Kubernetes Documentation](../../README.md)
|
||||
|
||||
To get up and running with Kubernetes-Mesos, follow:
|
||||
|
||||
- the [Getting started guide](../../docs/getting-started-guides/mesos.md) to launch a Kubernetes-Mesos cluster,
|
||||
- the [Kubernetes-Mesos Scheduler Guide](./docs/scheduler.md) for topics concerning the custom scheduler used in this distribution.
|
||||
|
||||
|
||||
[]()
|
@ -1,38 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Cleans output files/images and builds a full release from scratch
|
||||
#
|
||||
# Prerequisite:
|
||||
# ./cluster/mesos/docker/test/build.sh
|
||||
#
|
||||
# Example Usage:
|
||||
# ./contrib/mesos/ci/build-release.sh
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
set -o errtrace
|
||||
|
||||
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
|
||||
|
||||
"${KUBE_ROOT}/contrib/mesos/ci/run.sh" make clean
|
||||
|
||||
export KUBERNETES_CONTRIB=mesos
|
||||
export KUBE_RELEASE_RUN_TESTS="${KUBE_RELEASE_RUN_TESTS:-N}"
|
||||
export KUBE_SKIP_CONFIRMATIONS=Y
|
||||
|
||||
"${KUBE_ROOT}/build/release.sh"
|
@ -1,36 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Cleans output files/images and builds linux binaries from scratch
|
||||
#
|
||||
# Prerequisite:
|
||||
# ./cluster/mesos/docker/test/build.sh
|
||||
#
|
||||
# Example Usage:
|
||||
# ./contrib/mesos/ci/build.sh
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
set -o errtrace
|
||||
|
||||
TEST_ARGS="$@"
|
||||
|
||||
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
|
||||
|
||||
export KUBERNETES_CONTRIB=mesos
|
||||
|
||||
"${KUBE_ROOT}/contrib/mesos/ci/run.sh" make clean all ${TEST_ARGS}
|
@ -1,87 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Deploys a test cluster, runs the specified command, and destroys the test cluster.
|
||||
# Runs all commands inside the mesosphere/kubernetes-mesos-test docker image (built on demand).
|
||||
# Uses the mesos/docker cluster provider.
|
||||
#
|
||||
# Prerequisite:
|
||||
# ./cluster/mesos/docker/test/build.sh
|
||||
#
|
||||
# Example Usage:
|
||||
# ./contrib/mesos/ci/run-with-cluster.sh ./cluster/test-smoke.sh -v=2
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
set -o errtrace
|
||||
|
||||
RUN_CMD="$@"
|
||||
[ -z "${RUN_CMD:-}" ] && echo "No command supplied" && exit 1
|
||||
|
||||
KUBERNETES_PROVIDER="mesos/docker"
|
||||
|
||||
MESOS_DOCKER_WORK_DIR="${MESOS_DOCKER_WORK_DIR:-${HOME}/tmp/kubernetes}"
|
||||
|
||||
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
|
||||
|
||||
# Clean (test artifacts)
|
||||
echo "Cleaning work dir"
|
||||
echo "${MESOS_DOCKER_WORK_DIR}"
|
||||
rm -rf "${MESOS_DOCKER_WORK_DIR}"
|
||||
mkdir -p "${MESOS_DOCKER_WORK_DIR}"
|
||||
|
||||
echo "Detecting docker client"
|
||||
# Mount docker client binary to avoid client/compose/daemon version conflicts
|
||||
if [ -n "${DOCKER_MACHINE_NAME:-}" ] && which docker-machine; then
|
||||
# On a Mac with docker-machine, use the binary in the VM, not the host binary
|
||||
DOCKER_BIN_PATH="$(docker-machine ssh "${DOCKER_MACHINE_NAME}" which docker)"
|
||||
else
|
||||
DOCKER_BIN_PATH="$(which docker)"
|
||||
fi
|
||||
echo "${DOCKER_BIN_PATH}"
|
||||
|
||||
# Clean (k8s output & images), Build, Kube-Up, Test, Kube-Down
|
||||
cd "${KUBE_ROOT}"
|
||||
docker run \
|
||||
--rm \
|
||||
-v "${KUBE_ROOT}:/go/src/github.com/GoogleCloudPlatform/kubernetes" \
|
||||
-v "/var/run/docker.sock:/var/run/docker.sock" \
|
||||
-v "${DOCKER_BIN_PATH}:/usr/bin/docker" \
|
||||
-v "${MESOS_DOCKER_WORK_DIR}/auth:${MESOS_DOCKER_WORK_DIR}/auth" \
|
||||
-v "${MESOS_DOCKER_WORK_DIR}/log:${MESOS_DOCKER_WORK_DIR}/log" \
|
||||
-v "${MESOS_DOCKER_WORK_DIR}/mesosslave1/mesos:${MESOS_DOCKER_WORK_DIR}/mesosslave1/mesos" \
|
||||
-v "${MESOS_DOCKER_WORK_DIR}/mesosslave2/mesos:${MESOS_DOCKER_WORK_DIR}/mesosslave2/mesos" \
|
||||
-v "${MESOS_DOCKER_WORK_DIR}/overlay:${MESOS_DOCKER_WORK_DIR}/overlay" \
|
||||
-v "${MESOS_DOCKER_WORK_DIR}/reports:${MESOS_DOCKER_WORK_DIR}/reports" \
|
||||
$(test -d /teamcity/system/git && echo "-v /teamcity/system/git:/teamcity/system/git" || true) \
|
||||
-e "MESOS_DOCKER_WORK_DIR=${MESOS_DOCKER_WORK_DIR}" \
|
||||
-e "MESOS_DOCKER_IMAGE_DIR=/var/tmp/kubernetes" \
|
||||
-e "MESOS_DOCKER_OVERLAY_DIR=${MESOS_DOCKER_WORK_DIR}/overlay" \
|
||||
-e "KUBERNETES_CONTRIB=mesos" \
|
||||
-e "KUBERNETES_PROVIDER=mesos/docker" \
|
||||
-e "USER=root" \
|
||||
-e "E2E_REPORT_DIR=${MESOS_DOCKER_WORK_DIR}/reports" \
|
||||
-t $(tty &>/dev/null && echo "-i") \
|
||||
mesosphere/kubernetes-mesos-test \
|
||||
-ceux "\
|
||||
make clean all && \
|
||||
trap 'timeout 5m ./cluster/kube-down.sh' EXIT && \
|
||||
./cluster/kube-down.sh && \
|
||||
./cluster/kube-up.sh && \
|
||||
trap \"test \\\$? != 0 && export MESOS_DOCKER_DUMP_LOGS=true; cd \${PWD} && timeout 5m ./cluster/kube-down.sh\" EXIT && \
|
||||
${RUN_CMD}
|
||||
"
|
@ -1,56 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Runs the specified command in the test container (mesosphere/kubernetes-mesos-test).
|
||||
#
|
||||
# Prerequisite:
|
||||
# ./cluster/mesos/docker/test/build.sh
|
||||
#
|
||||
# Example Usage:
|
||||
# ./contrib/mesos/ci/run.sh make test
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
set -o errtrace
|
||||
|
||||
RUN_CMD="$@"
|
||||
[ -z "${RUN_CMD:-}" ] && echo "No command supplied" && exit 1
|
||||
|
||||
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
|
||||
|
||||
echo "Detecting docker client"
|
||||
# Mount docker client binary to avoid client/compose/daemon version conflicts
|
||||
if [ -n "${DOCKER_MACHINE_NAME:-}" ] && which docker-machine; then
|
||||
# On a Mac with docker-machine, use the binary in the VM, not the host binary
|
||||
DOCKER_BIN_PATH="$(docker-machine ssh "${DOCKER_MACHINE_NAME}" which docker)"
|
||||
else
|
||||
DOCKER_BIN_PATH="$(which docker)"
|
||||
fi
|
||||
echo "${DOCKER_BIN_PATH}"
|
||||
|
||||
# Clean (k8s output & images) & Build
|
||||
cd "${KUBE_ROOT}"
|
||||
exec docker run \
|
||||
--rm \
|
||||
-v "${KUBE_ROOT}:/go/src/github.com/GoogleCloudPlatform/kubernetes" \
|
||||
-v "/var/run/docker.sock:/var/run/docker.sock" \
|
||||
-v "${DOCKER_BIN_PATH}:/usr/bin/docker" \
|
||||
-e "KUBERNETES_CONTRIB=mesos" \
|
||||
-e "USER=root" \
|
||||
-t $(tty &>/dev/null && echo "-i") \
|
||||
mesosphere/kubernetes-mesos-test \
|
||||
-ceux "${RUN_CMD}"
|
@ -1,44 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Deploys a test cluster, runs the conformance tests, and destroys the test cluster.
|
||||
#
|
||||
# Prerequisite:
|
||||
# ./cluster/mesos/docker/test/build.sh
|
||||
#
|
||||
# Example Usage:
|
||||
# ./contrib/mesos/ci/test-conformance.sh -v=2
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
set -o errtrace
|
||||
|
||||
TEST_ARGS="$@"
|
||||
|
||||
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
|
||||
|
||||
TEST_CMD="KUBERNETES_CONFORMANCE_TEST=y KUBECONFIG=~/.kube/config go run hack/e2e.go --test --test_args=\"--ginkgo.focus=\\[Conformance\\]\""
|
||||
if [ -n "${CONFORMANCE_BRANCH}" ]; then
|
||||
# create a CONFORMANCE_BRANCH clone in a subdirectory
|
||||
TEST_CMD="
|
||||
git fetch https://github.com/kubernetes/kubernetes --tags -q ${CONFORMANCE_BRANCH} &&
|
||||
git branch -f ${CONFORMANCE_BRANCH} FETCH_HEAD &&
|
||||
git clone -s -b ${CONFORMANCE_BRANCH} . conformance &&
|
||||
cd conformance && make all && ${TEST_CMD}"
|
||||
fi
|
||||
|
||||
"${KUBE_ROOT}/contrib/mesos/ci/run-with-cluster.sh" ${TEST_CMD} ${TEST_ARGS}
|
@ -1,34 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Deploys a test cluster, runs the e2e tests, and destroys the test cluster.
|
||||
#
|
||||
# Prerequisite:
|
||||
# ./cluster/mesos/docker/test/build.sh
|
||||
#
|
||||
# Example Usage:
|
||||
# ./contrib/mesos/ci/test-e2e.sh -v=2
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
set -o errtrace
|
||||
|
||||
TEST_ARGS="$@"
|
||||
|
||||
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
|
||||
|
||||
"${KUBE_ROOT}/contrib/mesos/ci/run-with-cluster.sh" ./cluster/test-e2e.sh ${TEST_ARGS}
|
@ -1,34 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Cleans & runs the integration tests in the test container (mesosphere/kubernetes-mesos-test).
|
||||
#
|
||||
# Prerequisite:
|
||||
# ./cluster/mesos/docker/test/build.sh
|
||||
#
|
||||
# Example Usage:
|
||||
# ./contrib/mesos/ci/test-integration.sh
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
set -o errtrace
|
||||
|
||||
TEST_ARGS="$@"
|
||||
|
||||
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
|
||||
|
||||
"${KUBE_ROOT}/contrib/mesos/ci/run.sh" make clean test-integration ${TEST_ARGS}
|
@ -1,34 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Deploys a test cluster, runs the smoke tests, and destroys the test cluster.
|
||||
#
|
||||
# Prerequisite:
|
||||
# ./cluster/mesos/docker/test/build.sh
|
||||
#
|
||||
# Example Usage:
|
||||
# ./contrib/mesos/ci/test-smoke.sh -v=2
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
set -o errtrace
|
||||
|
||||
TEST_ARGS="$@"
|
||||
|
||||
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
|
||||
|
||||
"${KUBE_ROOT}/contrib/mesos/ci/run-with-cluster.sh" ./cluster/test-smoke.sh ${TEST_ARGS}
|
@ -1,34 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Cleans & runs the unit tests in the test container (mesosphere/kubernetes-mesos-test).
|
||||
#
|
||||
# Prerequisite:
|
||||
# ./cluster/mesos/docker/test/build.sh
|
||||
#
|
||||
# Example Usage:
|
||||
# ./contrib/mesos/ci/test-unit.sh
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
set -o errtrace
|
||||
|
||||
TEST_ARGS="$@"
|
||||
|
||||
KUBE_ROOT=$(cd "$(dirname "${BASH_SOURCE}")/../../.." && pwd)
|
||||
|
||||
"${KUBE_ROOT}/contrib/mesos/ci/run.sh" make clean test ${TEST_ARGS}
|
@ -1,23 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// This package main implements the executable Kubernetes Mesos controller manager.
|
||||
//
|
||||
// It is mainly a clone of the upstream cmd/hyperkube module right now because
|
||||
// the upstream hyperkube module is not reusable.
|
||||
//
|
||||
// TODO(jdef,sttts): refactor upstream cmd/kube-controller-manager to be reusable with the necessary mesos changes
|
||||
package main
|
@ -1,52 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"k8s.io/kubernetes/pkg/healthz"
|
||||
"k8s.io/kubernetes/pkg/util/flag"
|
||||
"k8s.io/kubernetes/pkg/util/logs"
|
||||
"k8s.io/kubernetes/pkg/version/verflag"
|
||||
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/controllermanager"
|
||||
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
func init() {
|
||||
healthz.DefaultHealthz()
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
||||
s := controllermanager.NewCMServer()
|
||||
s.AddFlags(pflag.CommandLine)
|
||||
|
||||
flag.InitFlags()
|
||||
logs.InitLogs()
|
||||
defer logs.FlushLogs()
|
||||
|
||||
verflag.PrintAndExitIfRequested()
|
||||
|
||||
if err := s.Run(pflag.CommandLine.Args()); err != nil {
|
||||
fmt.Fprintf(os.Stderr, err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// This package main implements the executable Kubernetes Mesos executor.
|
||||
package main
|
@ -1,46 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/spf13/pflag"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/executor/service"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
"k8s.io/kubernetes/pkg/util/flag"
|
||||
"k8s.io/kubernetes/pkg/util/logs"
|
||||
"k8s.io/kubernetes/pkg/version/verflag"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
s := service.NewKubeletExecutorServer()
|
||||
s.AddFlags(pflag.CommandLine)
|
||||
|
||||
flag.InitFlags()
|
||||
logs.InitLogs()
|
||||
defer logs.FlushLogs()
|
||||
|
||||
verflag.PrintAndExitIfRequested()
|
||||
|
||||
if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil {
|
||||
fmt.Fprintf(os.Stderr, err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// This package main implements the executable Kubernetes Mesos scheduler.
|
||||
package main
|
@ -1,45 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/spf13/pflag"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/service"
|
||||
"k8s.io/kubernetes/pkg/util/flag"
|
||||
"k8s.io/kubernetes/pkg/util/logs"
|
||||
"k8s.io/kubernetes/pkg/version/verflag"
|
||||
)
|
||||
|
||||
func main() {
|
||||
s := service.NewSchedulerServer()
|
||||
s.AddStandaloneFlags(pflag.CommandLine)
|
||||
|
||||
flag.InitFlags()
|
||||
logs.InitLogs()
|
||||
defer logs.FlushLogs()
|
||||
|
||||
verflag.PrintAndExitIfRequested()
|
||||
|
||||
if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil {
|
||||
fmt.Fprintf(os.Stderr, err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// This package main morphs all binaries under cmd/ and several other stock
|
||||
// Kubernetes binaries into a single executable.
|
||||
//
|
||||
// It is mainly a clone of the upstream cmd/hyperkube module right now because
|
||||
// the upstream hyperkube module is not reusable.
|
||||
//
|
||||
// TODO(jdef,sttts): refactor upstream cmd/hyperkube to be reusable with the necessary mesos changes
|
||||
package main // import "k8s.io/kubernetes/contrib/mesos/cmd/km"
|
@ -1,202 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// clone of the upstream cmd/hypercube/hyperkube.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
"k8s.io/kubernetes/pkg/util/logs"
|
||||
"k8s.io/kubernetes/pkg/version/verflag"
|
||||
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
// HyperKube represents a single binary that can morph/manage into multiple
|
||||
// servers.
|
||||
type HyperKube struct {
|
||||
Name string // The executable name, used for help and soft-link invocation
|
||||
Long string // A long description of the binary. It will be world wrapped before output.
|
||||
|
||||
servers []Server
|
||||
baseFlags *pflag.FlagSet
|
||||
out io.Writer
|
||||
helpFlagVal bool
|
||||
}
|
||||
|
||||
// AddServer adds a server to the HyperKube object.
|
||||
func (hk *HyperKube) AddServer(s *Server) {
|
||||
hk.servers = append(hk.servers, *s)
|
||||
hk.servers[len(hk.servers)-1].hk = hk
|
||||
}
|
||||
|
||||
// FindServer will find a specific server named name.
|
||||
func (hk *HyperKube) FindServer(name string) (*Server, error) {
|
||||
for _, s := range hk.servers {
|
||||
if s.Name() == name {
|
||||
return &s, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("Server not found: %s", name)
|
||||
}
|
||||
|
||||
// Servers returns a list of all of the registred servers
|
||||
func (hk *HyperKube) Servers() []Server {
|
||||
return hk.servers
|
||||
}
|
||||
|
||||
// Flags returns a flagset for "global" flags.
|
||||
func (hk *HyperKube) Flags() *pflag.FlagSet {
|
||||
if hk.baseFlags == nil {
|
||||
hk.baseFlags = pflag.NewFlagSet(hk.Name, pflag.ContinueOnError)
|
||||
hk.baseFlags.SetOutput(ioutil.Discard)
|
||||
hk.baseFlags.BoolVarP(&hk.helpFlagVal, "help", "h", false, "help for "+hk.Name)
|
||||
|
||||
// These will add all of the "global" flags (defined with both the
|
||||
// flag and pflag packages) to the new flag set we have.
|
||||
hk.baseFlags.AddGoFlagSet(flag.CommandLine)
|
||||
hk.baseFlags.AddFlagSet(pflag.CommandLine)
|
||||
|
||||
}
|
||||
return hk.baseFlags
|
||||
}
|
||||
|
||||
// Out returns the io.Writer that is used for all usage/error information
|
||||
func (hk *HyperKube) Out() io.Writer {
|
||||
if hk.out == nil {
|
||||
hk.out = os.Stderr
|
||||
}
|
||||
return hk.out
|
||||
}
|
||||
|
||||
// SetOut sets the output writer for all usage/error information
|
||||
func (hk *HyperKube) SetOut(w io.Writer) {
|
||||
hk.out = w
|
||||
}
|
||||
|
||||
// Print is a convenience method to Print to the defined output
|
||||
func (hk *HyperKube) Print(i ...interface{}) {
|
||||
fmt.Fprint(hk.Out(), i...)
|
||||
}
|
||||
|
||||
// Println is a convenience method to Println to the defined output
|
||||
func (hk *HyperKube) Println(i ...interface{}) {
|
||||
fmt.Fprintln(hk.Out(), i...)
|
||||
}
|
||||
|
||||
// Printf is a convenience method to Printf to the defined output
|
||||
func (hk *HyperKube) Printf(format string, i ...interface{}) {
|
||||
fmt.Fprintf(hk.Out(), format, i...)
|
||||
}
|
||||
|
||||
// Run the server. This will pick the appropriate server and run it.
|
||||
func (hk *HyperKube) Run(args []string) error {
|
||||
// If we are called directly, parse all flags up to the first real
|
||||
// argument. That should be the server to run.
|
||||
baseCommand := path.Base(args[0])
|
||||
serverName := baseCommand
|
||||
if serverName == hk.Name {
|
||||
args = args[1:]
|
||||
|
||||
baseFlags := hk.Flags()
|
||||
baseFlags.SetInterspersed(false) // Only parse flags up to the next real command
|
||||
err := baseFlags.Parse(args)
|
||||
if err != nil || hk.helpFlagVal {
|
||||
if err != nil {
|
||||
hk.Println("Error:", err)
|
||||
}
|
||||
hk.Usage()
|
||||
return err
|
||||
}
|
||||
|
||||
verflag.PrintAndExitIfRequested()
|
||||
|
||||
args = baseFlags.Args()
|
||||
if len(args) > 0 && len(args[0]) > 0 {
|
||||
serverName = args[0]
|
||||
baseCommand = baseCommand + " " + serverName
|
||||
args = args[1:]
|
||||
} else {
|
||||
err = errors.New("no server specified")
|
||||
hk.Printf("Error: %v\n\n", err)
|
||||
hk.Usage()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
s, err := hk.FindServer(serverName)
|
||||
if err != nil {
|
||||
hk.Printf("Error: %v\n\n", err)
|
||||
hk.Usage()
|
||||
return err
|
||||
}
|
||||
|
||||
s.Flags().AddFlagSet(hk.Flags())
|
||||
err = s.Flags().Parse(args)
|
||||
if err != nil || hk.helpFlagVal {
|
||||
if err != nil {
|
||||
hk.Printf("Error: %v\n\n", err)
|
||||
}
|
||||
s.Usage()
|
||||
return err
|
||||
}
|
||||
|
||||
verflag.PrintAndExitIfRequested()
|
||||
|
||||
logs.InitLogs()
|
||||
defer logs.FlushLogs()
|
||||
|
||||
err = s.Run(s, s.Flags().Args())
|
||||
if err != nil {
|
||||
hk.Println("Error:", err)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// RunToExit will run the hyperkube and then call os.Exit with an appropriate exit code.
|
||||
func (hk *HyperKube) RunToExit(args []string) {
|
||||
err := hk.Run(args)
|
||||
if err != nil {
|
||||
fmt.Fprint(os.Stderr, err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Usage will write out a summary for all servers that this binary supports.
|
||||
func (hk *HyperKube) Usage() {
|
||||
tt := `{{if .Long}}{{.Long | trim | wrap ""}}
|
||||
{{end}}Usage
|
||||
|
||||
{{.Name}} <server> [flags]
|
||||
|
||||
Servers
|
||||
{{range .Servers}}
|
||||
{{.Name}}
|
||||
{{.Long | trim | wrap " "}}{{end}}
|
||||
Call '{{.Name}} <server> --help' for help on a specific server.
|
||||
`
|
||||
util.ExecuteTemplate(hk.Out(), tt, hk)
|
||||
}
|
@ -1,144 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// clone of the upstream cmd/hypercube/hyperkube_test.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type result struct {
|
||||
err error
|
||||
output string
|
||||
}
|
||||
|
||||
func testServer(n string) *Server {
|
||||
return &Server{
|
||||
SimpleUsage: n,
|
||||
Long: fmt.Sprintf("A simple server named %s", n),
|
||||
Run: func(s *Server, args []string) error {
|
||||
s.hk.Printf("%s Run\n", s.Name())
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
func testServerError(n string) *Server {
|
||||
return &Server{
|
||||
SimpleUsage: n,
|
||||
Long: fmt.Sprintf("A simple server named %s that returns an error", n),
|
||||
Run: func(s *Server, args []string) error {
|
||||
s.hk.Printf("%s Run\n", s.Name())
|
||||
return errors.New("server returning error")
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func runFull(t *testing.T, args string) *result {
|
||||
buf := new(bytes.Buffer)
|
||||
hk := HyperKube{
|
||||
Name: "hyperkube",
|
||||
Long: "hyperkube is an all-in-one server binary.",
|
||||
}
|
||||
hk.SetOut(buf)
|
||||
|
||||
hk.AddServer(testServer("test1"))
|
||||
hk.AddServer(testServer("test2"))
|
||||
hk.AddServer(testServer("test3"))
|
||||
hk.AddServer(testServerError("test-error"))
|
||||
|
||||
a := strings.Split(args, " ")
|
||||
t.Logf("Running full with args: %q", a)
|
||||
err := hk.Run(a)
|
||||
|
||||
r := &result{err, buf.String()}
|
||||
t.Logf("Result err: %v, output: %q", r.err, r.output)
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
func TestRun(t *testing.T) {
|
||||
x := runFull(t, "hyperkube test1")
|
||||
assert.Contains(t, x.output, "test1 Run")
|
||||
assert.NoError(t, x.err)
|
||||
}
|
||||
|
||||
func TestLinkRun(t *testing.T) {
|
||||
x := runFull(t, "test1")
|
||||
assert.Contains(t, x.output, "test1 Run")
|
||||
assert.NoError(t, x.err)
|
||||
}
|
||||
|
||||
func TestTopNoArgs(t *testing.T) {
|
||||
x := runFull(t, "hyperkube")
|
||||
assert.EqualError(t, x.err, "no server specified")
|
||||
}
|
||||
|
||||
func TestBadServer(t *testing.T) {
|
||||
x := runFull(t, "hyperkube bad-server")
|
||||
assert.EqualError(t, x.err, "Server not found: bad-server")
|
||||
assert.Contains(t, x.output, "Usage")
|
||||
}
|
||||
|
||||
func TestTopHelp(t *testing.T) {
|
||||
x := runFull(t, "hyperkube --help")
|
||||
assert.NoError(t, x.err)
|
||||
assert.Contains(t, x.output, "all-in-one")
|
||||
assert.Contains(t, x.output, "A simple server named test1")
|
||||
}
|
||||
|
||||
func TestTopFlags(t *testing.T) {
|
||||
x := runFull(t, "hyperkube --help test1")
|
||||
assert.NoError(t, x.err)
|
||||
assert.Contains(t, x.output, "all-in-one")
|
||||
assert.Contains(t, x.output, "A simple server named test1")
|
||||
assert.NotContains(t, x.output, "test1 Run")
|
||||
}
|
||||
|
||||
func TestTopFlagsBad(t *testing.T) {
|
||||
x := runFull(t, "hyperkube --bad-flag")
|
||||
assert.EqualError(t, x.err, "unknown flag: --bad-flag")
|
||||
assert.Contains(t, x.output, "all-in-one")
|
||||
assert.Contains(t, x.output, "A simple server named test1")
|
||||
}
|
||||
|
||||
func TestServerHelp(t *testing.T) {
|
||||
x := runFull(t, "hyperkube test1 --help")
|
||||
assert.NoError(t, x.err)
|
||||
assert.Contains(t, x.output, "A simple server named test1")
|
||||
assert.Contains(t, x.output, "-h, --help help for hyperkube")
|
||||
assert.NotContains(t, x.output, "test1 Run")
|
||||
}
|
||||
|
||||
func TestServerFlagsBad(t *testing.T) {
|
||||
x := runFull(t, "hyperkube test1 --bad-flag")
|
||||
assert.EqualError(t, x.err, "unknown flag: --bad-flag")
|
||||
assert.Contains(t, x.output, "A simple server named test1")
|
||||
assert.Contains(t, x.output, "-h, --help help for hyperkube")
|
||||
assert.NotContains(t, x.output, "test1 Run")
|
||||
}
|
||||
|
||||
func TestServerError(t *testing.T) {
|
||||
x := runFull(t, "hyperkube test-error")
|
||||
assert.Contains(t, x.output, "test-error Run")
|
||||
assert.EqualError(t, x.err, "server returning error")
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// clone of the upstream cmd/hypercube/kube-controllermanager.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/controllermanager"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
)
|
||||
|
||||
// NewHyperkubeServer creates a new hyperkube Server object that includes the
|
||||
// description and flags.
|
||||
func NewControllerManager() *Server {
|
||||
s := controllermanager.NewCMServer()
|
||||
|
||||
hks := Server{
|
||||
SimpleUsage: hyperkube.CommandControllerManager,
|
||||
Long: "A server that runs a set of active components. This includes replication controllers, service endpoints and nodes.",
|
||||
Run: func(_ *Server, args []string) error {
|
||||
return s.Run(args)
|
||||
},
|
||||
}
|
||||
s.AddFlags(hks.Flags())
|
||||
return &hks
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/executor/service"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
)
|
||||
|
||||
// NewHyperkubeServer creates a new hyperkube Server object that includes the
|
||||
// description and flags.
|
||||
func NewKubeletExecutor() *Server {
|
||||
s := service.NewKubeletExecutorServer()
|
||||
hks := Server{
|
||||
SimpleUsage: hyperkube.CommandExecutor,
|
||||
Long: `The kubelet-executor binary is responsible for maintaining a set of containers
|
||||
on a particular node. It syncs data from a specialized Mesos source that tracks
|
||||
task launches and kills. It then queries Docker to see what is currently
|
||||
running. It synchronizes the configuration data, with the running set of
|
||||
containers by starting or stopping Docker containers.`,
|
||||
Run: func(hks *Server, args []string) error {
|
||||
return s.Run(hks, args)
|
||||
},
|
||||
}
|
||||
s.AddFlags(hks.Flags())
|
||||
return &hks
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/minion"
|
||||
)
|
||||
|
||||
// NewMinion creates a new hyperkube Server object that includes the
|
||||
// description and flags.
|
||||
func NewMinion() *Server {
|
||||
s := minion.NewMinionServer()
|
||||
hks := Server{
|
||||
SimpleUsage: hyperkube.CommandMinion,
|
||||
Long: `Implements a Kubernetes minion. This will launch the proxy and executor.`,
|
||||
Run: func(hks *Server, args []string) error {
|
||||
return s.Run(hks, args)
|
||||
},
|
||||
}
|
||||
s.AddMinionFlags(hks.Flags())
|
||||
s.AddExecutorFlags(hks.Flags())
|
||||
|
||||
return &hks
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// clone of the upstream cmd/hypercube/k8sm-scheduler.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/service"
|
||||
)
|
||||
|
||||
// NewScheduler creates a new hyperkube Server object that includes the
|
||||
// description and flags.
|
||||
func NewScheduler() *Server {
|
||||
s := service.NewSchedulerServer()
|
||||
|
||||
hks := Server{
|
||||
SimpleUsage: hyperkube.CommandScheduler,
|
||||
Long: `Implements the Kubernetes-Mesos scheduler. This will launch Mesos tasks which
|
||||
results in pods assigned to kubelets based on capacity and constraints.`,
|
||||
Run: func(hks *Server, args []string) error {
|
||||
return s.Run(hks, args)
|
||||
},
|
||||
}
|
||||
s.AddHyperkubeFlags(hks.Flags())
|
||||
return &hks
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// clone of the upstream cmd/hypercube/main.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
_ "k8s.io/kubernetes/pkg/client/metrics/prometheus" // for client metric registration
|
||||
_ "k8s.io/kubernetes/pkg/version/prometheus" // for version metric registration
|
||||
)
|
||||
|
||||
func main() {
|
||||
hk := HyperKube{
|
||||
Name: "km",
|
||||
Long: "This is an all-in-one binary that can run any of the various Kubernetes-Mesos servers.",
|
||||
}
|
||||
|
||||
hk.AddServer(NewKubeAPIServer())
|
||||
hk.AddServer(NewControllerManager())
|
||||
hk.AddServer(NewScheduler())
|
||||
hk.AddServer(NewKubeletExecutor())
|
||||
hk.AddServer(NewKubeProxy())
|
||||
hk.AddServer(NewMinion())
|
||||
|
||||
hk.RunToExit(os.Args)
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// clone of the upstream cmd/hypercube/kube-apiserver.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/cmd/kube-apiserver/app"
|
||||
"k8s.io/kubernetes/cmd/kube-apiserver/app/options"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
)
|
||||
|
||||
// NewKubeAPIServer creates a new hyperkube Server object that includes the
|
||||
// description and flags.
|
||||
func NewKubeAPIServer() *Server {
|
||||
s := options.NewAPIServer()
|
||||
|
||||
hks := Server{
|
||||
SimpleUsage: hyperkube.CommandApiserver,
|
||||
Long: "The main API entrypoint and interface to the storage system. The API server is also the focal point for all authorization decisions.",
|
||||
Run: func(_ *Server, _ []string) error {
|
||||
return app.Run(s)
|
||||
},
|
||||
}
|
||||
s.AddFlags(hks.Flags())
|
||||
return &hks
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// clone of the upstream cmd/hypercube/kube-proxy.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/cmd/kube-proxy/app"
|
||||
"k8s.io/kubernetes/cmd/kube-proxy/app/options"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
)
|
||||
|
||||
// NewKubeProxy creates a new hyperkube Server object that includes the
|
||||
// description and flags.
|
||||
|
||||
func NewKubeProxy() *Server {
|
||||
config := options.NewProxyConfig()
|
||||
|
||||
hks := Server{
|
||||
SimpleUsage: hyperkube.CommandProxy,
|
||||
Long: `The Kubernetes proxy server is responsible for taking traffic directed at
|
||||
services and forwarding it to the appropriate pods. It generally runs on
|
||||
nodes next to the Kubelet and proxies traffic from local pods to remote pods.
|
||||
It is also used when handling incoming external traffic.`,
|
||||
}
|
||||
|
||||
config.AddFlags(hks.Flags())
|
||||
|
||||
hks.Run = func(_ *Server, _ []string) error {
|
||||
s, err := app.NewProxyServerDefault(config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return s.Run()
|
||||
}
|
||||
|
||||
return &hks
|
||||
}
|
@ -1,82 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// clone of the upstream cmd/hypercube/server.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
type serverRunFunc func(s *Server, args []string) error
|
||||
|
||||
// Server describes a server that this binary can morph into.
|
||||
type Server struct {
|
||||
SimpleUsage string // One line description of the server.
|
||||
Long string // Longer free form description of the server
|
||||
Run serverRunFunc // Run the server. This is not expected to return.
|
||||
|
||||
flags *pflag.FlagSet // Flags for the command (and all dependents)
|
||||
name string
|
||||
hk *HyperKube
|
||||
}
|
||||
|
||||
// Usage returns the full usage string including all of the flags.
|
||||
func (s *Server) Usage() error {
|
||||
tt := `{{if .Long}}{{.Long | trim | wrap ""}}
|
||||
{{end}}Usage:
|
||||
{{.SimpleUsage}} [flags]
|
||||
|
||||
Available Flags:
|
||||
{{.Flags.FlagUsages}}`
|
||||
|
||||
return util.ExecuteTemplate(s.hk.Out(), tt, s)
|
||||
}
|
||||
|
||||
// Name returns the name of the command as derived from the usage line.
|
||||
func (s *Server) Name() string {
|
||||
if s.name != "" {
|
||||
return s.name
|
||||
}
|
||||
name := s.SimpleUsage
|
||||
i := strings.Index(name, " ")
|
||||
if i >= 0 {
|
||||
name = name[:i]
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
// Flags returns a flagset for this server
|
||||
func (s *Server) Flags() *pflag.FlagSet {
|
||||
if s.flags == nil {
|
||||
s.flags = pflag.NewFlagSet(s.Name(), pflag.ContinueOnError)
|
||||
s.flags.SetOutput(ioutil.Discard)
|
||||
}
|
||||
return s.flags
|
||||
}
|
||||
|
||||
func (s *Server) FindServer(name string) bool {
|
||||
if s == nil {
|
||||
return false
|
||||
}
|
||||
_, err := s.hk.FindServer(name)
|
||||
return err == nil
|
||||
}
|
@ -1,67 +0,0 @@
|
||||
# Kubernetes-Mesos Architecture
|
||||
|
||||
An [Apache Mesos][1] cluster consists of one or more masters, and one or more slaves.
|
||||
Kubernetes-Mesos (k8sm) operates as a Mesos framework that runs on the cluster.
|
||||
As a framework, k8sm provides scheduler and executor components, both of which are hybrids of Kubernetes and Mesos:
|
||||
the scheduler component integrates the Kubernetes scheduling API and the Mesos scheduler runtime, whereas;
|
||||
the executor component integrates Kubernetes kubelet services and the Mesos executor runtime.
|
||||
|
||||
Multiple Mesos masters are typically configured to coordinate leadership election via Zookeeper.
|
||||
Future releases of Mesos may implement leader election protocols [differently][2].
|
||||
Kubernetes maintains its internal registry (pods, replication controllers, bindings, nodes, services) in etcd.
|
||||
Users typically interact with Kubernetes using the `kubectl` command to manage Kubernetes primitives.
|
||||
|
||||
When a pod is created in Kubernetes, the k8sm scheduler creates an associated Mesos task and queues it for scheduling.
|
||||
Upon pairing the pod/task with an acceptable resource offer, the scheduler binds the pod/task to the offer's slave.
|
||||
As a result of binding the pod/task is launched and delivered to an executor (an executor is created by the Mesos slave if one is not already running).
|
||||
The executor launches the pod/task, which registers the bound pod with the kubelet engine and the kubelet begins to manage the lifecycle of the pod instance.
|
||||
|
||||

|
||||
|
||||
## Scheduling
|
||||
|
||||
The scheduling of a pod on Kubernetes on Mesos is essentially a two-phase process:
|
||||
|
||||
1. A new pod is noticed by the k8sm-scheduler and possibly matched with a
|
||||
Mesos offer. Then:
|
||||
|
||||
- The offer is *accepted*,
|
||||
- the pod is *annotated* with a number of annotation, especially `k8s.mesosphere.io/bindingHost`
|
||||
- the pod is *launched* on a Mesos slave.
|
||||
|
||||
The existence of the `bindingHost` annotation tells the k8sm-scheduler that this pod has been launched. If it is not set, the pod is considered *new*.
|
||||
|
||||
2. The Mesos slave receives the task launch event and starts (if not running yet) the k8sm-executor (possibly via the km hyperkube binary). Then:
|
||||
|
||||
- The k8sm-executor *binds* the tasks to the node via the apiserver, which means that the `NodeName` field is set by the apiserver.
|
||||
- The k8sm-executor sends the pod to the kubelet which is part of the k8sm-executor process.
|
||||
- The kubelet launches the containers using Docker.
|
||||
|
||||
## Networking
|
||||
|
||||
Kubernetes-Mesos uses "normal" Docker IPv4, host-private networking, rather than Kubernetes' SDN-based networking that assigns an IP per pod. This is mostly transparent to the user, especially when using the service abstraction to access pods. For details on some issues it creates, see [issues][3].
|
||||
|
||||

|
||||
|
||||
## Resource Accounting
|
||||
|
||||
Mesos is designed to handle resource accounting and enforcement across the cluster. Part of that enforcement involves "growing" and "shrinking" the pool of resources allocated for executor containers.
|
||||
|
||||
The implementation of the k8sm-executor launches pods as Docker containers (just like the upstream kubelet). The containers are resource limited (cpu and memory) with the means of `docker run` by the kubelet code. Moreover, all containers launched by the kubelet code are children of the k8sm-executor cgroup. This parent cgroup is assigned to the k8sm-executor by the Mesos slave.
|
||||
|
||||
To actually enforce the defined resource limit for the k8sm-executor and its pods, enable the cpu and memory isolator in your Mesos slaves.
|
||||
|
||||
The described resource allocation also applies to static pods which are run on every Mesos slave which runs a k8sm-executor.
|
||||
|
||||
Kubernetes allows to define pods without resource limits for cpu and/or memory. The upstream kubelet will then run the containers without resource bounds. Because Mesos enforces resource accounting, it assign default container cpu and memory limits for those pods. By default these are 0.25 cpu shares and 64 MB of memory. These values can be customized via the `--default-container-cpu-limit` and `--default-container-mem-limit` of the k8sm-scheduler.
|
||||
|
||||
Note that currently static pods without cpu and memory limit are not allowed and will make the k8sm-scheduler refuse to start (compare the [k8sm issues](issues.md)).
|
||||
|
||||
[1]: http://mesos.apache.org/
|
||||
[2]: https://issues.apache.org/jira/browse/MESOS-1806
|
||||
[3]: issues.md#service-endpoints
|
||||
|
||||
[]()
|
||||
|
||||
|
||||
[]()
|
Before Width: | Height: | Size: 68 KiB |
Before Width: | Height: | Size: 147 KiB |
@ -1,91 +0,0 @@
|
||||
# Discovery
|
||||
|
||||
## DNS
|
||||
|
||||
### kube-dns
|
||||
|
||||
[**kube-dns**](https://github.com/kubernetes/kubernetes/blob/release-1.1/docs/admin/dns.md) is a Kubernetes add-on that works out of the box with Kubernetes-Mesos.
|
||||
For details on usage see the implementation in the `cluster/mesos/docker` source tree.
|
||||
kube-dns provides records both for services and pods.
|
||||
|
||||
### mesos-dns
|
||||
|
||||
**NOTE:** There is still no support for publishing Kubernetes *services* in mesos-dns.
|
||||
|
||||
**mesos-dns** communicates with the leading Mesos master to build a DNS record set that reflects the tasks running in a Mesos cluster as documented here: http://mesosphere.github.io/mesos-dns/docs/naming.html.
|
||||
As of Kubernetes-Mesos [release v0.7.2](https://github.com/mesosphere/kubernetes/releases/tag/v0.7.2-v1.1.5) there is experimental support in the scheduler to populate a task's *discovery-info* field in order to generate alternative/more friendly record names in mesos-dns, for *pods* only.
|
||||
|
||||
To enable this feature, set `--mesos-generate-task-discovery=true` when launching the scheduler.
|
||||
|
||||
The following discovery-info fields may be set using labels (without a namespace prefix) or else `k8s.mesosphere.io/discovery-XXX` annotations:
|
||||
|
||||
* `visibility`: may be `framework`, `external`, or `cluster` (defaults to `cluster`)
|
||||
* `environment`
|
||||
* `location`
|
||||
* `name` (this alters record set generation in *mesos-dns*)
|
||||
* `version`
|
||||
|
||||
In the case where both a label as well as an annotation are supplied the value of the annotation is observed.
|
||||
The interpretation of value of the `name` label (and `discovery-name` annotation) is a special case: the generated Mesos `discovery-info.name` value will be `${name}.${pod-namespace}.pod`; all other discovery-info values are passed through without modification.
|
||||
|
||||
#### Example 1: Use a `name` label on a pod template
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ReplicationController
|
||||
metadata:
|
||||
name: frontend
|
||||
spec:
|
||||
replicas: 3
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: guestbook
|
||||
tier: frontend
|
||||
name: custom-name
|
||||
spec:
|
||||
containers:
|
||||
- name: php-redis
|
||||
image: gcr.io/google_samples/gb-frontend:v3
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 100Mi
|
||||
env:
|
||||
- name: GET_HOSTS_FROM
|
||||
value: dns
|
||||
ports:
|
||||
- containerPort: 80
|
||||
```
|
||||
|
||||
#### Example 2: Use a `discovery-name` annotation on a pod template
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ReplicationController
|
||||
metadata:
|
||||
name: frontend
|
||||
spec:
|
||||
replicas: 3
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: guestbook
|
||||
tier: frontend
|
||||
annotations:
|
||||
k8s.mesosphere.io/discovery-name: custom-name
|
||||
spec:
|
||||
containers:
|
||||
- name: php-redis
|
||||
image: gcr.io/google_samples/gb-frontend:v3
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 100Mi
|
||||
env:
|
||||
- name: GET_HOSTS_FROM
|
||||
value: dns
|
||||
ports:
|
||||
- containerPort: 80
|
||||
```
|
||||
|
||||
|
||||
[]()
|
@ -1,526 +0,0 @@
|
||||
# High Availability
|
||||
|
||||
Kubernetes on Mesos will eventually support two HA modes:
|
||||
|
||||
* [Hot-standby](#hot-standby) (*work-in-progress*)
|
||||
* [Cold-standby](#cold-standby)
|
||||
|
||||
Hot-standby mode is currently still work-in-progress as controller manager is not
|
||||
yet HA-aware (the work is being tracked [here][2]). Nevertheless, we will
|
||||
describe how hot-standby mode is intended to work. It is recommended to use
|
||||
cold-standby mode for HA for the time being until this work is done. In hot-standby
|
||||
mode all master components (apiserver, controller manager, and scheduler)
|
||||
actively run on every master node. Additional logic is added to the controller
|
||||
manager and scheduler to coordinate their access to the etcd backend to deal
|
||||
with concurrency issues when modifying cluster state. As apiserver does not
|
||||
modify cluster state, multiple of these can run concurrently without
|
||||
coordination. When the leader (i.e., the node whose scheduler is active)
|
||||
crashes, other master nodes will detect the failure after some time and then
|
||||
elect a new leader.
|
||||
|
||||
In cold-standby mode, similar to hot-standby mode apiserver will actively run
|
||||
on every master node. However, only one scheduler and controller manager will
|
||||
run at any instance in time. This is coordinated by a small external program
|
||||
called `podmaster` that uses etcd to perform leadership selection, and only on
|
||||
the leader node will the `podmaster` start the scheduler and controller
|
||||
manager. Cold-standby mode is how Kubernetes supports HA, and more information
|
||||
can be found [here][1].
|
||||
|
||||
## Hot-standby
|
||||
|
||||
### Scheduler
|
||||
|
||||
The implementation of the scheduler HA feature includes:
|
||||
|
||||
- Checkpointing by default (`--checkpoint`)
|
||||
- Large failover-timeout by default (`--failover-timeout`)
|
||||
- Hot-failover w/ multiple scheduler instances (`--ha`)
|
||||
- Best effort task reconciliation on failover
|
||||
|
||||
#### Multiple Instances
|
||||
|
||||
Multiple scheduler instances may be run to support a warm-standby scenario in which one scheduler fails and another takes over immediately.
|
||||
But at any moment in time only one scheduler is actually registered with the leading Mesos master.
|
||||
Scheduler leader election is implemented using etcd so it is important to have an HA etcd configuration established for reliable scheduler HA.
|
||||
|
||||
It is currently recommended that no more than 2 scheduler instances be running at the same time.
|
||||
Running more than 2 schedulers at once may work but has not been extensively tested.
|
||||
YMMV.
|
||||
|
||||
#### Failover
|
||||
|
||||
Scheduler failover may be triggered by either the following events:
|
||||
|
||||
- loss of leadership when running in HA mode (`--ha`).
|
||||
- the leading scheduler process receives a USR1 signal.
|
||||
|
||||
It is currently possible signal failover to a single, non-HA scheduler process.
|
||||
In this case, if there are problems launching a replacement scheduler process then the cluster may be without a scheduler until another is manually started.
|
||||
|
||||
#### How To
|
||||
|
||||
##### Command Line Arguments
|
||||
|
||||
- `--ha` is required to enable scheduler HA and multi-scheduler leader election.
|
||||
- `--km-path` or else (`--executor-path` and `--proxy-path`) should reference non-local-file URI's and must be identical across schedulers.
|
||||
|
||||
If you have HDFS installed on your slaves then you can specify HDFS URI locations for the binaries:
|
||||
|
||||
```shell
|
||||
$ hdfs dfs -put -f bin/km hdfs:///km
|
||||
$ ./bin/km scheduler ... --mesos-master=zk://zk1:2181,zk2:2181/mesos --ha --km-path=hdfs:///km
|
||||
```
|
||||
|
||||
**IMPORTANT:** some command line parameters specified for the scheduler process are passed to the Kubelet-executor and so are subject to compatibility tests:
|
||||
|
||||
- a Mesos master will not recognize differently configured executors as being compatible, and so...
|
||||
- a scheduler will refuse to accept any offer for slave resources if there are incompatible executors running on the slave.
|
||||
|
||||
Within the scheduler, compatibility is largely determined by comparing executor configuration hashes:
|
||||
a hash is calculated from a subset of the executor-related command line parameters provided to the scheduler process.
|
||||
The command line parameters that affect the hash calculation are listed below.
|
||||
|
||||
- `--allow-privileged`
|
||||
- `--api-servers`
|
||||
- `--auth-path`
|
||||
- `--cluster-*`
|
||||
- `--executor-*`
|
||||
- `--kubelet-*`
|
||||
- `--km-path`
|
||||
- `--mesos-cgroup-prefix`
|
||||
- `--mesos-launch-grace-period`
|
||||
- `--minion-*`
|
||||
- `--profiling`
|
||||
- `--proxy-*`
|
||||
- `--static-pods-config`
|
||||
|
||||
## Cold-standby
|
||||
|
||||
Setting up Kubernetes on Mesos in cold-standby mode is similar to Kubernetes in
|
||||
standalone mode described in [Kubernetes HA][1]. However, special attention is
|
||||
needed when setting up K8sm scheduler so that when the currently active
|
||||
scheduler crashes/dies, a new one can be instantiated and take over the work.
|
||||
More precisely, the new scheduler needs to be compatible with the executors
|
||||
that were started previously by the dead scheduler.
|
||||
|
||||
### Environment Variables
|
||||
|
||||
We will set up K8sm master on 2 nodes in HA mode. The same steps can be
|
||||
extended to set up more master nodes to deal with more concurrent failures. We
|
||||
will define a few environment variables first to describe the testbed
|
||||
environment.
|
||||
|
||||
```
|
||||
MESOS_IP=192.168.0.1
|
||||
MESOS_PORT=5050
|
||||
|
||||
ETCD_IP=192.168.0.2
|
||||
ETCD_PORT=4001
|
||||
|
||||
K8S_1_IP=192.168.0.3
|
||||
K8S_2_IP=192.168.0.4
|
||||
K8S_APISERVER_PORT=8080
|
||||
K8S_SCHEDULER_PORT=10251
|
||||
|
||||
NGINX_IP=192.168.0.5
|
||||
NGINX_APISERVER_PORT=80
|
||||
NGINX_SCHEDULER_PORT=81
|
||||
```
|
||||
|
||||
Other than the 2 K8sm master nodes (`192.168.0.3` and `192.168.0.4`), we also
|
||||
define a Mesos master at `192.168.0.1`, an etcd server at `192.168.0.2`, and an
|
||||
Nginx server that load balances between the 2 K8sm master nodes.
|
||||
|
||||
### K8sm Container Image
|
||||
|
||||
We use podmaster to coordinate leadership selection amongst K8sm masters.
|
||||
However, podmaster needs to run in a container (preferably in a pod), and on
|
||||
the leader node, its podmaster will instantiate scheduler and controller
|
||||
manager also in their separate pods. The podmaster image is pre-built and can
|
||||
be obtained from `gcr.io/google_containers/podmaster`. An official image that
|
||||
contains the `km` binary to start apiserver, scheduler, and controller
|
||||
manager is not yet available. But it can be built fairly easily.
|
||||
|
||||
```shell
|
||||
$ cat <<EOF >Dockerfile
|
||||
FROM ubuntu
|
||||
MAINTAINER Hai Huang <haih@us.ibm.com>
|
||||
RUN mkdir -p /opt/kubernetes
|
||||
COPY kubernetes/_output/dockerized/bin/linux/amd64/ /opt/kubernetes
|
||||
ENTRYPOINT ["/opt/kubernetes/km"]
|
||||
EOF
|
||||
$ cat <<EOF >build.sh
|
||||
#!/bin/bash
|
||||
K8SM_IMAGE_NAME=haih/k8sm
|
||||
git clone https://github.com/mesosphere/kubernetes
|
||||
cd kubernetes
|
||||
git checkout release-v0.7-v1.1
|
||||
KUBERNETES_CONTRIB=mesos build/run.sh make
|
||||
cd ..
|
||||
sudo docker build -t $K8SM_IMAGE_NAME --no-cache .
|
||||
EOF
|
||||
$ chmod 755 build.sh
|
||||
$ ./build.sh
|
||||
```
|
||||
|
||||
Make sure Docker engine is running locally as we will compile Kubernetes using
|
||||
a Docker image. One can also change the image name and which Kubernetes release
|
||||
to compile by modifying the script. After the script has finished running,
|
||||
there should be a local Docker image called `haih/k8sm` (use `docker images` to
|
||||
check).
|
||||
|
||||
Optionally, we can also push the image to Docker Hub (i.e., `docker push
|
||||
$K8SM_IMAGE_NAME`) so we do not have to compile the image on every K8sm master
|
||||
node.
|
||||
|
||||
**IMPORTANT:** Mesosphere team is currently maintaining the stable K8sm release in
|
||||
a separate [fork][3]. At the time of this writing, the latest stable release is
|
||||
`release-v0.7-v1.1`.
|
||||
|
||||
|
||||
### Configure ETCD
|
||||
|
||||
We assume there's an etcd server on `$ETCD_IP`. Ideally this should be a
|
||||
cluster of etcd servers running in HA mode backed up by redundant persistent
|
||||
storage. For testing purposes, on the etcd server one can spin up an etcd
|
||||
instance in a Docker container.
|
||||
|
||||
```shell
|
||||
$ docker run -d --hostname $(uname -n) --name etcd \
|
||||
-p ${ETCD_PORT}:${ETCD_PORT} \
|
||||
quay.io/coreos/etcd:v2.0.12 \
|
||||
--listen-client-urls http://0.0.0.0:${ETCD_PORT} \
|
||||
--advertise-client-urls http://${ETCD_IP}:${ETCD_PORT}
|
||||
```
|
||||
|
||||
### Configure Podmaster
|
||||
|
||||
Since we plan to run all K8sm components and podmaster in pods, we can use
|
||||
`kubelet` to bootstrap these pods by specifying a manifests directory.
|
||||
|
||||
```shell
|
||||
$ mkdir -p /etc/kubernetes/manifests/
|
||||
$ mkdir -p /srv/kubernetes/manifests/
|
||||
```
|
||||
|
||||
Once the kubelet has started, it will check the manifests directory periodically
|
||||
to see if it needs to start or stop pods. Pods can be started by putting their
|
||||
specification yaml files into the manifests directory, and subsequently they
|
||||
can be stopped by removing these yaml files.
|
||||
|
||||
```shell
|
||||
$ cat <<EOF > /etc/kubernetes/manifests/podmaster.yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: kube-podmaster
|
||||
namespace: kube-system
|
||||
spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: scheduler-elector
|
||||
image: gcr.io/google_containers/podmaster:1.1
|
||||
command:
|
||||
- /podmaster
|
||||
- --etcd-servers=http://${ETCD_IP}:${ETCD_PORT}
|
||||
- --key=scheduler
|
||||
- --whoami=${MY_IP}
|
||||
- --source-file=/src/manifests/scheduler.yaml
|
||||
- --dest-file=/dst/manifests/scheduler.yaml
|
||||
volumeMounts:
|
||||
- mountPath: /src/manifests
|
||||
name: manifest-src
|
||||
readOnly: true
|
||||
- mountPath: /dst/manifests
|
||||
name: manifest-dst
|
||||
- name: controller-manager-elector
|
||||
image: gcr.io/google_containers/podmaster:1.1
|
||||
command:
|
||||
- /podmaster
|
||||
- --etcd-servers=http://${ETCD_IP}:${ETCD_PORT}
|
||||
- --key=controller
|
||||
- --whoami=${MY_IP}
|
||||
- --source-file=/src/manifests/controller-mgr.yaml
|
||||
- --dest-file=/dst/manifests/controller-mgr.yaml
|
||||
terminationMessagePath: /dev/termination-log
|
||||
volumeMounts:
|
||||
- mountPath: /src/manifests
|
||||
name: manifest-src
|
||||
readOnly: true
|
||||
- mountPath: /dst/manifests
|
||||
name: manifest-dst
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /srv/kubernetes/manifests
|
||||
name: manifest-src
|
||||
- hostPath:
|
||||
path: /etc/kubernetes/manifests
|
||||
name: manifest-dst
|
||||
EOF
|
||||
```
|
||||
|
||||
One must change `$MY_IP` to either `$K8S_1_IP` or `K8S_2_IP` depending
|
||||
on which master node you are currently setting up the podmaster. Podmasters
|
||||
will compete with each other for leadership, and the winner will copy scheduler
|
||||
and controller manager's pod specification yaml files from
|
||||
`/srv/kubernetes/manifests/` to `/etc/kubernetes/manifests/`. When the kubelet
|
||||
detects these new yaml files, it will start the corresponding pods.
|
||||
|
||||
### Configure Scheduler
|
||||
|
||||
The scheduler pod specification will be put into `/srv/kubernetes/manifests/`.
|
||||
|
||||
```shell
|
||||
$ cat <<EOF > /srv/kubernetes/manifests/scheduler.yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: kube-scheduler
|
||||
namespace: kube-system
|
||||
spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: kube-scheduler
|
||||
image: haih/k8sm:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /opt/kubernetes/km
|
||||
- scheduler
|
||||
- --address=${MY_IP}
|
||||
- --advertised-address=${NGINX_IP}:${NGINX_SCHEDULER_PORT}
|
||||
- --mesos-master=${MESOS_IP}:${MESOS_PORT}
|
||||
- --etcd-servers=http://${ETCD_IP}:${ETCD_PORT}
|
||||
- --api-servers=${NGINX_IP}:${NGINX_APISERVER_PORT}
|
||||
- --v=10
|
||||
EOF
|
||||
```
|
||||
|
||||
Again, one must change `$MY_IP` to either `$K8S_1_IP` or `K8S_2_IP` depending
|
||||
on which master node is currently being working on. Even though we have not set
|
||||
up Nginx yet, we can still specify `--api-servers` and `--advertised-address`
|
||||
using Nginx's address and ports (make sure Nginx is already running before
|
||||
turning on the scheduler). Having `--api-servers` point to Nginx allows
|
||||
executors to maintain connectivity to one of the apiservers even when one or
|
||||
more apiservers is down as Nginx can automatically re-route requests to a
|
||||
working apiserver.
|
||||
|
||||
It is critically important to point `--advertised-address` to Nginx so all the
|
||||
schedulers would be assigned the same executor ID. Otherwise, if we assign
|
||||
`--advertised-address=${K8S_1_IP}` on the first K8s master and
|
||||
`--advertised-address=${K8S_2_IP}` on the second K8s master, they would
|
||||
generate different executor IDs. During a fail-over, the new scheduler would
|
||||
not be able to use the executor started by the failed scheduler. If so, one
|
||||
could get this error message in the scheduler log:
|
||||
|
||||
> Declining incompatible offer...
|
||||
|
||||
### Configure Controller Manager
|
||||
|
||||
The controller manager pod specification will also be put into `/srv/kubernetes/manifests/`.
|
||||
|
||||
```shell
|
||||
$ cat <<EOF > /srv/kubernetes/manifests/controller-mgr.yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: kube-controller-manager
|
||||
namespace: kube-system
|
||||
spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: kube-controller-manager
|
||||
image: haih/k8sm:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /opt/kubernetes/km
|
||||
- controller-manager
|
||||
- --master=http://${NGINX_IP}:${NGINX_APISERVER_PORT}
|
||||
- --cloud-provider=mesos
|
||||
- --cloud-config=/etc/kubernetes/mesos-cloud.conf
|
||||
volumeMounts:
|
||||
- mountPath: /etc/kubernetes
|
||||
name: kubernetes-config
|
||||
readOnly: true
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /etc/kubernetes
|
||||
name: kubernetes-config
|
||||
EOF
|
||||
```
|
||||
|
||||
Controller manager also needs a mesos configuration file as one of its
|
||||
parameters, and this configuration file is written to
|
||||
`/etc/kubernetes/mesos-cloud.conf`.
|
||||
|
||||
```shell
|
||||
$ cat <<EOF >/etc/kubernetes/mesos-cloud.conf
|
||||
[mesos-cloud]
|
||||
mesos-master = ${MESOS_IP}:${MESOS_PORT}
|
||||
EOF
|
||||
```
|
||||
|
||||
### Configure Apiserver
|
||||
|
||||
Apiserver runs on every master node, so its specification file is put into
|
||||
`/etc/kubernetes/manifests/`.
|
||||
|
||||
```shell
|
||||
cat <<EOF > /etc/kubernetes/manifests/apiserver.yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: kube-apiserver
|
||||
namespace: kube-system
|
||||
spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: kube-apiserver
|
||||
image: haih/k8sm:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /opt/kubernetes/km
|
||||
- apiserver
|
||||
- --insecure-bind-address=0.0.0.0
|
||||
- --etcd-servers=http://${ETCD_IP}:${ETCD_PORT}
|
||||
- --allow-privileged=true
|
||||
- --service-cluster-ip-range=10.10.10.0/24
|
||||
- --insecure-port=${K8S_APISERVER_PORT}
|
||||
- --cloud-provider=mesos
|
||||
- --cloud-config=/etc/kubernetes/mesos-cloud.conf
|
||||
- --advertise-address=${MY_IP}
|
||||
ports:
|
||||
- containerPort: ${K8S_APISERVER_PORT}
|
||||
hostPort: ${K8S_APISERVER_PORT}
|
||||
name: local
|
||||
volumeMounts:
|
||||
- mountPath: /etc/kubernetes
|
||||
name: kubernetes-config
|
||||
readOnly: true
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /etc/kubernetes
|
||||
name: kubernetes-config
|
||||
EOF
|
||||
```
|
||||
|
||||
Again, one must change `$MY_IP` to either `$K8S_1_IP` or `K8S_2_IP`
|
||||
depending on which master node is currently being working on.
|
||||
|
||||
To summarize our current setup: we have apiserver and podmaster's pod
|
||||
specification files put into `/etc/kubernetes/manifests/` so they run on every
|
||||
master node. Scheduler and controller manager's pod specification files are
|
||||
put into `/srv/kubernetes/manifests/`, and they will be copied into
|
||||
`/etc/kubernetes/manifests/` by their podmaster if and only if their podmaster was
|
||||
elected the leader.
|
||||
|
||||
### Configure Nginx
|
||||
|
||||
Nginx needs to be configured to load balance for both the apiserver and scheduler.
|
||||
For testing purpose, one can start Nginx in a Docker container.
|
||||
|
||||
```shell
|
||||
cat <<EOF >nginx.conf
|
||||
events {
|
||||
worker_connections 4096; ## Default: 1024
|
||||
}
|
||||
|
||||
http {
|
||||
upstream apiservers {
|
||||
server ${K8S_1_IP}:${K8S_APISERVER_PORT};
|
||||
server ${K8S_2_IP}:${K8S_APISERVER_PORT};
|
||||
}
|
||||
|
||||
upstream schedulers {
|
||||
server ${K8S_1_IP}:${K8S_SCHEDULER_PORT};
|
||||
server ${K8S_2_IP}:${K8S_SCHEDULER_PORT};
|
||||
}
|
||||
|
||||
server {
|
||||
listen ${NGINX_APISERVER_PORT};
|
||||
location / {
|
||||
proxy_pass http://apiservers;
|
||||
proxy_next_upstream error timeout invalid_header http_500;
|
||||
proxy_connect_timeout 2;
|
||||
proxy_buffering off;
|
||||
proxy_read_timeout 12h;
|
||||
proxy_send_timeout 12h;
|
||||
}
|
||||
}
|
||||
|
||||
server {
|
||||
listen ${NGINX_SCHEDULER_PORT};
|
||||
location / {
|
||||
proxy_pass http://schedulers;
|
||||
proxy_next_upstream error timeout invalid_header http_500;
|
||||
proxy_connect_timeout 2;
|
||||
proxy_buffering off;
|
||||
proxy_read_timeout 12h;
|
||||
proxy_send_timeout 12h;
|
||||
}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
$ docker run \
|
||||
-p $NGINX_APISERVER_PORT:$NGINX_APISERVER_PORT \
|
||||
-p $NGINX_SCHEDULER_PORT:$NGINX_SCHEDULER_PORT \
|
||||
--name nginx \
|
||||
-v `pwd`/nginx.conf:/etc/nginx/nginx.conf:ro \
|
||||
-d nginx:latest
|
||||
```
|
||||
|
||||
For the sake of clarity, configuring Nginx to support HTTP over TLS/SPDY is
|
||||
outside of our scope. However, one should keep in mind that without TLS/SPDY
|
||||
properly configured, some `kubectl` commands might not work properly. This
|
||||
problem is documented [here][4].
|
||||
|
||||
### Start Kubelet
|
||||
|
||||
To start everything up, we need to start the kubelet on K8s master nodes so
|
||||
they can start apiserver and podmaster. On the leader node, podmaster will
|
||||
subsequently start the scheduler and controller manager.
|
||||
|
||||
```shell
|
||||
$ mkdir -p /var/log/kubernetes
|
||||
$ kubelet \
|
||||
--api_servers=http://127.0.0.1:${K8S_APISERVER_PORT} \
|
||||
--register-node=false \
|
||||
--allow-privileged=true \
|
||||
--config=/etc/kubernetes/manifests \
|
||||
1>/var/log/kubernetes/kubelet.log 2>&1 &
|
||||
```
|
||||
|
||||
### Verification
|
||||
|
||||
On each of the K8s master nodes, one can run `docker ps` to verify that there
|
||||
is an apiserver pod and a podmaster pod running, and on one of the K8s master
|
||||
nodes, there is a controller manager and a scheduler pod running.
|
||||
|
||||
One should also verify if we can create user pods in the K8sm cluster
|
||||
|
||||
```shell
|
||||
$ export KUBERNETES_MASTER=http://${NGINX_IP}:${NGINX_APISERVER_PORT}
|
||||
$ kubectl create -f <userpod yaml file>
|
||||
$ kubectl get pods
|
||||
```
|
||||
|
||||
The pod should be shown in a `Running` state after some short amount of time.
|
||||
|
||||
### Tuning
|
||||
|
||||
During a fail-over, cold-standby mode takes some time before a new scheduler
|
||||
can be started to take over the work from the failed one. However, one can
|
||||
tune various parameters to shorten this time.
|
||||
|
||||
Podmaster has `--sleep` and `--ttl-secs` parameters that can be tuned, and both
|
||||
allow for faster failure detection. However, it is probably not a good idea to
|
||||
set `--ttl-secs` too small to minimize false positives.
|
||||
|
||||
Kubelet has `--file-check-frequency` parameter that controls how frequently it
|
||||
checks the manifests directory. It is set to 20 seconds by default.
|
||||
|
||||
[1]: http://kubernetes.io/v1.0/docs/admin/high-availability.html
|
||||
[2]: https://github.com/mesosphere/kubernetes-mesos/issues/457
|
||||
[3]: https://github.com/mesosphere/kubernetes
|
||||
[4]: https://github.com/kubernetes/kubernetes/blob/master/contrib/mesos/docs/issues.md#kubectl
|
||||
|
||||
[]()
|
@ -1,232 +0,0 @@
|
||||
## Known Issues
|
||||
|
||||
This page identifies significant known issues with the Kubernetes-Mesos distribution.
|
||||
|
||||
* [General Known Issues](#general-known-issues)
|
||||
* [DCOS Package Known Issues](#dcos-package-known-issues), in addendum to the above.
|
||||
|
||||
## General Known Issues
|
||||
|
||||
These known issues apply to all builds of Kubernetes-Mesos.
|
||||
|
||||
### Upgrades
|
||||
|
||||
Upgrading your Kubernetes-Mesos cluster is currently unsupported.
|
||||
One known problem exists with respect to expressing executor (kubelet and kube-proxy) process configuration via command line flags.
|
||||
It is **strongly** recommended that all of the Kubernetes-Mesos executors are destroyed before upgrading the Kubernetes-Mesos scheduler component:
|
||||
- destroy all daemon controllers running in the cluster, across all namespaces
|
||||
- destroy all replication controllers running in the cluster, across all namespaces
|
||||
- destroy all pods running in the cluster, across all namespaces
|
||||
- invoke the "kamikaze" debug endpoint on the scheduler (e.g. `curl http://10.2.0.5:10251/debug/actions/kamikaze`) to terminate all executors
|
||||
|
||||
Not following the above steps prior to upgrading the scheduler can result in a cluster wherein pods will never again be scheduled upon one or more nodes.
|
||||
This issue is being tracked here: https://github.com/mesosphere/kubernetes-mesos/issues/572.
|
||||
|
||||
### Netfilter Connection Tracking
|
||||
|
||||
The scheduler offers flags to tweak connection tracking for kube-proxy instances that are launched on slave nodes:
|
||||
|
||||
- conntrack-max (do **NOT** set this to a non-zero value if the Mesos slave process is running in a non-root network namespace)
|
||||
- conntrack-tcp-timeout-established
|
||||
|
||||
By default both of these are set to 0 when running Kubernetes-Mesos.
|
||||
Setting either of these flags to non-zero values may impact connection tracking for the entire slave.
|
||||
|
||||
### Port Specifications
|
||||
|
||||
In order for pods (replicated, or otherwise) to be scheduled on the cluster, it is strongly recommended that:
|
||||
* `pod.spec.containers[x].ports[y].hostPort` be left unspecified (or zero), or else;
|
||||
* `pod.spec.containers[x].ports[y].hostPort` exists in the range of `ports` resources declared on Mesos slaves
|
||||
- double-check the resource declarations for your Mesos slaves, the default for `ports` is typically `[31000-32000]`
|
||||
|
||||
Mesos slave host `ports` are resources that are managed by the Mesos resource/offers ecosystem; slave host ports are consumed by launched tasks.
|
||||
Kubernetes pod container specifications identify two types of ports, "container ports" and "host ports":
|
||||
- container ports are allocated from the network namespace of the pod, which is independent from that of the host, whereas;
|
||||
- host ports are allocated from the network namespace of the host.
|
||||
|
||||
**Notable on Kubernetes-Mesos**
|
||||
- Mesos slaves must be configured to offer host `ports` resources in order for pods to use them. Most Mesos package distributions, by default, configure a `ports` resource range for each slave.
|
||||
- The scheduler recognizes the declared *host ports* of each container in a pod/task and for each such host port, attempts to allocate it from the offered port resources listed in Mesos offers.
|
||||
- If no host port is declared for a given port spec, then the scheduler may map that port spec's container port to any host port from the offered ports ranges.
|
||||
- Any *host ports* explicitly declared in the pod container specification must fall within that range of `ports` offered by slaves in the cluster.
|
||||
Ports declared outside that range (other than zero) will never match resource offers received by the scheduler, and so pod specifications that declare such ports will never be executed as tasks on the cluster.
|
||||
- A missing pod container host port declaration or a host port set to zero will, by default, result in the allocation of a host port from a resource offer.
|
||||
- If a pod is the target of a Kubernetes service selector then the related target container ports must be declared in the pod spec.
|
||||
- In vanilla Kubernetes, host ports with the value zero are ignored.
|
||||
To obtain the same behavior with the Kubernetes-Mesos scheduler pods must be assigned a label of `k8s.mesosphere.io/portMapping` with the value `fixed`
|
||||
(see [#527](https://github.com/mesosphere/kubernetes-mesos/issues/527)).
|
||||
|
||||
### Pods
|
||||
|
||||
#### Pod Updates
|
||||
|
||||
Once a task has been launched for a given pod, Kubernetes-Mesos is blind to any updates applied to the pod state (other than for forced, or graceful deletion).
|
||||
|
||||
#### Pod Placement
|
||||
|
||||
The initial plan was to implement pod placement (aka scheduling "constraints") using rules similar to those found in Marathon.
|
||||
Upon further consideration it has been decided that a greater alignment between the stock Kubernetes scheduler and Kubernetes-Mesos scheduler would benefit both projects, as well as end-users.
|
||||
Currently there is limited support for pod placement using the Kubernetes-Mesos [scheduler](scheduler.md).
|
||||
This issue is being tracked here: https://github.com/mesosphere/kubernetes-mesos/issues/338
|
||||
|
||||
**Note:** An upcoming changeset will update the scheduler with initial support for multiple Mesos roles
|
||||
(see [#482](https://github.com/mesosphere/kubernetes-mesos/issues/482)).
|
||||
|
||||
#### Static Pods
|
||||
|
||||
Static pods are supported by the scheduler.
|
||||
The path to a directory containing pod definitions can be set via the `--static-pods-config` flag.
|
||||
Static pods are subject to the following restrictions:
|
||||
|
||||
- Static pods *are read only once* by the scheduler on startup.
|
||||
Only newly started executor will get the latest static pod specs from the defined static pod directory.
|
||||
|
||||
#### Orphan Pods
|
||||
|
||||
The default `executor_shutdown_grace_period` of a Mesos slave is 3 seconds.
|
||||
When the executor is shut down it forcefully terminates the Docker containers that it manages.
|
||||
However, if terminating the Docker containers takes longer than the `executor_shutdown_grace_period` then some containers may not get a termination signal at all.
|
||||
A consequence of this is that some pod containers, previously managed by the framework's executor, will remain running on the slave indefinitely.
|
||||
|
||||
There are two work-arounds to this problem:
|
||||
* Restart the framework and it should terminate the orphaned tasks.
|
||||
* Adjust the value of `executor_shutdown_grace_period` to something greater than 3 seconds.
|
||||
|
||||
### Services
|
||||
|
||||
#### Port Specifications
|
||||
|
||||
In order for Endpoints (therefore, Services) to be fully operational, it is strongly recommended that:
|
||||
- service ports explicitly define a `name`
|
||||
- service ports explicitly define a `targetPort`
|
||||
|
||||
For example:
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: redis-master
|
||||
labels:
|
||||
app: redis
|
||||
role: master
|
||||
tier: backend
|
||||
spec:
|
||||
ports:
|
||||
# the port that this service should serve on
|
||||
- port: 6379
|
||||
targetPort: 6379
|
||||
name: k8sm-works-best-with-a-name-here
|
||||
selector:
|
||||
app: redis
|
||||
role: master
|
||||
tier: backend
|
||||
```
|
||||
|
||||
#### Endpoints
|
||||
|
||||
At the time of this writing both Kubernetes and Mesos are using IPv4 addressing, albeit under different assumptions.
|
||||
Mesos clusters configured with Docker typically use default Docker networking, which is host-private.
|
||||
Kubernetes clusters assume a custom Docker networking configuration that assigns a cluster-routable IPv4 address to each pod, meaning that a process running anywhere on a Kubernetes cluster can reach a pod running on the same cluster by using the pod's Docker-assigned IPv4 address.
|
||||
|
||||
Kubernetes service endpoints terminate, by default, at a backing pod's IPv4 address using the container-port selected for in the service specification (PodIP:ContainerPort).
|
||||
This is problematic when default Docker networking has been configured, such as in the case of typical Mesos clusters, because a pod's host-private IPv4 address is not intended to be reachable outside of its host.
|
||||
|
||||
The Kubernetes-Mesos project has implemented a work-around:
|
||||
service endpoints are terminated at HostIP:HostPort, where the HostIP is the IP address of the Mesos slave and the HostPort is the host port declared in the pod container port specification.
|
||||
Host ports that are not defined, or else defined as zero, will automatically be assigned a (host) port resource from a resource offer.
|
||||
|
||||
To disable the work-around and revert to vanilla Kubernetes service endpoint termination:
|
||||
|
||||
- execute the k8sm scheduler with `-host-port-endpoints=false`
|
||||
- execute the k8sm controller-manager with `-host-port-endpoints=false`
|
||||
|
||||
Then the usual Kubernetes network assumptions must be fulfilled for Kubernetes to work with Mesos, i.e. each container must get a cluster-wide routable IP (compare [Kubernetes Networking documentation](../../../docs/design/networking.md#container-to-container)).
|
||||
|
||||
This workaround may be mitigated down the road by:
|
||||
- Future support for IPv6 addressing in Docker and Kubernetes
|
||||
- Native IP-per-container support via Mesos with a custom Kubernetes network plugin
|
||||
|
||||
### Scheduling
|
||||
|
||||
Statements in this section regarding the "scheduler" pertain specifically to the Kubernetes-Mesos scheduler, unless otherwise noted.
|
||||
|
||||
Some factors that influence when pods are scheduled by k8s-mesos:
|
||||
- availability of a resource offer that "fits" the pod (mesos master/slave);
|
||||
- scheduler *backoff* (to avoid busy-looping) during pod scheduling (k8s-mesos scheduler)
|
||||
|
||||
The scheduler attempts to mitigate the second item by cancelling the backoff period if an offer arrives that fits a pod-in-waiting.
|
||||
However, there is nothing that the scheduler can do if there are no resources available in the cluster.
|
||||
|
||||
That said, the current scheduling algorithm is naive: it makes **no attempts to pack multiple pods into a single offer**.
|
||||
This means that each pod launch requires an independent offer.
|
||||
In a small cluster resource offers do not arrive very frequently.
|
||||
In a large cluster with a "decent" amount of free resources the arrival rate of offers is expected to be much higher.
|
||||
|
||||
The slave on each host announces offers to Mesos periodically.
|
||||
In a single node cluster only a single slave process is advertising resources to the master.
|
||||
The master will pass those along to the scheduler, at some interval and level of 'fairness' determined by mesos.
|
||||
That scheduler will pair each resource offer with a pod that needs to be placed in the cluster.
|
||||
Once paired, a task is launched to instantiate the pod.
|
||||
The used resources will be marked as consumed, the remaining resources are "returned" to the cluster and the scheduler will wait for the next resource offer from the master... and the cycle repeats itself.
|
||||
This likely limits the scheduling throughput observable in a single-node cluster.
|
||||
|
||||
The team plans to conduct benchmarks on the scheduling algorithm to establish some baselines, and is definitely thinking about ways to increase scheduling throughput- including scheduling multiple pods per offer.
|
||||
|
||||
#### Runtime Configuration
|
||||
|
||||
- mesos: `--offer_timeout` : Duration of time before an offer is rescinded from a framework.
|
||||
This helps fairness when running frameworks that hold on to offers, or frameworks that accidentally drop offers.
|
||||
([via](http://mesos.apache.org/documentation/latest/configuration/))
|
||||
- k8s-mesos `--scheduler-config` : An ini-style configuration file with low-level scheduler settings.
|
||||
See `offer-ttl`, `initial-pod-backoff`, and `max-pod-backoff`.
|
||||
([via](https://github.com/kubernetes/kubernetes/blob/master/contrib/mesos/pkg/scheduler/config/config.go))
|
||||
|
||||
What is not configurable, but perhaps should be, are the mesos "filters" that the scheduler includes when declining offers that are not matched to pods within the configured `offer-ttl` (see https://github.com/apache/mesos/blob/0.25.0/include/mesos/mesos.proto#L1165): the current `refuse_seconds` value is hard-coded to 5s.
|
||||
That parameter should probably be exposed via the scheduler fine tuning mechanism.
|
||||
|
||||
#### Backoff
|
||||
|
||||
If no matching resource offer can be found for a pod then that pod is put into a backoff queue.
|
||||
Once the backoff period expires the pod is re-added to the scheduling queue.
|
||||
The backoff period may be truncated by the arrival of an offer with matching resources.
|
||||
This is an event-based design and there is no polling.
|
||||
|
||||
#### Debugging
|
||||
|
||||
Good insight may be achieved when all of the relevant logs are collected into a single tool (Splunk, or an ELK stack) in a manner such that it is trivial to search for something along the lines of a task-id or pod-id during cluster debugging sessions.
|
||||
|
||||
The scheduler also offers `/debug` API endpoints that may be useful:
|
||||
- on-demand explicit reconciliation: /debug/actions/requestExplicit
|
||||
- on-demand implicit reconciliation: /debug/actions/requestImplicit
|
||||
- kamikaze (terminate all "empty" executors that aren't running pods): /debug/actions/kamikaze
|
||||
- pods to be scheduled: /debug/scheduler/podqueue
|
||||
- pod registry changes waiting to be processed: /debug/scheduler/podstore
|
||||
- schedulers internal task registry state: /debug/registry/tasks
|
||||
- scheduler metrics are available at /metrics
|
||||
|
||||
## DCOS Package Known Issues
|
||||
|
||||
All of the issues in the above section also apply to the Kubernetes-Mesos DCOS package builds.
|
||||
The issues listed in this section apply specifically to the Kubernetes-Mesos DCOS package available from https://github.com/mesosphere/multiverse.
|
||||
|
||||
### Etcd
|
||||
|
||||
The default configuration of the DCOS Kubernetes package launches an internal etcd process **which only persists the cluster state in the sandbox of the current container instance**. While this is simpler for the first steps with Kubernetes-Mesos, it means that any cluster state is lost when the Kubernetes-Mesos Docker container is restarted.
|
||||
|
||||
Hence, for any kind of production-like deployment it is highly recommended to install the etcd DCOS package alongside Kubernetes-Mesos and
|
||||
configure the later to use the etcd cluster. Further instructions
|
||||
can be found at https://docs.mesosphere.com/services/kubernetes/#install.
|
||||
|
||||
This situation will eventually go away as soon as DCOS supports package dependencies and/or interactive package configuration.
|
||||
|
||||
### Kubectl
|
||||
|
||||
The following `kubectl` and `dcos kubectl` commands are not yet supported:
|
||||
|
||||
- exec (see [#356](https://github.com/mesosphere/kubernetes-mesos/issues/356))
|
||||
- logs (see [#587](https://github.com/mesosphere/kubernetes-mesos/issues/587))
|
||||
- port-forward
|
||||
- proxy
|
||||
|
||||
|
||||
[]()
|
Before Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 2.5 KiB |
Before Width: | Height: | Size: 5.1 KiB |
Before Width: | Height: | Size: 46 KiB |
Before Width: | Height: | Size: 97 KiB |
@ -1,178 +0,0 @@
|
||||
# Kubernetes-Mesos Scheduler
|
||||
|
||||
Kubernetes on Mesos does not use the upstream scheduler binary, but replaces it
|
||||
with its own Mesos framework scheduler. The following gives an overview of
|
||||
the differences.
|
||||
|
||||
## Labels and Mesos Agent Attributes
|
||||
|
||||
The scheduler of Kubernetes-Mesos takes [labels][1] into account: it matches
|
||||
specified labels in pod specs with defined labels of nodes.
|
||||
|
||||
In addition to user defined labels, [attributes of Mesos agents][2] are converted
|
||||
into node labels by the scheduler, following the pattern
|
||||
|
||||
```yaml
|
||||
k8s.mesosphere.io/attribute-<name>: value
|
||||
```
|
||||
|
||||
As an example, a Mesos agent attribute of `generation:2015` will result in the node label
|
||||
|
||||
```yaml
|
||||
k8s.mesosphere.io/attribute-generation: 2015
|
||||
```
|
||||
|
||||
and can be used to schedule pods onto nodes which are of generation 2015.
|
||||
|
||||
**Note:** Node labels prefixed by `k8s.mesosphere.io` are managed by
|
||||
Kubernetes-Mesos and should not be modified manually by the user or admin. For
|
||||
example, the Kubernetes-Mesos executor manages `k8s.mesosphere.io/attribute`
|
||||
labels and will auto-detect and update modified attributes when the mesos-slave
|
||||
is restarted.
|
||||
|
||||
## Resource Roles
|
||||
|
||||
A Mesos cluster can be statically partitioned using [resources roles][2]. Each
|
||||
resource is assigned such a role (`*` is the default role, if none is explicitly
|
||||
assigned in the mesos-slave command line). The Mesos master will send offers to
|
||||
frameworks for `*` resources and – optionally – one additional role that a
|
||||
framework is assigned to. Right now only one such additional role for a framework is
|
||||
supported.
|
||||
|
||||
### Configuring Roles for the Scheduler
|
||||
|
||||
Every Mesos framework scheduler can choose among offered `*` resources and
|
||||
optionally one additional role. The Kubernetes-Mesos scheduler supports this by setting
|
||||
the framework roles in the scheduler command line, e.g.
|
||||
|
||||
```bash
|
||||
$ km scheduler ... --mesos-framework-roles="*,role1" ...
|
||||
```
|
||||
|
||||
This permits the Kubernetes-Mesos scheduler to accept offered resources for the `*` and `role1` roles.
|
||||
By default pods may be assigned any combination of resources for the roles accepted by the scheduler.
|
||||
This default role assignment behavior may be overridden using the `--mesos-default-pod-roles` flag or
|
||||
else by annotating the pod (as described later).
|
||||
|
||||
One can configure default pod roles, e.g.
|
||||
|
||||
```bash
|
||||
$ km scheduler ... --mesos-default-pod-roles="role1" ...
|
||||
```
|
||||
|
||||
This will tell the Kubernetes-Mesos scheduler to default to `role1` resource offers.
|
||||
The configured default pod roles must be a subset of the configured framework roles.
|
||||
|
||||
The order of configured default pod roles is relevant,
|
||||
`--mesos-default-pod-roles=role1,*` will first try to consume `role1` resources
|
||||
from an offer and, once depleted, fall back to `*` resources.
|
||||
|
||||
The configuration `--mesos-default-pod-roles=*,role1` has the reverse behavior.
|
||||
It first tries to consume `*` resources from an offer and, once depleted, falls
|
||||
back to `role1` resources.
|
||||
|
||||
Due to restrictions of Mesos, currently only one additional role next to `*` can be configured
|
||||
for both framework and default pod roles.
|
||||
|
||||
### Specifying Roles for Pods
|
||||
|
||||
By default a pod is scheduled using resources as specified using the
|
||||
`--mesos-default-pod-roles` configuration.
|
||||
|
||||
A pod can override of this default behaviour using a `k8s.mesosphere.io/roles`
|
||||
annotation:
|
||||
|
||||
```yaml
|
||||
k8s.mesosphere.io/roles: "*,role1"
|
||||
```
|
||||
|
||||
The format is a comma separated list of allowed resource roles. The scheduler
|
||||
will try to schedule the pod with `*` resources first, using `role1`
|
||||
resources if the former are not available or are depleted.
|
||||
|
||||
**Note:** An empty list will mean that no resource roles are allowed which is
|
||||
equivalent to a pod which is unschedulable.
|
||||
|
||||
For example:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: backend
|
||||
annotations:
|
||||
k8s.mesosphere.io/roles: "*,public"
|
||||
namespace: prod
|
||||
spec:
|
||||
...
|
||||
```
|
||||
|
||||
This `*/public` pod will be scheduled using resources from both roles,
|
||||
preferably using `*` resources, followed by `public`. If none
|
||||
of those roles provides enough resources, the scheduling fails.
|
||||
|
||||
**Note:** The scheduler will also allow to mix different roles in the following
|
||||
sense: if a node provides `cpu` resources for the `*` role, but `mem` resources
|
||||
only for the `public` role, the above pod will be scheduled using `cpu(*)` and
|
||||
`mem(public)` resources.
|
||||
|
||||
**Note:** The scheduler might also mix within one resource type, i.e. it will
|
||||
use as many `cpu`s of the `*` role as possible. If a pod requires even more
|
||||
`cpu` resources (defined using the `pod.spec.resources.limits` property) for successful
|
||||
scheduling, the scheduler will add resources from the `public`
|
||||
role until the pod resource requirements are satisfied. E.g. a
|
||||
pod might be scheduled with 0.5 `cpu(*)`, 1.5 `cpu(public)`
|
||||
resources plus e.g. 2 GB `mem(public)` resources.
|
||||
|
||||
## Tuning
|
||||
|
||||
The scheduler configuration can be fine-tuned using an ini-style configuration file.
|
||||
The filename is passed via `--scheduler-config` to the `km scheduler` command.
|
||||
|
||||
Be warned though that some them are pretty low-level and one has to know the inner
|
||||
workings of k8sm to find sensible values. Moreover, these settings may change or
|
||||
even disappear from version to version without further notice.
|
||||
|
||||
The following settings are the default:
|
||||
|
||||
```
|
||||
[scheduler]
|
||||
; duration an offer is viable, prior to being expired
|
||||
offer-ttl = 5s
|
||||
|
||||
; duration an expired offer lingers in history
|
||||
offer-linger-ttl = 2m
|
||||
|
||||
; duration between offer listener notifications
|
||||
listener-delay = 1s
|
||||
|
||||
; size of the pod updates channel
|
||||
updates-backlog = 2048
|
||||
|
||||
; interval we update the frameworkId stored in etcd
|
||||
framework-id-refresh-interval = 30s
|
||||
|
||||
; wait this amount of time after initial registration before attempting
|
||||
; implicit reconciliation
|
||||
initial-implicit-reconciliation-delay = 15s
|
||||
|
||||
; interval in between internal task status checks/updates
|
||||
explicit-reconciliation-max-backoff = 2m
|
||||
|
||||
; waiting period after attempting to cancel an ongoing reconciliation
|
||||
explicit-reconciliation-abort-timeout = 30s
|
||||
|
||||
initial-pod-backoff = 1s
|
||||
max-pod-backoff = 60s
|
||||
http-handler-timeout = 10s
|
||||
http-bind-interval = 5s
|
||||
```
|
||||
|
||||
## Low-Level Scheduler Architecture
|
||||
|
||||

|
||||
|
||||
[1]: ../../../docs/user-guide/labels.md
|
||||
[2]: http://mesos.apache.org/documentation/attributes-resources/
|
||||
|
||||
[]()
|
Before Width: | Height: | Size: 217 KiB |
@ -1,43 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package assert
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// EventuallyTrue asserts that the given predicate becomes true within the given timeout. It
|
||||
// checks the predicate regularly each 100ms.
|
||||
func EventuallyTrue(t *testing.T, timeout time.Duration, fn func() bool, msgAndArgs ...interface{}) bool {
|
||||
start := time.Now()
|
||||
for {
|
||||
if fn() {
|
||||
return true
|
||||
}
|
||||
if time.Now().Sub(start) > timeout {
|
||||
if len(msgAndArgs) > 0 {
|
||||
return assert.Fail(t, msgAndArgs[0].(string), msgAndArgs[1:]...)
|
||||
} else {
|
||||
return assert.Fail(t, "predicate fn has not been true after %v", timeout.String())
|
||||
}
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package assert is an utility package containing reusable testing functionality
|
||||
// extending github.com/stretchr/testify/assert
|
||||
package assert // import "k8s.io/kubernetes/contrib/mesos/pkg/assert"
|
@ -1,96 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package backoff
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
type clock interface {
|
||||
Now() time.Time
|
||||
}
|
||||
|
||||
type realClock struct{}
|
||||
|
||||
func (realClock) Now() time.Time {
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
type backoffEntry struct {
|
||||
backoff time.Duration
|
||||
lastUpdate time.Time
|
||||
}
|
||||
|
||||
type Backoff struct {
|
||||
perItemBackoff map[string]*backoffEntry
|
||||
lock sync.Mutex
|
||||
clock clock
|
||||
defaultDuration time.Duration
|
||||
maxDuration time.Duration
|
||||
}
|
||||
|
||||
func New(initial, max time.Duration) *Backoff {
|
||||
return &Backoff{
|
||||
perItemBackoff: map[string]*backoffEntry{},
|
||||
clock: realClock{},
|
||||
defaultDuration: initial,
|
||||
maxDuration: max,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Backoff) getEntry(id string) *backoffEntry {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
entry, ok := p.perItemBackoff[id]
|
||||
if !ok {
|
||||
entry = &backoffEntry{backoff: p.defaultDuration}
|
||||
p.perItemBackoff[id] = entry
|
||||
}
|
||||
entry.lastUpdate = p.clock.Now()
|
||||
return entry
|
||||
}
|
||||
|
||||
func (p *Backoff) Get(id string) time.Duration {
|
||||
entry := p.getEntry(id)
|
||||
duration := entry.backoff
|
||||
entry.backoff *= 2
|
||||
if entry.backoff > p.maxDuration {
|
||||
entry.backoff = p.maxDuration
|
||||
}
|
||||
//TODO(jdef) parameterize use of jitter?
|
||||
// add jitter, get better backoff distribution
|
||||
duration = time.Duration(rand.Int63n(int64(duration)))
|
||||
log.V(3).Infof("Backing off %v for pod %s", duration, id)
|
||||
return duration
|
||||
}
|
||||
|
||||
// Garbage collect records that have aged past maxDuration. Backoff users are expected
|
||||
// to invoke this periodically.
|
||||
func (p *Backoff) GC() {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
now := p.clock.Now()
|
||||
for id, entry := range p.perItemBackoff {
|
||||
if now.Sub(entry.lastUpdate) > p.maxDuration {
|
||||
delete(p.perItemBackoff, id)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package backoff provides backoff functionality with a simple API.
|
||||
// Originally copied from Kubernetes: plugin/pkg/scheduler/factory/factory.go
|
||||
package backoff // import "k8s.io/kubernetes/contrib/mesos/pkg/backoff"
|
@ -1,371 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package controllermanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"net"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
kubecontrollermanager "k8s.io/kubernetes/cmd/kube-controller-manager/app"
|
||||
"k8s.io/kubernetes/cmd/kube-controller-manager/app/options"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/node"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||
"k8s.io/kubernetes/pkg/apimachinery/registered"
|
||||
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||
"k8s.io/kubernetes/pkg/client/restclient"
|
||||
"k8s.io/kubernetes/pkg/client/typed/dynamic"
|
||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||
"k8s.io/kubernetes/pkg/client/unversioned/clientcmd"
|
||||
clientcmdapi "k8s.io/kubernetes/pkg/client/unversioned/clientcmd/api"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider/providers/mesos"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/controller/daemon"
|
||||
"k8s.io/kubernetes/pkg/controller/deployment"
|
||||
endpointcontroller "k8s.io/kubernetes/pkg/controller/endpoint"
|
||||
"k8s.io/kubernetes/pkg/controller/informers"
|
||||
"k8s.io/kubernetes/pkg/controller/job"
|
||||
namespacecontroller "k8s.io/kubernetes/pkg/controller/namespace"
|
||||
nodecontroller "k8s.io/kubernetes/pkg/controller/node"
|
||||
"k8s.io/kubernetes/pkg/controller/podautoscaler"
|
||||
"k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
|
||||
"k8s.io/kubernetes/pkg/controller/podgc"
|
||||
replicaset "k8s.io/kubernetes/pkg/controller/replicaset"
|
||||
replicationcontroller "k8s.io/kubernetes/pkg/controller/replication"
|
||||
resourcequotacontroller "k8s.io/kubernetes/pkg/controller/resourcequota"
|
||||
routecontroller "k8s.io/kubernetes/pkg/controller/route"
|
||||
servicecontroller "k8s.io/kubernetes/pkg/controller/service"
|
||||
serviceaccountcontroller "k8s.io/kubernetes/pkg/controller/serviceaccount"
|
||||
persistentvolumecontroller "k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
|
||||
"k8s.io/kubernetes/pkg/healthz"
|
||||
quotainstall "k8s.io/kubernetes/pkg/quota/install"
|
||||
"k8s.io/kubernetes/pkg/serviceaccount"
|
||||
certutil "k8s.io/kubernetes/pkg/util/cert"
|
||||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/profile"
|
||||
kmendpoint "k8s.io/kubernetes/contrib/mesos/pkg/service"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
const (
|
||||
// Jitter used when starting controller managers
|
||||
ControllerStartJitter = 1.0
|
||||
)
|
||||
|
||||
// CMServer is the main context object for the controller manager.
|
||||
type CMServer struct {
|
||||
*options.CMServer
|
||||
UseHostPortEndpoints bool
|
||||
}
|
||||
|
||||
// NewCMServer creates a new CMServer with a default config.
|
||||
func NewCMServer() *CMServer {
|
||||
s := &CMServer{
|
||||
CMServer: options.NewCMServer(),
|
||||
}
|
||||
s.CloudProvider = mesos.ProviderName
|
||||
s.UseHostPortEndpoints = true
|
||||
return s
|
||||
}
|
||||
|
||||
// AddFlags adds flags for a specific CMServer to the specified FlagSet
|
||||
func (s *CMServer) AddFlags(fs *pflag.FlagSet) {
|
||||
s.CMServer.AddFlags(fs)
|
||||
fs.BoolVar(&s.UseHostPortEndpoints, "host-port-endpoints", s.UseHostPortEndpoints, "Map service endpoints to hostIP:hostPort instead of podIP:containerPort. Default true.")
|
||||
}
|
||||
|
||||
func (s *CMServer) resyncPeriod() time.Duration {
|
||||
factor := rand.Float64() + 1
|
||||
return time.Duration(float64(time.Hour) * 12.0 * factor)
|
||||
}
|
||||
|
||||
func (s *CMServer) Run(_ []string) error {
|
||||
if s.Kubeconfig == "" && s.Master == "" {
|
||||
glog.Warningf("Neither --kubeconfig nor --master was specified. Using default API client. This might not work.")
|
||||
}
|
||||
|
||||
// This creates a client, first loading any specified kubeconfig
|
||||
// file, and then overriding the Master flag, if non-empty.
|
||||
kubeconfig, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(
|
||||
&clientcmd.ClientConfigLoadingRules{ExplicitPath: s.Kubeconfig},
|
||||
&clientcmd.ConfigOverrides{ClusterInfo: clientcmdapi.Cluster{Server: s.Master}}).ClientConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
kubeconfig.QPS = 20.0
|
||||
kubeconfig.Burst = 30
|
||||
|
||||
kubeClient, err := client.New(kubeconfig)
|
||||
if err != nil {
|
||||
glog.Fatalf("Invalid API configuration: %v", err)
|
||||
}
|
||||
|
||||
go func() {
|
||||
mux := http.NewServeMux()
|
||||
healthz.InstallHandler(mux)
|
||||
if s.EnableProfiling {
|
||||
profile.InstallHandler(mux)
|
||||
}
|
||||
mux.Handle("/metrics", prometheus.Handler())
|
||||
server := &http.Server{
|
||||
Addr: net.JoinHostPort(s.Address, strconv.Itoa(int(s.Port))),
|
||||
Handler: mux,
|
||||
}
|
||||
glog.Fatal(server.ListenAndServe())
|
||||
}()
|
||||
|
||||
endpoints := s.createEndpointController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "endpoint-controller")))
|
||||
go endpoints.Run(int(s.ConcurrentEndpointSyncs), wait.NeverStop)
|
||||
|
||||
go replicationcontroller.NewReplicationManagerFromClient(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "replication-controller")), s.resyncPeriod, replicationcontroller.BurstReplicas, int(s.LookupCacheSizeForRC)).
|
||||
Run(int(s.ConcurrentRCSyncs), wait.NeverStop)
|
||||
|
||||
go podgc.NewFromClient(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "pod-garbage-collector")), int(s.TerminatedPodGCThreshold)).
|
||||
Run(wait.NeverStop)
|
||||
|
||||
//TODO(jdef) should eventually support more cloud providers here
|
||||
if s.CloudProvider != mesos.ProviderName {
|
||||
glog.Fatalf("Only provider %v is supported, you specified %v", mesos.ProviderName, s.CloudProvider)
|
||||
}
|
||||
cloud, err := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile)
|
||||
if err != nil {
|
||||
glog.Fatalf("Cloud provider could not be initialized: %v", err)
|
||||
}
|
||||
_, clusterCIDR, _ := net.ParseCIDR(s.ClusterCIDR)
|
||||
_, serviceCIDR, _ := net.ParseCIDR(s.ServiceCIDR)
|
||||
nodeController, err := nodecontroller.NewNodeControllerFromClient(cloud, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "node-controller")),
|
||||
s.PodEvictionTimeout.Duration, s.NodeEvictionRate, s.SecondaryNodeEvictionRate, s.LargeClusterSizeThreshold, s.UnhealthyZoneThreshold,
|
||||
s.NodeMonitorGracePeriod.Duration, s.NodeStartupGracePeriod.Duration, s.NodeMonitorPeriod.Duration, clusterCIDR, serviceCIDR, int(s.NodeCIDRMaskSize), s.AllocateNodeCIDRs)
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to initialize nodecontroller: %v", err)
|
||||
}
|
||||
nodeController.Run()
|
||||
|
||||
nodeStatusUpdaterController := node.NewStatusUpdater(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "node-status-controller")), s.NodeMonitorPeriod.Duration, time.Now)
|
||||
if err := nodeStatusUpdaterController.Run(wait.NeverStop); err != nil {
|
||||
glog.Fatalf("Failed to start node status update controller: %v", err)
|
||||
}
|
||||
|
||||
serviceController, err := servicecontroller.New(cloud, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "service-controller")), s.ClusterName)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to start service controller: %v", err)
|
||||
} else {
|
||||
serviceController.Run(int(s.ConcurrentServiceSyncs))
|
||||
}
|
||||
|
||||
if s.AllocateNodeCIDRs && s.ConfigureCloudRoutes {
|
||||
if cloud == nil {
|
||||
glog.Warning("configure-cloud-routes is set, but no cloud provider specified. Will not configure cloud provider routes.")
|
||||
} else if routes, ok := cloud.Routes(); !ok {
|
||||
glog.Warning("configure-cloud-routes is set, but cloud provider does not support routes. Will not configure cloud provider routes.")
|
||||
} else {
|
||||
routeController := routecontroller.New(routes, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "route-controller")), s.ClusterName, clusterCIDR)
|
||||
routeController.Run(s.RouteReconciliationPeriod.Duration)
|
||||
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
|
||||
}
|
||||
} else {
|
||||
glog.Infof("Will not configure cloud provider routes for allocate-node-cidrs: %v, configure-cloud-routes: %v.", s.AllocateNodeCIDRs, s.ConfigureCloudRoutes)
|
||||
}
|
||||
|
||||
resourceQuotaControllerClient := clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "resource-quota-controller"))
|
||||
resourceQuotaRegistry := quotainstall.NewRegistry(resourceQuotaControllerClient)
|
||||
groupKindsToReplenish := []unversioned.GroupKind{
|
||||
api.Kind("Pod"),
|
||||
api.Kind("Service"),
|
||||
api.Kind("ReplicationController"),
|
||||
api.Kind("PersistentVolumeClaim"),
|
||||
api.Kind("Secret"),
|
||||
}
|
||||
resourceQuotaControllerOptions := &resourcequotacontroller.ResourceQuotaControllerOptions{
|
||||
KubeClient: resourceQuotaControllerClient,
|
||||
ResyncPeriod: controller.StaticResyncPeriodFunc(s.ResourceQuotaSyncPeriod.Duration),
|
||||
Registry: resourceQuotaRegistry,
|
||||
GroupKindsToReplenish: groupKindsToReplenish,
|
||||
ReplenishmentResyncPeriod: s.resyncPeriod,
|
||||
ControllerFactory: resourcequotacontroller.NewReplenishmentControllerFactoryFromClient(resourceQuotaControllerClient),
|
||||
}
|
||||
go resourcequotacontroller.NewResourceQuotaController(resourceQuotaControllerOptions).Run(int(s.ConcurrentResourceQuotaSyncs), wait.NeverStop)
|
||||
|
||||
// If apiserver is not running we should wait for some time and fail only then. This is particularly
|
||||
// important when we start apiserver and controller manager at the same time.
|
||||
var versionStrings []string
|
||||
err = wait.PollImmediate(time.Second, 10*time.Second, func() (bool, error) {
|
||||
if versionStrings, err = restclient.ServerAPIVersions(kubeconfig); err == nil {
|
||||
return true, nil
|
||||
}
|
||||
glog.Errorf("Failed to get api versions from server: %v", err)
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to get api versions from server: %v", err)
|
||||
}
|
||||
versions := &unversioned.APIVersions{Versions: versionStrings}
|
||||
|
||||
resourceMap, err := kubeClient.Discovery().ServerResources()
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to get supported resources from server: %v", err)
|
||||
}
|
||||
|
||||
// Find the list of namespaced resources via discovery that the namespace controller must manage
|
||||
namespaceKubeClient := clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "namespace-controller"))
|
||||
namespaceClientPool := dynamic.NewClientPool(restclient.AddUserAgent(kubeconfig, "namespace-controller"), registered.RESTMapper(), dynamic.LegacyAPIPathResolverFunc)
|
||||
groupVersionResources, err := namespaceKubeClient.Discovery().ServerPreferredNamespacedResources()
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to get supported resources from server: %v", err)
|
||||
}
|
||||
namespaceController := namespacecontroller.NewNamespaceController(namespaceKubeClient, namespaceClientPool, groupVersionResources, s.NamespaceSyncPeriod.Duration, api.FinalizerKubernetes)
|
||||
go namespaceController.Run(int(s.ConcurrentNamespaceSyncs), wait.NeverStop)
|
||||
|
||||
groupVersion := "extensions/v1beta1"
|
||||
resources, found := resourceMap[groupVersion]
|
||||
// TODO(k8s): this needs to be dynamic so users don't have to restart their controller manager if they change the apiserver
|
||||
if containsVersion(versions, groupVersion) && found {
|
||||
glog.Infof("Starting %s apis", groupVersion)
|
||||
if containsResource(resources, "horizontalpodautoscalers") {
|
||||
glog.Infof("Starting horizontal pod controller.")
|
||||
hpaClient := clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "horizontal-pod-autoscaler"))
|
||||
metricsClient := metrics.NewHeapsterMetricsClient(
|
||||
hpaClient,
|
||||
metrics.DefaultHeapsterNamespace,
|
||||
metrics.DefaultHeapsterScheme,
|
||||
metrics.DefaultHeapsterService,
|
||||
metrics.DefaultHeapsterPort,
|
||||
)
|
||||
go podautoscaler.NewHorizontalController(hpaClient.Core(), hpaClient.Extensions(), hpaClient, metricsClient, s.HorizontalPodAutoscalerSyncPeriod.Duration).
|
||||
Run(wait.NeverStop)
|
||||
}
|
||||
|
||||
if containsResource(resources, "daemonsets") {
|
||||
glog.Infof("Starting daemon set controller")
|
||||
informerFactory := informers.NewSharedInformerFactory(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "daemon-set-controller")), s.resyncPeriod())
|
||||
|
||||
go daemon.NewDaemonSetsController(informerFactory.DaemonSets(), informerFactory.Pods(), informerFactory.Nodes(), clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "daemon-set-controller")), int(s.LookupCacheSizeForDaemonSet)).
|
||||
Run(int(s.ConcurrentDaemonSetSyncs), wait.NeverStop)
|
||||
informerFactory.Start(wait.NeverStop)
|
||||
}
|
||||
|
||||
if containsResource(resources, "jobs") {
|
||||
glog.Infof("Starting job controller")
|
||||
go job.NewJobControllerFromClient(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "job-controller")), s.resyncPeriod).
|
||||
Run(int(s.ConcurrentJobSyncs), wait.NeverStop)
|
||||
}
|
||||
|
||||
if containsResource(resources, "deployments") {
|
||||
glog.Infof("Starting deployment controller")
|
||||
go deployment.NewDeploymentController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "deployment-controller")), s.resyncPeriod).
|
||||
Run(int(s.ConcurrentDeploymentSyncs), wait.NeverStop)
|
||||
}
|
||||
|
||||
if containsResource(resources, "replicasets") {
|
||||
glog.Infof("Starting ReplicaSet controller")
|
||||
go replicaset.NewReplicaSetControllerFromClient(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "replicaset-controller")), s.resyncPeriod, replicaset.BurstReplicas, int(s.LookupCacheSizeForRS)).
|
||||
Run(int(s.ConcurrentRSSyncs), wait.NeverStop)
|
||||
}
|
||||
}
|
||||
|
||||
alphaProvisioner, err := kubecontrollermanager.NewAlphaVolumeProvisioner(cloud, s.VolumeConfiguration)
|
||||
if err != nil {
|
||||
glog.Fatalf("An backward-compatible provisioner could not be created: %v, but one was expected. Provisioning will not work. This functionality is considered an early Alpha version.", err)
|
||||
}
|
||||
params := persistentvolumecontroller.ControllerParameters{
|
||||
KubeClient: clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "persistent-volume-binder")),
|
||||
SyncPeriod: s.PVClaimBinderSyncPeriod.Duration,
|
||||
AlphaProvisioner: alphaProvisioner,
|
||||
VolumePlugins: kubecontrollermanager.ProbeControllerVolumePlugins(cloud, s.VolumeConfiguration),
|
||||
Cloud: cloud,
|
||||
ClusterName: s.ClusterName,
|
||||
EnableDynamicProvisioning: s.VolumeConfiguration.EnableDynamicProvisioning,
|
||||
}
|
||||
volumeController := persistentvolumecontroller.NewController(params)
|
||||
volumeController.Run(wait.NeverStop)
|
||||
|
||||
var rootCA []byte
|
||||
|
||||
if s.RootCAFile != "" {
|
||||
rootCA, err = ioutil.ReadFile(s.RootCAFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading root-ca-file at %s: %v", s.RootCAFile, err)
|
||||
}
|
||||
if _, err := certutil.ParseCertsPEM(rootCA); err != nil {
|
||||
return fmt.Errorf("error parsing root-ca-file at %s: %v", s.RootCAFile, err)
|
||||
}
|
||||
} else {
|
||||
rootCA = kubeconfig.CAData
|
||||
}
|
||||
|
||||
if len(s.ServiceAccountKeyFile) > 0 {
|
||||
privateKey, err := serviceaccount.ReadPrivateKey(s.ServiceAccountKeyFile)
|
||||
if err != nil {
|
||||
glog.Errorf("Error reading key for service account token controller: %v", err)
|
||||
} else {
|
||||
go serviceaccountcontroller.NewTokensController(
|
||||
clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "tokens-controller")),
|
||||
serviceaccountcontroller.TokensControllerOptions{
|
||||
TokenGenerator: serviceaccount.JWTTokenGenerator(privateKey),
|
||||
RootCA: rootCA,
|
||||
},
|
||||
).Run(int(s.ConcurrentSATokenSyncs), wait.NeverStop)
|
||||
}
|
||||
}
|
||||
|
||||
serviceaccountcontroller.NewServiceAccountsController(
|
||||
clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "service-account-controller")),
|
||||
serviceaccountcontroller.DefaultServiceAccountsControllerOptions(),
|
||||
).Run()
|
||||
|
||||
select {}
|
||||
}
|
||||
|
||||
func (s *CMServer) createEndpointController(client *clientset.Clientset) kmendpoint.EndpointController {
|
||||
if s.UseHostPortEndpoints {
|
||||
glog.V(2).Infof("Creating hostIP:hostPort endpoint controller")
|
||||
return kmendpoint.NewEndpointController(client)
|
||||
}
|
||||
glog.V(2).Infof("Creating podIP:containerPort endpoint controller")
|
||||
stockEndpointController := endpointcontroller.NewEndpointControllerFromClient(client, s.resyncPeriod)
|
||||
return stockEndpointController
|
||||
}
|
||||
|
||||
func containsVersion(versions *unversioned.APIVersions, version string) bool {
|
||||
for ix := range versions.Versions {
|
||||
if versions.Versions[ix] == version {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func containsResource(resources *unversioned.APIResourceList, resourceName string) bool {
|
||||
for ix := range resources.APIResources {
|
||||
resource := resources.APIResources[ix]
|
||||
if resource.Name == resourceName {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package controllermanager is largely a clone of the upstream implementation,
|
||||
// with additional functionality to select between stock or a customized
|
||||
// endpoints controller.
|
||||
package controllermanager // import "k8s.io/kubernetes/contrib/mesos/pkg/controllermanager"
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package election provides interfaces used for master election.
|
||||
package election // import "k8s.io/kubernetes/contrib/mesos/pkg/election"
|
@ -1,198 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package election
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
etcd "github.com/coreos/etcd/client"
|
||||
"github.com/golang/glog"
|
||||
"golang.org/x/net/context"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||
etcdutil "k8s.io/kubernetes/pkg/storage/etcd/util"
|
||||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
"k8s.io/kubernetes/pkg/watch"
|
||||
)
|
||||
|
||||
// Master is used to announce the current elected master.
|
||||
type Master string
|
||||
|
||||
// IsAnAPIObject is used solely so we can work with the watch package.
|
||||
// TODO(k8s): Either fix watch so this isn't necessary, or make this a real API Object.
|
||||
// TODO(k8s): when it becomes clear how this package will be used, move these declarations to
|
||||
// to the proper place.
|
||||
func (obj Master) GetObjectKind() unversioned.ObjectKind { return unversioned.EmptyObjectKind }
|
||||
|
||||
// NewEtcdMasterElector returns an implementation of election.MasterElector backed by etcd.
|
||||
func NewEtcdMasterElector(h etcd.Client) MasterElector {
|
||||
return &etcdMasterElector{etcd: etcd.NewKeysAPI(h)}
|
||||
}
|
||||
|
||||
type empty struct{}
|
||||
|
||||
// internal implementation struct
|
||||
type etcdMasterElector struct {
|
||||
etcd etcd.KeysAPI
|
||||
done chan empty
|
||||
events chan watch.Event
|
||||
}
|
||||
|
||||
// Elect implements the election.MasterElector interface.
|
||||
func (e *etcdMasterElector) Elect(path, id string) watch.Interface {
|
||||
e.done = make(chan empty)
|
||||
e.events = make(chan watch.Event)
|
||||
go wait.Until(func() { e.run(path, id) }, time.Second*5, wait.NeverStop)
|
||||
return e
|
||||
}
|
||||
|
||||
func (e *etcdMasterElector) run(path, id string) {
|
||||
masters := make(chan string)
|
||||
errors := make(chan error)
|
||||
go e.master(path, id, 30, masters, errors, e.done) // TODO(jdef) extract constant
|
||||
for {
|
||||
select {
|
||||
case m := <-masters:
|
||||
e.events <- watch.Event{
|
||||
Type: watch.Modified,
|
||||
Object: Master(m),
|
||||
}
|
||||
case e := <-errors:
|
||||
glog.Errorf("Error in election: %v", e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ResultChan implements the watch.Interface interface.
|
||||
func (e *etcdMasterElector) ResultChan() <-chan watch.Event {
|
||||
return e.events
|
||||
}
|
||||
|
||||
// extendMaster attempts to extend ownership of a master lock for TTL seconds.
|
||||
// returns "", nil if extension failed
|
||||
// returns id, nil if extension succeeded
|
||||
// returns "", err if an error occurred
|
||||
func (e *etcdMasterElector) extendMaster(path, id string, ttl uint64, res *etcd.Response) (string, error) {
|
||||
// If it matches the passed in id, extend the lease by writing a new entry.
|
||||
// Uses compare and swap, so that if we TTL out in the meantime, the write will fail.
|
||||
// We don't handle the TTL delete w/o a write case here, it's handled in the next loop
|
||||
// iteration.
|
||||
opts := etcd.SetOptions{
|
||||
TTL: time.Duration(ttl) * time.Second,
|
||||
PrevValue: "",
|
||||
PrevIndex: res.Node.ModifiedIndex,
|
||||
}
|
||||
_, err := e.etcd.Set(context.TODO(), path, id, &opts)
|
||||
if err != nil && !etcdutil.IsEtcdTestFailed(err) {
|
||||
return "", err
|
||||
}
|
||||
if err != nil && etcdutil.IsEtcdTestFailed(err) {
|
||||
return "", nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// becomeMaster attempts to become the master for this lock.
|
||||
// returns "", nil if the attempt failed
|
||||
// returns id, nil if the attempt succeeded
|
||||
// returns "", err if an error occurred
|
||||
func (e *etcdMasterElector) becomeMaster(path, id string, ttl uint64) (string, error) {
|
||||
opts := etcd.SetOptions{
|
||||
TTL: time.Duration(ttl) * time.Second,
|
||||
PrevExist: etcd.PrevNoExist,
|
||||
}
|
||||
|
||||
_, err := e.etcd.Set(context.TODO(), path, id, &opts)
|
||||
if err != nil && !etcdutil.IsEtcdNodeExist(err) {
|
||||
// unexpected error
|
||||
return "", err
|
||||
}
|
||||
if err != nil && etcdutil.IsEtcdNodeExist(err) {
|
||||
return "", nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// handleMaster performs one loop of master locking.
|
||||
// on success it returns <master>, nil
|
||||
// on error it returns "", err
|
||||
// in situations where you should try again due to concurrent state changes (e.g. another actor simultaneously acquiring the lock)
|
||||
// it returns "", nil
|
||||
func (e *etcdMasterElector) handleMaster(path, id string, ttl uint64) (string, error) {
|
||||
res, err := e.etcd.Get(context.TODO(), path, nil)
|
||||
|
||||
// Unexpected error, bail out
|
||||
if err != nil && !etcdutil.IsEtcdNotFound(err) {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// There is no master, try to become the master.
|
||||
if err != nil && etcdutil.IsEtcdNotFound(err) {
|
||||
return e.becomeMaster(path, id, ttl)
|
||||
}
|
||||
|
||||
// This should never happen.
|
||||
if res.Node == nil {
|
||||
return "", fmt.Errorf("unexpected response: %#v", res)
|
||||
}
|
||||
|
||||
// We're not the master, just return the current value
|
||||
if res.Node.Value != id {
|
||||
return res.Node.Value, nil
|
||||
}
|
||||
|
||||
// We are the master, try to extend out lease
|
||||
return e.extendMaster(path, id, ttl, res)
|
||||
}
|
||||
|
||||
// master provices a distributed master election lock, maintains lock until failure, or someone sends something in the done channel.
|
||||
// The basic algorithm is:
|
||||
// while !done
|
||||
// Get the current master
|
||||
// If there is no current master
|
||||
// Try to become the master
|
||||
// Otherwise
|
||||
// If we are the master, extend the lease
|
||||
// If the master is different than the last time through the loop, report the master
|
||||
// Sleep 80% of TTL
|
||||
func (e *etcdMasterElector) master(path, id string, ttl uint64, masters chan<- string, errors chan<- error, done <-chan empty) {
|
||||
lastMaster := ""
|
||||
for {
|
||||
master, err := e.handleMaster(path, id, ttl)
|
||||
if err != nil {
|
||||
errors <- err
|
||||
} else if len(master) == 0 {
|
||||
continue
|
||||
} else if master != lastMaster {
|
||||
lastMaster = master
|
||||
masters <- master
|
||||
}
|
||||
// TODO(k8s): Add Watch here, skip the polling for faster reactions
|
||||
// If done is closed, break out.
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
case <-time.After(time.Duration((ttl*8)/10) * time.Second):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ResultChan implements the watch.Interface interface
|
||||
func (e *etcdMasterElector) Stop() {
|
||||
close(e.done)
|
||||
}
|
@ -1,78 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package election
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
etcd "github.com/coreos/etcd/client"
|
||||
"golang.org/x/net/context"
|
||||
|
||||
etcdtesting "k8s.io/kubernetes/pkg/storage/etcd/testing"
|
||||
"k8s.io/kubernetes/pkg/watch"
|
||||
)
|
||||
|
||||
func TestEtcdMasterOther(t *testing.T) {
|
||||
server := etcdtesting.NewEtcdTestClientServer(t)
|
||||
defer server.Terminate(t)
|
||||
|
||||
path := "foo"
|
||||
keysAPI := etcd.NewKeysAPI(server.Client)
|
||||
if _, err := keysAPI.Set(context.TODO(), path, "baz", nil); err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
master := NewEtcdMasterElector(server.Client)
|
||||
w := master.Elect(path, "bar")
|
||||
result := <-w.ResultChan()
|
||||
if result.Type != watch.Modified || result.Object.(Master) != "baz" {
|
||||
t.Errorf("unexpected event: %#v", result)
|
||||
}
|
||||
w.Stop()
|
||||
}
|
||||
|
||||
func TestEtcdMasterNoOther(t *testing.T) {
|
||||
server := etcdtesting.NewEtcdTestClientServer(t)
|
||||
defer server.Terminate(t)
|
||||
|
||||
path := "foo"
|
||||
master := NewEtcdMasterElector(server.Client)
|
||||
w := master.Elect(path, "bar")
|
||||
result := <-w.ResultChan()
|
||||
if result.Type != watch.Modified || result.Object.(Master) != "bar" {
|
||||
t.Errorf("unexpected event: %#v", result)
|
||||
}
|
||||
w.Stop()
|
||||
}
|
||||
|
||||
func TestEtcdMasterNoOtherThenConflict(t *testing.T) {
|
||||
server := etcdtesting.NewEtcdTestClientServer(t)
|
||||
defer server.Terminate(t)
|
||||
|
||||
path := "foo"
|
||||
master := NewEtcdMasterElector(server.Client)
|
||||
leader := NewEtcdMasterElector(server.Client)
|
||||
|
||||
w_ldr := leader.Elect(path, "baz")
|
||||
result := <-w_ldr.ResultChan()
|
||||
w := master.Elect(path, "bar")
|
||||
result = <-w.ResultChan()
|
||||
if result.Type != watch.Modified || result.Object.(Master) != "baz" {
|
||||
t.Errorf("unexpected event: %#v", result)
|
||||
}
|
||||
w.Stop()
|
||||
w_ldr.Stop()
|
||||
}
|
@ -1,53 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package election
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"k8s.io/kubernetes/pkg/watch"
|
||||
)
|
||||
|
||||
// Fake allows for testing of anything consuming a MasterElector.
|
||||
type Fake struct {
|
||||
mux *watch.Broadcaster
|
||||
currentMaster Master
|
||||
lock sync.Mutex // Protect access of currentMaster
|
||||
}
|
||||
|
||||
// NewFake makes a new fake MasterElector.
|
||||
func NewFake() *Fake {
|
||||
// 0 means block for clients.
|
||||
return &Fake{mux: watch.NewBroadcaster(0, watch.WaitIfChannelFull)}
|
||||
}
|
||||
|
||||
func (f *Fake) ChangeMaster(newMaster Master) {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
f.mux.Action(watch.Modified, newMaster)
|
||||
f.currentMaster = newMaster
|
||||
}
|
||||
|
||||
func (f *Fake) Elect(path, id string) watch.Interface {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
w := f.mux.Watch()
|
||||
if f.currentMaster != "" {
|
||||
f.mux.Action(watch.Modified, f.currentMaster)
|
||||
}
|
||||
return w
|
||||
}
|
@ -1,121 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package election
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
|
||||
"k8s.io/kubernetes/pkg/watch"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// MasterElector is an interface for services that can elect masters.
|
||||
// Important Note: MasterElectors are not inter-operable, all participants in the election need to be
|
||||
// using the same underlying implementation of this interface for correct behavior.
|
||||
type MasterElector interface {
|
||||
// Elect makes the caller represented by 'id' enter into a master election for the
|
||||
// distributed lock defined by 'path'
|
||||
// The returned watch.Interface provides a stream of Master objects which
|
||||
// contain the current master.
|
||||
// Calling Stop on the returned interface relinquishes ownership (if currently possesed)
|
||||
// and removes the caller from the election
|
||||
Elect(path, id string) watch.Interface
|
||||
}
|
||||
|
||||
// Service represents anything that can start and stop on demand.
|
||||
type Service interface {
|
||||
Validate(desired, current Master)
|
||||
Start()
|
||||
Stop()
|
||||
}
|
||||
|
||||
type notifier struct {
|
||||
masters chan Master // elected masters arrive here, should be buffered to better deal with rapidly flapping masters
|
||||
|
||||
// for comparison, to see if we are master.
|
||||
id Master
|
||||
|
||||
service Service
|
||||
}
|
||||
|
||||
// Notify runs Elect() on m, and calls Start()/Stop() on s when the
|
||||
// elected master starts/stops matching 'id'. Never returns.
|
||||
func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) {
|
||||
n := ¬ifier{id: Master(id), service: s, masters: make(chan Master, 1)}
|
||||
finished := runtime.After(func() {
|
||||
runtime.Until(func() {
|
||||
for {
|
||||
w := m.Elect(path, id)
|
||||
for {
|
||||
select {
|
||||
case <-abort:
|
||||
return
|
||||
case event, open := <-w.ResultChan():
|
||||
if !open {
|
||||
break
|
||||
}
|
||||
if event.Type != watch.Modified {
|
||||
continue
|
||||
}
|
||||
electedMaster, ok := event.Object.(Master)
|
||||
if !ok {
|
||||
glog.Errorf("Unexpected object from election channel: %v", event.Object)
|
||||
break
|
||||
}
|
||||
|
||||
sendElected:
|
||||
for {
|
||||
select {
|
||||
case <-abort:
|
||||
return
|
||||
case n.masters <- electedMaster:
|
||||
break sendElected
|
||||
default: // ring full, discard old value and add the new
|
||||
select {
|
||||
case <-abort:
|
||||
return
|
||||
case <-n.masters:
|
||||
default: // ring was cleared for us?!
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}, 0, abort)
|
||||
})
|
||||
runtime.Until(func() { n.serviceLoop(finished) }, 0, abort)
|
||||
}
|
||||
|
||||
// serviceLoop waits for changes, and calls Start()/Stop() as needed.
|
||||
func (n *notifier) serviceLoop(abort <-chan struct{}) {
|
||||
var current Master
|
||||
for {
|
||||
select {
|
||||
case <-abort:
|
||||
return
|
||||
case desired := <-n.masters:
|
||||
if current != n.id && desired == n.id {
|
||||
n.service.Validate(desired, current)
|
||||
n.service.Start()
|
||||
} else if current == n.id && desired != n.id {
|
||||
n.service.Stop()
|
||||
}
|
||||
current = desired
|
||||
}
|
||||
}
|
||||
}
|
@ -1,106 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package election
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
|
||||
)
|
||||
|
||||
type slowService struct {
|
||||
t *testing.T
|
||||
on bool
|
||||
// We explicitly have no lock to prove that
|
||||
// Start and Stop are not called concurrently.
|
||||
changes chan<- bool
|
||||
done <-chan struct{}
|
||||
}
|
||||
|
||||
func (s *slowService) Validate(d, c Master) {
|
||||
// noop
|
||||
}
|
||||
|
||||
func (s *slowService) Start() {
|
||||
select {
|
||||
case <-s.done:
|
||||
return // avoid writing to closed changes chan
|
||||
default:
|
||||
}
|
||||
if s.on {
|
||||
s.t.Errorf("started already on service")
|
||||
}
|
||||
time.Sleep(2 * time.Millisecond)
|
||||
s.on = true
|
||||
s.changes <- true
|
||||
}
|
||||
|
||||
func (s *slowService) Stop() {
|
||||
select {
|
||||
case <-s.done:
|
||||
return // avoid writing to closed changes chan
|
||||
default:
|
||||
}
|
||||
if !s.on {
|
||||
s.t.Errorf("stopped already off service")
|
||||
}
|
||||
time.Sleep(2 * time.Millisecond)
|
||||
s.on = false
|
||||
s.changes <- false
|
||||
}
|
||||
|
||||
func Test(t *testing.T) {
|
||||
m := NewFake()
|
||||
changes := make(chan bool, 1500)
|
||||
done := make(chan struct{})
|
||||
s := &slowService{t: t, changes: changes, done: done}
|
||||
|
||||
// change master to "notme" such that the initial m.Elect call inside Notify
|
||||
// will trigger an obversable event. We will wait for it to make sure the
|
||||
// Notify loop will see those master changes triggered by the go routine below.
|
||||
m.ChangeMaster(Master("me"))
|
||||
temporaryWatch := m.mux.Watch()
|
||||
ch := temporaryWatch.ResultChan()
|
||||
|
||||
notifyDone := runtime.After(func() { Notify(m, "", "me", s, done) })
|
||||
|
||||
// wait for the event triggered by the initial m.Elect of Notify. Then drain
|
||||
// the channel to not block anything.
|
||||
<-ch
|
||||
temporaryWatch.Stop()
|
||||
for i := 0; i < len(ch); i += 1 { // go 1.3 and 1.4 compatible loop
|
||||
<-ch
|
||||
}
|
||||
|
||||
go func() {
|
||||
defer close(done)
|
||||
for i := 0; i < 500; i++ {
|
||||
for _, key := range []string{"me", "notme", "alsonotme"} {
|
||||
m.ChangeMaster(Master(key))
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
<-notifyDone
|
||||
close(changes)
|
||||
|
||||
changesNum := len(changes)
|
||||
if changesNum > 1000 || changesNum == 0 {
|
||||
t.Errorf("unexpected number of changes: %v", changesNum)
|
||||
}
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/node"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
unversionedcore "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/unversioned"
|
||||
)
|
||||
|
||||
type kubeAPI interface {
|
||||
killPod(ns, name string) error
|
||||
}
|
||||
|
||||
type nodeAPI interface {
|
||||
createOrUpdate(hostname string, slaveAttrLabels, annotations map[string]string) (*api.Node, error)
|
||||
}
|
||||
|
||||
// clientAPIWrapper implements kubeAPI and node API, which serve to isolate external dependencies
|
||||
// such that they're easier to mock in unit test.
|
||||
type clientAPIWrapper struct {
|
||||
client unversionedcore.CoreInterface
|
||||
}
|
||||
|
||||
func (cw *clientAPIWrapper) killPod(ns, name string) error {
|
||||
return cw.client.Pods(ns).Delete(name, api.NewDeleteOptions(0))
|
||||
}
|
||||
|
||||
func (cw *clientAPIWrapper) createOrUpdate(hostname string, slaveAttrLabels, annotations map[string]string) (*api.Node, error) {
|
||||
return node.CreateOrUpdate(cw.client, hostname, slaveAttrLabels, annotations)
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// default values to use when constructing mesos ExecutorInfo messages
|
||||
const (
|
||||
DefaultInfoID = "k8sm-executor"
|
||||
DefaultInfoSource = "kubernetes"
|
||||
DefaultSuicideTimeout = 20 * time.Minute
|
||||
DefaultLaunchGracePeriod = 5 * time.Minute
|
||||
)
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package config contains executor configuration constants.
|
||||
package config // import "k8s.io/kubernetes/contrib/mesos/pkg/executor/config"
|
@ -1,21 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
Package executor includes a mesos executor, which contains
|
||||
a kubelet as its member to manage containers.
|
||||
*/
|
||||
package executor // import "k8s.io/kubernetes/contrib/mesos/pkg/executor"
|
@ -1,755 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||
|
||||
dockertypes "github.com/docker/engine-api/types"
|
||||
"github.com/gogo/protobuf/proto"
|
||||
log "github.com/golang/glog"
|
||||
bindings "github.com/mesos/mesos-go/executor"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
mutil "github.com/mesos/mesos-go/mesosutil"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/executor/messages"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/node"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/podutil"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/executorinfo"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/meta"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
apierrors "k8s.io/kubernetes/pkg/api/errors"
|
||||
"k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
kruntime "k8s.io/kubernetes/pkg/runtime"
|
||||
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
|
||||
)
|
||||
|
||||
type stateType int32
|
||||
|
||||
const (
|
||||
disconnectedState stateType = iota
|
||||
connectedState
|
||||
suicidalState
|
||||
terminalState
|
||||
)
|
||||
|
||||
func (s *stateType) get() stateType {
|
||||
return stateType(atomic.LoadInt32((*int32)(s)))
|
||||
}
|
||||
|
||||
func (s *stateType) transition(from, to stateType) bool {
|
||||
return atomic.CompareAndSwapInt32((*int32)(s), int32(from), int32(to))
|
||||
}
|
||||
|
||||
func (s *stateType) transitionTo(to stateType, unless ...stateType) bool {
|
||||
if len(unless) == 0 {
|
||||
atomic.StoreInt32((*int32)(s), int32(to))
|
||||
return true
|
||||
}
|
||||
for {
|
||||
state := s.get()
|
||||
for _, x := range unless {
|
||||
if state == x {
|
||||
return false
|
||||
}
|
||||
}
|
||||
if s.transition(state, to) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// KubernetesExecutor is an mesos executor that runs pods
|
||||
// in a minion machine.
|
||||
type Executor struct {
|
||||
state stateType
|
||||
lock sync.Mutex
|
||||
terminate chan struct{} // signals that the executor is shutting down
|
||||
outgoing chan func() (mesos.Status, error) // outgoing queue to the mesos driver
|
||||
dockerClient dockertools.DockerInterface
|
||||
suicideWatch suicideWatcher
|
||||
suicideTimeout time.Duration
|
||||
shutdownAlert func() // invoked just prior to executor shutdown
|
||||
kubeletFinished <-chan struct{} // signals that kubelet Run() died
|
||||
exitFunc func(int)
|
||||
staticPodsConfigPath string
|
||||
staticPodsFilters podutil.Filters
|
||||
launchGracePeriod time.Duration
|
||||
nodeInfos chan<- NodeInfo
|
||||
initCompleted chan struct{} // closes upon completion of Init()
|
||||
registry Registry
|
||||
watcher *watcher
|
||||
kubeAPI kubeAPI
|
||||
nodeAPI nodeAPI
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
APIClient *clientset.Clientset
|
||||
Docker dockertools.DockerInterface
|
||||
ShutdownAlert func()
|
||||
SuicideTimeout time.Duration
|
||||
KubeletFinished <-chan struct{} // signals that kubelet Run() died
|
||||
ExitFunc func(int)
|
||||
LaunchGracePeriod time.Duration
|
||||
NodeInfos chan<- NodeInfo
|
||||
Registry Registry
|
||||
Options []Option // functional options
|
||||
}
|
||||
|
||||
// Option is a functional option type for Executor
|
||||
type Option func(*Executor)
|
||||
|
||||
func (k *Executor) isConnected() bool {
|
||||
return connectedState == (&k.state).get()
|
||||
}
|
||||
|
||||
// New creates a new kubernetes executor.
|
||||
func New(config Config) *Executor {
|
||||
launchGracePeriod := config.LaunchGracePeriod
|
||||
if launchGracePeriod == 0 {
|
||||
// this is the equivalent of saying "the timer never expires" and simplies nil
|
||||
// timer checks elsewhere in the code. it's a little hacky but less code to
|
||||
// maintain that alternative approaches.
|
||||
launchGracePeriod = time.Duration(math.MaxInt64)
|
||||
}
|
||||
k := &Executor{
|
||||
state: disconnectedState,
|
||||
terminate: make(chan struct{}),
|
||||
outgoing: make(chan func() (mesos.Status, error), 1024),
|
||||
dockerClient: config.Docker,
|
||||
suicideTimeout: config.SuicideTimeout,
|
||||
kubeletFinished: config.KubeletFinished,
|
||||
suicideWatch: &suicideTimer{},
|
||||
shutdownAlert: config.ShutdownAlert,
|
||||
exitFunc: config.ExitFunc,
|
||||
launchGracePeriod: launchGracePeriod,
|
||||
nodeInfos: config.NodeInfos,
|
||||
initCompleted: make(chan struct{}),
|
||||
registry: config.Registry,
|
||||
}
|
||||
if config.APIClient != nil {
|
||||
k.kubeAPI = &clientAPIWrapper{config.APIClient.Core()}
|
||||
k.nodeAPI = &clientAPIWrapper{config.APIClient.Core()}
|
||||
}
|
||||
|
||||
// apply functional options
|
||||
for _, opt := range config.Options {
|
||||
opt(k)
|
||||
}
|
||||
|
||||
runtime.On(k.initCompleted, k.runSendLoop)
|
||||
|
||||
k.watcher = newWatcher(k.registry.watch())
|
||||
runtime.On(k.initCompleted, k.watcher.run)
|
||||
|
||||
return k
|
||||
}
|
||||
|
||||
// StaticPods creates a static pods Option for an Executor
|
||||
func StaticPods(configPath string, f podutil.Filters) Option {
|
||||
return func(k *Executor) {
|
||||
k.staticPodsFilters = f
|
||||
k.staticPodsConfigPath = configPath
|
||||
}
|
||||
}
|
||||
|
||||
// Done returns a chan that closes when the executor is shutting down
|
||||
func (k *Executor) Done() <-chan struct{} {
|
||||
return k.terminate
|
||||
}
|
||||
|
||||
func (k *Executor) Init(driver bindings.ExecutorDriver) {
|
||||
defer close(k.initCompleted)
|
||||
|
||||
k.killKubeletContainers()
|
||||
k.resetSuicideWatch(driver)
|
||||
|
||||
k.watcher.addFilter(func(podEvent *PodEvent) bool {
|
||||
switch podEvent.eventType {
|
||||
case PodEventIncompatibleUpdate:
|
||||
log.Warningf("killing %s because of an incompatible update", podEvent.FormatShort())
|
||||
k.killPodTask(driver, podEvent.taskID)
|
||||
// halt processing of this event; when the pod is deleted we'll receive another
|
||||
// event for that.
|
||||
return false
|
||||
|
||||
case PodEventDeleted:
|
||||
// an active pod-task was deleted, alert mesos:
|
||||
// send back a TASK_KILLED status, we completed the pod-task lifecycle normally.
|
||||
k.resetSuicideWatch(driver)
|
||||
k.sendStatus(driver, newStatus(mutil.NewTaskID(podEvent.taskID), mesos.TaskState_TASK_KILLED, "pod-deleted"))
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
//TODO(jdef) monitor kubeletFinished and shutdown if it happens
|
||||
}
|
||||
|
||||
func (k *Executor) isDone() bool {
|
||||
select {
|
||||
case <-k.terminate:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Registered is called when the executor is successfully registered with the slave.
|
||||
func (k *Executor) Registered(
|
||||
driver bindings.ExecutorDriver,
|
||||
executorInfo *mesos.ExecutorInfo,
|
||||
frameworkInfo *mesos.FrameworkInfo,
|
||||
slaveInfo *mesos.SlaveInfo,
|
||||
) {
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof(
|
||||
"Executor %v of framework %v registered with slave %v\n",
|
||||
executorInfo, frameworkInfo, slaveInfo,
|
||||
)
|
||||
|
||||
if !(&k.state).transition(disconnectedState, connectedState) {
|
||||
log.Errorf("failed to register/transition to a connected state")
|
||||
}
|
||||
|
||||
k.initializeStaticPodsSource(executorInfo)
|
||||
|
||||
annotations, err := annotationsFor(executorInfo)
|
||||
if err != nil {
|
||||
log.Errorf(
|
||||
"cannot get node annotations from executor info %v error %v",
|
||||
executorInfo, err,
|
||||
)
|
||||
}
|
||||
|
||||
if slaveInfo != nil {
|
||||
_, err := k.nodeAPI.createOrUpdate(
|
||||
slaveInfo.GetHostname(),
|
||||
node.SlaveAttributesToLabels(slaveInfo.Attributes),
|
||||
annotations,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("cannot update node labels: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
|
||||
if slaveInfo != nil && k.nodeInfos != nil {
|
||||
k.nodeInfos <- nodeInfo(slaveInfo, executorInfo) // leave it behind the upper lock to avoid panics
|
||||
}
|
||||
}
|
||||
|
||||
// Reregistered is called when the executor is successfully re-registered with the slave.
|
||||
// This can happen when the slave fails over.
|
||||
func (k *Executor) Reregistered(driver bindings.ExecutorDriver, slaveInfo *mesos.SlaveInfo) {
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
log.Infof("Reregistered with slave %v\n", slaveInfo)
|
||||
if !(&k.state).transition(disconnectedState, connectedState) {
|
||||
log.Errorf("failed to reregister/transition to a connected state")
|
||||
}
|
||||
|
||||
if slaveInfo != nil {
|
||||
_, err := k.nodeAPI.createOrUpdate(
|
||||
slaveInfo.GetHostname(),
|
||||
node.SlaveAttributesToLabels(slaveInfo.Attributes),
|
||||
nil, // don't change annotations
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("cannot update node labels: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if slaveInfo != nil && k.nodeInfos != nil {
|
||||
// make sure nodeInfos is not nil and send new NodeInfo
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
k.nodeInfos <- nodeInfo(slaveInfo, nil)
|
||||
}
|
||||
}
|
||||
|
||||
// initializeStaticPodsSource unzips the data slice into the static-pods directory
|
||||
func (k *Executor) initializeStaticPodsSource(executorInfo *mesos.ExecutorInfo) {
|
||||
if data := executorInfo.GetData(); len(data) > 0 && k.staticPodsConfigPath != "" {
|
||||
log.V(2).Infof("extracting static pods config to %s", k.staticPodsConfigPath)
|
||||
err := podutil.WriteToDir(
|
||||
k.staticPodsFilters.Do(podutil.Gunzip(executorInfo.Data)),
|
||||
k.staticPodsConfigPath,
|
||||
)
|
||||
if err != nil {
|
||||
log.Errorf("failed to initialize static pod configuration: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Disconnected is called when the executor is disconnected from the slave.
|
||||
func (k *Executor) Disconnected(driver bindings.ExecutorDriver) {
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
log.Infof("Slave is disconnected\n")
|
||||
if !(&k.state).transition(connectedState, disconnectedState) {
|
||||
log.Errorf("failed to disconnect/transition to a disconnected state")
|
||||
}
|
||||
}
|
||||
|
||||
// LaunchTask is called when the executor receives a request to launch a task.
|
||||
// The happens when the k8sm scheduler has decided to schedule the pod
|
||||
// (which corresponds to a Mesos Task) onto the node where this executor
|
||||
// is running, but the binding is not recorded in the Kubernetes store yet.
|
||||
// This function is invoked to tell the executor to record the binding in the
|
||||
// Kubernetes store and start the pod via the Kubelet.
|
||||
func (k *Executor) LaunchTask(driver bindings.ExecutorDriver, taskInfo *mesos.TaskInfo) {
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("Launch task %v\n", taskInfo)
|
||||
|
||||
taskID := taskInfo.GetTaskId().GetValue()
|
||||
if p := k.registry.pod(taskID); p != nil {
|
||||
log.Warningf("task %v already launched", taskID)
|
||||
// Not to send back TASK_RUNNING or TASK_FAILED here, because
|
||||
// may be duplicated messages
|
||||
return
|
||||
}
|
||||
|
||||
if !k.isConnected() {
|
||||
log.Errorf("Ignore launch task because the executor is disconnected\n")
|
||||
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
|
||||
messages.ExecutorUnregistered))
|
||||
return
|
||||
}
|
||||
|
||||
obj, err := kruntime.Decode(api.Codecs.UniversalDecoder(), taskInfo.GetData())
|
||||
if err != nil {
|
||||
log.Errorf("failed to extract yaml data from the taskInfo.data %v", err)
|
||||
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
|
||||
messages.UnmarshalTaskDataFailure))
|
||||
return
|
||||
}
|
||||
pod, ok := obj.(*api.Pod)
|
||||
if !ok {
|
||||
log.Errorf("expected *api.Pod instead of %T: %+v", pod, pod)
|
||||
k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED,
|
||||
messages.UnmarshalTaskDataFailure))
|
||||
return
|
||||
}
|
||||
|
||||
k.resetSuicideWatch(driver)
|
||||
|
||||
// run the next step aync because it calls out to apiserver and we don't want to block here
|
||||
go k.bindAndWatchTask(driver, taskInfo, time.NewTimer(k.launchGracePeriod), pod)
|
||||
}
|
||||
|
||||
// determine whether we need to start a suicide countdown. if so, then start
|
||||
// a timer that, upon expiration, causes this executor to commit suicide.
|
||||
// this implementation runs asynchronously. callers that wish to wait for the
|
||||
// reset to complete may wait for the returned signal chan to close.
|
||||
func (k *Executor) resetSuicideWatch(driver bindings.ExecutorDriver) <-chan struct{} {
|
||||
ch := make(chan struct{})
|
||||
go func() {
|
||||
defer close(ch)
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
|
||||
if k.suicideTimeout < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
if k.suicideWatch != nil {
|
||||
if !k.registry.empty() {
|
||||
k.suicideWatch.Stop()
|
||||
return
|
||||
}
|
||||
if k.suicideWatch.Reset(k.suicideTimeout) {
|
||||
// valid timer, reset was successful
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
//TODO(jdef) reduce verbosity here once we're convinced that suicide watch is working properly
|
||||
log.Infof("resetting suicide watch timer for %v", k.suicideTimeout)
|
||||
|
||||
k.suicideWatch = k.suicideWatch.Next(k.suicideTimeout, driver, jumper(k.attemptSuicide))
|
||||
}()
|
||||
return ch
|
||||
}
|
||||
|
||||
func (k *Executor) attemptSuicide(driver bindings.ExecutorDriver, abort <-chan struct{}) {
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
|
||||
// this attempt may have been queued and since been aborted
|
||||
select {
|
||||
case <-abort:
|
||||
//TODO(jdef) reduce verbosity once suicide watch is working properly
|
||||
log.Infof("aborting suicide attempt since watch was cancelled")
|
||||
return
|
||||
default: // continue
|
||||
}
|
||||
|
||||
// fail-safe, will abort kamikaze attempts if there are tasks
|
||||
if !k.registry.empty() {
|
||||
log.Errorf("suicide attempt failed, there are still running tasks")
|
||||
return
|
||||
}
|
||||
|
||||
log.Infoln("Attempting suicide")
|
||||
if (&k.state).transitionTo(suicidalState, suicidalState, terminalState) {
|
||||
//TODO(jdef) let the scheduler know?
|
||||
//TODO(jdef) is suicide more graceful than slave-demanded shutdown?
|
||||
k.doShutdown(driver)
|
||||
}
|
||||
}
|
||||
|
||||
func podStatusData(pod *api.Pod, status api.PodStatus) ([]byte, string, error) {
|
||||
podFullName := container.GetPodFullName(pod)
|
||||
data, err := json.Marshal(api.PodStatusResult{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: podFullName,
|
||||
SelfLink: "/podstatusresult",
|
||||
},
|
||||
Status: status,
|
||||
})
|
||||
return data, podFullName, err
|
||||
}
|
||||
|
||||
// async continuation of LaunchTask
|
||||
func (k *Executor) bindAndWatchTask(driver bindings.ExecutorDriver, task *mesos.TaskInfo, launchTimer *time.Timer, pod *api.Pod) {
|
||||
success := false
|
||||
defer func() {
|
||||
if !success {
|
||||
k.killPodTask(driver, task.TaskId.GetValue())
|
||||
k.resetSuicideWatch(driver)
|
||||
}
|
||||
}()
|
||||
|
||||
// allow a recently failed-over scheduler the chance to recover the task/pod binding:
|
||||
// it may have failed and recovered before the apiserver is able to report the updated
|
||||
// binding information. replays of this status event will signal to the scheduler that
|
||||
// the apiserver should be up-to-date.
|
||||
startingData, _, err := podStatusData(pod, api.PodStatus{})
|
||||
if err != nil {
|
||||
log.Errorf("failed to generate pod-task starting data for task %v pod %v/%v: %v",
|
||||
task.TaskId.GetValue(), pod.Namespace, pod.Name, err)
|
||||
k.sendStatus(driver, newStatus(task.TaskId, mesos.TaskState_TASK_FAILED, err.Error()))
|
||||
return
|
||||
}
|
||||
|
||||
err = k.registry.bind(task.TaskId.GetValue(), pod)
|
||||
if err != nil {
|
||||
log.Errorf("failed to bind task %v pod %v/%v: %v",
|
||||
task.TaskId.GetValue(), pod.Namespace, pod.Name, err)
|
||||
k.sendStatus(driver, newStatus(task.TaskId, mesos.TaskState_TASK_FAILED, err.Error()))
|
||||
return
|
||||
}
|
||||
|
||||
// send TASK_STARTING
|
||||
k.sendStatus(driver, &mesos.TaskStatus{
|
||||
TaskId: task.TaskId,
|
||||
State: mesos.TaskState_TASK_STARTING.Enum(),
|
||||
Message: proto.String(messages.CreateBindingSuccess),
|
||||
Data: startingData,
|
||||
})
|
||||
|
||||
// within the launch timeout window we should see a pod-task update via the registry.
|
||||
// if we see a Running update then we need to generate a TASK_RUNNING status update for mesos.
|
||||
handlerFinished := false
|
||||
handler := &watchHandler{
|
||||
expiration: watchExpiration{
|
||||
timeout: launchTimer.C,
|
||||
onEvent: func(taskID string) {
|
||||
if !handlerFinished {
|
||||
// launch timeout expired
|
||||
k.killPodTask(driver, task.TaskId.GetValue())
|
||||
}
|
||||
},
|
||||
},
|
||||
onEvent: func(podEvent *PodEvent) (bool, error) {
|
||||
switch podEvent.eventType {
|
||||
case PodEventUpdated:
|
||||
log.V(2).Infof("Found status: '%v' for %s", podEvent.pod.Status, podEvent.FormatShort())
|
||||
|
||||
if podEvent.pod.Status.Phase != api.PodRunning {
|
||||
// still waiting for pod to transition to a running state, so
|
||||
// we're not done monitoring yet; check back later..
|
||||
break
|
||||
}
|
||||
|
||||
data, podFullName, err := podStatusData(podEvent.pod, podEvent.pod.Status)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to marshal pod status result: %v", err)
|
||||
}
|
||||
|
||||
defer k.sendStatus(driver, &mesos.TaskStatus{
|
||||
TaskId: task.TaskId,
|
||||
State: mesos.TaskState_TASK_RUNNING.Enum(),
|
||||
Message: proto.String("pod-running:" + podFullName),
|
||||
Data: data,
|
||||
})
|
||||
fallthrough
|
||||
|
||||
case PodEventDeleted:
|
||||
// we're done monitoring because pod has been deleted
|
||||
handlerFinished = true
|
||||
launchTimer.Stop()
|
||||
}
|
||||
return handlerFinished, nil
|
||||
},
|
||||
}
|
||||
k.watcher.forTask(task.TaskId.GetValue(), handler)
|
||||
success = true
|
||||
}
|
||||
|
||||
// KillTask is called when the executor receives a request to kill a task.
|
||||
func (k *Executor) KillTask(driver bindings.ExecutorDriver, taskId *mesos.TaskID) {
|
||||
k.killPodTask(driver, taskId.GetValue())
|
||||
}
|
||||
|
||||
// deletes the pod and task associated with the task identified by taskID and sends a task
|
||||
// status update to mesos. also attempts to reset the suicide watch.
|
||||
func (k *Executor) killPodTask(driver bindings.ExecutorDriver, taskID string) {
|
||||
pod := k.registry.pod(taskID)
|
||||
if pod == nil {
|
||||
log.V(1).Infof("Failed to remove task, unknown task %v\n", taskID)
|
||||
k.sendStatus(driver, newStatus(&mesos.TaskID{Value: &taskID}, mesos.TaskState_TASK_LOST, "kill-pod-task"))
|
||||
return
|
||||
}
|
||||
|
||||
// force-delete the pod from the API server
|
||||
// TODO(jdef) possibly re-use eviction code from stock k8s once it lands?
|
||||
err := k.kubeAPI.killPod(pod.Namespace, pod.Name)
|
||||
if err != nil {
|
||||
log.V(1).Infof("failed to delete task %v pod %v/%v from apiserver: %+v", taskID, pod.Namespace, pod.Name, err)
|
||||
if apierrors.IsNotFound(err) {
|
||||
k.sendStatus(driver, newStatus(&mesos.TaskID{Value: &taskID}, mesos.TaskState_TASK_LOST, "kill-pod-task"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FrameworkMessage is called when the framework sends some message to the executor
|
||||
func (k *Executor) FrameworkMessage(driver bindings.ExecutorDriver, message string) {
|
||||
if k.isDone() {
|
||||
return
|
||||
}
|
||||
if !k.isConnected() {
|
||||
log.Warningf("Ignore framework message because the executor is disconnected\n")
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("Receives message from framework %v\n", message)
|
||||
//TODO(jdef) master reported a lost task, reconcile this! @see framework.go:handleTaskLost
|
||||
if strings.HasPrefix(message, messages.TaskLost+":") {
|
||||
taskId := message[len(messages.TaskLost)+1:]
|
||||
if taskId != "" {
|
||||
// TODO(jdef) would it make more sense to check the status of the task and
|
||||
// just replay the last non-terminal message that we sent if the task is
|
||||
// still active?
|
||||
|
||||
// clean up pod state
|
||||
k.sendStatus(driver, newStatus(&mesos.TaskID{Value: &taskId}, mesos.TaskState_TASK_LOST, messages.TaskLostAck))
|
||||
k.killPodTask(driver, taskId)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
switch message {
|
||||
case messages.Kamikaze:
|
||||
k.attemptSuicide(driver, nil)
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown is called when the executor receives a shutdown request.
|
||||
func (k *Executor) Shutdown(driver bindings.ExecutorDriver) {
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
k.doShutdown(driver)
|
||||
}
|
||||
|
||||
// assumes that caller has obtained state lock
|
||||
func (k *Executor) doShutdown(driver bindings.ExecutorDriver) {
|
||||
defer func() {
|
||||
log.Errorf("exiting with unclean shutdown: %v", recover())
|
||||
if k.exitFunc != nil {
|
||||
k.exitFunc(1)
|
||||
}
|
||||
}()
|
||||
|
||||
(&k.state).transitionTo(terminalState)
|
||||
|
||||
// signal to all listeners that this KubeletExecutor is done!
|
||||
close(k.terminate)
|
||||
close(k.nodeInfos)
|
||||
|
||||
if k.shutdownAlert != nil {
|
||||
func() {
|
||||
utilruntime.HandleCrash()
|
||||
k.shutdownAlert()
|
||||
}()
|
||||
}
|
||||
|
||||
log.Infoln("Stopping executor driver")
|
||||
_, err := driver.Stop()
|
||||
if err != nil {
|
||||
log.Warningf("failed to stop executor driver: %v", err)
|
||||
}
|
||||
|
||||
log.Infoln("Shutdown the executor")
|
||||
|
||||
// according to docs, mesos will generate TASK_LOST updates for us
|
||||
// if needed, so don't take extra time to do that here.
|
||||
k.registry.shutdown()
|
||||
|
||||
select {
|
||||
// the main Run() func may still be running... wait for it to finish: it will
|
||||
// clear the pod configuration cleanly, telling k8s "there are no pods" and
|
||||
// clean up resources (pods, volumes, etc).
|
||||
case <-k.kubeletFinished:
|
||||
|
||||
//TODO(jdef) attempt to wait for events to propagate to API server?
|
||||
|
||||
// TODO(jdef) extract constant, should be smaller than whatever the
|
||||
// slave graceful shutdown timeout period is.
|
||||
case <-time.After(15 * time.Second):
|
||||
log.Errorf("timed out waiting for kubelet Run() to die")
|
||||
}
|
||||
log.Infoln("exiting")
|
||||
if k.exitFunc != nil {
|
||||
k.exitFunc(0)
|
||||
}
|
||||
}
|
||||
|
||||
// Destroy existing k8s containers
|
||||
func (k *Executor) killKubeletContainers() {
|
||||
if containers, err := dockertools.GetKubeletDockerContainers(k.dockerClient, true); err == nil {
|
||||
opts := dockertypes.ContainerRemoveOptions{
|
||||
RemoveVolumes: true,
|
||||
Force: true,
|
||||
}
|
||||
for _, container := range containers {
|
||||
log.V(2).Infof("Removing container: %v", container.ID)
|
||||
if err := k.dockerClient.RemoveContainer(container.ID, opts); err != nil {
|
||||
log.Warning(err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Warningf("Failed to list kubelet docker containers: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Error is called when some error happens.
|
||||
func (k *Executor) Error(driver bindings.ExecutorDriver, message string) {
|
||||
log.Errorln(message)
|
||||
}
|
||||
|
||||
func newStatus(taskId *mesos.TaskID, state mesos.TaskState, message string) *mesos.TaskStatus {
|
||||
return &mesos.TaskStatus{
|
||||
TaskId: taskId,
|
||||
State: &state,
|
||||
Message: proto.String(message),
|
||||
}
|
||||
}
|
||||
|
||||
func (k *Executor) sendStatus(driver bindings.ExecutorDriver, status *mesos.TaskStatus) {
|
||||
select {
|
||||
case <-k.terminate:
|
||||
default:
|
||||
k.outgoing <- func() (mesos.Status, error) { return driver.SendStatusUpdate(status) }
|
||||
}
|
||||
}
|
||||
|
||||
func (k *Executor) sendFrameworkMessage(driver bindings.ExecutorDriver, msg string) {
|
||||
select {
|
||||
case <-k.terminate:
|
||||
default:
|
||||
k.outgoing <- func() (mesos.Status, error) { return driver.SendFrameworkMessage(msg) }
|
||||
}
|
||||
}
|
||||
|
||||
func (k *Executor) runSendLoop() {
|
||||
defer log.V(1).Info("sender loop exiting")
|
||||
for {
|
||||
select {
|
||||
case <-k.terminate:
|
||||
return
|
||||
default:
|
||||
if !k.isConnected() {
|
||||
select {
|
||||
case <-k.terminate:
|
||||
case <-time.After(1 * time.Second):
|
||||
}
|
||||
continue
|
||||
}
|
||||
sender, ok := <-k.outgoing
|
||||
if !ok {
|
||||
// programming error
|
||||
panic("someone closed the outgoing channel")
|
||||
}
|
||||
if status, err := sender(); err == nil {
|
||||
continue
|
||||
} else {
|
||||
log.Error(err)
|
||||
if status == mesos.Status_DRIVER_ABORTED {
|
||||
return
|
||||
}
|
||||
}
|
||||
// attempt to re-queue the sender
|
||||
select {
|
||||
case <-k.terminate:
|
||||
case k.outgoing <- sender:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func annotationsFor(ei *mesos.ExecutorInfo) (annotations map[string]string, err error) {
|
||||
annotations = map[string]string{}
|
||||
if ei == nil {
|
||||
return
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err = executorinfo.EncodeResources(&buf, ei.GetResources()); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
annotations[meta.ExecutorIdKey] = ei.GetExecutorId().GetValue()
|
||||
annotations[meta.ExecutorResourcesKey] = buf.String()
|
||||
|
||||
return
|
||||
}
|
@ -1,636 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
assertext "k8s.io/kubernetes/contrib/mesos/pkg/assert"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/executor/messages"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/podutil"
|
||||
kmruntime "k8s.io/kubernetes/contrib/mesos/pkg/runtime"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/podtask"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/podtask/hostport"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/testapi"
|
||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||
"k8s.io/kubernetes/pkg/client/cache"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
"k8s.io/kubernetes/pkg/runtime"
|
||||
utiltesting "k8s.io/kubernetes/pkg/util/testing"
|
||||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
"k8s.io/kubernetes/pkg/watch"
|
||||
|
||||
"github.com/mesos/mesos-go/mesosproto"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/mock"
|
||||
)
|
||||
|
||||
// TestExecutorRegister ensures that the executor thinks it is connected
|
||||
// after Register is called.
|
||||
func TestExecutorRegister(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
executor := NewTestKubernetesExecutor()
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
|
||||
assert.Equal(t, true, executor.isConnected(), "executor should be connected")
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// TestExecutorDisconnect ensures that the executor thinks that it is not
|
||||
// connected after a call to Disconnected has occurred.
|
||||
func TestExecutorDisconnect(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
executor := NewTestKubernetesExecutor()
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
executor.Disconnected(mockDriver)
|
||||
|
||||
assert.Equal(t, false, executor.isConnected(),
|
||||
"executor should not be connected after Disconnected")
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// TestExecutorReregister ensures that the executor thinks it is connected
|
||||
// after a connection problem happens, followed by a call to Reregistered.
|
||||
func TestExecutorReregister(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
executor := NewTestKubernetesExecutor()
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
executor.Disconnected(mockDriver)
|
||||
executor.Reregistered(mockDriver, nil)
|
||||
|
||||
assert.Equal(t, true, executor.isConnected(), "executor should be connected")
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
||||
|
||||
type fakeRegistry struct {
|
||||
sync.Mutex
|
||||
boundTasks map[string]*api.Pod
|
||||
updates chan *PodEvent
|
||||
}
|
||||
|
||||
func newFakeRegistry() *fakeRegistry {
|
||||
return &fakeRegistry{boundTasks: map[string]*api.Pod{}, updates: make(chan *PodEvent, 100)}
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) empty() bool {
|
||||
r.Lock()
|
||||
defer r.Unlock()
|
||||
return len(r.boundTasks) == 0
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) pod(taskID string) *api.Pod {
|
||||
r.Lock()
|
||||
defer r.Unlock()
|
||||
return r.boundTasks[taskID]
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) watch() <-chan *PodEvent { return r.updates }
|
||||
|
||||
func (r *fakeRegistry) shutdown() {
|
||||
r.Lock()
|
||||
defer r.Unlock()
|
||||
r.boundTasks = map[string]*api.Pod{}
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) bind(taskID string, pod *api.Pod) error {
|
||||
r.Lock()
|
||||
defer r.Unlock()
|
||||
pod.Annotations = map[string]string{
|
||||
"k8s.mesosphere.io/taskId": taskID,
|
||||
}
|
||||
r.boundTasks[taskID] = pod
|
||||
|
||||
// the normal registry sends a bind..
|
||||
r.updates <- &PodEvent{pod: pod, taskID: taskID, eventType: PodEventBound}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) Update(pod *api.Pod) (*PodEvent, error) {
|
||||
r.Lock()
|
||||
defer r.Unlock()
|
||||
taskID, err := taskIDFor(pod)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, ok := r.boundTasks[taskID]; !ok {
|
||||
return nil, errUnknownTask
|
||||
}
|
||||
rp := &PodEvent{pod: pod, taskID: taskID, eventType: PodEventUpdated}
|
||||
r.updates <- rp
|
||||
return rp, nil
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) Remove(taskID string) error {
|
||||
r.Lock()
|
||||
defer r.Unlock()
|
||||
pod, ok := r.boundTasks[taskID]
|
||||
if !ok {
|
||||
return errUnknownTask
|
||||
}
|
||||
delete(r.boundTasks, taskID)
|
||||
r.updates <- &PodEvent{pod: pod, taskID: taskID, eventType: PodEventDeleted}
|
||||
return nil
|
||||
}
|
||||
|
||||
// phaseChange simulates a pod source update; normally this update is generated from a watch
|
||||
func (r *fakeRegistry) phaseChange(pod *api.Pod, phase api.PodPhase) error {
|
||||
clone, err := api.Scheme.DeepCopy(pod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
phasedPod := clone.(*api.Pod)
|
||||
phasedPod.Status.Phase = phase
|
||||
_, err = r.Update(phasedPod)
|
||||
return err
|
||||
}
|
||||
|
||||
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch tasks and generates
|
||||
// appropriate status messages for mesos. It then kills the task and validates that appropriate
|
||||
// actions are taken by the executor.
|
||||
func TestExecutorLaunchAndKillTask(t *testing.T) {
|
||||
var (
|
||||
mockDriver = &MockExecutorDriver{}
|
||||
registry = newFakeRegistry()
|
||||
executor = New(Config{
|
||||
Docker: dockertools.ConnectToDockerOrDie("fake://", 0),
|
||||
NodeInfos: make(chan NodeInfo, 1),
|
||||
Registry: registry,
|
||||
})
|
||||
mockKubeAPI = &mockKubeAPI{}
|
||||
pod = NewTestPod(1)
|
||||
executorinfo = &mesosproto.ExecutorInfo{}
|
||||
)
|
||||
executor.kubeAPI = mockKubeAPI
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
|
||||
podTask, err := podtask.New(
|
||||
api.NewDefaultContext(),
|
||||
podtask.Config{
|
||||
Prototype: executorinfo,
|
||||
HostPortStrategy: hostport.StrategyWildcard,
|
||||
},
|
||||
pod,
|
||||
)
|
||||
assert.Equal(t, nil, err, "must be able to create a task from a pod")
|
||||
|
||||
pod.Annotations = map[string]string{
|
||||
"k8s.mesosphere.io/taskId": podTask.ID,
|
||||
}
|
||||
|
||||
podTask.Spec = &podtask.Spec{Executor: executorinfo}
|
||||
taskInfo, err := podTask.BuildTaskInfo()
|
||||
assert.Equal(t, nil, err, "must be able to build task info")
|
||||
|
||||
data, err := runtime.Encode(testapi.Default.Codec(), pod)
|
||||
assert.Equal(t, nil, err, "must be able to encode a pod's spec data")
|
||||
|
||||
taskInfo.Data = data
|
||||
var statusUpdateCalls sync.WaitGroup
|
||||
statusUpdateCalls.Add(1)
|
||||
statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() }
|
||||
|
||||
mockDriver.On(
|
||||
"SendStatusUpdate",
|
||||
mesosproto.TaskState_TASK_STARTING,
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
|
||||
|
||||
statusUpdateCalls.Add(1)
|
||||
mockDriver.On(
|
||||
"SendStatusUpdate",
|
||||
mesosproto.TaskState_TASK_RUNNING,
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
|
||||
|
||||
executor.LaunchTask(mockDriver, taskInfo)
|
||||
|
||||
assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
|
||||
executor.lock.Lock()
|
||||
defer executor.lock.Unlock()
|
||||
return !registry.empty()
|
||||
}, "executor must be able to create a task and a pod")
|
||||
|
||||
// simulate a pod source update; normally this update is generated when binding a pod
|
||||
err = registry.phaseChange(pod, api.PodPending)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// simulate a pod source update; normally this update is generated by the kubelet once the pod is healthy
|
||||
err = registry.phaseChange(pod, api.PodRunning)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Allow some time for asynchronous requests to the driver.
|
||||
finished := kmruntime.After(statusUpdateCalls.Wait)
|
||||
select {
|
||||
case <-finished:
|
||||
case <-time.After(wait.ForeverTestTimeout):
|
||||
t.Fatalf("timed out waiting for status update calls to finish")
|
||||
}
|
||||
|
||||
statusUpdateCalls.Add(1)
|
||||
mockDriver.On(
|
||||
"SendStatusUpdate",
|
||||
mesosproto.TaskState_TASK_KILLED,
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()
|
||||
|
||||
// simulate what happens when the apiserver is told to delete a pod
|
||||
mockKubeAPI.On("killPod", pod.Namespace, pod.Name).Return(nil).Run(func(_ mock.Arguments) {
|
||||
registry.Remove(podTask.ID)
|
||||
})
|
||||
|
||||
executor.KillTask(mockDriver, taskInfo.TaskId)
|
||||
assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
|
||||
executor.lock.Lock()
|
||||
defer executor.lock.Unlock()
|
||||
return registry.empty()
|
||||
}, "executor must be able to kill a created task and pod")
|
||||
|
||||
// Allow some time for asynchronous requests to the driver.
|
||||
finished = kmruntime.After(statusUpdateCalls.Wait)
|
||||
select {
|
||||
case <-finished:
|
||||
case <-time.After(wait.ForeverTestTimeout):
|
||||
t.Fatalf("timed out waiting for status update calls to finish")
|
||||
}
|
||||
|
||||
mockDriver.AssertExpectations(t)
|
||||
mockKubeAPI.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// TestExecutorStaticPods test that the ExecutorInfo.data is parsed
|
||||
// as a zip archive with pod definitions.
|
||||
func TestExecutorInitializeStaticPodsSource(t *testing.T) {
|
||||
// create some zip with static pod definition
|
||||
givenPodsDir, err := utiltesting.MkTmpdir("executor-givenpods")
|
||||
assert.NoError(t, err)
|
||||
defer os.RemoveAll(givenPodsDir)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
reportErrors := func(errCh <-chan error) {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for err := range errCh {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
createStaticPodFile := func(fileName, name string) {
|
||||
spod := `{
|
||||
"apiVersion": "v1",
|
||||
"kind": "Pod",
|
||||
"metadata": {
|
||||
"name": "%v",
|
||||
"namespace": "staticpods",
|
||||
"labels": { "name": "foo", "cluster": "bar" }
|
||||
},
|
||||
"spec": {
|
||||
"containers": [{
|
||||
"name": "%v",
|
||||
"image": "library/nginx",
|
||||
"ports": [{ "containerPort": 80, "name": "http" }]
|
||||
}]
|
||||
}
|
||||
}`
|
||||
destfile := filepath.Join(givenPodsDir, fileName)
|
||||
err = os.MkdirAll(filepath.Dir(destfile), 0770)
|
||||
assert.NoError(t, err)
|
||||
err = ioutil.WriteFile(destfile, []byte(fmt.Sprintf(spod, name, name)), 0660)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
createStaticPodFile("spod.json", "spod-01")
|
||||
createStaticPodFile("spod2.json", "spod-02")
|
||||
createStaticPodFile("dir/spod.json", "spod-03") // same file name as first one to check for overwriting
|
||||
staticpods, errs := podutil.ReadFromDir(givenPodsDir)
|
||||
reportErrors(errs)
|
||||
|
||||
gzipped, err := podutil.Gzip(staticpods)
|
||||
assert.NoError(t, err)
|
||||
|
||||
expectedStaticPodsNum := 2 // subdirectories are ignored by FileSource, hence only 2
|
||||
|
||||
// temporary directory which is normally located in the executor sandbox
|
||||
staticPodsConfigPath, err := utiltesting.MkTmpdir("executor-k8sm-archive")
|
||||
assert.NoError(t, err)
|
||||
defer os.RemoveAll(staticPodsConfigPath)
|
||||
|
||||
executor := &Executor{
|
||||
staticPodsConfigPath: staticPodsConfigPath,
|
||||
}
|
||||
|
||||
// extract the pods into staticPodsConfigPath
|
||||
executor.initializeStaticPodsSource(&mesosproto.ExecutorInfo{Data: gzipped})
|
||||
|
||||
actualpods, errs := podutil.ReadFromDir(staticPodsConfigPath)
|
||||
reportErrors(errs)
|
||||
|
||||
list := podutil.List(actualpods)
|
||||
assert.NotNil(t, list)
|
||||
assert.Equal(t, expectedStaticPodsNum, len(list.Items))
|
||||
|
||||
var (
|
||||
expectedNames = map[string]struct{}{
|
||||
"spod-01": {},
|
||||
"spod-02": {},
|
||||
}
|
||||
actualNames = map[string]struct{}{}
|
||||
)
|
||||
for _, pod := range list.Items {
|
||||
actualNames[pod.Name] = struct{}{}
|
||||
}
|
||||
assert.True(t, reflect.DeepEqual(expectedNames, actualNames), "expected %v instead of %v", expectedNames, actualNames)
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// TestExecutorFrameworkMessage ensures that the executor is able to
|
||||
// handle messages from the framework, specifically about lost tasks
|
||||
// and Kamikaze. When a task is lost, the executor needs to clean up
|
||||
// its state. When a Kamikaze message is received, the executor should
|
||||
// attempt suicide.
|
||||
func TestExecutorFrameworkMessage(t *testing.T) {
|
||||
// TODO(jdef): Fix the unexpected call in the mocking system.
|
||||
t.Skip("This test started failing when panic catching was disabled.")
|
||||
var (
|
||||
mockDriver = &MockExecutorDriver{}
|
||||
kubeletFinished = make(chan struct{})
|
||||
registry = newFakeRegistry()
|
||||
executor = New(Config{
|
||||
Docker: dockertools.ConnectToDockerOrDie("fake://", 0),
|
||||
NodeInfos: make(chan NodeInfo, 1),
|
||||
ShutdownAlert: func() {
|
||||
close(kubeletFinished)
|
||||
},
|
||||
KubeletFinished: kubeletFinished,
|
||||
Registry: registry,
|
||||
})
|
||||
pod = NewTestPod(1)
|
||||
mockKubeAPI = &mockKubeAPI{}
|
||||
)
|
||||
|
||||
executor.kubeAPI = mockKubeAPI
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
executor.FrameworkMessage(mockDriver, "test framework message")
|
||||
|
||||
// set up a pod to then lose
|
||||
executorinfo := &mesosproto.ExecutorInfo{}
|
||||
podTask, _ := podtask.New(
|
||||
api.NewDefaultContext(),
|
||||
podtask.Config{
|
||||
ID: "foo",
|
||||
Prototype: executorinfo,
|
||||
HostPortStrategy: hostport.StrategyWildcard,
|
||||
},
|
||||
pod,
|
||||
)
|
||||
pod.Annotations = map[string]string{
|
||||
"k8s.mesosphere.io/taskId": podTask.ID,
|
||||
}
|
||||
podTask.Spec = &podtask.Spec{
|
||||
Executor: executorinfo,
|
||||
}
|
||||
|
||||
taskInfo, err := podTask.BuildTaskInfo()
|
||||
assert.Equal(t, nil, err, "must be able to build task info")
|
||||
|
||||
data, _ := runtime.Encode(testapi.Default.Codec(), pod)
|
||||
taskInfo.Data = data
|
||||
|
||||
mockDriver.On(
|
||||
"SendStatusUpdate",
|
||||
mesosproto.TaskState_TASK_STARTING,
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Once()
|
||||
|
||||
called := make(chan struct{})
|
||||
mockDriver.On(
|
||||
"SendStatusUpdate",
|
||||
mesosproto.TaskState_TASK_RUNNING,
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
|
||||
|
||||
executor.LaunchTask(mockDriver, taskInfo)
|
||||
|
||||
// must wait for this otherwise phase changes may not apply
|
||||
assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
|
||||
executor.lock.Lock()
|
||||
defer executor.lock.Unlock()
|
||||
return !registry.empty()
|
||||
}, "executor must be able to create a task and a pod")
|
||||
|
||||
err = registry.phaseChange(pod, api.PodPending)
|
||||
assert.NoError(t, err)
|
||||
err = registry.phaseChange(pod, api.PodRunning)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// waiting until the pod is really running b/c otherwise a TASK_FAILED could be
|
||||
// triggered by the asynchronously running executor methods when removing the task
|
||||
// from k.tasks through the "task-lost:foo" message below.
|
||||
select {
|
||||
case <-called:
|
||||
case <-time.After(wait.ForeverTestTimeout):
|
||||
t.Fatalf("timed out waiting for SendStatusUpdate for the running task")
|
||||
}
|
||||
|
||||
// send task-lost message for it
|
||||
called = make(chan struct{})
|
||||
mockDriver.On(
|
||||
"SendStatusUpdate",
|
||||
mesosproto.TaskState_TASK_LOST,
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
|
||||
|
||||
// simulate what happens when the apiserver is told to delete a pod
|
||||
mockKubeAPI.On("killPod", pod.Namespace, pod.Name).Return(nil).Run(func(_ mock.Arguments) {
|
||||
registry.Remove(podTask.ID)
|
||||
})
|
||||
|
||||
executor.FrameworkMessage(mockDriver, "task-lost:foo")
|
||||
|
||||
assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
|
||||
executor.lock.Lock()
|
||||
defer executor.lock.Unlock()
|
||||
return registry.empty()
|
||||
}, "executor must be able to kill a created task and pod")
|
||||
|
||||
select {
|
||||
case <-called:
|
||||
case <-time.After(wait.ForeverTestTimeout):
|
||||
t.Fatalf("timed out waiting for SendStatusUpdate")
|
||||
}
|
||||
|
||||
mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()
|
||||
|
||||
executor.FrameworkMessage(mockDriver, messages.Kamikaze)
|
||||
assert.Equal(t, true, executor.isDone(),
|
||||
"executor should have shut down after receiving a Kamikaze message")
|
||||
|
||||
mockDriver.AssertExpectations(t)
|
||||
mockKubeAPI.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// Create a pod with a given index, requiring one port
|
||||
func NewTestPod(i int) *api.Pod {
|
||||
name := fmt.Sprintf("pod%d", i)
|
||||
return &api.Pod{
|
||||
TypeMeta: unversioned.TypeMeta{APIVersion: testapi.Default.GroupVersion().String()},
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: api.NamespaceDefault,
|
||||
SelfLink: testapi.Default.SelfLink("pods", string(i)),
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Name: "foo",
|
||||
Ports: []api.ContainerPort{
|
||||
{
|
||||
ContainerPort: int32(8000 + i),
|
||||
Protocol: api.ProtocolTCP,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Status: api.PodStatus{
|
||||
Conditions: []api.PodCondition{
|
||||
{
|
||||
Type: api.PodReady,
|
||||
Status: api.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Create mock of pods ListWatch, usually listening on the apiserver pods watch endpoint
|
||||
type MockPodsListWatch struct {
|
||||
ListWatch cache.ListWatch
|
||||
fakeWatcher *watch.FakeWatcher
|
||||
list api.PodList
|
||||
}
|
||||
|
||||
// A apiserver mock which partially mocks the pods API
|
||||
type TestServer struct {
|
||||
server *httptest.Server
|
||||
Stats map[string]uint
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
func NewTestServer(t *testing.T, namespace string) *TestServer {
|
||||
ts := TestServer{
|
||||
Stats: map[string]uint{},
|
||||
}
|
||||
mux := http.NewServeMux()
|
||||
|
||||
mux.HandleFunc(testapi.Default.ResourcePath("bindings", namespace, ""), func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
ts.server = httptest.NewServer(mux)
|
||||
return &ts
|
||||
}
|
||||
|
||||
func NewMockPodsListWatch(initialPodList api.PodList) *MockPodsListWatch {
|
||||
lw := MockPodsListWatch{
|
||||
fakeWatcher: watch.NewFake(),
|
||||
list: initialPodList,
|
||||
}
|
||||
lw.ListWatch = cache.ListWatch{
|
||||
WatchFunc: func(options api.ListOptions) (watch.Interface, error) {
|
||||
return lw.fakeWatcher, nil
|
||||
},
|
||||
ListFunc: func(options api.ListOptions) (runtime.Object, error) {
|
||||
return &lw.list, nil
|
||||
},
|
||||
}
|
||||
return &lw
|
||||
}
|
||||
|
||||
// TestExecutorShutdown ensures that the executor properly shuts down
|
||||
// when Shutdown is called.
|
||||
func TestExecutorShutdown(t *testing.T) {
|
||||
var (
|
||||
mockDriver = &MockExecutorDriver{}
|
||||
kubeletFinished = make(chan struct{})
|
||||
exitCalled = int32(0)
|
||||
executor = New(Config{
|
||||
Docker: dockertools.ConnectToDockerOrDie("fake://", 0),
|
||||
NodeInfos: make(chan NodeInfo, 1),
|
||||
ShutdownAlert: func() {
|
||||
close(kubeletFinished)
|
||||
},
|
||||
KubeletFinished: kubeletFinished,
|
||||
ExitFunc: func(_ int) {
|
||||
atomic.AddInt32(&exitCalled, 1)
|
||||
},
|
||||
Registry: newFakeRegistry(),
|
||||
})
|
||||
)
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()
|
||||
executor.Shutdown(mockDriver)
|
||||
|
||||
assert.Equal(t, false, executor.isConnected(),
|
||||
"executor should not be connected after Shutdown")
|
||||
assert.Equal(t, true, executor.isDone(),
|
||||
"executor should be in Done state after Shutdown")
|
||||
assert.Equal(t, true, atomic.LoadInt32(&exitCalled) > 0,
|
||||
"the executor should call its ExitFunc when it is ready to close down")
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestExecutorsendFrameworkMessage(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
executor := NewTestKubernetesExecutor()
|
||||
|
||||
executor.Init(mockDriver)
|
||||
executor.Registered(mockDriver, nil, nil, nil)
|
||||
|
||||
called := make(chan struct{})
|
||||
mockDriver.On(
|
||||
"SendFrameworkMessage",
|
||||
"foo bar baz",
|
||||
).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()
|
||||
executor.sendFrameworkMessage(mockDriver, "foo bar baz")
|
||||
|
||||
// guard against data race in mock driver between AssertExpectations and Called
|
||||
select {
|
||||
case <-called: // expected
|
||||
case <-time.After(wait.ForeverTestTimeout):
|
||||
t.Fatalf("expected call to SendFrameworkMessage")
|
||||
}
|
||||
mockDriver.AssertExpectations(t)
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package messages exposes executor event/message names as constants.
|
||||
package messages // import "k8s.io/kubernetes/contrib/mesos/pkg/executor/messages"
|
@ -1,36 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package messages
|
||||
|
||||
// messages that ship with TaskStatus objects
|
||||
|
||||
const (
|
||||
ContainersDisappeared = "containers-disappeared"
|
||||
CreateBindingFailure = "create-binding-failure"
|
||||
CreateBindingSuccess = "create-binding-success"
|
||||
ExecutorUnregistered = "executor-unregistered"
|
||||
ExecutorShutdown = "executor-shutdown"
|
||||
LaunchTaskFailed = "launch-task-failed"
|
||||
KubeletPodLaunchFailed = "kubelet-pod-launch-failed"
|
||||
TaskKilled = "task-killed"
|
||||
TaskLost = "task-lost"
|
||||
UnmarshalTaskDataFailure = "unmarshal-task-data-failure"
|
||||
TaskLostAck = "task-lost-ack" // executor acknowledgment of forwarded TASK_LOST framework message
|
||||
Kamikaze = "kamikaze"
|
||||
WrongSlaveFailure = "pod-for-wrong-slave-failure"
|
||||
AnnotationUpdateFailure = "annotation-update-failure"
|
||||
)
|
@ -1,90 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/mesos/mesos-go/mesosproto"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/mock"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
)
|
||||
|
||||
type mockKubeAPI struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
func (m *mockKubeAPI) killPod(ns, name string) error {
|
||||
args := m.Called(ns, name)
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
type MockExecutorDriver struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) Start() (mesosproto.Status, error) {
|
||||
args := m.Called()
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) Stop() (mesosproto.Status, error) {
|
||||
args := m.Called()
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) Abort() (mesosproto.Status, error) {
|
||||
args := m.Called()
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) Join() (mesosproto.Status, error) {
|
||||
args := m.Called()
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) Run() (mesosproto.Status, error) {
|
||||
args := m.Called()
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) SendStatusUpdate(taskStatus *mesosproto.TaskStatus) (mesosproto.Status, error) {
|
||||
args := m.Called(*taskStatus.State)
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockExecutorDriver) SendFrameworkMessage(msg string) (mesosproto.Status, error) {
|
||||
args := m.Called(msg)
|
||||
return args.Get(0).(mesosproto.Status), args.Error(1)
|
||||
}
|
||||
|
||||
func NewTestKubernetesExecutor() *Executor {
|
||||
return New(Config{
|
||||
Docker: dockertools.ConnectToDockerOrDie("fake://", 0),
|
||||
Registry: newFakeRegistry(),
|
||||
})
|
||||
}
|
||||
|
||||
func TestExecutorNew(t *testing.T) {
|
||||
mockDriver := &MockExecutorDriver{}
|
||||
executor := NewTestKubernetesExecutor()
|
||||
executor.Init(mockDriver)
|
||||
|
||||
assert.Equal(t, executor.isDone(), false, "executor should not be in Done state on initialization")
|
||||
assert.Equal(t, executor.isConnected(), false, "executor should not be connected on initialization")
|
||||
}
|
@ -1,67 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
|
||||
type NodeInfo struct {
|
||||
Cores int
|
||||
Mem uint64 // in bytes
|
||||
}
|
||||
|
||||
func nodeInfo(si *mesos.SlaveInfo, ei *mesos.ExecutorInfo) NodeInfo {
|
||||
var executorCPU, executorMem float64
|
||||
|
||||
// get executor resources
|
||||
if ei != nil {
|
||||
for _, r := range ei.GetResources() {
|
||||
if r == nil || r.GetType() != mesos.Value_SCALAR {
|
||||
continue
|
||||
}
|
||||
switch r.GetName() {
|
||||
case "cpus":
|
||||
executorCPU += r.GetScalar().GetValue()
|
||||
case "mem":
|
||||
executorMem += r.GetScalar().GetValue()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// get resource capacity of the node
|
||||
ni := NodeInfo{}
|
||||
for _, r := range si.GetResources() {
|
||||
if r == nil || r.GetType() != mesos.Value_SCALAR {
|
||||
continue
|
||||
}
|
||||
|
||||
switch r.GetName() {
|
||||
case "cpus":
|
||||
// We intentionally take the floor of executorCPU because cores are integers
|
||||
// and we would loose a complete cpu here if the value is <1.
|
||||
// TODO(sttts): switch to float64 when "Machine Allocables" are implemented
|
||||
ni.Cores += int(r.GetScalar().GetValue())
|
||||
case "mem":
|
||||
ni.Mem += uint64(r.GetScalar().GetValue()) * 1024 * 1024
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(sttts): subtract executorCPU/Mem from static pod resources before subtracting them from the capacity
|
||||
ni.Cores -= int(executorCPU)
|
||||
ni.Mem -= uint64(executorMem) * 1024 * 1024
|
||||
|
||||
return ni
|
||||
}
|
@ -1,340 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"sync"
|
||||
|
||||
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/executor/messages"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/meta"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
type (
|
||||
podEventType int
|
||||
|
||||
PodEvent struct {
|
||||
pod *api.Pod
|
||||
taskID string
|
||||
eventType podEventType
|
||||
}
|
||||
|
||||
// Registry is a state store for pod task metadata. Clients are expected to watch() the
|
||||
// event stream to observe changes over time.
|
||||
Registry interface {
|
||||
// Update modifies the registry's iternal representation of the pod; it may also
|
||||
// modify the pod argument itself. An update may fail because either a pod isn't
|
||||
// labeled with a task ID, the task ID is unknown, or the nature of the update may
|
||||
// be incompatible with what's supported in kubernetes-mesos.
|
||||
Update(pod *api.Pod) (*PodEvent, error)
|
||||
|
||||
// Remove the task from this registry, returns an error if the taskID is unknown.
|
||||
Remove(taskID string) error
|
||||
|
||||
// bind associates a taskID with a pod, triggers the binding API on the k8s apiserver
|
||||
// and stores the resulting pod-task metadata.
|
||||
bind(taskID string, pod *api.Pod) error
|
||||
|
||||
// watch returns the event stream of the registry. clients are expected to read this
|
||||
// stream otherwise the event buffer will fill up and registry ops will block.
|
||||
watch() <-chan *PodEvent
|
||||
|
||||
// return true if there are no tasks registered
|
||||
empty() bool
|
||||
|
||||
// return the api.Pod registered to the given taskID or else nil
|
||||
pod(taskID string) *api.Pod
|
||||
|
||||
// shutdown any related async processing and clear the internal state of the registry
|
||||
shutdown()
|
||||
}
|
||||
|
||||
registryImpl struct {
|
||||
client *clientset.Clientset
|
||||
updates chan *PodEvent
|
||||
m sync.RWMutex
|
||||
boundTasks map[string]*api.Pod
|
||||
}
|
||||
)
|
||||
|
||||
var (
|
||||
errCreateBindingFailed = errors.New(messages.CreateBindingFailure)
|
||||
errAnnotationUpdateFailure = errors.New(messages.AnnotationUpdateFailure)
|
||||
errUnknownTask = errors.New("unknown task ID")
|
||||
errUnsupportedUpdate = errors.New("pod update allowed by k8s is incompatible with this version of k8s-mesos")
|
||||
)
|
||||
|
||||
const (
|
||||
PodEventBound podEventType = iota
|
||||
PodEventUpdated
|
||||
PodEventDeleted
|
||||
PodEventIncompatibleUpdate
|
||||
|
||||
updatesBacklogSize = 200
|
||||
)
|
||||
|
||||
func IsUnsupportedUpdate(err error) bool {
|
||||
return err == errUnsupportedUpdate
|
||||
}
|
||||
|
||||
func (rp *PodEvent) Task() string {
|
||||
return rp.taskID
|
||||
}
|
||||
|
||||
func (rp *PodEvent) Pod() *api.Pod {
|
||||
return rp.pod
|
||||
}
|
||||
|
||||
func (rp *PodEvent) FormatShort() string {
|
||||
return "task '" + rp.taskID + "' pod '" + rp.pod.Namespace + "/" + rp.pod.Name + "'"
|
||||
}
|
||||
|
||||
func NewRegistry(client *clientset.Clientset) Registry {
|
||||
r := ®istryImpl{
|
||||
client: client,
|
||||
updates: make(chan *PodEvent, updatesBacklogSize),
|
||||
boundTasks: make(map[string]*api.Pod),
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *registryImpl) watch() <-chan *PodEvent {
|
||||
return r.updates
|
||||
}
|
||||
|
||||
func taskIDFor(pod *api.Pod) (taskID string, err error) {
|
||||
taskID = pod.Annotations[meta.TaskIdKey]
|
||||
if taskID == "" {
|
||||
err = errUnknownTask
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (r *registryImpl) shutdown() {
|
||||
//TODO(jdef) flesh this out
|
||||
r.m.Lock()
|
||||
defer r.m.Unlock()
|
||||
r.boundTasks = map[string]*api.Pod{}
|
||||
}
|
||||
|
||||
func (r *registryImpl) empty() bool {
|
||||
r.m.RLock()
|
||||
defer r.m.RUnlock()
|
||||
return len(r.boundTasks) == 0
|
||||
}
|
||||
|
||||
func (r *registryImpl) pod(taskID string) *api.Pod {
|
||||
r.m.RLock()
|
||||
defer r.m.RUnlock()
|
||||
return r.boundTasks[taskID]
|
||||
}
|
||||
|
||||
func (r *registryImpl) Remove(taskID string) error {
|
||||
r.m.Lock()
|
||||
defer r.m.Unlock()
|
||||
pod, ok := r.boundTasks[taskID]
|
||||
if !ok {
|
||||
return errUnknownTask
|
||||
}
|
||||
|
||||
delete(r.boundTasks, taskID)
|
||||
|
||||
r.updates <- &PodEvent{
|
||||
pod: pod,
|
||||
taskID: taskID,
|
||||
eventType: PodEventDeleted,
|
||||
}
|
||||
|
||||
log.V(1).Infof("unbound task %v from pod %v/%v", taskID, pod.Namespace, pod.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *registryImpl) Update(pod *api.Pod) (*PodEvent, error) {
|
||||
// Don't do anything for pods without task anotation which means:
|
||||
// - "pre-scheduled" pods which have a NodeName set to this node without being scheduled already.
|
||||
// - static/mirror pods: they'll never have a TaskID annotation, and we don't expect them to ever change.
|
||||
// - all other pods that haven't passed through the launch-task-binding phase, which would set annotations.
|
||||
taskID, err := taskIDFor(pod)
|
||||
if err != nil {
|
||||
// There also could be a race between the overall launch-task process and this update, but here we
|
||||
// will never be able to process such a stale update because the "update pod" that we're receiving
|
||||
// in this func won't yet have a task ID annotation. It follows that we can safely drop such a stale
|
||||
// update on the floor because we'll get another update later that, in addition to the changes that
|
||||
// we're dropping now, will also include the changes from the binding process.
|
||||
log.V(5).Infof("ignoring pod update for %s/%s because %s annotation is missing", pod.Namespace, pod.Name, meta.TaskIdKey)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// be a good citizen: copy the arg before making any changes to it
|
||||
clone, err := api.Scheme.DeepCopy(pod)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pod = clone.(*api.Pod)
|
||||
|
||||
r.m.Lock()
|
||||
defer r.m.Unlock()
|
||||
oldPod, ok := r.boundTasks[taskID]
|
||||
if !ok {
|
||||
return nil, errUnknownTask
|
||||
}
|
||||
|
||||
registeredPod := &PodEvent{
|
||||
pod: pod,
|
||||
taskID: taskID,
|
||||
eventType: PodEventUpdated,
|
||||
}
|
||||
|
||||
// TODO(jdef) would be nice to only execute this logic based on the presence of
|
||||
// some particular annotation:
|
||||
// - preserve the original container port spec since the k8sm scheduler
|
||||
// has likely changed it.
|
||||
if !copyPorts(pod, oldPod) {
|
||||
// TODO(jdef) the state of "pod" is possibly inconsistent at this point.
|
||||
// we don't care for the moment - we might later.
|
||||
registeredPod.eventType = PodEventIncompatibleUpdate
|
||||
r.updates <- registeredPod
|
||||
log.Warningf("pod containers changed in an incompatible way; aborting update")
|
||||
return registeredPod, errUnsupportedUpdate
|
||||
}
|
||||
|
||||
// update our internal copy and broadcast the change
|
||||
r.boundTasks[taskID] = pod
|
||||
r.updates <- registeredPod
|
||||
|
||||
log.V(1).Infof("updated task %v pod %v/%v", taskID, pod.Namespace, pod.Name)
|
||||
return registeredPod, nil
|
||||
}
|
||||
|
||||
// copyPorts copies the container pod specs from src to dest and returns
|
||||
// true if all ports (in both dest and src) are accounted for, otherwise
|
||||
// false. if returning false then it's possible that only a partial copy
|
||||
// has been performed.
|
||||
func copyPorts(dest, src *api.Pod) bool {
|
||||
containers := src.Spec.Containers
|
||||
ctPorts := make(map[string][]api.ContainerPort, len(containers))
|
||||
for i := range containers {
|
||||
ctPorts[containers[i].Name] = containers[i].Ports
|
||||
}
|
||||
containers = dest.Spec.Containers
|
||||
for i := range containers {
|
||||
name := containers[i].Name
|
||||
if ports, found := ctPorts[name]; found {
|
||||
containers[i].Ports = ports
|
||||
delete(ctPorts, name)
|
||||
} else {
|
||||
// old pod spec is missing this container?!
|
||||
return false
|
||||
}
|
||||
}
|
||||
if len(ctPorts) > 0 {
|
||||
// new pod spec has containers that aren't in the old pod spec
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *registryImpl) bind(taskID string, pod *api.Pod) error {
|
||||
// validate taskID matches that of the annotation
|
||||
annotatedTaskID, err := taskIDFor(pod)
|
||||
if err != nil {
|
||||
log.Warning("failed to bind: missing task ID annotation for pod ", pod.Namespace+"/"+pod.Name)
|
||||
return errCreateBindingFailed
|
||||
}
|
||||
if annotatedTaskID != taskID {
|
||||
log.Warningf("failed to bind: expected task-id %v instead of %v for pod %v/%v", taskID, annotatedTaskID, pod.Namespace, pod.Name)
|
||||
return errCreateBindingFailed
|
||||
}
|
||||
|
||||
// record this as a bound task for now so that we can avoid racing with the mesos pod source, who is
|
||||
// watching the apiserver for pod updates and will verify pod-task validity with us upon receiving such
|
||||
boundSuccessfully := false
|
||||
defer func() {
|
||||
if !boundSuccessfully {
|
||||
r.m.Lock()
|
||||
defer r.m.Unlock()
|
||||
delete(r.boundTasks, taskID)
|
||||
}
|
||||
}()
|
||||
func() {
|
||||
r.m.Lock()
|
||||
defer r.m.Unlock()
|
||||
r.boundTasks[taskID] = pod
|
||||
}()
|
||||
|
||||
if pod.Spec.NodeName == "" {
|
||||
//HACK(jdef): cloned binding construction from k8s plugin/pkg/scheduler/framework.go
|
||||
binding := &api.Binding{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Namespace: pod.Namespace,
|
||||
Name: pod.Name,
|
||||
Annotations: make(map[string]string),
|
||||
},
|
||||
Target: api.ObjectReference{
|
||||
Kind: "Node",
|
||||
Name: pod.Annotations[meta.BindingHostKey],
|
||||
},
|
||||
}
|
||||
|
||||
// forward the annotations that the scheduler wants to apply
|
||||
for k, v := range pod.Annotations {
|
||||
binding.Annotations[k] = v
|
||||
}
|
||||
|
||||
// create binding on apiserver
|
||||
log.Infof("Binding task %v pod '%v/%v' to '%v' with annotations %+v...",
|
||||
taskID, pod.Namespace, pod.Name, binding.Target.Name, binding.Annotations)
|
||||
ctx := api.WithNamespace(api.NewContext(), binding.Namespace)
|
||||
err := r.client.CoreClient.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error()
|
||||
if err != nil {
|
||||
log.Warningf("failed to bind task %v pod %v/%v: %v", taskID, pod.Namespace, pod.Name, err)
|
||||
return errCreateBindingFailed
|
||||
}
|
||||
} else {
|
||||
// post annotations update to apiserver
|
||||
patch := struct {
|
||||
Metadata struct {
|
||||
Annotations map[string]string `json:"annotations"`
|
||||
} `json:"metadata"`
|
||||
}{}
|
||||
patch.Metadata.Annotations = pod.Annotations
|
||||
patchJson, _ := json.Marshal(patch)
|
||||
log.V(4).Infof("Patching annotations %v of task %v pod %v/%v: %v", pod.Annotations, taskID, pod.Namespace, pod.Name, string(patchJson))
|
||||
err := r.client.CoreClient.Patch(api.MergePatchType).RequestURI(pod.SelfLink).Body(patchJson).Do().Error()
|
||||
if err != nil {
|
||||
log.Errorf("Error updating annotations of ready-to-launch task %v pod %v/%v: %v", taskID, pod.Namespace, pod.Name, err)
|
||||
return errAnnotationUpdateFailure
|
||||
}
|
||||
}
|
||||
|
||||
boundSuccessfully = true
|
||||
|
||||
r.updates <- &PodEvent{
|
||||
pod: pod,
|
||||
taskID: taskID,
|
||||
eventType: PodEventBound,
|
||||
}
|
||||
|
||||
log.V(1).Infof("bound task %v to pod %v/%v", taskID, pod.Namespace, pod.Name)
|
||||
return nil
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
)
|
||||
|
||||
type MesosCadvisor struct {
|
||||
cadvisor.Interface
|
||||
cores int
|
||||
mem uint64
|
||||
}
|
||||
|
||||
func NewMesosCadvisor(cores int, mem uint64, port uint, runtime string) (*MesosCadvisor, error) {
|
||||
c, err := cadvisor.New(port, runtime)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &MesosCadvisor{c, cores, mem}, nil
|
||||
}
|
||||
|
||||
func (mc *MesosCadvisor) MachineInfo() (*cadvisorapi.MachineInfo, error) {
|
||||
mi, err := mc.Interface.MachineInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// set Mesos provided values
|
||||
mesosMi := *mi
|
||||
mesosMi.NumCores = mc.cores
|
||||
mesosMi.MemoryCapacity = mc.mem
|
||||
|
||||
return &mesosMi, nil
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package service contains the cmd/k8sm-executor glue code.
|
||||
package service // import "k8s.io/kubernetes/contrib/mesos/pkg/executor/service"
|
@ -1,84 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
log "github.com/golang/glog"
|
||||
"k8s.io/kubernetes/pkg/kubelet"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/util/runtime"
|
||||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
)
|
||||
|
||||
// executorKubelet decorates the kubelet with a Run function that notifies the
|
||||
// executor by closing kubeletDone before entering blocking state.
|
||||
type executorKubelet struct {
|
||||
*kubelet.Kubelet
|
||||
kubeletDone chan<- struct{} // closed once kubelet.Run() returns
|
||||
executorDone <-chan struct{} // closed when executor terminates
|
||||
}
|
||||
|
||||
// Run runs the main kubelet loop, closing the kubeletFinished chan when the
|
||||
// loop exits. Like the upstream Run, it will never return.
|
||||
func (kl *executorKubelet) Run(mergedUpdates <-chan kubetypes.PodUpdate) {
|
||||
defer func() {
|
||||
// When this Run function is called, we close it here.
|
||||
// Otherwise, KubeletExecutorServer.runKubelet will.
|
||||
close(kl.kubeletDone)
|
||||
runtime.HandleCrash()
|
||||
log.Infoln("kubelet run terminated") //TODO(jdef) turn down verbosity
|
||||
// important: never return! this is in our contract
|
||||
select {}
|
||||
}()
|
||||
|
||||
// push merged updates into another, closable update channel which is closed
|
||||
// when the executor shuts down.
|
||||
closableUpdates := make(chan kubetypes.PodUpdate)
|
||||
go func() {
|
||||
// closing closableUpdates will cause our patched kubelet's syncLoop() to exit
|
||||
defer close(closableUpdates)
|
||||
pipeLoop:
|
||||
for {
|
||||
select {
|
||||
case <-kl.executorDone:
|
||||
break pipeLoop
|
||||
default:
|
||||
select {
|
||||
case u := <-mergedUpdates:
|
||||
select {
|
||||
case closableUpdates <- u: // noop
|
||||
case <-kl.executorDone:
|
||||
break pipeLoop
|
||||
}
|
||||
case <-kl.executorDone:
|
||||
break pipeLoop
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// we expect that Run() will complete after closableUpdates is closed and the
|
||||
// kubelet's syncLoop() has finished processing its backlog, which hopefully
|
||||
// will not take very long. Peeking into the future (current k8s master) it
|
||||
// seems that the backlog has grown from 1 to 50 -- this may negatively impact
|
||||
// us going forward, time will tell.
|
||||
wait.Until(func() { kl.Kubelet.Run(closableUpdates) }, 0, kl.executorDone)
|
||||
|
||||
//TODO(jdef) revisit this if/when executor failover lands
|
||||
// Force kubelet to delete all pods.
|
||||
kl.HandlePodRemoves(kl.GetPods())
|
||||
}
|
@ -1,200 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podsource
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/executor"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/podutil"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/client/cache"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
type (
|
||||
filterType int
|
||||
|
||||
podName struct {
|
||||
namespace, name string
|
||||
}
|
||||
|
||||
// Filter is invoked for each snapshot of pod state that passes through this source
|
||||
Filter interface {
|
||||
// Before is invoked before any pods are evaluated
|
||||
Before(podCount int)
|
||||
// Accept returns true if this pod should be accepted by the source; a value
|
||||
// of false results in the pod appearing to have been removed from apiserver.
|
||||
// If true, the caller should use the output pod value for the remainder of
|
||||
// the processing task. If false then the output pod value may be nil.
|
||||
Accept(*api.Pod) (*api.Pod, bool)
|
||||
// After is invoked after all pods have been evaluated
|
||||
After()
|
||||
}
|
||||
|
||||
// FilterFunc is a simplified Filter implementation that only implements Filter.Accept, its
|
||||
// Before and After implementations are noop.
|
||||
FilterFunc func(*api.Pod) (*api.Pod, bool)
|
||||
|
||||
Source struct {
|
||||
stop <-chan struct{}
|
||||
out chan<- interface{} // never close this because pkg/util/config.mux doesn't handle that very well
|
||||
filters []Filter // additional filters to apply to pod objects
|
||||
}
|
||||
|
||||
Option func(*Source)
|
||||
)
|
||||
|
||||
const (
|
||||
// if we don't use this source then the kubelet will do funny, mirror things. we alias
|
||||
// this here for convenience. see the docs for Source for additional explanation.
|
||||
// @see ConfigSourceAnnotationKey
|
||||
MesosSource = kubetypes.ApiserverSource
|
||||
)
|
||||
|
||||
func (f FilterFunc) Before(_ int) {}
|
||||
func (f FilterFunc) After() {}
|
||||
func (f FilterFunc) Accept(pod *api.Pod) (*api.Pod, bool) { return f(pod) }
|
||||
|
||||
// Mesos spawns a new pod source that watches API server for changes and collaborates with
|
||||
// executor.Registry to generate api.Pod objects in a fashion that's very Mesos-aware.
|
||||
func Mesos(
|
||||
stop <-chan struct{},
|
||||
out chan<- interface{},
|
||||
podWatch *cache.ListWatch,
|
||||
registry executor.Registry,
|
||||
options ...Option,
|
||||
) {
|
||||
source := &Source{
|
||||
stop: stop,
|
||||
out: out,
|
||||
filters: []Filter{
|
||||
FilterFunc(filterMirrorPod),
|
||||
®isteredPodFilter{registry: registry},
|
||||
},
|
||||
}
|
||||
// note: any filters added by options should be applied after the defaults
|
||||
for _, opt := range options {
|
||||
opt(source)
|
||||
}
|
||||
// reflect changes from the watch into a chan, filtered to include only mirror pods
|
||||
// (have an ConfigMirrorAnnotationKey attr)
|
||||
cache.NewReflector(
|
||||
podWatch,
|
||||
&api.Pod{},
|
||||
cache.NewUndeltaStore(source.send, cache.MetaNamespaceKeyFunc),
|
||||
0,
|
||||
).RunUntil(stop)
|
||||
}
|
||||
|
||||
func filterMirrorPod(p *api.Pod) (*api.Pod, bool) {
|
||||
_, ok := (*p).Annotations[kubetypes.ConfigMirrorAnnotationKey]
|
||||
return p, ok
|
||||
}
|
||||
|
||||
type registeredPodFilter struct {
|
||||
priorPodNames, podNames map[podName]string // maps a podName to a taskID
|
||||
registry executor.Registry
|
||||
}
|
||||
|
||||
func (rpf *registeredPodFilter) Before(podCount int) {
|
||||
rpf.priorPodNames = rpf.podNames
|
||||
rpf.podNames = make(map[podName]string, podCount)
|
||||
}
|
||||
|
||||
func (rpf *registeredPodFilter) After() {
|
||||
// detect when pods are deleted and notify the registry
|
||||
for k, taskID := range rpf.priorPodNames {
|
||||
if _, found := rpf.podNames[k]; !found {
|
||||
rpf.registry.Remove(taskID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (rpf *registeredPodFilter) Accept(p *api.Pod) (*api.Pod, bool) {
|
||||
rpod, err := rpf.registry.Update(p)
|
||||
if err == nil {
|
||||
// pod is bound to a task, and the update is compatible
|
||||
// so we'll allow it through
|
||||
p = rpod.Pod() // use the (possibly) updated pod spec!
|
||||
rpf.podNames[podName{p.Namespace, p.Name}] = rpod.Task()
|
||||
return p, true
|
||||
}
|
||||
if rpod != nil {
|
||||
// we were able to ID the pod but the update still failed...
|
||||
log.Warningf("failed to update registry for task %v pod %v/%v: %v",
|
||||
rpod.Task(), p.Namespace, p.Name, err)
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// send is an update callback invoked by NewUndeltaStore; it applies all of source.filters
|
||||
// to the incoming pod snapshot and forwards a PodUpdate that contains a snapshot of all
|
||||
// the pods that were accepted by the filters.
|
||||
func (source *Source) send(objs []interface{}) {
|
||||
var (
|
||||
podCount = len(objs)
|
||||
pods = make([]*api.Pod, 0, podCount)
|
||||
)
|
||||
|
||||
for _, f := range source.filters {
|
||||
f.Before(podCount)
|
||||
}
|
||||
foreachPod:
|
||||
for _, o := range objs {
|
||||
p := o.(*api.Pod)
|
||||
for _, f := range source.filters {
|
||||
if p, ok := f.Accept(p); ok {
|
||||
pods = append(pods, p)
|
||||
continue foreachPod
|
||||
}
|
||||
}
|
||||
// unrecognized pod
|
||||
log.V(2).Infof("skipping pod %v/%v", p.Namespace, p.Name)
|
||||
}
|
||||
// TODO(jdef) should these be applied in reverse order instead?
|
||||
for _, f := range source.filters {
|
||||
f.After()
|
||||
}
|
||||
|
||||
u := kubetypes.PodUpdate{
|
||||
Op: kubetypes.SET,
|
||||
Pods: pods,
|
||||
Source: MesosSource,
|
||||
}
|
||||
select {
|
||||
case <-source.stop:
|
||||
case source.out <- u:
|
||||
log.V(2).Infof("sent %d pod updates", len(pods))
|
||||
}
|
||||
}
|
||||
|
||||
func ContainerEnvOverlay(env []api.EnvVar) Option {
|
||||
return func(s *Source) {
|
||||
// prepend this filter so that it impacts *all* pods running on the slave
|
||||
s.filters = append([]Filter{filterContainerEnvOverlay(env)}, s.filters...)
|
||||
}
|
||||
}
|
||||
|
||||
func filterContainerEnvOverlay(env []api.EnvVar) FilterFunc {
|
||||
f := podutil.Environment(env)
|
||||
return func(pod *api.Pod) (*api.Pod, bool) {
|
||||
f(pod)
|
||||
// we should't vote, let someone else decide whether the pod gets accepted
|
||||
return pod, false
|
||||
}
|
||||
}
|
@ -1,321 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
bindings "github.com/mesos/mesos-go/executor"
|
||||
"github.com/spf13/pflag"
|
||||
kubeletapp "k8s.io/kubernetes/cmd/kubelet/app"
|
||||
"k8s.io/kubernetes/cmd/kubelet/app/options"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/executor"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/executor/config"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/executor/service/podsource"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/podutil"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/meta"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/apis/componentconfig"
|
||||
"k8s.io/kubernetes/pkg/client/cache"
|
||||
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||
"k8s.io/kubernetes/pkg/fields"
|
||||
"k8s.io/kubernetes/pkg/kubelet"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
kconfig "k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/types"
|
||||
)
|
||||
|
||||
// TODO(jdef): passing the value of envContainerID to all docker containers instantiated
|
||||
// through the kubelet is part of a strategy to enable orphan container GC; this can all
|
||||
// be ripped out once we have a kubelet runtime that leverages Mesos native containerization.
|
||||
|
||||
// envContainerID is the name of the environment variable that contains the
|
||||
// Mesos-assigned container ID of the Executor.
|
||||
const envContainerID = "MESOS_EXECUTOR_CONTAINER_UUID"
|
||||
|
||||
type KubeletExecutorServer struct {
|
||||
*options.KubeletServer
|
||||
SuicideTimeout time.Duration
|
||||
LaunchGracePeriod time.Duration
|
||||
|
||||
containerID string
|
||||
}
|
||||
|
||||
func NewKubeletExecutorServer() *KubeletExecutorServer {
|
||||
k := &KubeletExecutorServer{
|
||||
KubeletServer: options.NewKubeletServer(),
|
||||
SuicideTimeout: config.DefaultSuicideTimeout,
|
||||
LaunchGracePeriod: config.DefaultLaunchGracePeriod,
|
||||
}
|
||||
if pwd, err := os.Getwd(); err != nil {
|
||||
log.Warningf("failed to determine current directory: %v", err)
|
||||
} else {
|
||||
k.RootDirectory = pwd // mesos sandbox dir
|
||||
}
|
||||
k.Address = defaultBindingAddress()
|
||||
|
||||
return k
|
||||
}
|
||||
|
||||
func (s *KubeletExecutorServer) AddFlags(fs *pflag.FlagSet) {
|
||||
s.KubeletServer.AddFlags(fs)
|
||||
fs.DurationVar(&s.SuicideTimeout, "suicide-timeout", s.SuicideTimeout, "Self-terminate after this period of inactivity. Zero disables suicide watch.")
|
||||
fs.DurationVar(&s.LaunchGracePeriod, "mesos-launch-grace-period", s.LaunchGracePeriod, "Launch grace period after which launching tasks will be cancelled. Zero disables launch cancellation.")
|
||||
}
|
||||
|
||||
func (s *KubeletExecutorServer) runExecutor(
|
||||
nodeInfos chan<- executor.NodeInfo,
|
||||
kubeletFinished <-chan struct{},
|
||||
staticPodsConfigPath string,
|
||||
apiclient *clientset.Clientset,
|
||||
registry executor.Registry,
|
||||
) (<-chan struct{}, error) {
|
||||
staticPodFilters := podutil.Filters{
|
||||
// annotate the pod with BindingHostKey so that the scheduler will ignore the pod
|
||||
// once it appears in the pod registry. the stock kubelet sets the pod host in order
|
||||
// to accomplish the same; we do this because the k8sm scheduler works differently.
|
||||
podutil.Annotator(map[string]string{
|
||||
meta.BindingHostKey: s.HostnameOverride,
|
||||
}),
|
||||
}
|
||||
if s.containerID != "" {
|
||||
// tag all pod containers with the containerID so that they can be properly GC'd by Mesos
|
||||
staticPodFilters = append(staticPodFilters, podutil.Environment([]api.EnvVar{
|
||||
{Name: envContainerID, Value: s.containerID},
|
||||
}))
|
||||
}
|
||||
exec := executor.New(executor.Config{
|
||||
Registry: registry,
|
||||
APIClient: apiclient,
|
||||
Docker: dockertools.ConnectToDockerOrDie(s.DockerEndpoint, 0),
|
||||
SuicideTimeout: s.SuicideTimeout,
|
||||
KubeletFinished: kubeletFinished,
|
||||
ExitFunc: os.Exit,
|
||||
NodeInfos: nodeInfos,
|
||||
Options: []executor.Option{
|
||||
executor.StaticPods(staticPodsConfigPath, staticPodFilters),
|
||||
},
|
||||
})
|
||||
|
||||
// initialize driver and initialize the executor with it
|
||||
dconfig := bindings.DriverConfig{
|
||||
Executor: exec,
|
||||
HostnameOverride: s.HostnameOverride,
|
||||
BindingAddress: net.ParseIP(s.Address),
|
||||
}
|
||||
driver, err := bindings.NewMesosExecutorDriver(dconfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create executor driver: %v", err)
|
||||
}
|
||||
log.V(2).Infof("Initialize executor driver...")
|
||||
exec.Init(driver)
|
||||
|
||||
// start the driver
|
||||
go func() {
|
||||
if _, err := driver.Run(); err != nil {
|
||||
log.Fatalf("executor driver failed: %v", err)
|
||||
}
|
||||
log.Info("executor Run completed")
|
||||
}()
|
||||
|
||||
return exec.Done(), nil
|
||||
}
|
||||
|
||||
func (s *KubeletExecutorServer) runKubelet(
|
||||
nodeInfos <-chan executor.NodeInfo,
|
||||
kubeletDone chan<- struct{},
|
||||
staticPodsConfigPath string,
|
||||
apiclient *clientset.Clientset,
|
||||
podLW *cache.ListWatch,
|
||||
registry executor.Registry,
|
||||
executorDone <-chan struct{},
|
||||
) (err error) {
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// close the channel here. When Run returns without error, the executorKubelet is
|
||||
// responsible to do this. If it returns with an error, we are responsible here.
|
||||
close(kubeletDone)
|
||||
}
|
||||
}()
|
||||
|
||||
kubeDeps, err := kubeletapp.UnsecuredKubeletDeps(s.KubeletServer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// apply Mesos specific settings
|
||||
kubeDeps.Builder = func(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *kubelet.KubeletDeps, standaloneMode bool) (kubelet.KubeletBootstrap, error) {
|
||||
k, err := kubeletapp.CreateAndInitKubelet(kubeCfg, kubeDeps, standaloneMode)
|
||||
if err != nil {
|
||||
return k, err
|
||||
}
|
||||
|
||||
// decorate kubelet such that it shuts down when the executor is
|
||||
decorated := &executorKubelet{
|
||||
Kubelet: k.(*kubelet.Kubelet),
|
||||
kubeletDone: kubeletDone,
|
||||
executorDone: executorDone,
|
||||
}
|
||||
|
||||
return decorated, nil
|
||||
}
|
||||
s.RuntimeCgroups = "" // don't move the docker daemon into a cgroup
|
||||
kubeDeps.KubeClient = apiclient
|
||||
|
||||
// taken from KubeletServer#Run(*KubeletConfig)
|
||||
eventClientConfig, err := kubeletapp.CreateAPIServerClientConfig(s.KubeletServer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// make a separate client for events
|
||||
eventClientConfig.QPS = float32(s.EventRecordQPS)
|
||||
eventClientConfig.Burst = int(s.EventBurst)
|
||||
kubeDeps.EventClient, err = clientset.NewForConfig(eventClientConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
kubeDeps.PodConfig = kconfig.NewPodConfig(kconfig.PodConfigNotificationIncremental, kubeDeps.Recorder) // override the default pod source
|
||||
|
||||
s.SystemCgroups = "" // don't take control over other system processes.
|
||||
|
||||
if kubeDeps.Cloud != nil {
|
||||
// fail early and hard because having the cloud provider loaded would go unnoticed,
|
||||
// but break bigger cluster because accessing the state.json from every slave kills the master.
|
||||
panic("cloud provider must not be set")
|
||||
}
|
||||
|
||||
// create custom cAdvisor interface which return the resource values that Mesos reports
|
||||
ni := <-nodeInfos
|
||||
cAdvisorInterface, err := NewMesosCadvisor(ni.Cores, ni.Mem, uint(s.CAdvisorPort), s.ContainerRuntime)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
kubeDeps.CAdvisorInterface = cAdvisorInterface
|
||||
kubeDeps.ContainerManager, err = cm.NewContainerManager(kubeDeps.Mounter, cAdvisorInterface, cm.NodeConfig{
|
||||
RuntimeCgroupsName: s.RuntimeCgroups,
|
||||
SystemCgroupsName: s.SystemCgroups,
|
||||
KubeletCgroupsName: s.KubeletCgroups,
|
||||
ContainerRuntime: s.ContainerRuntime,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go func() {
|
||||
for ni := range nodeInfos {
|
||||
// TODO(sttts): implement with MachineAllocable mechanism when https://github.com/kubernetes/kubernetes/issues/13984 is finished
|
||||
log.V(3).Infof("ignoring updated node resources: %v", ni)
|
||||
}
|
||||
}()
|
||||
|
||||
// create main pod source, it will stop generating events once executorDone is closed
|
||||
var containerOptions []podsource.Option
|
||||
if s.containerID != "" {
|
||||
// tag all pod containers with the containerID so that they can be properly GC'd by Mesos
|
||||
containerOptions = append(containerOptions, podsource.ContainerEnvOverlay([]api.EnvVar{
|
||||
{Name: envContainerID, Value: s.containerID},
|
||||
}))
|
||||
kubeDeps.ContainerRuntimeOptions = append(kubeDeps.ContainerRuntimeOptions,
|
||||
dockertools.PodInfraContainerEnv(map[string]string{
|
||||
envContainerID: s.containerID,
|
||||
}))
|
||||
}
|
||||
|
||||
podsource.Mesos(executorDone, kubeDeps.PodConfig.Channel(podsource.MesosSource), podLW, registry, containerOptions...)
|
||||
|
||||
// create static-pods directory file source
|
||||
log.V(2).Infof("initializing static pods source factory, configured at path %q", staticPodsConfigPath)
|
||||
fileSourceUpdates := kubeDeps.PodConfig.Channel(kubetypes.FileSource)
|
||||
kconfig.NewSourceFile(staticPodsConfigPath, s.HostnameOverride, s.FileCheckFrequency.Duration, fileSourceUpdates)
|
||||
|
||||
// run the kubelet
|
||||
// NOTE: because kubeDeps != nil holds, the upstream Run function will not
|
||||
// initialize the cloud provider. We explicitly wouldn't want
|
||||
// that because then every kubelet instance would query the master
|
||||
// state.json which does not scale.
|
||||
s.KubeletServer.LockFilePath = "" // disable lock file
|
||||
err = kubeletapp.Run(s.KubeletServer, kubeDeps)
|
||||
return
|
||||
}
|
||||
|
||||
// Run runs the specified KubeletExecutorServer.
|
||||
func (s *KubeletExecutorServer) Run(hks hyperkube.Interface, _ []string) error {
|
||||
// create shared channels
|
||||
kubeletFinished := make(chan struct{})
|
||||
nodeInfos := make(chan executor.NodeInfo, 1)
|
||||
|
||||
// create static pods directory
|
||||
staticPodsConfigPath := filepath.Join(s.RootDirectory, "static-pods")
|
||||
err := os.Mkdir(staticPodsConfigPath, 0750)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// we're expecting that either Mesos or the minion process will set this for us
|
||||
s.containerID = os.Getenv(envContainerID)
|
||||
if s.containerID == "" {
|
||||
log.Warningf("missing expected environment variable %q", envContainerID)
|
||||
}
|
||||
|
||||
// create apiserver client
|
||||
var apiclient *clientset.Clientset
|
||||
clientConfig, err := kubeletapp.CreateAPIServerClientConfig(s.KubeletServer)
|
||||
if err == nil {
|
||||
apiclient, err = clientset.NewForConfig(clientConfig)
|
||||
}
|
||||
if err != nil {
|
||||
// required for k8sm since we need to send api.Binding information back to the apiserver
|
||||
return fmt.Errorf("cannot create API client: %v", err)
|
||||
}
|
||||
|
||||
var (
|
||||
pw = cache.NewListWatchFromClient(apiclient.CoreClient, "pods", api.NamespaceAll,
|
||||
fields.OneTermEqualSelector(api.PodHostField, s.HostnameOverride),
|
||||
)
|
||||
reg = executor.NewRegistry(apiclient)
|
||||
)
|
||||
|
||||
// start executor
|
||||
var executorDone <-chan struct{}
|
||||
executorDone, err = s.runExecutor(nodeInfos, kubeletFinished, staticPodsConfigPath, apiclient, reg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// start kubelet, blocking
|
||||
return s.runKubelet(nodeInfos, kubeletFinished, staticPodsConfigPath, apiclient, pw, reg, executorDone)
|
||||
}
|
||||
|
||||
func defaultBindingAddress() string {
|
||||
libProcessIP := os.Getenv("LIBPROCESS_IP")
|
||||
if libProcessIP == "" {
|
||||
return "0.0.0.0"
|
||||
} else {
|
||||
return libProcessIP
|
||||
}
|
||||
}
|
@ -1,65 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
bindings "github.com/mesos/mesos-go/executor"
|
||||
)
|
||||
|
||||
// func that attempts suicide
|
||||
type jumper func(bindings.ExecutorDriver, <-chan struct{})
|
||||
|
||||
type suicideWatcher interface {
|
||||
Next(time.Duration, bindings.ExecutorDriver, jumper) suicideWatcher
|
||||
Reset(time.Duration) bool
|
||||
Stop() bool
|
||||
}
|
||||
|
||||
// TODO(jdef) add metrics for this?
|
||||
type suicideTimer struct {
|
||||
timer *time.Timer
|
||||
}
|
||||
|
||||
func (w *suicideTimer) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher {
|
||||
return &suicideTimer{
|
||||
timer: time.AfterFunc(d, func() {
|
||||
log.Warningf("Suicide timeout (%v) expired", d)
|
||||
f(driver, nil)
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
func (w *suicideTimer) Stop() (result bool) {
|
||||
if w != nil && w.timer != nil {
|
||||
log.Infoln("stopping suicide watch") //TODO(jdef) debug
|
||||
result = w.timer.Stop()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// return true if the timer was successfully reset
|
||||
func (w *suicideTimer) Reset(d time.Duration) bool {
|
||||
if w != nil && w.timer != nil {
|
||||
log.Infoln("resetting suicide watch") //TODO(jdef) debug
|
||||
w.timer.Reset(d)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
@ -1,197 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
bindings "github.com/mesos/mesos-go/executor"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
)
|
||||
|
||||
type suicideTracker struct {
|
||||
suicideWatcher
|
||||
stops uint32
|
||||
resets uint32
|
||||
timers uint32
|
||||
jumps *uint32
|
||||
}
|
||||
|
||||
func (t *suicideTracker) Reset(d time.Duration) bool {
|
||||
defer func() { t.resets++ }()
|
||||
return t.suicideWatcher.Reset(d)
|
||||
}
|
||||
|
||||
func (t *suicideTracker) Stop() bool {
|
||||
defer func() { t.stops++ }()
|
||||
return t.suicideWatcher.Stop()
|
||||
}
|
||||
|
||||
func (t *suicideTracker) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher {
|
||||
tracker := &suicideTracker{
|
||||
stops: t.stops,
|
||||
resets: t.resets,
|
||||
jumps: t.jumps,
|
||||
timers: t.timers + 1,
|
||||
}
|
||||
jumper := tracker.makeJumper(f)
|
||||
tracker.suicideWatcher = t.suicideWatcher.Next(d, driver, jumper)
|
||||
return tracker
|
||||
}
|
||||
|
||||
func (t *suicideTracker) makeJumper(_ jumper) jumper {
|
||||
return jumper(func(driver bindings.ExecutorDriver, cancel <-chan struct{}) {
|
||||
glog.Warningln("Jumping?!")
|
||||
if t.jumps != nil {
|
||||
atomic.AddUint32(t.jumps, 1)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestSuicide_zeroTimeout(t *testing.T) {
|
||||
defer glog.Flush()
|
||||
|
||||
k := NewTestKubernetesExecutor()
|
||||
tracker := &suicideTracker{suicideWatcher: k.suicideWatch}
|
||||
k.suicideWatch = tracker
|
||||
|
||||
ch := k.resetSuicideWatch(nil)
|
||||
|
||||
select {
|
||||
case <-ch:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatalf("timeout waiting for reset of suicide watch")
|
||||
}
|
||||
if tracker.stops != 0 {
|
||||
t.Fatalf("expected no stops since suicideWatchTimeout was never set")
|
||||
}
|
||||
if tracker.resets != 0 {
|
||||
t.Fatalf("expected no resets since suicideWatchTimeout was never set")
|
||||
}
|
||||
if tracker.timers != 0 {
|
||||
t.Fatalf("expected no timers since suicideWatchTimeout was never set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSuicide_WithTasks(t *testing.T) {
|
||||
defer glog.Flush()
|
||||
|
||||
k := NewTestKubernetesExecutor()
|
||||
k.suicideTimeout = 50 * time.Millisecond
|
||||
|
||||
jumps := uint32(0)
|
||||
tracker := &suicideTracker{suicideWatcher: k.suicideWatch, jumps: &jumps}
|
||||
k.suicideWatch = tracker
|
||||
|
||||
k.registry.bind("foo", &api.Pod{}) // prevent suicide attempts from succeeding
|
||||
|
||||
// call reset with a nil timer
|
||||
glog.Infoln("Resetting suicide watch with 1 task")
|
||||
select {
|
||||
case <-k.resetSuicideWatch(nil):
|
||||
tracker = k.suicideWatch.(*suicideTracker)
|
||||
if tracker.stops != 1 {
|
||||
t.Fatalf("expected suicide attempt to Stop() since there are registered tasks")
|
||||
}
|
||||
if tracker.resets != 0 {
|
||||
t.Fatalf("expected no resets since")
|
||||
}
|
||||
if tracker.timers != 0 {
|
||||
t.Fatalf("expected no timers since")
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatalf("initial suicide watch setup failed")
|
||||
}
|
||||
|
||||
k.registry.Remove("foo") // zero remaining tasks
|
||||
k.suicideTimeout = 1500 * time.Millisecond
|
||||
suicideStart := time.Now()
|
||||
|
||||
// reset the suicide watch, which should actually start a timer now
|
||||
glog.Infoln("Resetting suicide watch with 0 tasks")
|
||||
select {
|
||||
case <-k.resetSuicideWatch(nil):
|
||||
tracker = k.suicideWatch.(*suicideTracker)
|
||||
if tracker.stops != 1 {
|
||||
t.Fatalf("did not expect suicide attempt to Stop() since there are no registered tasks")
|
||||
}
|
||||
if tracker.resets != 1 {
|
||||
t.Fatalf("expected 1 resets instead of %d", tracker.resets)
|
||||
}
|
||||
if tracker.timers != 1 {
|
||||
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatalf("2nd suicide watch setup failed")
|
||||
}
|
||||
|
||||
k.lock.Lock()
|
||||
k.registry.bind("foo", &api.Pod{}) // prevent suicide attempts from succeeding
|
||||
k.lock.Unlock()
|
||||
|
||||
// reset the suicide watch, which should stop the existing timer
|
||||
glog.Infoln("Resetting suicide watch with 1 task")
|
||||
select {
|
||||
case <-k.resetSuicideWatch(nil):
|
||||
tracker = k.suicideWatch.(*suicideTracker)
|
||||
if tracker.stops != 2 {
|
||||
t.Fatalf("expected 2 stops instead of %d since there are registered tasks", tracker.stops)
|
||||
}
|
||||
if tracker.resets != 1 {
|
||||
t.Fatalf("expected 1 resets instead of %d", tracker.resets)
|
||||
}
|
||||
if tracker.timers != 1 {
|
||||
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatalf("3rd suicide watch setup failed")
|
||||
}
|
||||
|
||||
k.lock.Lock()
|
||||
k.registry.Remove("foo") // allow suicide attempts to schedule
|
||||
k.lock.Unlock()
|
||||
|
||||
// reset the suicide watch, which should reset a stopped timer
|
||||
glog.Infoln("Resetting suicide watch with 0 tasks")
|
||||
select {
|
||||
case <-k.resetSuicideWatch(nil):
|
||||
tracker = k.suicideWatch.(*suicideTracker)
|
||||
if tracker.stops != 2 {
|
||||
t.Fatalf("expected 2 stops instead of %d since there are no registered tasks", tracker.stops)
|
||||
}
|
||||
if tracker.resets != 2 {
|
||||
t.Fatalf("expected 2 resets instead of %d", tracker.resets)
|
||||
}
|
||||
if tracker.timers != 1 {
|
||||
t.Fatalf("expected 1 timers instead of %d", tracker.timers)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatalf("4th suicide watch setup failed")
|
||||
}
|
||||
|
||||
sinceWatch := time.Since(suicideStart)
|
||||
time.Sleep(3*time.Second - sinceWatch) // give the first timer to misfire (it shouldn't since Stop() was called)
|
||||
|
||||
if j := atomic.LoadUint32(&jumps); j != 1 {
|
||||
t.Fatalf("expected 1 jumps instead of %d since stop was called", j)
|
||||
} else {
|
||||
glog.Infoln("Jumps verified") // glog so we get a timestamp
|
||||
}
|
||||
}
|
@ -1,150 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
type (
|
||||
// filter registration events, return false to abort further processing of the event
|
||||
watchFilter func(pod *PodEvent) (accept bool)
|
||||
|
||||
watchExpiration struct {
|
||||
// timeout closes when the handler has expired; it delivers at most one Time.
|
||||
timeout <-chan time.Time
|
||||
|
||||
// onEvent is an optional callback that is invoked if/when the expired chan
|
||||
// closes
|
||||
onEvent func(taskID string)
|
||||
}
|
||||
|
||||
watchHandler struct {
|
||||
// prevent callbacks from being invoked simultaneously
|
||||
sync.Mutex
|
||||
|
||||
// handle registration events, return true to indicate the handler should be
|
||||
// de-registered upon completion. If pod is nil then the associated handler
|
||||
// has expired.
|
||||
onEvent func(pod *PodEvent) (done bool, err error)
|
||||
|
||||
// expiration is an optional configuration that indicates when a handler should
|
||||
// be considered to have expired, and what action to take upon such
|
||||
expiration watchExpiration
|
||||
}
|
||||
|
||||
// watcher observes PodEvent events and conditionally executes handlers that
|
||||
// have been associated with the taskID of the PodEvent.
|
||||
watcher struct {
|
||||
updates <-chan *PodEvent
|
||||
rw sync.RWMutex
|
||||
handlers map[string]*watchHandler
|
||||
filters []watchFilter
|
||||
runOnce chan struct{}
|
||||
}
|
||||
)
|
||||
|
||||
func newWatcher(updates <-chan *PodEvent) *watcher {
|
||||
return &watcher{
|
||||
updates: updates,
|
||||
handlers: make(map[string]*watchHandler),
|
||||
runOnce: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
func (pw *watcher) run() {
|
||||
select {
|
||||
case <-pw.runOnce:
|
||||
log.Error("run() has already been invoked for this pod-watcher")
|
||||
return
|
||||
default:
|
||||
close(pw.runOnce)
|
||||
}
|
||||
updateLoop:
|
||||
for u := range pw.updates {
|
||||
log.V(2).Info("filtering " + u.FormatShort())
|
||||
for _, f := range pw.filters {
|
||||
if !f(u) {
|
||||
continue updateLoop
|
||||
}
|
||||
}
|
||||
log.V(1).Info("handling " + u.FormatShort())
|
||||
h, ok := func() (h *watchHandler, ok bool) {
|
||||
pw.rw.RLock()
|
||||
defer pw.rw.RUnlock()
|
||||
h, ok = pw.handlers[u.taskID]
|
||||
return
|
||||
}()
|
||||
if ok {
|
||||
log.V(1).Info("executing action for " + u.FormatShort())
|
||||
done, err := func() (bool, error) {
|
||||
h.Lock()
|
||||
defer h.Unlock()
|
||||
return h.onEvent(u)
|
||||
}()
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
}
|
||||
if done {
|
||||
// de-register handler upon successful completion of action
|
||||
log.V(1).Info("de-registering handler for " + u.FormatShort())
|
||||
func() {
|
||||
pw.rw.Lock()
|
||||
delete(pw.handlers, u.taskID)
|
||||
pw.rw.Unlock()
|
||||
}()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (pw *watcher) addFilter(f watchFilter) {
|
||||
select {
|
||||
case <-pw.runOnce:
|
||||
log.Errorf("failed to add filter because pod-watcher is already running")
|
||||
default:
|
||||
pw.filters = append(pw.filters, f)
|
||||
}
|
||||
}
|
||||
|
||||
// forTask associates a handler `h` with the given taskID.
|
||||
func (pw *watcher) forTask(taskID string, h *watchHandler) {
|
||||
pw.rw.Lock()
|
||||
pw.handlers[taskID] = h
|
||||
pw.rw.Unlock()
|
||||
|
||||
if exp := h.expiration; exp.timeout != nil {
|
||||
go func() {
|
||||
<-exp.timeout
|
||||
log.V(1).Infof("expiring handler for task %v", taskID)
|
||||
|
||||
// de-register handler upon expiration
|
||||
pw.rw.Lock()
|
||||
delete(pw.handlers, taskID)
|
||||
pw.rw.Unlock()
|
||||
|
||||
if exp.onEvent != nil {
|
||||
h.Lock()
|
||||
defer h.Unlock()
|
||||
exp.onEvent(taskID)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package flagutil
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
// kubelet attempts to customize default values for some cadvisor flags, so
|
||||
// make sure that we pick these up.
|
||||
_ "k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
||||
)
|
||||
|
||||
// FlagFunc retrieves a specific flag instance; returns nil if the flag is not configured.
|
||||
type FlagFunc func() *flag.Flag
|
||||
|
||||
// NameValue returns the name and value of a flag, if it exists, otherwise empty strings.
|
||||
func (ff FlagFunc) NameValue() (name, value string) {
|
||||
if f := ff(); f != nil {
|
||||
name, value = f.Name, f.Value.String()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func flagFunc(name string) FlagFunc { return func() *flag.Flag { return flag.Lookup(name) } }
|
||||
|
||||
// Cadvisor fields return the configured values of cadvisor global flags
|
||||
var Cadvisor = struct {
|
||||
HousekeepingInterval FlagFunc
|
||||
GlobalHousekeepingInterval FlagFunc
|
||||
}{
|
||||
flagFunc("housekeeping_interval"),
|
||||
flagFunc("global_housekeeping_interval"),
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package flagutil
|
||||
|
||||
import (
|
||||
// TODO(jdef) kill this once cadvisor flags are no longer configured by
|
||||
// global variables. Importing it this way guarantees that the global flag
|
||||
// variables are initialized.
|
||||
_ "github.com/google/cadvisor/manager"
|
||||
)
|
@ -1,21 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package hyperkube facilitates the combination of multiple
|
||||
// kubernetes-mesos components into a single binary form, providing a
|
||||
// simple mechanism for intra-component discovery as per the original
|
||||
// Kubernetes hyperkube package.
|
||||
package hyperkube // import "k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
|
@ -1,26 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package hyperkube
|
||||
|
||||
const (
|
||||
CommandApiserver = "apiserver"
|
||||
CommandControllerManager = "controller-manager"
|
||||
CommandExecutor = "executor"
|
||||
CommandMinion = "minion"
|
||||
CommandProxy = "proxy"
|
||||
CommandScheduler = "scheduler"
|
||||
)
|
@ -1,54 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package hyperkube
|
||||
|
||||
import (
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
var (
|
||||
nilKube = &nilKubeType{}
|
||||
)
|
||||
|
||||
type Interface interface {
|
||||
// FindServer will find a specific server named name.
|
||||
FindServer(name string) bool
|
||||
|
||||
// The executable name, used for help and soft-link invocation
|
||||
Name() string
|
||||
|
||||
// Flags returns a flagset for "global" flags.
|
||||
Flags() *pflag.FlagSet
|
||||
}
|
||||
|
||||
type nilKubeType struct{}
|
||||
|
||||
func (n *nilKubeType) FindServer(_ string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (n *nilKubeType) Name() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (n *nilKubeType) Flags() *pflag.FlagSet {
|
||||
return nil
|
||||
}
|
||||
|
||||
func Nil() Interface {
|
||||
return nilKube
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/pkg/api/resource"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultLogMaxBackups = 5 // how many backup to keep
|
||||
DefaultLogMaxAgeInDays = 7 // after how many days to rotate at most
|
||||
|
||||
DefaultCgroupPrefix = "mesos"
|
||||
)
|
||||
|
||||
// DefaultLogMaxSize returns the maximal log file size before rotation
|
||||
func DefaultLogMaxSize() resource.Quantity {
|
||||
return *resource.NewQuantity(10*1024*1024, resource.BinarySI)
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package config contains minion configuration constants.
|
||||
package config // import "k8s.io/kubernetes/contrib/mesos/pkg/minion/config"
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package minion contains the executor and proxy bootstrap code for a Mesos slave
|
||||
package minion // import "k8s.io/kubernetes/contrib/mesos/pkg/minion"
|
@ -1,25 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package minion
|
||||
|
||||
import (
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
func enterPrivateMountNamespace() {
|
||||
log.Info("Skipping mount namespace, only available on Linux")
|
||||
}
|
@ -1,55 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package minion
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
)
|
||||
|
||||
// enterPrivateMountNamespace does just that: the current mount ns is unshared (isolated)
|
||||
// and then made a slave to the root mount / of the parent mount ns (mount events from /
|
||||
// or its children that happen in the parent NS propagate to us).
|
||||
//
|
||||
// this is not yet compatible with volume plugins as implemented by the kubelet, which
|
||||
// depends on using host-volume args to 'docker run' to attach plugin volumes to CT's
|
||||
// at runtime. as such, docker needs to be able to see the volumes mounted by k8s plugins,
|
||||
// which is impossible if k8s volume plugins are running in an isolated mount ns.
|
||||
//
|
||||
// an alternative approach would be to always run the kubelet in the host's mount-ns and
|
||||
// rely upon mesos to forcibly umount bindings in the task sandbox before rmdir'ing it:
|
||||
// https://issues.apache.org/jira/browse/MESOS-349.
|
||||
//
|
||||
// use at your own risk.
|
||||
func enterPrivateMountNamespace() {
|
||||
log.Warningln("EXPERIMENTAL FEATURE: entering private mount ns")
|
||||
|
||||
// enter a new mount NS, useful for isolating changes to the mount table
|
||||
// that are made by the kubelet for storage volumes.
|
||||
err := syscall.Unshare(syscall.CLONE_NEWNS)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to enter private mount NS: %v", err)
|
||||
}
|
||||
|
||||
// make the rootfs / rslave to the parent mount NS so that we
|
||||
// pick up on any changes made there
|
||||
err = syscall.Mount("", "/", "dontcare", syscall.MS_REC|syscall.MS_SLAVE, "")
|
||||
if err != nil {
|
||||
log.Fatalf("failed to mark / rslave: %v", err)
|
||||
}
|
||||
}
|
@ -1,381 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package minion
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
"github.com/kardianos/osext"
|
||||
"github.com/spf13/pflag"
|
||||
"gopkg.in/natefinch/lumberjack.v2"
|
||||
kubeletapp "k8s.io/kubernetes/cmd/kubelet/app"
|
||||
exservice "k8s.io/kubernetes/contrib/mesos/pkg/executor/service"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/flagutil"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/hyperkube"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/minion/config"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/minion/tasks"
|
||||
"k8s.io/kubernetes/pkg/api/resource"
|
||||
"k8s.io/kubernetes/pkg/client/restclient"
|
||||
)
|
||||
|
||||
const (
|
||||
proxyLogFilename = "proxy.log"
|
||||
executorLogFilename = "executor.log"
|
||||
)
|
||||
|
||||
type MinionServer struct {
|
||||
// embed the executor server to be able to use its flags
|
||||
// TODO(sttts): get rid of this mixing of the minion and the executor server with a multiflags implementation for km
|
||||
KubeletExecutorServer *exservice.KubeletExecutorServer
|
||||
|
||||
privateMountNS bool
|
||||
hks hyperkube.Interface
|
||||
clientConfig *restclient.Config
|
||||
kmBinary string
|
||||
tasks []*tasks.Task
|
||||
|
||||
pathOverride string // the PATH environment for the sub-processes
|
||||
cgroupPrefix string // e.g. mesos
|
||||
cgroupRoot string // the cgroupRoot that we pass to the kubelet-executor, depends on containPodResources
|
||||
mesosCgroup string // discovered mesos cgroup root, e.g. /mesos/{container-id}
|
||||
containPodResources bool
|
||||
|
||||
logMaxSize resource.Quantity
|
||||
logMaxBackups int
|
||||
logMaxAgeInDays int
|
||||
logVerbosity int32 // see glog.Level
|
||||
|
||||
runProxy bool
|
||||
proxyKubeconfig string
|
||||
proxyLogV int
|
||||
proxyBindall bool
|
||||
proxyMode string
|
||||
conntrackMax int
|
||||
conntrackTCPTimeoutEstablished int
|
||||
}
|
||||
|
||||
// NewMinionServer creates the MinionServer struct with default values to be used by hyperkube
|
||||
func NewMinionServer() *MinionServer {
|
||||
s := &MinionServer{
|
||||
KubeletExecutorServer: exservice.NewKubeletExecutorServer(),
|
||||
privateMountNS: false, // disabled until Docker supports customization of the parent mount namespace
|
||||
cgroupPrefix: config.DefaultCgroupPrefix,
|
||||
containPodResources: true,
|
||||
logMaxSize: config.DefaultLogMaxSize(),
|
||||
logMaxBackups: config.DefaultLogMaxBackups,
|
||||
logMaxAgeInDays: config.DefaultLogMaxAgeInDays,
|
||||
runProxy: true,
|
||||
proxyMode: "userspace", // upstream default is "iptables" post-v1.1
|
||||
}
|
||||
|
||||
// cache this for later use
|
||||
binary, err := osext.Executable()
|
||||
if err != nil {
|
||||
log.Fatalf("failed to determine currently running executable: %v", err)
|
||||
}
|
||||
s.kmBinary = binary
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// filterArgsByFlagSet returns a list of args which are parsed by the given flag set
|
||||
// and another list with those which do not match
|
||||
func filterArgsByFlagSet(args []string, flags *pflag.FlagSet) ([]string, []string) {
|
||||
matched := []string{}
|
||||
notMatched := []string{}
|
||||
for _, arg := range args {
|
||||
err := flags.Parse([]string{arg})
|
||||
if err != nil {
|
||||
notMatched = append(notMatched, arg)
|
||||
} else {
|
||||
matched = append(matched, arg)
|
||||
}
|
||||
}
|
||||
return matched, notMatched
|
||||
}
|
||||
|
||||
func findMesosCgroup(prefix string) (cgroupPath string, containerID string) {
|
||||
// derive our cgroup from MESOS_DIRECTORY environment
|
||||
mesosDir := os.Getenv("MESOS_DIRECTORY")
|
||||
if mesosDir == "" {
|
||||
log.V(2).Infof("cannot derive executor's cgroup because MESOS_DIRECTORY is empty")
|
||||
return
|
||||
}
|
||||
|
||||
containerID = path.Base(mesosDir)
|
||||
if containerID == "" {
|
||||
log.V(2).Infof("cannot derive executor's cgroup from MESOS_DIRECTORY=%q", mesosDir)
|
||||
return
|
||||
}
|
||||
|
||||
cgroupPath = path.Join("/", prefix, containerID)
|
||||
return
|
||||
}
|
||||
|
||||
func (ms *MinionServer) launchProxyServer() {
|
||||
bindAddress := "0.0.0.0"
|
||||
if !ms.proxyBindall {
|
||||
bindAddress = ms.KubeletExecutorServer.Address
|
||||
}
|
||||
args := []string{
|
||||
fmt.Sprintf("--bind-address=%s", bindAddress),
|
||||
fmt.Sprintf("--v=%d", ms.proxyLogV),
|
||||
"--logtostderr=true",
|
||||
// TODO(jdef) resource-container is going away completely at some point, but
|
||||
// we need to override it here to disable the current default behavior
|
||||
"--resource-container=", // disable this; mesos slave doesn't like sub-containers yet
|
||||
"--proxy-mode=" + ms.proxyMode,
|
||||
"--conntrack-max=" + strconv.Itoa(ms.conntrackMax),
|
||||
"--conntrack-tcp-timeout-established=" + strconv.Itoa(ms.conntrackTCPTimeoutEstablished),
|
||||
}
|
||||
if ms.proxyKubeconfig != "" {
|
||||
args = append(args, fmt.Sprintf("--kubeconfig=%s", ms.proxyKubeconfig))
|
||||
}
|
||||
if ms.clientConfig.Host != "" {
|
||||
args = append(args, fmt.Sprintf("--master=%s", ms.clientConfig.Host))
|
||||
}
|
||||
if ms.KubeletExecutorServer.HostnameOverride != "" {
|
||||
args = append(args, fmt.Sprintf("--hostname-override=%s", ms.KubeletExecutorServer.HostnameOverride))
|
||||
}
|
||||
|
||||
ms.launchHyperkubeServer(hyperkube.CommandProxy, args, proxyLogFilename)
|
||||
}
|
||||
|
||||
// launchExecutorServer returns a chan that closes upon kubelet-executor death. since the kubelet-
|
||||
// executor doesn't support failover right now, the right thing to do is to fail completely since all
|
||||
// pods will be lost upon restart and we want mesos to recover the resources from them.
|
||||
func (ms *MinionServer) launchExecutorServer(containerID string) <-chan struct{} {
|
||||
allArgs := os.Args[2:]
|
||||
|
||||
// filter out minion flags, leaving those for the executor
|
||||
executorFlags := pflag.NewFlagSet("executor", pflag.ContinueOnError)
|
||||
executorFlags.SetOutput(ioutil.Discard)
|
||||
ms.AddExecutorFlags(executorFlags)
|
||||
executorArgs, _ := filterArgsByFlagSet(allArgs, executorFlags)
|
||||
|
||||
// disable resource-container; mesos slave doesn't like sub-containers yet
|
||||
executorArgs = append(executorArgs, "--kubelet-cgroups=")
|
||||
|
||||
appendOptional := func(name, value string) {
|
||||
if value != "" {
|
||||
executorArgs = append(executorArgs, "--"+name+"="+value)
|
||||
}
|
||||
}
|
||||
appendOptional("cgroup-root", ms.cgroupRoot)
|
||||
|
||||
// forward global cadvisor flag values to the executor
|
||||
// TODO(jdef) remove this code once cadvisor global flags have been cleaned up
|
||||
appendOptional(flagutil.Cadvisor.HousekeepingInterval.NameValue())
|
||||
appendOptional(flagutil.Cadvisor.GlobalHousekeepingInterval.NameValue())
|
||||
|
||||
// forward containerID so that the executor may pass it along to containers that it launches
|
||||
var ctidOpt tasks.Option
|
||||
ctidOpt = func(t *tasks.Task) tasks.Option {
|
||||
oldenv := t.Env[:]
|
||||
t.Env = append(t.Env, "MESOS_EXECUTOR_CONTAINER_UUID="+containerID)
|
||||
return func(t2 *tasks.Task) tasks.Option {
|
||||
t2.Env = oldenv
|
||||
return ctidOpt
|
||||
}
|
||||
}
|
||||
|
||||
// run executor and quit minion server when this exits cleanly
|
||||
execDied := make(chan struct{})
|
||||
ms.launchHyperkubeServer(hyperkube.CommandExecutor, executorArgs, executorLogFilename, tasks.NoRespawn(execDied), ctidOpt)
|
||||
return execDied
|
||||
}
|
||||
|
||||
func (ms *MinionServer) launchHyperkubeServer(server string, args []string, logFileName string, options ...tasks.Option) {
|
||||
log.V(2).Infof("Spawning hyperkube %v with args '%+v'", server, args)
|
||||
|
||||
kmArgs := append([]string{server}, args...)
|
||||
maxSize := ms.logMaxSize.Value()
|
||||
if maxSize > 0 {
|
||||
// convert to MB
|
||||
maxSize = maxSize / 1024 / 1024
|
||||
if maxSize == 0 {
|
||||
log.Warning("maximal log file size is rounded to 1 MB")
|
||||
maxSize = 1
|
||||
}
|
||||
}
|
||||
|
||||
writerFunc := func() io.WriteCloser {
|
||||
return &lumberjack.Logger{
|
||||
Filename: logFileName,
|
||||
MaxSize: int(maxSize),
|
||||
MaxBackups: ms.logMaxBackups,
|
||||
MaxAge: ms.logMaxAgeInDays,
|
||||
}
|
||||
}
|
||||
|
||||
// prepend env, allow later options to customize further
|
||||
options = append([]tasks.Option{tasks.Environment(os.Environ()), ms.applyPathOverride()}, options...)
|
||||
|
||||
t := tasks.New(server, ms.kmBinary, kmArgs, writerFunc, options...)
|
||||
go t.Start()
|
||||
ms.tasks = append(ms.tasks, t)
|
||||
}
|
||||
|
||||
// applyPathOverride overrides PATH and also adds $SANDBOX/bin (needed for locating bundled binary deps
|
||||
// as well as external deps like iptables)
|
||||
func (ms *MinionServer) applyPathOverride() tasks.Option {
|
||||
return func(t *tasks.Task) tasks.Option {
|
||||
kmEnv := make([]string, 0, len(t.Env))
|
||||
for _, e := range t.Env {
|
||||
if !strings.HasPrefix(e, "PATH=") {
|
||||
kmEnv = append(kmEnv, e)
|
||||
} else {
|
||||
if ms.pathOverride != "" {
|
||||
e = "PATH=" + ms.pathOverride
|
||||
}
|
||||
pwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Cannot get current directory: %v", err))
|
||||
}
|
||||
kmEnv = append(kmEnv, fmt.Sprintf("%s:%s", e, path.Join(pwd, "bin")))
|
||||
}
|
||||
}
|
||||
oldenv := t.Env
|
||||
t.Env = kmEnv
|
||||
return tasks.Environment(oldenv)
|
||||
}
|
||||
}
|
||||
|
||||
// runs the main kubelet loop, closing the kubeletFinished chan when the loop exits.
|
||||
// never returns.
|
||||
func (ms *MinionServer) Run(hks hyperkube.Interface, _ []string) error {
|
||||
if ms.privateMountNS {
|
||||
// only the Linux version will do anything
|
||||
enterPrivateMountNamespace()
|
||||
}
|
||||
|
||||
// create apiserver client
|
||||
clientConfig, err := kubeletapp.CreateAPIServerClientConfig(ms.KubeletExecutorServer.KubeletServer)
|
||||
if err != nil {
|
||||
// required for k8sm since we need to send api.Binding information
|
||||
// back to the apiserver
|
||||
log.Fatalf("No API client: %v", err)
|
||||
}
|
||||
ms.clientConfig = clientConfig
|
||||
|
||||
// derive the executor cgroup and use it as:
|
||||
// - pod container cgroup root (e.g. docker cgroup-parent, optionally; see comments below)
|
||||
// - parent of kubelet container
|
||||
// - parent of kube-proxy container
|
||||
containerID := ""
|
||||
ms.mesosCgroup, containerID = findMesosCgroup(ms.cgroupPrefix)
|
||||
log.Infof("discovered mesos cgroup at %q", ms.mesosCgroup)
|
||||
|
||||
// hack alert, this helps to work around systemd+docker+mesos integration problems
|
||||
// when docker's cgroup-parent flag is used (!containPodResources = don't use the docker flag)
|
||||
if ms.containPodResources {
|
||||
ms.cgroupRoot = ms.mesosCgroup
|
||||
}
|
||||
|
||||
cgroupLogger := log.Infof
|
||||
if ms.cgroupRoot == "" {
|
||||
cgroupLogger = log.Warningf
|
||||
}
|
||||
|
||||
cgroupLogger("using cgroup-root %q", ms.cgroupRoot)
|
||||
|
||||
// run subprocesses until ms.done is closed on return of this function
|
||||
if ms.runProxy {
|
||||
ms.launchProxyServer()
|
||||
}
|
||||
|
||||
// abort closes when the kubelet-executor dies
|
||||
abort := ms.launchExecutorServer(containerID)
|
||||
shouldQuit := termSignalListener(abort)
|
||||
te := tasks.MergeOutput(ms.tasks, shouldQuit)
|
||||
|
||||
// TODO(jdef) do something fun here, such as reporting task completion to the apiserver
|
||||
|
||||
<-te.Close().Done() // we don't listen for any specific events yet; wait for all tasks to finish
|
||||
return nil
|
||||
}
|
||||
|
||||
// termSignalListener returns a signal chan that closes when either (a) the process receives a termination
|
||||
// signal: SIGTERM, SIGINT, or SIGHUP; or (b) the abort chan closes.
|
||||
func termSignalListener(abort <-chan struct{}) <-chan struct{} {
|
||||
shouldQuit := make(chan struct{})
|
||||
sigCh := make(chan os.Signal, 1)
|
||||
signal.Notify(sigCh)
|
||||
|
||||
go func() {
|
||||
defer close(shouldQuit)
|
||||
for {
|
||||
select {
|
||||
case <-abort:
|
||||
log.Infof("executor died, aborting")
|
||||
return
|
||||
case s, ok := <-sigCh:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
switch s {
|
||||
case os.Interrupt, os.Signal(syscall.SIGTERM), os.Signal(syscall.SIGINT), os.Signal(syscall.SIGHUP):
|
||||
log.Infof("received signal %q, aborting", s)
|
||||
return
|
||||
case os.Signal(syscall.SIGCHLD): // who cares?
|
||||
default:
|
||||
log.Errorf("unexpected signal: %T %#v", s, s)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}()
|
||||
return shouldQuit
|
||||
}
|
||||
|
||||
func (ms *MinionServer) AddExecutorFlags(fs *pflag.FlagSet) {
|
||||
ms.KubeletExecutorServer.AddFlags(fs)
|
||||
|
||||
// hack to forward log verbosity flag to the executor
|
||||
fs.Int32Var(&ms.logVerbosity, "v", ms.logVerbosity, "log level for V logs")
|
||||
}
|
||||
|
||||
func (ms *MinionServer) AddMinionFlags(fs *pflag.FlagSet) {
|
||||
// general minion flags
|
||||
fs.StringVar(&ms.cgroupPrefix, "mesos-cgroup-prefix", ms.cgroupPrefix, "The cgroup prefix concatenated with MESOS_DIRECTORY must give the executor cgroup set by Mesos")
|
||||
fs.BoolVar(&ms.privateMountNS, "private-mountns", ms.privateMountNS, "Enter a private mount NS before spawning procs (linux only). Experimental, not yet compatible with k8s volumes.")
|
||||
fs.StringVar(&ms.pathOverride, "path-override", ms.pathOverride, "Override the PATH in the environment of the sub-processes.")
|
||||
fs.BoolVar(&ms.containPodResources, "contain-pod-resources", ms.containPodResources, "Allocate pod CPU and memory resources from offers and reparent pod containers into mesos cgroups; disable if you're having strange mesos/docker/systemd interactions.")
|
||||
|
||||
// log file flags
|
||||
fs.Var(resource.NewQuantityFlagValue(&ms.logMaxSize), "max-log-size", "Maximum log file size for the executor and proxy before rotation")
|
||||
fs.IntVar(&ms.logMaxAgeInDays, "max-log-age", ms.logMaxAgeInDays, "Maximum log file age of the executor and proxy in days")
|
||||
fs.IntVar(&ms.logMaxBackups, "max-log-backups", ms.logMaxBackups, "Maximum log file backups of the executor and proxy to keep after rotation")
|
||||
|
||||
// proxy flags
|
||||
fs.BoolVar(&ms.runProxy, "run-proxy", ms.runProxy, "Maintain a running kube-proxy instance as a child proc of this kubelet-executor.")
|
||||
fs.StringVar(&ms.proxyKubeconfig, "proxy-kubeconfig", ms.proxyKubeconfig, "Path to kubeconfig file used by the child kube-proxy.")
|
||||
fs.IntVar(&ms.proxyLogV, "proxy-logv", ms.proxyLogV, "Log verbosity of the child kube-proxy.")
|
||||
fs.BoolVar(&ms.proxyBindall, "proxy-bindall", ms.proxyBindall, "When true will cause kube-proxy to bind to 0.0.0.0.")
|
||||
fs.StringVar(&ms.proxyMode, "proxy-mode", ms.proxyMode, "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
|
||||
fs.IntVar(&ms.conntrackMax, "conntrack-max", ms.conntrackMax, "Maximum number of NAT connections to track on agent nodes (0 to leave as-is)")
|
||||
fs.IntVar(&ms.conntrackTCPTimeoutEstablished, "conntrack-tcp-timeout-established", ms.conntrackTCPTimeoutEstablished, "Idle timeout for established TCP connections on agent nodes (0 to leave as-is)")
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package tasks provides an API for supervising system processes as Task's.
|
||||
// It provides stronger guarantees with respect to process lifecycle than a
|
||||
// standalone kubelet running static pods.
|
||||
package tasks // import "k8s.io/kubernetes/contrib/mesos/pkg/minion/tasks"
|
@ -1,98 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package tasks
|
||||
|
||||
type Events interface {
|
||||
// Close stops delivery of events in the completion and errors channels; callers must close this when they intend to no longer read from completion() or errors()
|
||||
Close() Events
|
||||
|
||||
// Completion reports Completion events as they happen
|
||||
Completion() <-chan *Completion
|
||||
|
||||
// Done returns a signal chan that closes when all tasks have completed and there are no more events to deliver
|
||||
Done() <-chan struct{}
|
||||
}
|
||||
|
||||
type eventsImpl struct {
|
||||
tc chan *Completion
|
||||
stopForwarding chan struct{}
|
||||
done <-chan struct{}
|
||||
}
|
||||
|
||||
func newEventsImpl(tcin <-chan *Completion, done <-chan struct{}) *eventsImpl {
|
||||
ei := &eventsImpl{
|
||||
tc: make(chan *Completion),
|
||||
stopForwarding: make(chan struct{}),
|
||||
done: done,
|
||||
}
|
||||
go func() {
|
||||
defer close(ei.tc)
|
||||
forwardCompletionUntil(tcin, ei.tc, ei.stopForwarding, done, nil)
|
||||
}()
|
||||
return ei
|
||||
}
|
||||
|
||||
func (e *eventsImpl) Close() Events { close(e.stopForwarding); return e }
|
||||
func (e *eventsImpl) Completion() <-chan *Completion { return e.tc }
|
||||
func (e *eventsImpl) Done() <-chan struct{} { return e.done }
|
||||
|
||||
// forwardCompletionUntil is a generic pipe that forwards objects between channels.
|
||||
// if discard is closed, objects are silently dropped.
|
||||
// if tap != nil then it's invoked for each object as it's read from tin, but before it's written to tch.
|
||||
// returns when either reading from tin completes (no more objects, and is closed), or else
|
||||
// abort is closed, which ever happens first.
|
||||
func forwardCompletionUntil(tin <-chan *Completion, tch chan<- *Completion, discard <-chan struct{}, abort <-chan struct{}, tap func(*Completion, bool)) {
|
||||
var tc *Completion
|
||||
var ok bool
|
||||
forwardLoop:
|
||||
for {
|
||||
select {
|
||||
case tc, ok = <-tin:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if tap != nil {
|
||||
tap(tc, false)
|
||||
}
|
||||
select {
|
||||
case <-abort:
|
||||
break forwardLoop
|
||||
case <-discard:
|
||||
case tch <- tc:
|
||||
}
|
||||
case <-abort:
|
||||
// best effort
|
||||
select {
|
||||
case tc, ok = <-tin:
|
||||
if ok {
|
||||
if tap != nil {
|
||||
tap(tc, true)
|
||||
}
|
||||
break forwardLoop
|
||||
}
|
||||
default:
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
// best effort
|
||||
select {
|
||||
case tch <- tc:
|
||||
case <-discard:
|
||||
default:
|
||||
}
|
||||
}
|
@ -1,431 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package tasks
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os/exec"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultTaskRestartDelay = 5 * time.Second
|
||||
|
||||
// TODO(jdef) there's no easy way for us to discover the grace period that we actually
|
||||
// have, from mesos: it's simply a missing core feature. there's a MESOS-xyz ticket for
|
||||
// this somewhere. if it was discoverable then we could come up with a better strategy.
|
||||
// there are some comments in the executor regarding this as well (because there we're
|
||||
// concerned about cleaning up pods within the grace period). we could pick a some
|
||||
// higher (arbitrary) value but without knowing when the slave will forcibly kill us
|
||||
// it seems a somewhat futile exercise.
|
||||
defaultKillGracePeriod = 5 * time.Second
|
||||
)
|
||||
|
||||
// Completion represents the termination of a Task process. Each process execution should
|
||||
// yield (barring drops because of an abort signal) exactly one Completion.
|
||||
type Completion struct {
|
||||
name string // name of the task
|
||||
code int // exit code that the task process completed with
|
||||
err error // process management errors are reported here
|
||||
}
|
||||
|
||||
// systemProcess is a useful abstraction for testing
|
||||
type systemProcess interface {
|
||||
// Wait works like exec.Cmd.Wait()
|
||||
Wait() error
|
||||
|
||||
// Kill returns the pid of the process that was killed
|
||||
Kill(force bool) (int, error)
|
||||
}
|
||||
|
||||
type cmdProcess struct {
|
||||
delegate *exec.Cmd
|
||||
}
|
||||
|
||||
func (cp *cmdProcess) Wait() error {
|
||||
return cp.delegate.Wait()
|
||||
}
|
||||
|
||||
func (cp *cmdProcess) Kill(force bool) (int, error) {
|
||||
// kill the entire process group, not just the one process
|
||||
pid := cp.delegate.Process.Pid
|
||||
processGroup := 0 - pid
|
||||
|
||||
// we send a SIGTERM here for a graceful stop. users of this package should
|
||||
// wait for tasks to complete normally. as a fallback/safeguard, child procs
|
||||
// are spawned in notStartedTask to receive a SIGKILL when this process dies.
|
||||
sig := syscall.SIGTERM
|
||||
if force {
|
||||
sig = syscall.SIGKILL
|
||||
}
|
||||
rc := syscall.Kill(processGroup, sig)
|
||||
return pid, rc
|
||||
}
|
||||
|
||||
// task is a specification for running a system process; it provides hooks for customizing
|
||||
// logging and restart handling as well as provides event channels for communicating process
|
||||
// termination and errors related to process management.
|
||||
type Task struct {
|
||||
Env []string // optional: process environment override
|
||||
Finished func(restarting bool) bool // callback invoked when a task process has completed; if `restarting` then it will be restarted if it returns true
|
||||
RestartDelay time.Duration // interval between repeated task restarts
|
||||
|
||||
name string // required: unique name for this task
|
||||
bin string // required: path to executable
|
||||
args []string // optional: process arguments
|
||||
createLogger func() io.WriteCloser // factory func that builds a log writer
|
||||
cmd systemProcess // process that we started
|
||||
completedCh chan *Completion // reports exit codes encountered when task processes exit, or errors during process management
|
||||
shouldQuit chan struct{} // shouldQuit is closed to indicate that the task should stop its running process, if any
|
||||
done chan struct{} // done closes when all processes related to the task have terminated
|
||||
initialState taskStateFn // prepare and start a new live process, defaults to notStartedTask; should be set by run()
|
||||
runLatch int32 // guard against multiple Task.run calls
|
||||
killFunc func(bool) (int, error)
|
||||
}
|
||||
|
||||
// New builds a newly initialized task object but does not start any processes for it. callers
|
||||
// are expected to invoke task.run(...) on their own.
|
||||
func New(name, bin string, args []string, cl func() io.WriteCloser, options ...Option) *Task {
|
||||
t := &Task{
|
||||
name: name,
|
||||
bin: bin,
|
||||
args: args,
|
||||
createLogger: cl,
|
||||
completedCh: make(chan *Completion),
|
||||
shouldQuit: make(chan struct{}),
|
||||
done: make(chan struct{}),
|
||||
RestartDelay: defaultTaskRestartDelay,
|
||||
Finished: func(restarting bool) bool { return restarting },
|
||||
}
|
||||
t.killFunc = func(force bool) (int, error) { return t.cmd.Kill(force) }
|
||||
for _, opt := range options {
|
||||
opt(t)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// Start spawns a goroutine to execute the Task. Panics if invoked more than once.
|
||||
func (t *Task) Start() {
|
||||
go t.run(notStartedTask)
|
||||
}
|
||||
|
||||
// run executes the state machine responsible for starting, monitoring, and possibly restarting
|
||||
// a system process for the task. The initialState func is the entry point of the state machine.
|
||||
// Upon returning the done and completedCh chans are all closed.
|
||||
func (t *Task) run(initialState taskStateFn) {
|
||||
if !atomic.CompareAndSwapInt32(&t.runLatch, 0, 1) {
|
||||
panic("Task.run() may only be invoked once")
|
||||
}
|
||||
t.initialState = initialState
|
||||
|
||||
defer close(t.done)
|
||||
defer close(t.completedCh)
|
||||
|
||||
state := initialState
|
||||
for state != nil {
|
||||
next := state(t)
|
||||
state = next
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Task) tryComplete(tc *Completion) {
|
||||
select {
|
||||
case <-t.shouldQuit:
|
||||
// best effort
|
||||
select {
|
||||
case t.completedCh <- tc:
|
||||
default:
|
||||
}
|
||||
case t.completedCh <- tc:
|
||||
}
|
||||
}
|
||||
|
||||
// tryError is a convenience func that invokes tryComplete with a completion error
|
||||
func (t *Task) tryError(err error) {
|
||||
t.tryComplete(&Completion{err: err})
|
||||
}
|
||||
|
||||
type taskStateFn func(*Task) taskStateFn
|
||||
|
||||
func taskShouldRestart(t *Task) taskStateFn {
|
||||
// make our best effort to stop here if signalled (shouldQuit). not doing so here
|
||||
// could add cost later (a process might be launched).
|
||||
|
||||
// sleep for a bit; then return t.initialState
|
||||
tm := time.NewTimer(t.RestartDelay)
|
||||
defer tm.Stop()
|
||||
select {
|
||||
case <-tm.C:
|
||||
select {
|
||||
case <-t.shouldQuit:
|
||||
default:
|
||||
if t.Finished(true) {
|
||||
select {
|
||||
case <-t.shouldQuit:
|
||||
// the world has changed, die
|
||||
return nil
|
||||
default:
|
||||
}
|
||||
return t.initialState
|
||||
}
|
||||
// finish call decided not to respawn, so die
|
||||
return nil
|
||||
}
|
||||
case <-t.shouldQuit:
|
||||
}
|
||||
|
||||
// we're quitting, tell the Finished callback and then die
|
||||
t.Finished(false)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *Task) initLogging(r io.Reader) {
|
||||
writer := t.createLogger()
|
||||
go func() {
|
||||
defer writer.Close()
|
||||
_, err := io.Copy(writer, r)
|
||||
if err != nil && err != io.EOF {
|
||||
// using tryComplete is racy because the state machine closes completedCh and
|
||||
// so we don't want to attempt to write to a closed/closing chan. so
|
||||
// just log this for now.
|
||||
log.Errorf("logger for task %q crashed: %v", t.bin, err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// notStartedTask spawns the given task and transitions to a startedTask state
|
||||
func notStartedTask(t *Task) taskStateFn {
|
||||
log.Infof("starting task process %q with args '%+v'", t.bin, t.args)
|
||||
|
||||
// create command
|
||||
cmd := exec.Command(t.bin, t.args...)
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
t.tryError(fmt.Errorf("error getting stdout of %v: %v", t.name, err))
|
||||
return taskShouldRestart
|
||||
}
|
||||
go func() {
|
||||
defer stdout.Close()
|
||||
io.Copy(ioutil.Discard, stdout) // TODO(jdef) we might want to save this at some point
|
||||
}()
|
||||
stderrLogs, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
t.tryError(fmt.Errorf("error getting stderr of %v: %v", t.name, err))
|
||||
return taskShouldRestart
|
||||
}
|
||||
|
||||
t.initLogging(stderrLogs)
|
||||
if len(t.Env) > 0 {
|
||||
cmd.Env = t.Env
|
||||
}
|
||||
cmd.SysProcAttr = sysProcAttr()
|
||||
|
||||
// last min check for shouldQuit here
|
||||
select {
|
||||
case <-t.shouldQuit:
|
||||
t.tryError(fmt.Errorf("task execution canceled, aborting process launch"))
|
||||
return taskShouldRestart
|
||||
default:
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
t.tryError(fmt.Errorf("failed to start task process %q: %v", t.bin, err))
|
||||
return taskShouldRestart
|
||||
}
|
||||
log.Infoln("task started", t.name)
|
||||
t.cmd = &cmdProcess{delegate: cmd}
|
||||
return taskRunning
|
||||
}
|
||||
|
||||
type exitError interface {
|
||||
error
|
||||
|
||||
// see os.ProcessState.Sys: returned value can be converted to something like syscall.WaitStatus
|
||||
Sys() interface{}
|
||||
}
|
||||
|
||||
func taskRunning(t *Task) taskStateFn {
|
||||
// listen for normal process completion in a goroutine; don't block because we need to listen for shouldQuit
|
||||
waitCh := make(chan *Completion, 1)
|
||||
go func() {
|
||||
wr := &Completion{name: t.name}
|
||||
defer func() {
|
||||
waitCh <- wr
|
||||
close(waitCh)
|
||||
}()
|
||||
|
||||
if err := t.cmd.Wait(); err != nil {
|
||||
if exitError, ok := err.(exitError); ok {
|
||||
if waitStatus, ok := exitError.Sys().(syscall.WaitStatus); ok {
|
||||
wr.code = waitStatus.ExitStatus()
|
||||
return
|
||||
}
|
||||
}
|
||||
wr.err = fmt.Errorf("task wait ended strangely for %q: %v", t.bin, err)
|
||||
}
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-t.shouldQuit:
|
||||
t.tryComplete(t.awaitDeath(&realTimer{}, defaultKillGracePeriod, waitCh))
|
||||
case wr := <-waitCh:
|
||||
t.tryComplete(wr)
|
||||
}
|
||||
return taskShouldRestart
|
||||
}
|
||||
|
||||
// awaitDeath waits for the process to complete, or else for a "quit" signal on the task-
|
||||
// at which point we'll attempt to kill manually.
|
||||
func (t *Task) awaitDeath(timer timer, gracePeriod time.Duration, waitCh <-chan *Completion) *Completion {
|
||||
defer timer.discard()
|
||||
|
||||
select {
|
||||
case wr := <-waitCh:
|
||||
// got a signal to quit, but we're already finished
|
||||
return wr
|
||||
default:
|
||||
}
|
||||
|
||||
forceKill := false
|
||||
wr := &Completion{name: t.name, err: fmt.Errorf("failed to kill process: %q", t.bin)}
|
||||
|
||||
// the loop is here in case we receive a shouldQuit signal; we need to kill the task.
|
||||
// in this case, first send a SIGTERM (force=false) to the task and then wait for it
|
||||
// to die (within the gracePeriod). if it doesn't die, then we loop around only this
|
||||
// time we'll send a SIGKILL (force=true) and wait for a reduced gracePeriod. There
|
||||
// does exist a slim chance that the underlying wait4() syscall won't complete before
|
||||
// this process dies, in which case a zombie will rise. Starting the mesos slave with
|
||||
// pid namespace isolation should mitigate this.
|
||||
waitLoop:
|
||||
for i := 0; i < 2; i++ {
|
||||
log.Infof("killing %s (force=%t) : %s", t.name, forceKill, t.bin)
|
||||
pid, err := t.killFunc(forceKill)
|
||||
if err != nil {
|
||||
log.Warningf("failed to kill process: %q pid %d: %v", t.bin, pid, err)
|
||||
break waitLoop
|
||||
}
|
||||
|
||||
// Wait for the kill to be processed, and child proc resources cleaned up; try to avoid zombies!
|
||||
timer.set(gracePeriod)
|
||||
select {
|
||||
case wr = <-waitCh:
|
||||
break waitLoop
|
||||
case <-timer.await():
|
||||
// want a timeout, but a shorter one than we used initially.
|
||||
// using /= 2 is deterministic and yields the desirable effect.
|
||||
gracePeriod /= 2
|
||||
forceKill = true
|
||||
continue waitLoop
|
||||
}
|
||||
}
|
||||
return wr
|
||||
}
|
||||
|
||||
// forwardUntil forwards task process completion status and errors to the given output
|
||||
// chans until either the task terminates or abort is closed.
|
||||
func (t *Task) forwardUntil(tch chan<- *Completion, abort <-chan struct{}) {
|
||||
// merge task completion and error until we're told to die, then
|
||||
// tell the task to stop
|
||||
defer close(t.shouldQuit)
|
||||
forwardCompletionUntil(t.completedCh, tch, nil, abort, nil)
|
||||
}
|
||||
|
||||
// MergeOutput waits for the given tasks to complete. meanwhile it logs each time a task
|
||||
// process completes or generates an error. when shouldQuit closes, tasks are canceled and this
|
||||
// func eventually returns once all ongoing event handlers have completed running.
|
||||
func MergeOutput(tasks []*Task, shouldQuit <-chan struct{}) Events {
|
||||
tc := make(chan *Completion)
|
||||
|
||||
var waitForTasks sync.WaitGroup
|
||||
waitForTasks.Add(len(tasks))
|
||||
|
||||
for _, t := range tasks {
|
||||
t := t
|
||||
// translate task dead signal into Done
|
||||
go func() {
|
||||
<-t.done
|
||||
waitForTasks.Done()
|
||||
}()
|
||||
// fan-in task completion and error events to tc, ec
|
||||
go t.forwardUntil(tc, shouldQuit)
|
||||
}
|
||||
|
||||
tclistener := make(chan *Completion)
|
||||
done := runtime.After(func() {
|
||||
completionFinished := runtime.After(func() {
|
||||
defer close(tclistener)
|
||||
forwardCompletionUntil(tc, tclistener, nil, shouldQuit, func(tt *Completion, shutdown bool) {
|
||||
prefix := ""
|
||||
if shutdown {
|
||||
prefix = "(shutdown) "
|
||||
}
|
||||
log.Infof(prefix+"task %q exited with status %d", tt.name, tt.code)
|
||||
})
|
||||
})
|
||||
waitForTasks.Wait()
|
||||
close(tc)
|
||||
<-completionFinished
|
||||
})
|
||||
ei := newEventsImpl(tclistener, done)
|
||||
return ei
|
||||
}
|
||||
|
||||
// Option is a functional option type for a Task that returns an "undo" Option after upon modifying the Task
|
||||
type Option func(*Task) Option
|
||||
|
||||
// NoRespawn configures the Task lifecycle such that it will not respawn upon termination
|
||||
func NoRespawn(listener chan<- struct{}) Option {
|
||||
return func(t *Task) Option {
|
||||
finished, restartDelay := t.Finished, t.RestartDelay
|
||||
|
||||
t.Finished = func(_ bool) bool {
|
||||
// this func implements the task.finished spec, so when the task exits
|
||||
// we return false to indicate that it should not be restarted. we also
|
||||
// close execDied to signal interested listeners.
|
||||
if listener != nil {
|
||||
close(listener)
|
||||
listener = nil
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// since we only expect to die once, and there is no restart; don't delay any longer than needed
|
||||
t.RestartDelay = 0
|
||||
|
||||
return func(t2 *Task) Option {
|
||||
t2.Finished, t2.RestartDelay = finished, restartDelay
|
||||
return NoRespawn(listener)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Environment customizes the process runtime environment for a Task
|
||||
func Environment(env []string) Option {
|
||||
return func(t *Task) Option {
|
||||
oldenv := t.Env
|
||||
t.Env = env[:]
|
||||
return Environment(oldenv)
|
||||
}
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package tasks
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func sysProcAttr() *syscall.SysProcAttr {
|
||||
return &syscall.SysProcAttr{
|
||||
Setpgid: true,
|
||||
Pdeathsig: syscall.SIGKILL, // see cmdProcess.Kill
|
||||
}
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
// +build !linux
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package tasks
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func sysProcAttr() *syscall.SysProcAttr {
|
||||
// TODO(jdef)
|
||||
// Consequence of not having Pdeathdig is that on non-Linux systems,
|
||||
// if SIGTERM doesn't stop child procs then they may "leak" and be
|
||||
// reparented 'up the chain' somewhere when the minion process
|
||||
// terminates. For example, such child procs end up living indefinitely
|
||||
// as children of the mesos slave process (I think the slave could handle
|
||||
// this case, but currently doesn't do it very well). Pdeathsig on Linux
|
||||
// was a fallback/failsafe mechanism implemented to guard against this. I
|
||||
// don't know if OS X has any syscalls that do something similar.
|
||||
return &syscall.SysProcAttr{
|
||||
Setpgid: true,
|
||||
}
|
||||
}
|
@ -1,311 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package tasks
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type badWriteCloser struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func (b *badWriteCloser) Write(_ []byte) (int, error) { return 0, b.err }
|
||||
func (b *badWriteCloser) Close() error { return b.err }
|
||||
|
||||
type discardCloser int
|
||||
|
||||
func (d discardCloser) Write(b []byte) (int, error) { return len(b), nil }
|
||||
func (d discardCloser) Close() error { return nil }
|
||||
|
||||
var devNull = func() io.WriteCloser { return discardCloser(0) }
|
||||
|
||||
type fakeExitError uint32
|
||||
|
||||
func (f fakeExitError) Sys() interface{} { return syscall.WaitStatus(f << 8) }
|
||||
func (f fakeExitError) Error() string { return fmt.Sprintf("fake-exit-error: %d", f) }
|
||||
|
||||
type fakeProcess struct {
|
||||
done chan struct{}
|
||||
pid int
|
||||
err error
|
||||
}
|
||||
|
||||
func (f *fakeProcess) Wait() error {
|
||||
<-f.done
|
||||
return f.err
|
||||
}
|
||||
func (f *fakeProcess) Kill(_ bool) (int, error) {
|
||||
close(f.done)
|
||||
return f.pid, f.err
|
||||
}
|
||||
func (f *fakeProcess) exit(code int) {
|
||||
f.err = fakeExitError(code)
|
||||
close(f.done)
|
||||
}
|
||||
|
||||
func newFakeProcess() *fakeProcess {
|
||||
return &fakeProcess{
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
func TestBadLogger(t *testing.T) {
|
||||
err := errors.New("qux")
|
||||
fp := newFakeProcess()
|
||||
tt := New("foo", "bar", nil, func() io.WriteCloser {
|
||||
defer func() {
|
||||
fp.pid = 123 // sanity check
|
||||
fp.Kill(false) // this causes Wait() to return
|
||||
}()
|
||||
return &badWriteCloser{err}
|
||||
})
|
||||
|
||||
tt.RestartDelay = 0 // don't slow the test down for no good reason
|
||||
|
||||
finishCalled := make(chan struct{})
|
||||
tt.Finished = func(ok bool) bool {
|
||||
log.Infof("tt.Finished: ok %t", ok)
|
||||
if ok {
|
||||
close(finishCalled)
|
||||
}
|
||||
return false // never respawn, this causes t.done to close
|
||||
}
|
||||
|
||||
// abuse eventsImpl: we're not going to listen on the task completion or event chans,
|
||||
// and we don't want to block the state machine, so discard all task events as they happen
|
||||
ei := newEventsImpl(tt.completedCh, tt.done)
|
||||
ei.Close()
|
||||
|
||||
go tt.run(func(_ *Task) taskStateFn {
|
||||
log.Infof("tt initialized")
|
||||
tt.initLogging(bytes.NewBuffer(([]byte)("unlogged bytes")))
|
||||
tt.cmd = fp
|
||||
return taskRunning
|
||||
})
|
||||
|
||||
// if the logger fails the task will be killed
|
||||
// badWriteLogger generates an error immediately and results in a task kill
|
||||
<-finishCalled
|
||||
<-tt.done
|
||||
|
||||
// this should never data race since the state machine is dead at this point
|
||||
if fp.pid != 123 {
|
||||
t.Fatalf("incorrect pid, expected 123 not %d", fp.pid)
|
||||
}
|
||||
|
||||
// TODO(jdef) would be nice to check for a specific error that indicates the logger died
|
||||
}
|
||||
|
||||
func TestMergeOutput(t *testing.T) {
|
||||
var tasksStarted, tasksDone sync.WaitGroup
|
||||
tasksDone.Add(2)
|
||||
tasksStarted.Add(2)
|
||||
|
||||
t1 := New("foo", "", nil, devNull)
|
||||
t1exited := make(chan struct{})
|
||||
t1.RestartDelay = 0 // don't slow the test down for no good reason
|
||||
t1.Finished = func(ok bool) bool {
|
||||
// we expect each of these cases to happen exactly once
|
||||
if !ok {
|
||||
tasksDone.Done()
|
||||
} else {
|
||||
close(t1exited)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
go t1.run(func(t *Task) taskStateFn {
|
||||
defer tasksStarted.Done()
|
||||
t.initLogging(bytes.NewBuffer([]byte{}))
|
||||
t.cmd = newFakeProcess()
|
||||
return taskRunning
|
||||
})
|
||||
|
||||
t2 := New("bar", "", nil, devNull)
|
||||
t2exited := make(chan struct{})
|
||||
t2.RestartDelay = 0 // don't slow the test down for no good reason
|
||||
t2.Finished = func(ok bool) bool {
|
||||
// we expect each of these cases to happen exactly once
|
||||
if !ok {
|
||||
tasksDone.Done()
|
||||
} else {
|
||||
close(t2exited)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
go t2.run(func(t *Task) taskStateFn {
|
||||
defer tasksStarted.Done()
|
||||
t.initLogging(bytes.NewBuffer([]byte{}))
|
||||
t.cmd = newFakeProcess()
|
||||
return taskRunning
|
||||
})
|
||||
|
||||
shouldQuit := make(chan struct{})
|
||||
te := MergeOutput([]*Task{t1, t2}, shouldQuit)
|
||||
|
||||
tasksStarted.Wait()
|
||||
tasksStarted.Add(2) // recycle the barrier
|
||||
|
||||
// kill each task once, let it restart; make sure that we get the completion status?
|
||||
t1.cmd.(*fakeProcess).exit(1)
|
||||
t2.cmd.(*fakeProcess).exit(2)
|
||||
|
||||
codes := map[int]struct{}{}
|
||||
for i := 0; i < 2; i++ {
|
||||
switch tc := <-te.Completion(); tc.code {
|
||||
case 1, 2:
|
||||
codes[tc.code] = struct{}{}
|
||||
default:
|
||||
if tc.err != nil {
|
||||
t.Errorf("unexpected task completion error: %v", tc.err)
|
||||
} else {
|
||||
t.Errorf("unexpected task completion code: %d", tc.code)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
te.Close() // we're not going to read any other completion or error events
|
||||
|
||||
if len(codes) != 2 {
|
||||
t.Fatalf("expected each task process to exit once")
|
||||
}
|
||||
|
||||
// each task invokes Finished() once
|
||||
<-t1exited
|
||||
<-t2exited
|
||||
|
||||
log.Infoln("each task process has completed one round")
|
||||
tasksStarted.Wait() // tasks will auto-restart their exited procs
|
||||
|
||||
// assert that the tasks are not dead; TODO(jdef) not sure that these checks are useful
|
||||
select {
|
||||
case <-t1.done:
|
||||
t.Fatalf("t1 is unexpectedly dead")
|
||||
default:
|
||||
}
|
||||
select {
|
||||
case <-t2.done:
|
||||
t.Fatalf("t2 is unexpectedly dead")
|
||||
default:
|
||||
}
|
||||
|
||||
log.Infoln("firing quit signal")
|
||||
close(shouldQuit) // fire shouldQuit, and everything should terminate gracefully
|
||||
|
||||
log.Infoln("waiting for tasks to die")
|
||||
tasksDone.Wait() // our tasks should die
|
||||
|
||||
log.Infoln("waiting for merge to complete")
|
||||
<-te.Done() // wait for the merge to complete
|
||||
}
|
||||
|
||||
type fakeTimer struct {
|
||||
ch chan time.Time
|
||||
}
|
||||
|
||||
func (t *fakeTimer) set(d time.Duration) {}
|
||||
func (t *fakeTimer) discard() {}
|
||||
func (t *fakeTimer) await() <-chan time.Time { return t.ch }
|
||||
func (t *fakeTimer) expire() { t.ch = make(chan time.Time); close(t.ch) }
|
||||
func (t *fakeTimer) reset() { t.ch = nil }
|
||||
|
||||
func TestAfterDeath(t *testing.T) {
|
||||
// test kill escalation since that's not covered by other unit tests
|
||||
t1 := New("foo", "", nil, devNull)
|
||||
kills := 0
|
||||
waitCh := make(chan *Completion, 1)
|
||||
timer := &fakeTimer{}
|
||||
timer.expire()
|
||||
t1.killFunc = func(force bool) (int, error) {
|
||||
// > 0 is intentional, multiple calls to close() should panic
|
||||
if kills > 0 {
|
||||
assert.True(t, force)
|
||||
timer.reset() // don't want to race w/ waitCh
|
||||
waitCh <- &Completion{name: t1.name, code: 123}
|
||||
close(waitCh)
|
||||
} else {
|
||||
assert.False(t, force)
|
||||
}
|
||||
kills++
|
||||
return 0, nil
|
||||
}
|
||||
wr := t1.awaitDeath(timer, 0, waitCh)
|
||||
assert.Equal(t, "foo", wr.name)
|
||||
assert.Equal(t, 123, wr.code)
|
||||
assert.NoError(t, wr.err)
|
||||
|
||||
// test tie between shouldQuit and waitCh
|
||||
waitCh = make(chan *Completion, 1)
|
||||
waitCh <- &Completion{name: t1.name, code: 456}
|
||||
close(waitCh)
|
||||
t1.killFunc = func(force bool) (int, error) {
|
||||
t.Fatalf("should not attempt to kill a task that has already reported completion")
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
timer.reset() // don't race w/ waitCh
|
||||
wr = t1.awaitDeath(timer, 0, waitCh)
|
||||
assert.Equal(t, 456, wr.code)
|
||||
assert.NoError(t, wr.err)
|
||||
|
||||
// test delayed killFunc failure
|
||||
kills = 0
|
||||
killFailed := errors.New("for some reason kill failed")
|
||||
t1.killFunc = func(force bool) (int, error) {
|
||||
// > 0 is intentional, multiple calls to close() should panic
|
||||
if kills > 0 {
|
||||
assert.True(t, force)
|
||||
return -1, killFailed
|
||||
} else {
|
||||
assert.False(t, force)
|
||||
}
|
||||
kills++
|
||||
return 0, nil
|
||||
}
|
||||
timer.expire()
|
||||
wr = t1.awaitDeath(timer, 0, nil)
|
||||
assert.Equal(t, "foo", wr.name)
|
||||
assert.Error(t, wr.err)
|
||||
|
||||
// test initial killFunc failure
|
||||
kills = 0
|
||||
t1.killFunc = func(force bool) (int, error) {
|
||||
// > 0 is intentional, multiple calls to close() should panic
|
||||
if kills > 0 {
|
||||
assert.True(t, force)
|
||||
t.Fatalf("killFunc should only be invoked once, not again after is has already failed")
|
||||
} else {
|
||||
assert.False(t, force)
|
||||
}
|
||||
kills++
|
||||
return 0, killFailed
|
||||
}
|
||||
timer.expire()
|
||||
wr = t1.awaitDeath(timer, 0, nil)
|
||||
assert.Equal(t, "foo", wr.name)
|
||||
assert.Error(t, wr.err)
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package tasks
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type timer interface {
|
||||
set(time.Duration)
|
||||
discard()
|
||||
await() <-chan time.Time
|
||||
}
|
||||
|
||||
type realTimer struct {
|
||||
*time.Timer
|
||||
}
|
||||
|
||||
func (t *realTimer) set(d time.Duration) {
|
||||
if t.Timer == nil {
|
||||
t.Timer = time.NewTimer(d)
|
||||
} else {
|
||||
t.Reset(d)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *realTimer) await() <-chan time.Time {
|
||||
if t.Timer == nil {
|
||||
return nil
|
||||
}
|
||||
return t.C
|
||||
}
|
||||
|
||||
func (t *realTimer) discard() {
|
||||
if t.Timer != nil {
|
||||
t.Stop()
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package node provides utilities to create and update nodes
|
||||
package node // import "k8s.io/kubernetes/contrib/mesos/pkg/node"
|
@ -1,226 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
unversionedcore "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/unversioned"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/errors"
|
||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||
"k8s.io/kubernetes/pkg/util/validation"
|
||||
)
|
||||
|
||||
const (
|
||||
labelPrefix = "k8s.mesosphere.io/attribute-"
|
||||
clientRetryCount = 5
|
||||
clientRetryInterval = time.Second
|
||||
)
|
||||
|
||||
// Create creates a new node api object with the given hostname,
|
||||
// slave attribute labels and annotations
|
||||
func Create(
|
||||
client unversionedcore.NodesGetter,
|
||||
hostName string,
|
||||
slaveAttrLabels,
|
||||
annotations map[string]string,
|
||||
) (*api.Node, error) {
|
||||
n := api.Node{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: hostName,
|
||||
},
|
||||
Spec: api.NodeSpec{
|
||||
ExternalID: hostName,
|
||||
},
|
||||
Status: api.NodeStatus{
|
||||
Phase: api.NodePending,
|
||||
// WORKAROUND(sttts): make sure that the Ready condition is the
|
||||
// first one. The kube-ui v3 depends on this assumption.
|
||||
// TODO(sttts): remove this workaround when kube-ui v4 is used or we
|
||||
// merge this with the statusupdate in the controller manager.
|
||||
Conditions: []api.NodeCondition{
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
Reason: slaveReadyReason,
|
||||
Message: slaveReadyMessage,
|
||||
LastHeartbeatTime: unversioned.Now(),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
n.Labels = mergeMaps(
|
||||
map[string]string{"kubernetes.io/hostname": hostName},
|
||||
slaveAttrLabels,
|
||||
)
|
||||
|
||||
n.Annotations = annotations
|
||||
|
||||
// try to create
|
||||
return client.Nodes().Create(&n)
|
||||
}
|
||||
|
||||
// Update updates an existing node api object
|
||||
// by looking up the given hostname.
|
||||
// The updated node merges the given slave attribute labels
|
||||
// and annotations with the found api object.
|
||||
func Update(
|
||||
client unversionedcore.NodesGetter,
|
||||
hostname string,
|
||||
slaveAttrLabels,
|
||||
annotations map[string]string,
|
||||
) (n *api.Node, err error) {
|
||||
for i := 0; i < clientRetryCount; i++ {
|
||||
n, err = client.Nodes().Get(hostname)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting node %q: %v", hostname, err)
|
||||
}
|
||||
if n == nil {
|
||||
return nil, fmt.Errorf("no node instance returned for %q", hostname)
|
||||
}
|
||||
|
||||
// update labels derived from Mesos slave attributes, keep all other labels
|
||||
n.Labels = mergeMaps(
|
||||
filterMap(n.Labels, IsNotSlaveAttributeLabel),
|
||||
slaveAttrLabels,
|
||||
)
|
||||
n.Annotations = mergeMaps(n.Annotations, annotations)
|
||||
|
||||
n, err = client.Nodes().Update(n)
|
||||
if err == nil && !errors.IsConflict(err) {
|
||||
return n, nil
|
||||
}
|
||||
|
||||
log.Infof("retry %d/%d: error updating node %v err %v", i, clientRetryCount, n, err)
|
||||
time.Sleep(time.Duration(i) * clientRetryInterval)
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// CreateOrUpdate creates a node api object or updates an existing one
|
||||
func CreateOrUpdate(
|
||||
client unversionedcore.NodesGetter,
|
||||
hostname string,
|
||||
slaveAttrLabels,
|
||||
annotations map[string]string,
|
||||
) (*api.Node, error) {
|
||||
n, err := Create(client, hostname, slaveAttrLabels, annotations)
|
||||
if err == nil {
|
||||
return n, nil
|
||||
}
|
||||
|
||||
if !errors.IsAlreadyExists(err) {
|
||||
return nil, fmt.Errorf("unable to register %q with the apiserver: %v", hostname, err)
|
||||
}
|
||||
|
||||
// fall back to update an old node with new labels
|
||||
return Update(client, hostname, slaveAttrLabels, annotations)
|
||||
}
|
||||
|
||||
// IsNotSlaveAttributeLabel returns true iff the given label is not derived from a slave attribute
|
||||
func IsNotSlaveAttributeLabel(key, value string) bool {
|
||||
return !IsSlaveAttributeLabel(key, value)
|
||||
}
|
||||
|
||||
// IsSlaveAttributeLabel returns true iff the given label is derived from a slave attribute
|
||||
func IsSlaveAttributeLabel(key, value string) bool {
|
||||
return strings.HasPrefix(key, labelPrefix)
|
||||
}
|
||||
|
||||
// IsUpToDate returns true iff the node's slave labels match the given attributes labels
|
||||
func IsUpToDate(n *api.Node, labels map[string]string) bool {
|
||||
slaveLabels := map[string]string{}
|
||||
for k, v := range n.Labels {
|
||||
if IsSlaveAttributeLabel(k, "") {
|
||||
slaveLabels[k] = v
|
||||
}
|
||||
}
|
||||
return reflect.DeepEqual(slaveLabels, labels)
|
||||
}
|
||||
|
||||
// SlaveAttributesToLabels converts slave attributes into string key/value labels
|
||||
func SlaveAttributesToLabels(attrs []*mesos.Attribute) map[string]string {
|
||||
l := map[string]string{}
|
||||
for _, a := range attrs {
|
||||
if a == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
var v string
|
||||
k := labelPrefix + a.GetName()
|
||||
|
||||
switch a.GetType() {
|
||||
case mesos.Value_TEXT:
|
||||
v = a.GetText().GetValue()
|
||||
case mesos.Value_SCALAR:
|
||||
v = strconv.FormatFloat(a.GetScalar().GetValue(), 'G', -1, 64)
|
||||
}
|
||||
|
||||
if errs := validation.IsQualifiedName(k); len(errs) != 0 {
|
||||
log.V(3).Infof("ignoring invalid node label %q: %v", k, errs)
|
||||
continue
|
||||
}
|
||||
|
||||
if errs := validation.IsValidLabelValue(v); len(errs) != 0 {
|
||||
log.V(3).Infof("ignoring invalid node %s=%q: %v", k, v, errs)
|
||||
continue
|
||||
}
|
||||
|
||||
l[k] = v
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
// filterMap filters the given map and returns a new map
|
||||
// containing all original elements matching the given key-value predicate.
|
||||
func filterMap(m map[string]string, predicate func(string, string) bool) map[string]string {
|
||||
result := make(map[string]string, len(m))
|
||||
for k, v := range m {
|
||||
if predicate(k, v) {
|
||||
result[k] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// mergeMaps merges all given maps into a single map.
|
||||
// There is no advanced key conflict resolution.
|
||||
// The last key from the given maps wins.
|
||||
func mergeMaps(ms ...map[string]string) map[string]string {
|
||||
var l int
|
||||
for _, m := range ms {
|
||||
l += len(m)
|
||||
}
|
||||
|
||||
result := make(map[string]string, l)
|
||||
for _, m := range ms {
|
||||
for k, v := range m {
|
||||
result[k] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
@ -1,151 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
unversionedcore "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/unversioned"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/queue"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/errors"
|
||||
)
|
||||
|
||||
type Registrator interface {
|
||||
// Register checks whether the node is registered with the given labels. If it
|
||||
// is not, it is created or updated on the apiserver. If an the node was up-to-date,
|
||||
// false is returned.
|
||||
Register(hostName string, labels map[string]string) (bool, error)
|
||||
|
||||
// Start the registration loop and return immediately.
|
||||
Run(terminate <-chan struct{}) error
|
||||
}
|
||||
|
||||
type registration struct {
|
||||
hostName string
|
||||
labels map[string]string
|
||||
}
|
||||
|
||||
func (r *registration) Copy() queue.Copyable {
|
||||
return ®istration{
|
||||
hostName: r.hostName,
|
||||
labels: r.labels, // labels are never changed, no need to clone
|
||||
}
|
||||
}
|
||||
|
||||
func (r *registration) GetUID() string {
|
||||
return r.hostName
|
||||
}
|
||||
|
||||
func (r *registration) Value() queue.UniqueCopyable {
|
||||
return r
|
||||
}
|
||||
|
||||
type LookupFunc func(hostName string) *api.Node
|
||||
|
||||
type clientRegistrator struct {
|
||||
lookupNode LookupFunc
|
||||
client unversionedcore.NodesGetter
|
||||
queue *queue.HistoricalFIFO
|
||||
}
|
||||
|
||||
func NewRegistrator(client unversionedcore.NodesGetter, lookupNode LookupFunc) *clientRegistrator {
|
||||
return &clientRegistrator{
|
||||
lookupNode: lookupNode,
|
||||
client: client,
|
||||
queue: queue.NewHistorical(nil),
|
||||
}
|
||||
}
|
||||
|
||||
func (r *clientRegistrator) Run(terminate <-chan struct{}) error {
|
||||
loop := func() {
|
||||
RegistrationLoop:
|
||||
for {
|
||||
obj := r.queue.Pop(terminate)
|
||||
log.V(3).Infof("registration event observed")
|
||||
if obj == nil {
|
||||
break RegistrationLoop
|
||||
}
|
||||
select {
|
||||
case <-terminate:
|
||||
break RegistrationLoop
|
||||
default:
|
||||
}
|
||||
|
||||
rg := obj.(*registration)
|
||||
n, needsUpdate := r.updateNecessary(rg.hostName, rg.labels)
|
||||
if !needsUpdate {
|
||||
log.V(2).Infof("no update needed, skipping for %s: %v", rg.hostName, rg.labels)
|
||||
continue
|
||||
}
|
||||
|
||||
if n == nil {
|
||||
log.V(2).Infof("creating node %s with labels %v", rg.hostName, rg.labels)
|
||||
_, err := CreateOrUpdate(r.client, rg.hostName, rg.labels, nil)
|
||||
if err != nil {
|
||||
log.Errorf("error creating the node %s: %v", rg.hostName, rg.labels)
|
||||
}
|
||||
} else {
|
||||
log.V(2).Infof("updating node %s with labels %v", rg.hostName, rg.labels)
|
||||
_, err := Update(r.client, rg.hostName, rg.labels, nil)
|
||||
if err != nil && errors.IsNotFound(err) {
|
||||
// last chance when our store was out of date
|
||||
_, err = Create(r.client, rg.hostName, rg.labels, nil)
|
||||
}
|
||||
if err != nil {
|
||||
log.Errorf("error updating the node %s: %v", rg.hostName, rg.labels)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
go runtime.Until(loop, time.Second, terminate)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *clientRegistrator) Register(hostName string, labels map[string]string) (bool, error) {
|
||||
_, needsUpdate := r.updateNecessary(hostName, labels)
|
||||
|
||||
if needsUpdate {
|
||||
log.V(5).Infof("queuing registration for node %s with labels %v", hostName, labels)
|
||||
err := r.queue.Update(®istration{
|
||||
hostName: hostName,
|
||||
labels: labels,
|
||||
})
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("cannot register node %s: %v", hostName, err)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// updateNecessary retrieves the node with the given hostname and checks whether the given
|
||||
// labels would mean any update to the node. The unmodified node is returned, plus
|
||||
// true iff an update is necessary.
|
||||
func (r *clientRegistrator) updateNecessary(hostName string, labels map[string]string) (*api.Node, bool) {
|
||||
if r.lookupNode == nil {
|
||||
return nil, true
|
||||
}
|
||||
n := r.lookupNode(hostName)
|
||||
return n, n == nil || !IsUpToDate(n, labels)
|
||||
}
|
@ -1,160 +0,0 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package node
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/errors"
|
||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||
unversionedcore "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/unversioned"
|
||||
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/unversioned/fake"
|
||||
"k8s.io/kubernetes/pkg/client/testing/core"
|
||||
"k8s.io/kubernetes/pkg/runtime"
|
||||
)
|
||||
|
||||
type fakeNodes struct {
|
||||
*fake.FakeNodes
|
||||
}
|
||||
|
||||
func (f *fakeNodes) Nodes() unversionedcore.NodeInterface {
|
||||
return f
|
||||
}
|
||||
|
||||
func calledOnce(h bool, ret runtime.Object, err error) (<-chan struct{}, func(core.Action) (bool, runtime.Object, error)) {
|
||||
ch := make(chan struct{})
|
||||
return ch, func(_ core.Action) (bool, runtime.Object, error) {
|
||||
select {
|
||||
case <-ch:
|
||||
panic("called more than once")
|
||||
default:
|
||||
close(ch)
|
||||
}
|
||||
return h, ret, err
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegister_withUnknownNode(t *testing.T) {
|
||||
fc := &core.Fake{}
|
||||
nodes := &fakeNodes{&fake.FakeNodes{Fake: &fake.FakeCore{Fake: fc}}}
|
||||
createCalled, createOnce := calledOnce(true, nil, nil)
|
||||
fc.AddReactor("create", "nodes", createOnce)
|
||||
|
||||
lookup := func(hostName string) *api.Node {
|
||||
select {
|
||||
case <-createCalled:
|
||||
return &api.Node{ObjectMeta: api.ObjectMeta{Name: "foo"}}
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
r := NewRegistrator(nodes, lookup)
|
||||
ch := make(chan struct{})
|
||||
defer close(ch)
|
||||
r.Run(ch)
|
||||
|
||||
t.Logf("registering node foo")
|
||||
ok, err := r.Register("foo", nil)
|
||||
if !ok {
|
||||
t.Fatalf("registration failed without error")
|
||||
} else if err != nil {
|
||||
t.Fatalf("registration failed with error %v", err)
|
||||
}
|
||||
|
||||
// wait for node creation
|
||||
t.Logf("awaiting node creation")
|
||||
<-createCalled
|
||||
}
|
||||
|
||||
func TestRegister_withKnownNode(t *testing.T) {
|
||||
fc := &core.Fake{}
|
||||
nodes := &fakeNodes{&fake.FakeNodes{Fake: &fake.FakeCore{Fake: fc}}}
|
||||
updateCalled, updateOnce := calledOnce(true, nil, nil)
|
||||
fc.AddReactor("update", "nodes", updateOnce)
|
||||
|
||||
lookup := func(hostName string) *api.Node {
|
||||
select {
|
||||
case <-updateCalled:
|
||||
return &api.Node{ObjectMeta: api.ObjectMeta{Name: "foo"}}
|
||||
default:
|
||||
// this node needs an update because it has labels: the updated version doesn't
|
||||
return &api.Node{ObjectMeta: api.ObjectMeta{Name: "foo", Labels: map[string]string{"a": "b"}}}
|
||||
}
|
||||
}
|
||||
|
||||
r := NewRegistrator(nodes, lookup)
|
||||
ch := make(chan struct{})
|
||||
defer close(ch)
|
||||
r.Run(ch)
|
||||
|
||||
t.Logf("registering node foo")
|
||||
ok, err := r.Register("foo", nil)
|
||||
if !ok {
|
||||
t.Fatalf("registration failed without error")
|
||||
} else if err != nil {
|
||||
t.Fatalf("registration failed with error %v", err)
|
||||
}
|
||||
|
||||
// wait for node update
|
||||
t.Logf("awaiting node update")
|
||||
<-updateCalled
|
||||
}
|
||||
|
||||
func TestRegister_withSemiKnownNode(t *testing.T) {
|
||||
// semi-known because the lookup func doesn't see the a very newly created node
|
||||
// but our apiserver "create" call returns an already-exists error. in this case
|
||||
// CreateOrUpdate should proceed to attempt an update.
|
||||
|
||||
fc := &core.Fake{}
|
||||
nodes := &fakeNodes{&fake.FakeNodes{Fake: &fake.FakeCore{Fake: fc}}}
|
||||
|
||||
createCalled, createOnce := calledOnce(true, nil, errors.NewAlreadyExists(unversioned.GroupResource{Group: "", Resource: ""}, "nodes"))
|
||||
fc.AddReactor("create", "nodes", createOnce)
|
||||
|
||||
updateCalled, updateOnce := calledOnce(true, nil, nil)
|
||||
fc.AddReactor("update", "nodes", updateOnce)
|
||||
|
||||
lookup := func(hostName string) *api.Node {
|
||||
select {
|
||||
case <-updateCalled:
|
||||
return &api.Node{ObjectMeta: api.ObjectMeta{Name: "foo"}}
|
||||
default:
|
||||
// this makes the node semi-known: apiserver knows it but the store/cache doesn't
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
r := NewRegistrator(nodes, lookup)
|
||||
ch := make(chan struct{})
|
||||
defer close(ch)
|
||||
r.Run(ch)
|
||||
|
||||
t.Logf("registering node foo")
|
||||
ok, err := r.Register("foo", nil)
|
||||
if !ok {
|
||||
t.Fatalf("registration failed without error")
|
||||
} else if err != nil {
|
||||
t.Fatalf("registration failed with error %v", err)
|
||||
}
|
||||
|
||||
// wait for node update
|
||||
t.Logf("awaiting node update")
|
||||
<-createCalled
|
||||
<-updateCalled
|
||||
}
|
@ -1,190 +0,0 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||
|
||||
log "github.com/golang/glog"
|
||||
"k8s.io/kubernetes/cmd/kubelet/app/options"
|
||||
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/errors"
|
||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||
"k8s.io/kubernetes/pkg/client/cache"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider/providers/mesos"
|
||||
"k8s.io/kubernetes/pkg/fields"
|
||||
"k8s.io/kubernetes/pkg/util/sets"
|
||||
)
|
||||
|
||||
const (
|
||||
nodeStatusUpdateRetry = 5
|
||||
slaveReadyReason = "SlaveReady"
|
||||
slaveReadyMessage = "mesos reports ready status"
|
||||
)
|
||||
|
||||
type StatusUpdater struct {
|
||||
client *clientset.Clientset
|
||||
relistPeriod time.Duration
|
||||
heartBeatPeriod time.Duration
|
||||
nowFunc func() time.Time
|
||||
}
|
||||
|
||||
func NewStatusUpdater(client *clientset.Clientset, relistPeriod time.Duration, nowFunc func() time.Time) *StatusUpdater {
|
||||
kubecfg := options.NewKubeletServer() // only create to get the config, this is without side-effects
|
||||
return &StatusUpdater{
|
||||
client: client,
|
||||
relistPeriod: relistPeriod,
|
||||
heartBeatPeriod: kubecfg.NodeStatusUpdateFrequency.Duration,
|
||||
nowFunc: nowFunc,
|
||||
}
|
||||
}
|
||||
|
||||
func (u *StatusUpdater) Run(terminate <-chan struct{}) error {
|
||||
nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
|
||||
nodeLW := cache.NewListWatchFromClient(u.client.CoreClient, "nodes", api.NamespaceAll, fields.Everything())
|
||||
cache.NewReflector(nodeLW, &api.Node{}, nodeStore, u.relistPeriod).Run()
|
||||
|
||||
monitor := func() {
|
||||
// build up a set of listed slave nodes without a kubelet
|
||||
slaves, err := mesos.CloudProvider.ListWithoutKubelet()
|
||||
if err != nil {
|
||||
log.Errorf("Error listing slaves without kubelet: %v", err)
|
||||
return
|
||||
}
|
||||
slavesWithoutKubelet := sets.NewString(slaves...)
|
||||
|
||||
// update status for nodes which do not have a kubelet running and
|
||||
// which are still existing as slave. This status update must be done
|
||||
// before the node controller counts down the NodeMonitorGracePeriod
|
||||
nodes := nodeStore.List()
|
||||
|
||||
for _, n := range nodes {
|
||||
node := n.(*api.Node)
|
||||
if !slavesWithoutKubelet.Has(node.Spec.ExternalID) {
|
||||
// let the kubelet do its job updating the status, or the
|
||||
// node controller will remove this node if the node does not even
|
||||
// exist anymore
|
||||
continue
|
||||
}
|
||||
|
||||
err := u.updateStatus(node)
|
||||
if err != nil {
|
||||
log.Errorf("Error updating node status: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
go runtime.Until(monitor, u.heartBeatPeriod, terminate)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (u *StatusUpdater) updateStatus(n *api.Node) error {
|
||||
for i := 0; i < nodeStatusUpdateRetry; i++ {
|
||||
if err := u.tryUpdateStatus(n); err != nil && !errors.IsConflict(err) {
|
||||
log.Errorf("Error updating node status, will retry: %v", err)
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("Update node status exceeds retry count")
|
||||
}
|
||||
|
||||
// nodeWithUpdatedStatus clones the given node and updates the NodeReady condition.
|
||||
// The updated node is return and a boolean indicating whether the node was changed
|
||||
// at all.
|
||||
func (u *StatusUpdater) nodeWithUpdatedStatus(n *api.Node) (*api.Node, bool, error) {
|
||||
readyCondition := getCondition(&n.Status, api.NodeReady)
|
||||
currentTime := unversioned.NewTime(u.nowFunc())
|
||||
|
||||
// avoid flapping by waiting at least twice the kubetlet update frequency, i.e.
|
||||
// give the kubelet the chance twice to update the heartbeat. This is necessary
|
||||
// because we only poll the Mesos master state.json once in a while and we
|
||||
// know that that the information from there can easily be outdated.
|
||||
gracePeriod := u.heartBeatPeriod * 2
|
||||
if readyCondition != nil && !currentTime.After(readyCondition.LastHeartbeatTime.Add(gracePeriod)) {
|
||||
return n, false, nil
|
||||
}
|
||||
|
||||
clone, err := api.Scheme.DeepCopy(n)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
n = clone.(*api.Node)
|
||||
|
||||
newNodeReadyCondition := api.NodeCondition{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
Reason: slaveReadyReason,
|
||||
Message: slaveReadyMessage,
|
||||
LastHeartbeatTime: currentTime,
|
||||
}
|
||||
|
||||
found := false
|
||||
for i := range n.Status.Conditions {
|
||||
c := &n.Status.Conditions[i]
|
||||
if c.Type == api.NodeReady {
|
||||
if c.Status == newNodeReadyCondition.Status {
|
||||
newNodeReadyCondition.LastTransitionTime = c.LastTransitionTime
|
||||
} else {
|
||||
newNodeReadyCondition.LastTransitionTime = currentTime
|
||||
}
|
||||
n.Status.Conditions[i] = newNodeReadyCondition
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
newNodeReadyCondition.LastTransitionTime = currentTime
|
||||
n.Status.Conditions = append(n.Status.Conditions, newNodeReadyCondition)
|
||||
}
|
||||
|
||||
return n, true, nil
|
||||
}
|
||||
|
||||
// tryUpdateStatus updates the status of the given node and tries to persist that
|
||||
// on the apiserver
|
||||
func (u *StatusUpdater) tryUpdateStatus(n *api.Node) error {
|
||||
n, updated, err := u.nodeWithUpdatedStatus(n)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !updated {
|
||||
return nil
|
||||
}
|
||||
|
||||
_, err = u.client.Nodes().UpdateStatus(n)
|
||||
return err
|
||||
}
|
||||
|
||||
// getCondition returns a condition object for the specific condition
|
||||
// type, nil if the condition is not set.
|
||||
func getCondition(status *api.NodeStatus, conditionType api.NodeConditionType) *api.NodeCondition {
|
||||
if status == nil {
|
||||
return nil
|
||||
}
|
||||
for i := range status.Conditions {
|
||||
if status.Conditions[i].Type == conditionType {
|
||||
return &status.Conditions[i]
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|