Add cluster directory and health-monitor.sh.

Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
Lantao Liu 2017-12-11 22:48:52 +00:00 committed by Derek McGowan
parent 8a3f1c99e0
commit 0512d1e0b2
No known key found for this signature in database
GPG Key ID: F58C5D0A4405ACDB
5 changed files with 594 additions and 0 deletions

18
contrib/env Normal file
View File

@ -0,0 +1,18 @@
#!/bin/bash
CLUSTER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# TODO(random-liu): Upload release tarball to user's own GCS, and use it. We should
# not let all nodes of all users download tarball from cri-containerd-release.
export KUBE_MASTER_EXTRA_METADATA="user-data=${CLUSTER_DIR}/gce/cloud-init/master.yaml,cri-containerd-configure-sh=${CLUSTER_DIR}/gce/configure.sh"
export KUBE_NODE_EXTRA_METADATA="user-data=${CLUSTER_DIR}/gce/cloud-init/node.yaml,cri-containerd-configure-sh=${CLUSTER_DIR}/gce/configure.sh"
if [ -n "${VERSION}" ]; then
version=$(mktemp /tmp/version.XXXX)
echo "${VERSION}" > "$version"
export KUBE_MASTER_EXTRA_METADATA="${KUBE_MASTER_EXTRA_METADATA},version=${version}"
export KUBE_NODE_EXTRA_METADATA="${KUBE_NODE_EXTRA_METADATA},version=${version}"
fi
export KUBE_CONTAINER_RUNTIME="remote"
export KUBE_CONTAINER_RUNTIME_ENDPOINT="/var/run/cri-containerd.sock"
export KUBE_LOAD_IMAGE_COMMAND="/home/cri-containerd/usr/local/bin/cri-containerd load"
export NETWORK_POLICY_PROVIDER="calico"
export NON_MASQUERADE_CIDR="0.0.0.0/0"

View File

@ -0,0 +1,232 @@
#cloud-config
write_files:
# Setup cri-containerd.
- path: /etc/systemd/system/cri-containerd-installation.service
permissions: 0644
owner: root
content: |
# installed by cloud-init
[Unit]
Description=Download and install cri-containerd binaries and configurations.
After=network-online.target
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStartPre=/bin/mkdir -p /home/cri-containerd
ExecStartPre=/bin/mount --bind /home/cri-containerd /home/cri-containerd
ExecStartPre=/bin/mount -o remount,exec /home/cri-containerd
ExecStartPre=/usr/bin/curl --fail --retry 5 --retry-delay 3 --silent --show-error -H "X-Google-Metadata-Request: True" -o /home/cri-containerd/configure.sh http://metadata.google.internal/computeMetadata/v1/instance/attributes/cri-containerd-configure-sh
ExecStartPre=/bin/chmod 544 /home/cri-containerd/configure.sh
ExecStart=/home/cri-containerd/configure.sh
[Install]
WantedBy=cri-containerd.target
- path: /etc/containerd/config.toml
permissions: 0644
owner: root
content: |
# installed by cloud-init
oom_score = -999
[plugins.linux]
shim = "/home/cri-containerd/usr/local/bin/containerd-shim"
runtime = "/home/cri-containerd/usr/local/sbin/runc"
- path: /etc/systemd/system/containerd.service
permissions: 0644
owner: root
content: |
# installed by cloud-init
[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=cri-containerd-installation.service
[Service]
Restart=always
RestartSec=5
Delegate=yes
KillMode=process
LimitNOFILE=1048576
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
ExecStartPre=/sbin/modprobe overlay
ExecStart=/home/cri-containerd/usr/local/bin/containerd --log-level debug
[Install]
WantedBy=cri-containerd.target
- path: /etc/systemd/system/cri-containerd.service
permissions: 0644
owner: root
content: |
# installed by cloud-init
[Unit]
Description=Kubernetes containerd CRI shim
Requires=network-online.target
After=cri-containerd-installation.service
[Service]
Restart=always
RestartSec=5
LimitNOFILE=1048576
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
# cri-containerd on master uses the cni binary and config in the
# release tarball.
ExecStart=/home/cri-containerd/usr/local/bin/cri-containerd \
--logtostderr --v=4 \
--network-bin-dir=/home/cri-containerd/opt/cni/bin \
--network-conf-dir=/home/cri-containerd/etc/cni/net.d
[Install]
WantedBy=cri-containerd.target
- path: /etc/systemd/system/cri-containerd-monitor.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Kubernetes health monitoring for cri-containerd and containerd
After=containerd.service cri-containerd.service
[Service]
Restart=always
RestartSec=10
RemainAfterExit=yes
RemainAfterExit=yes
ExecStartPre=/bin/chmod 544 /home/cri-containerd/opt/cri-containerd/cluster/health-monitor.sh
ExecStart=/bin/bash -c 'CRICTL=/home/cri-containerd/usr/local/bin/crictl \
/home/cri-containerd/opt/cri-containerd/cluster/health-monitor.sh'
[Install]
WantedBy=cri-containerd.target
# TODO(random-liu): Guarantee order.
- path: /etc/systemd/system/cri-containerd.target
permissions: 0644
owner: root
content: |
[Unit]
Description=CRI Containerd
[Install]
WantedBy=kubernetes.target
# Setup kubernetes.
- path: /etc/systemd/system/kube-master-installation.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Download and install k8s binaries and configurations
After=network-online.target
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStartPre=/bin/mkdir -p /home/kubernetes/bin
ExecStartPre=/bin/mount --bind /home/kubernetes/bin /home/kubernetes/bin
ExecStartPre=/bin/mount -o remount,exec /home/kubernetes/bin
ExecStartPre=/usr/bin/curl --fail --retry 5 --retry-delay 3 --silent --show-error -H "X-Google-Metadata-Request: True" -o /home/kubernetes/bin/configure.sh http://metadata.google.internal/computeMetadata/v1/instance/attributes/configure-sh
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/configure.sh
ExecStart=/home/kubernetes/bin/configure.sh
[Install]
WantedBy=kubernetes.target
- path: /etc/systemd/system/kube-master-configuration.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Configure kubernetes master
After=kube-master-installation.service
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/configure-helper.sh
ExecStart=/home/kubernetes/bin/configure-helper.sh
[Install]
WantedBy=kubernetes.target
- path: /etc/systemd/system/kubelet-monitor.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Kubernetes health monitoring for kubelet
After=kube-master-configuration.service
[Service]
Restart=always
RestartSec=10
RemainAfterExit=yes
RemainAfterExit=yes
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/health-monitor.sh
ExecStart=/home/kubernetes/bin/health-monitor.sh kubelet
[Install]
WantedBy=kubernetes.target
- path: /etc/systemd/system/kube-logrotate.timer
permissions: 0644
owner: root
content: |
[Unit]
Description=Hourly kube-logrotate invocation
[Timer]
OnCalendar=hourly
[Install]
WantedBy=kubernetes.target
- path: /etc/systemd/system/kube-logrotate.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Kubernetes log rotation
After=kube-master-configuration.service
[Service]
Type=oneshot
ExecStart=-/usr/sbin/logrotate /etc/logrotate.conf
[Install]
WantedBy=kubernetes.target
- path: /etc/systemd/system/kubernetes.target
permissions: 0644
owner: root
content: |
[Unit]
Description=Kubernetes
[Install]
WantedBy=multi-user.target
runcmd:
- systemctl daemon-reload
- systemctl enable containerd.service
- systemctl enable cri-containerd-installation.service
- systemctl enable cri-containerd.service
- systemctl enable cri-containerd-monitor.service
- systemctl enable cri-containerd.target
- systemctl enable kube-master-installation.service
- systemctl enable kube-master-configuration.service
- systemctl enable kubelet-monitor.service
- systemctl enable kube-logrotate.timer
- systemctl enable kube-logrotate.service
- systemctl enable kubernetes.target
- systemctl start kubernetes.target

View File

@ -0,0 +1,234 @@
#cloud-config
write_files:
# Setup cri-containerd.
- path: /etc/systemd/system/cri-containerd-installation.service
permissions: 0644
owner: root
content: |
# installed by cloud-init
[Unit]
Description=Download and install cri-containerd binaries and configurations.
After=network-online.target
[Service]
Type=oneshot
RemainAfterExit=yes
# cri-containerd requires the existence of cni config directory.
# TODO(random-liu): Eliminate the requirement in ocicni.
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /home/cri-containerd
ExecStartPre=/bin/mount --bind /home/cri-containerd /home/cri-containerd
ExecStartPre=/bin/mount -o remount,exec /home/cri-containerd
ExecStartPre=/usr/bin/curl --fail --retry 5 --retry-delay 3 --silent --show-error -H "X-Google-Metadata-Request: True" -o /home/cri-containerd/configure.sh http://metadata.google.internal/computeMetadata/v1/instance/attributes/cri-containerd-configure-sh
ExecStartPre=/bin/chmod 544 /home/cri-containerd/configure.sh
ExecStart=/home/cri-containerd/configure.sh
[Install]
WantedBy=cri-containerd.target
- path: /etc/containerd/config.toml
permissions: 0644
owner: root
content: |
# installed by cloud-init
oom_score = -999
[plugins.linux]
shim = "/home/cri-containerd/usr/local/bin/containerd-shim"
runtime = "/home/cri-containerd/usr/local/sbin/runc"
- path: /etc/systemd/system/containerd.service
permissions: 0644
owner: root
content: |
# installed by cloud-init
[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=cri-containerd-installation.service
[Service]
Restart=always
RestartSec=5
Delegate=yes
KillMode=process
LimitNOFILE=1048576
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
ExecStartPre=/sbin/modprobe overlay
ExecStart=/home/cri-containerd/usr/local/bin/containerd --log-level debug
[Install]
WantedBy=cri-containerd.target
- path: /etc/systemd/system/cri-containerd.service
permissions: 0644
owner: root
content: |
# installed by cloud-init
[Unit]
Description=Kubernetes containerd CRI shim
Requires=network-online.target
After=cri-containerd-installation.service
[Service]
Restart=always
RestartSec=5
LimitNOFILE=1048576
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
# Point to /home/kubernetes/bin where calico setup cni binary in kube-up.sh.
# Point to /etc/cni/net.d where calico put cni config in kube-up.sh.
ExecStart=/home/cri-containerd/usr/local/bin/cri-containerd \
--logtostderr --v=4 \
--network-bin-dir=/home/kubernetes/bin \
--network-conf-dir=/etc/cni/net.d
[Install]
WantedBy=cri-containerd.target
- path: /etc/systemd/system/cri-containerd-monitor.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Kubernetes health monitoring for cri-containerd and containerd
After=containerd.service cri-containerd.service
[Service]
Restart=always
RestartSec=10
RemainAfterExit=yes
RemainAfterExit=yes
ExecStartPre=/bin/chmod 544 /home/cri-containerd/opt/cri-containerd/cluster/health-monitor.sh
ExecStart=/bin/bash -c 'CRICTL=/home/cri-containerd/usr/local/bin/crictl \
/home/cri-containerd/opt/cri-containerd/cluster/health-monitor.sh'
[Install]
WantedBy=cri-containerd.target
- path: /etc/systemd/system/cri-containerd.target
permissions: 0644
owner: root
content: |
[Unit]
Description=CRI Containerd
[Install]
WantedBy=kubernetes.target
# Setup kubernetes.
- path: /etc/systemd/system/kube-node-installation.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Download and install k8s binaries and configurations
After=network-online.target
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStartPre=/bin/mkdir -p /home/kubernetes/bin
ExecStartPre=/bin/mount --bind /home/kubernetes/bin /home/kubernetes/bin
ExecStartPre=/bin/mount -o remount,exec /home/kubernetes/bin
ExecStartPre=/usr/bin/curl --fail --retry 5 --retry-delay 3 --silent --show-error -H "X-Google-Metadata-Request: True" -o /home/kubernetes/bin/configure.sh http://metadata.google.internal/computeMetadata/v1/instance/attributes/configure-sh
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/configure.sh
ExecStart=/home/kubernetes/bin/configure.sh
[Install]
WantedBy=kubernetes.target
- path: /etc/systemd/system/kube-node-configuration.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Configure kubernetes node
After=kube-node-installation.service
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/configure-helper.sh
ExecStart=/home/kubernetes/bin/configure-helper.sh
[Install]
WantedBy=kubernetes.target
- path: /etc/systemd/system/kubelet-monitor.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Kubernetes health monitoring for kubelet
After=kube-node-configuration.service
[Service]
Restart=always
RestartSec=10
RemainAfterExit=yes
RemainAfterExit=yes
ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/health-monitor.sh
ExecStart=/home/kubernetes/bin/health-monitor.sh kubelet
[Install]
WantedBy=kubernetes.target
- path: /etc/systemd/system/kube-logrotate.timer
permissions: 0644
owner: root
content: |
[Unit]
Description=Hourly kube-logrotate invocation
[Timer]
OnCalendar=hourly
[Install]
WantedBy=kubernetes.target
- path: /etc/systemd/system/kube-logrotate.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Kubernetes log rotation
After=kube-node-configuration.service
[Service]
Type=oneshot
ExecStart=-/usr/sbin/logrotate /etc/logrotate.conf
[Install]
WantedBy=kubernetes.target
- path: /etc/systemd/system/kubernetes.target
permissions: 0644
owner: root
content: |
[Unit]
Description=Kubernetes
[Install]
WantedBy=multi-user.target
runcmd:
- systemctl daemon-reload
- systemctl enable containerd.service
- systemctl enable cri-containerd-installation.service
- systemctl enable cri-containerd.service
- systemctl enable cri-containerd-monitor.service
- systemctl enable cri-containerd.target
- systemctl enable kube-node-installation.service
- systemctl enable kube-node-configuration.service
- systemctl enable kubelet-monitor.service
- systemctl enable kube-logrotate.timer
- systemctl enable kube-logrotate.service
- systemctl enable kubernetes.target
- systemctl start kubernetes.target

68
contrib/gce/configure.sh Executable file
View File

@ -0,0 +1,68 @@
#!/bin/bash
# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o xtrace
set -o errexit
set -o nounset
set -o pipefail
# CRI_CONTAINERD_HOME is the directory for cri-containerd.
CRI_CONTAINERD_HOME="/home/cri-containerd"
cd "${CRI_CONTAINERD_HOME}"
# fetch_metadata fetches metadata from GCE metadata server.
# Var set:
# 1. Metadata key: key of the metadata.
fetch_metadata() {
local -r key=$1
local -r attributes="http://metadata.google.internal/computeMetadata/v1/instance/attributes"
if curl --fail --retry 5 --retry-delay 3 --silent --show-error -H "X-Google-Metadata-Request: True" "${attributes}/" | \
grep -q "${key}"; then
curl --fail --retry 5 --retry-delay 3 --silent --show-error -H "X-Google-Metadata-Request: True" \
"${attributes}/${key}"
fi
}
# DEPLOY_PATH is the gcs path where cri-containerd tarball is stored.
DEPLOY_PATH=${DEPLOY_PATH:-"cri-containerd-release"}
# PKG_PREFIX is the prefix of the cri-containerd tarball name.
# By default use the release tarball with cni built in.
PKG_PREFIX=${PKG_PREFIX:-"cri-containerd-cni"}
# VERSION is the cri-containerd version to use. If not specified,
# the latest version will be used.
VERSION_METADATA="version"
VERSION=$(fetch_metadata "${VERSION_METADATA}")
if [ -z "${VERSION}" ]; then
VERSION=$(curl -f --ipv4 --retry 6 --retry-delay 3 --silent --show-error \
https://storage.googleapis.com/${DEPLOY_PATH}/latest)
fi
# TARBALL_GCS_PATH is the path to download cri-containerd tarball for node e2e.
TARBALL_GCS_PATH="https://storage.googleapis.com/${DEPLOY_PATH}/${PKG_PREFIX}-${VERSION}.tar.gz"
# TARBALL is the name of the tarball after being downloaded.
TARBALL="cri-containerd.tar.gz"
# Download and untar the release tar ball.
curl -f --ipv4 -Lo "${TARBALL}" --connect-timeout 20 --max-time 300 --retry 6 --retry-delay 10 "${TARBALL_GCS_PATH}"
tar xvf "${TARBALL}"
# Copy crictl config.
cp "${CRI_CONTAINERD_HOME}/etc/crictl.yaml" /etc
echo "export PATH=${CRI_CONTAINERD_HOME}/usr/local/bin/:${CRI_CONTAINERD_HOME}/usr/local/sbin/:\$PATH" > \
/etc/profile.d/cri-containerd_env.sh

42
contrib/health-monitor.sh Executable file
View File

@ -0,0 +1,42 @@
#!/bin/bash
# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o nounset
set -o pipefail
# CRICTL is the path of crictl
CRICTL=${CRICTL:-"crictl"}
# COMMAND_TIMEOUT is the timeout for the health check command.
COMMAND_TIMEOUT=${COMMAND_TIMEOUT:-60}
# CHECK_PERIOD is the health check period.
CHECK_PERIOD=${CHECK_PERIOD:-10}
# SLEEP_SECONDS is the time to sleep after killing cri-containerd
# and containerd.
SLEEP_SECONDS=${SLEEP_SECONDS:-120}
while true; do
# Use crictl sandboxes because it requires both containerd and
# cri-containerd to be working.
if ! timeout ${COMMAND_TIMEOUT} ${CRICTL} sandboxes > /dev/null; then
echo "crictl sandboxes timeout!"
pkill -9 containerd
pkill cri-containerd
# Wait for a while, as we don't want to kill it again before it is really up.
sleep ${SLEEP_SECONDS}
else
sleep ${CHECK_PERIOD}
fi
done