kubernetes/cluster/photon-controller/util.sh
Alain Roy fa9d79df75 Initial kube-up support for VMware's Photon Controller
This is for: https://github.com/kubernetes/kubernetes/issues/24121

Photon Controller is an open-source cloud management platform. More
information is available at:
http://vmware.github.io/photon-controller/

This commit provides initial support for Photon Controller. The
following features are tested and working:
- kube-up and kube-down
- Basic pod and service management
- Networking within the Kubernetes cluster
- UI and DNS addons

It has been tested with a Kubernetes cluster of up to 10
nodes. Further work on scaling is planned for the near future.

Internally we have implemented continuous integration testing and will
run it multiple times per day against the Kubernetes master branch
once this is integrated so we can quickly react to problems.

A few things have not yet been implemented, but are planned:
- Support for kube-push
- Support for test-build-release, test-setup, test-teardown

Assuming this is accepted for inclusion, we will write documentation
for the kubernetes.io site.

We have included a script to help users configure Photon Controller
for use with Kubernetes. While not required, it will help some
users get started more quickly. It will be documented.

We are aware of the kube-deploy efforts and will track them and
support them as appropriate.
2016-04-25 13:24:16 -07:00

1093 lines
33 KiB
Bash
Executable File

#!/bin/bash
# Copyright 2016 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
# A library of helper functions that each provider hosting Kubernetes must implement to use cluster/kube-*.sh scripts.
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
# shellcheck source=./config-common.sh
source "${KUBE_ROOT}/cluster/photon-controller/config-common.sh"
# shellcheck source=./config-default.sh
source "${KUBE_ROOT}/cluster/photon-controller/${KUBE_CONFIG_FILE-"config-default.sh"}"
# shellcheck source=../common.sh
source "${KUBE_ROOT}/cluster/common.sh"
readonly PHOTON="photon -n"
# Naming scheme for VMs (masters & nodes)
readonly MASTER_NAME="${INSTANCE_PREFIX}-master"
# shell check claims this doesn't work because you can't use a variable in a brace
# range. It does work because we're calling eval.
# shellcheck disable=SC2051
readonly NODE_NAMES=($(eval echo "${INSTANCE_PREFIX}"-node-{1.."${NUM_NODES}"}))
#####################################################################
#
# Public API
#
#####################################################################
#
# detect-master will query Photon Controller for the Kubernetes master.
# It assumes that the VM name for the master is unique.
# It will set KUBE_MASTER_ID to be the VM ID of the master
# It will set KUBE_MASTER_IP to be the IP address of the master
# If the silent parameter is passed, it will not print when the master
# is found: this is used internally just to find the MASTER
#
function detect-master {
local silent=${1:-""}
local tenant_args="--tenant ${PHOTON_TENANT} --project ${PHOTON_PROJECT}"
KUBE_MASTER=${MASTER_NAME}
KUBE_MASTER_ID=${KUBE_MASTER_ID:-""}
KUBE_MASTER_IP=${KUBE_MASTER_IP:-""}
# We don't want silent failure: we check for failure
set +o pipefail
if [[ -z ${KUBE_MASTER_ID} ]]; then
KUBE_MASTER_ID=$(${PHOTON} vm list ${tenant_args} | grep $'\t'"kubernetes-master"$'\t' | awk '{print $1}')
fi
if [[ -z ${KUBE_MASTER_ID} ]]; then
kube::log::error "Could not find Kubernetes master node ID. Make sure you've launched a cluster with kube-up.sh"
exit 1
fi
if [[ -z "${KUBE_MASTER_IP-}" ]]; then
# Make sure to ignore lines where it's not attached to a portgroup
# Make sure to ignore lines that have a network interface but no address
KUBE_MASTER_IP=$(${PHOTON} vm networks "${KUBE_MASTER_ID}" | grep -v "^-" | grep -E '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -1 | awk -F'\t' '{print $3}')
fi
if [[ -z "${KUBE_MASTER_IP-}" ]]; then
kube::log::error "Could not find Kubernetes master node IP. Make sure you've launched a cluster with 'kube-up.sh'" >&2
exit 1
fi
if [[ -z ${silent} ]]; then
kube::log::status "Master: $KUBE_MASTER ($KUBE_MASTER_IP)"
fi
# Reset default set in common.sh
set -o pipefail
}
#
# detect-nodes will query Photon Controller for the Kubernetes nodes
# It assumes that the VM name for the nodes are unique.
# It assumes that NODE_NAMES has been set
# It will set KUBE_NODE_IP_ADDRESSES to be the VM IPs of the nodes
# It will set the KUBE_NODE_IDS to be the VM IDs of the nodes
# If the silent parameter is passed, it will not print when the nodes
# are found: this is used internally just to find the MASTER
#
function detect-nodes {
local silent=${1:-""}
local failure=0
local tenant_args="--tenant ${PHOTON_TENANT} --project ${PHOTON_PROJECT}"
KUBE_NODE_IP_ADDRESSES=()
KUBE_NODE_IDS=()
# We don't want silent failure: we check for failure
set +o pipefail
for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
local node_id
node_id=$(${PHOTON} vm list ${tenant_args} | grep $'\t'"${NODE_NAMES[${i}]}"$'\t' | awk '{print $1}')
if [[ -z ${node_id} ]]; then
kube::log::error "Could not find ${NODE_NAMES[${i}]}"
failure=1
fi
KUBE_NODE_IDS+=("${node_id}")
# Make sure to ignore lines where it's not attached to a portgroup
# Make sure to ignore lines that have a network interface but no address
node_ip=$(${PHOTON} vm networks "${node_id}" | grep -v "^-" | grep -E '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -1 | awk -F'\t' '{print $3}')
KUBE_NODE_IP_ADDRESSES+=("${node_ip}")
if [[ -z ${silent} ]]; then
kube::log::status "Node: ${NODE_NAMES[${i}]} (${KUBE_NODE_IP_ADDRESSES[${i}]})"
fi
done
if [[ ${failure} -ne 0 ]]; then
exit 1
fi
# Reset default set in common.sh
set -o pipefail
}
# Get node names if they are not static.
function detect-node-names {
echo "TODO: detect-node-names" 1>&2
}
#
# Verifies that this computer has sufficient software installed
# so that it can run the rest of the script.
#
function verify-prereqs {
verify-cmd-in-path photon
verify-cmd-in-path ssh
verify-cmd-in-path scp
verify-cmd-in-path ssh-add
verify-cmd-in-path openssl
verify-cmd-in-path mkisofs
}
#
# The entry point for bringing up a Kubernetes cluster
#
function kube-up {
verify-prereqs
verify-ssh-prereqs
verify-photon-config
ensure-temp-dir
find-release-tars
find-image-id
load-or-gen-kube-basicauth
gen-cloud-init-iso
gen-master-start
create-master-vm
install-salt-on-master
gen-node-start
install-salt-on-nodes
detect-nodes -s
install-kubernetes-on-master
install-kubernetes-on-nodes
wait-master-api
wait-node-apis
setup-pod-routes
copy-kube-certs
kube::log::status "Creating kubeconfig..."
create-kubeconfig
}
# Delete a kubernetes cluster
function kube-down {
detect-master
detect-nodes
pc-delete-vm "${KUBE_MASTER}" "${KUBE_MASTER_ID}"
for (( node=0; node<${#KUBE_NODE_IDS[@]}; node++)); do
pc-delete-vm "${NODE_NAMES[${node}]}" "${KUBE_NODE_IDS[${node}]}"
done
}
# Update a kubernetes cluster
function kube-push {
echo "TODO: kube-push" 1>&2
}
# Prepare update a kubernetes component
function prepare-push {
echo "TODO: prepare-push" 1>&2
}
# Update a kubernetes master
function push-master {
echo "TODO: push-master" 1>&2
}
# Update a kubernetes node
function push-node {
echo "TODO: push-node" 1>&2
}
# Execute prior to running tests to build a release if required for env
function test-build-release {
echo "TODO: test-build-release" 1>&2
}
# Execute prior to running tests to initialize required structure
function test-setup {
echo "TODO: test-setup" 1>&2
}
# Execute after running tests to perform any required clean-up
function test-teardown {
echo "TODO: test-teardown" 1>&2
}
#####################################################################
#
# Internal functions
#
#####################################################################
#
# Uses Photon Controller to make a VM
# Takes two parameters:
# - The name of the VM (Assumed to be unique)
# - The name of the flavor to create the VM (Assumed to be unique)
#
# It assumes that the variables in config-common.sh (PHOTON_TENANT, etc)
# are set correctly.
#
# It also assumes the cloud-init ISO has been generated
#
# When it completes, it sets two environment variables for use by the
# caller: _VM_ID (the ID of the created VM) and _VM_IP (the IP address
# of the created VM)
#
function pc-create-vm {
local vm_name="${1}"
local vm_flavor="${2}"
local rc=0
local i=0
# Create the VM
local tenant_args="--tenant ${PHOTON_TENANT} --project ${PHOTON_PROJECT}"
local vm_args="--name ${vm_name} --image ${PHOTON_IMAGE_ID} --flavor ${vm_flavor}"
local disk_args="disk-1 ${PHOTON_DISK_FLAVOR} boot=true"
rc=0
_VM_ID=$(${PHOTON} vm create ${tenant_args} ${vm_args} --disks "${disk_args}" 2>&1) || rc=$?
if [[ ${rc} -ne 0 ]]; then
kube::log::error "Failed to create VM. Error output:"
echo "${_VM_ID}"
exit 1
fi
kube::log::status "Created VM ${vm_name}: ${_VM_ID}"
# Start the VM
# Note that the VM has cloud-init in it, and we attach an ISO that
# contains a user-data.txt file for cloud-init. When the VM starts,
# cloud-init will temporarily mount the ISO and configure the VM
# Our user-data will configure the 'kube' user and set up the ssh
# authorized keys to allow us to ssh to the VM and do further work.
run-cmd "${PHOTON} vm attach-iso -p ${KUBE_TEMP}/cloud-init.iso ${_VM_ID}"
run-cmd "${PHOTON} vm start ${_VM_ID}"
kube::log::status "Started VM ${vm_name}, waiting for network address..."
# Wait for the VM to be started and connected to the network
have_network=0
for i in $(seq 120); do
# photon -n vm networks print several fields:
# NETWORK MAC IP GATEWAY CONNECTED?
# We wait until CONNECTED is True
rc=0
networks=$(${PHOTON} vm networks "${_VM_ID}") || rc=$?
if [[ ${rc} -ne 0 ]]; then
kube::log::error "'${PHOTON} vm networks ${_VM_ID}' failed. Error output: "
echo "${networks}"
fi
networks=$(echo "${networks}" | grep True) || rc=$?
if [[ ${rc} -eq 0 ]]; then
have_network=1
break;
fi
sleep 1
done
# Fail if the VM didn't come up
if [[ ${have_network} -eq 0 ]]; then
kube::log::error "VM ${vm_name} failed to start up: no IP was found"
exit 1
fi
# Find the IP address of the VM
_VM_IP=$(${PHOTON} vm networks "${_VM_ID}" | head -1 | awk -F'\t' '{print $3}')
kube::log::status "VM ${vm_name} has IP: ${_VM_IP}"
}
#
# Delete one of our VMs
# If it is STARTED, it will be stopped first.
#
function pc-delete-vm {
local vm_name="${1}"
local vm_id="${2}"
local rc=0
kube::log::status "Deleting VM ${vm_name}"
${PHOTON} vm show "${vm_id}" | head -1 | grep STARTED > /dev/null 2>&1 || rc=$?
if [[ ${rc} -eq 0 ]]; then
${PHOTON} vm stop "${vm_id}" > /dev/null 2>&1 || rc=$?
if [[ ${rc} -ne 0 ]]; then
kube::log::error "Error: could not stop ${vm_name} ($vm_id)"
kube::log::error "Please investigate and stop manually"
return
fi
fi
rc=0
${PHOTON} vm delete "${vm_id}" > /dev/null 2>&1 || rc=$?
if [[ ${rc} -ne 0 ]]; then
kube::log::error "Error: could not delete ${vm_name} ($vm_id)"
kube::log::error "Please investigate and delete manually"
fi
}
#
# Looks for the image named PHOTON_IMAGE
# Sets PHOTON_IMAGE_ID to be the id of that image.
# We currently assume there is exactly one image with name
#
function find-image-id {
local rc=0
PHOTON_IMAGE_ID=$(${PHOTON} image list | grep $'\t'"${PHOTON_IMAGE}"$'\t' | head -1 | grep READY | awk -F'\t' '{print $1}')
if [[ ${rc} -ne 0 ]]; then
kube::log::error "Cannot find image \"${PHOTON_IMAGE}\""
fail=1
fi
}
#
# Generate an ISO with a single file called user-data.txt
# This ISO will be used to configure cloud-init (which is already
# on the VM). We will tell cloud-init to create the kube user/group
# and give ourselves the ability to ssh to the VM with ssh. We also
# allow people to ssh with the same password that was randomly
# generated for access to Kubernetes as a backup method.
#
# Assumes environment variables:
# - VM_USER
# - KUBE_PASSWORD (randomly generated password)
#
function gen-cloud-init-iso {
local password_hash
password_hash=$(openssl passwd -1 "${KUBE_PASSWORD}")
local ssh_key
ssh_key=$(ssh-add -L | head -1)
# Make the user-data file that will be used by cloud-init
(
echo "#cloud-config"
echo ""
echo "groups:"
echo " - ${VM_USER}"
echo ""
echo "users:"
echo " - name: ${VM_USER}"
echo " gecos: Kubernetes"
echo " primary-group: ${VM_USER}"
echo " lock-passwd: false"
echo " passwd: ${password_hash}"
echo " ssh-authorized-keys: "
echo " - ${ssh_key}"
echo " sudo: ALL=(ALL) NOPASSWD:ALL"
echo " shell: /bin/bash"
echo ""
echo "hostname:"
echo " - hostname: kube"
) > "${KUBE_TEMP}/user-data.txt"
# Make the ISO that will contain the user-data
# The -rock option means that we'll generate real filenames (long and with case)
run-cmd "mkisofs -rock -o ${KUBE_TEMP}/cloud-init.iso ${KUBE_TEMP}/user-data.txt"
}
#
# Generate a script used to install salt on the master
# It is placed into $KUBE_TEMP/master-start.sh
#
function gen-master-start {
python "${KUBE_ROOT}/third_party/htpasswd/htpasswd.py" \
-b -c "${KUBE_TEMP}/htpasswd" "${KUBE_USER}" "${KUBE_PASSWORD}"
local htpasswd
htpasswd=$(cat "${KUBE_TEMP}/htpasswd")
# This calculation of the service IP should work, but if you choose an
# alternate subnet, there's a small chance you'd need to modify the
# service_ip, below. We'll choose an IP like 10.244.240.1 by taking
# the first three octets of the SERVICE_CLUSTER_IP_RANGE and tacking
# on a .1
local octets
local service_ip
octets=($(echo "${SERVICE_CLUSTER_IP_RANGE}" | sed -e 's|/.*||' -e 's/\./ /g'))
((octets[3]+=1))
service_ip=$(echo "${octets[*]}" | sed 's/ /./g')
MASTER_EXTRA_SANS="IP:${service_ip},DNS:${MASTER_NAME},${MASTER_EXTRA_SANS}"
(
echo "#! /bin/bash"
echo "readonly MY_NAME=${MASTER_NAME}"
grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/hostname.sh"
echo "cd /home/kube/cache/kubernetes-install"
echo "readonly MASTER_NAME='${MASTER_NAME}'"
echo "readonly MASTER_IP_RANGE='${MASTER_IP_RANGE}'"
echo "readonly INSTANCE_PREFIX='${INSTANCE_PREFIX}'"
echo "readonly NODE_INSTANCE_PREFIX='${INSTANCE_PREFIX}-node'"
echo "readonly NODE_IP_RANGES='${NODE_IP_RANGES}'"
echo "readonly SERVICE_CLUSTER_IP_RANGE='${SERVICE_CLUSTER_IP_RANGE}'"
echo "readonly ENABLE_NODE_LOGGING='${ENABLE_NODE_LOGGING:-false}'"
echo "readonly LOGGING_DESTINATION='${LOGGING_DESTINATION:-}'"
echo "readonly ENABLE_CLUSTER_DNS='${ENABLE_CLUSTER_DNS:-false}'"
echo "readonly ENABLE_CLUSTER_UI='${ENABLE_CLUSTER_UI:-false}'"
echo "readonly DNS_SERVER_IP='${DNS_SERVER_IP:-}'"
echo "readonly DNS_DOMAIN='${DNS_DOMAIN:-}'"
echo "readonly KUBE_USER='${KUBE_USER:-}'"
echo "readonly KUBE_PASSWORD='${KUBE_PASSWORD:-}'"
echo "readonly SERVER_BINARY_TAR='${SERVER_BINARY_TAR##*/}'"
echo "readonly SALT_TAR='${SALT_TAR##*/}'"
echo "readonly MASTER_HTPASSWD='${htpasswd}'"
echo "readonly E2E_STORAGE_TEST_ENVIRONMENT='${E2E_STORAGE_TEST_ENVIRONMENT:-}'"
echo "readonly MASTER_EXTRA_SANS='${MASTER_EXTRA_SANS:-}'"
grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/create-dynamic-salt-files.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/install-release.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/salt-master.sh"
) > "${KUBE_TEMP}/master-start.sh"
}
#
# Generate the scripts for each node to install salt
#
function gen-node-start {
local i
for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
(
echo "#! /bin/bash"
echo "readonly MY_NAME=${NODE_NAMES[${i}]}"
grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/hostname.sh"
echo "KUBE_MASTER=${KUBE_MASTER}"
echo "KUBE_MASTER_IP=${KUBE_MASTER_IP}"
echo "NODE_IP_RANGE=$NODE_IP_RANGES"
grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/salt-minion.sh"
) > "${KUBE_TEMP}/node-start-${i}.sh"
done
}
#
# Create a script that will run on the Kubernetes master and will run salt
# to configure the master. We make it a script instead of just running a
# single ssh command so that we can get logging.
#
function gen-master-salt {
gen-salt "kubernetes-master"
}
#
# Create scripts that will be run on the Kubernetes master. Each of these
# will invoke salt to configure one of the nodes
#
function gen-node-salt {
local i
for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
gen-salt "${NODE_NAMES[${i}]}"
done
}
#
# Shared implementation for gen-master-salt and gen-node-salt
# Writes a script that installs Kubernetes with salt
# The core of the script is simple (run 'salt ... state.highstate')
# We also do a bit of logging so we can debug problems
#
# There is also a funky workaround for an issue with docker 1.9
# (elsewhere we peg ourselves to docker 1.9). It's fixed in 1.10,
# so we should be able to remove it in the future
# https://github.com/docker/docker/issues/18113
# The problem is that sometimes the install (with apt-get) of
# docker fails. Deleting a file and retrying fixes it.
#
# Tell shellcheck to ignore our variables within single quotes:
# We're writing a script, not executing it, so this is normal
# shellcheck disable=SC2016
function gen-salt {
node_name=${1}
(
echo '#!/bin/bash'
echo ''
echo "node=${node_name}"
echo 'out=/tmp/${node}-salt.out'
echo 'log=/tmp/${node}-salt.log'
echo ''
echo 'echo $(date) >> $log'
echo 'salt ${node} state.highstate -t 30 --no-color > ${out}'
echo 'grep -E "Failed:[[:space:]]+0" ${out}'
echo 'success=$?'
echo 'cat ${out} >> ${log}'
echo ''
echo 'if [[ ${success} -ne 0 ]]; then'
echo ' # Did we try to install docker-engine?'
echo ' attempted=$(grep docker-engine ${out} | wc -l)'
echo ' # Is docker-engine installed?'
echo ' installed=$(salt --output=txt ${node} pkg.version docker-engine | wc -l)'
echo ' if [[ ${attempted} -ne 0 && ${installed} -eq 0 ]]; then'
echo ' echo "Unwedging docker-engine install" >> ${log}'
echo ' salt ${node} cmd.run "rm -f /var/lib/docker/network/files/local-kv.db"'
echo ' fi'
echo 'fi'
echo 'exit ${success}'
) > "${KUBE_TEMP}/${node_name}-salt.sh"
}
#
# Create the Kubernetes master VM
# Sets global variables:
# - KUBE_MASTER (Name)
# - KUBE_MASTER_ID (Photon VM ID)
# - KUBE_MASTER_IP (IP address)
#
function create-master-vm {
kube::log::status "Starting master VM..."
pc-create-vm "${MASTER_NAME}" "${PHOTON_MASTER_FLAVOR}"
KUBE_MASTER=${MASTER_NAME}
KUBE_MASTER_ID=${_VM_ID}
KUBE_MASTER_IP=${_VM_IP}
}
#
# Install salt on the Kubernetes master
# Relies on the master-start.sh script created in gen-master-start
#
function install-salt-on-master {
kube::log::status "Installing salt on master..."
upload-server-tars "${MASTER_NAME}" "${KUBE_MASTER_IP}"
run-script-remotely "${KUBE_MASTER_IP}" "${KUBE_TEMP}/master-start.sh"
}
#
# Installs salt on Kubernetes nodes in parallel
# Relies on the node-start script created in gen-node-start
#
function install-salt-on-nodes {
kube::log::status "Creating nodes and installing salt on them..."
# Start each of the VMs in parallel
# In the future, we'll batch this because it doesn't scale well
# past 10 or 20 nodes
local node
for (( node=0; node<${#NODE_NAMES[@]}; node++)); do
(
pc-create-vm "${NODE_NAMES[${node}]}" "${PHOTON_NODE_FLAVOR}"
run-script-remotely "${_VM_IP}" "${KUBE_TEMP}/node-start-${node}.sh"
) &
done
# Wait for the node VM startups to complete
local fail=0
local job
for job in $(jobs -p); do
wait "${job}" || fail=$((fail + 1))
done
if (( fail != 0 )); then
kube::log::error "Failed to start ${fail}/${NUM_NODES} nodes"
exit 1
fi
}
#
# Install Kubernetes on the master.
# This uses the kubernetes-master-salt.sh script created by gen-master-salt
# That script uses salt to install Kubernetes
#
function install-kubernetes-on-master {
# Wait until salt-master is running: it may take a bit
try-until-success-ssh "${KUBE_MASTER_IP}" \
"Waiting for salt-master to start on ${KUBE_MASTER}" \
"pgrep salt-master"
gen-master-salt
copy-file-to-vm "${_VM_IP}" "${KUBE_TEMP}/kubernetes-master-salt.sh" "/tmp/kubernetes-master-salt.sh"
try-until-success-ssh "${KUBE_MASTER_IP}" \
"Installing Kubernetes on ${KUBE_MASTER} via salt" \
"sudo /bin/bash /tmp/kubernetes-master-salt.sh"
}
#
# Install Kubernetes on the the nodes in parallel
# This uses the kubernetes-master-salt.sh script created by gen-node-salt
# That script uses salt to install Kubernetes
#
function install-kubernetes-on-nodes {
gen-node-salt
# Run in parallel to bring up the cluster faster
# TODO: Batch this so that we run up to N in parallel, so
# we don't overload this machine or the salt master
local node
for (( node=0; node<${#NODE_NAMES[@]}; node++)); do
(
copy-file-to-vm "${_VM_IP}" "${KUBE_TEMP}/${NODE_NAMES[${node}]}-salt.sh" "/tmp/${NODE_NAMES[${node}]}-salt.sh"
try-until-success-ssh "${KUBE_NODE_IP_ADDRESSES[${node}]}" \
"Waiting for salt-master to start on ${NODE_NAMES[${node}]}" \
"pgrep salt-minion"
try-until-success-ssh "${KUBE_MASTER_IP}" \
"Installing Kubernetes on ${NODE_NAMES[${node}]} via salt" \
"sudo /bin/bash /tmp/${NODE_NAMES[${node}]}-salt.sh"
) &
done
# Wait for the Kubernetes installations to complete
local fail=0
local job
for job in $(jobs -p); do
wait "${job}" || fail=$((fail + 1))
done
if (( fail != 0 )); then
kube::log::error "Failed to start install Kubernetes on ${fail} out of ${NUM_NODES} nodess"
exit 1
fi
}
#
# Upload the Kubernetes tarballs to the master
#
function upload-server-tars {
vm_name=${1}
vm_ip=${2}
run-ssh-cmd "${vm_ip}" "mkdir -p /home/kube/cache/kubernetes-install"
local tar
for tar in "${SERVER_BINARY_TAR}" "${SALT_TAR}"; do
local base_tar
base_tar=$(basename "${tar}")
kube::log::status "Uploading ${base_tar} to ${vm_name}..."
copy-file-to-vm "${vm_ip}" "${tar}" "/home/kube/cache/kubernetes-install/${tar##*/}"
done
}
#
# Wait for the Kubernets healthz API to be responsive on the master
#
function wait-master-api {
local curl_creds="--insecure --user ${KUBE_USER}:${KUBE_PASSWORD}"
local curl_output="--fail --output /dev/null --silent"
local curl_net="--max-time 1"
try-until-success "Waiting for Kubernetes API on ${KUBE_MASTER}" \
"curl ${curl_creds} ${curl_output} ${curl_net} https://${KUBE_MASTER_IP}/healthz"
}
#
# Wait for the Kubernetes healthz API to be responsive on each node
#
function wait-node-apis {
local curl_output="--fail --output /dev/null --silent"
local curl_net="--max-time 1"
for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
try-until-success "Waiting for Kubernetes API on ${NODE_NAMES[${i}]}..." \
"curl ${curl_output} ${curl_net} http://${KUBE_NODE_IP_ADDRESSES[${i}]}:10250/healthz"
done
}
#
# Configure the nodes so the pods can communicate
# Each node will have a bridge named cbr0 for the NODE_IP_RANGES
# defined in config-default.sh. This finds the IP subnet (assigned
# by Kubernetes) to nodes and configures routes so they can communicate
#
# Also configure the master to be able to talk to the nodes. This is
# useful so that you can get to the UI from the master.
#
function setup-pod-routes {
local node
KUBE_NODE_BRIDGE_NETWORK=()
for (( node=0; node<${#NODE_NAMES[@]}; node++)); do
# This happens in two steps (wait for an address, wait for a non 172.x.x.x address)
# because it's both simpler and more clear what's happening.
try-until-success-ssh "${KUBE_NODE_IP_ADDRESSES[${node}]}" \
"Waiting for cbr0 bridge on ${NODE_NAMES[${node}]} to have an address" \
'sudo ifconfig cbr0 | grep -oP "inet addr:\K\S+"'
try-until-success-ssh "${KUBE_NODE_IP_ADDRESSES[${node}]}" \
"Waiting for cbr0 bridge on ${NODE_NAMES[${node}]} to have correct address" \
'sudo ifconfig cbr0 | grep -oP "inet addr:\K\S+" | grep -v "^172."'
run-ssh-cmd "${KUBE_NODE_IP_ADDRESSES[${node}]}" 'sudo ip route show | grep -E "dev cbr0" | cut -d " " -f1'
KUBE_NODE_BRIDGE_NETWORK+=(${_OUTPUT})
kube::log::status "cbr0 on ${NODE_NAMES[${node}]} is ${_OUTPUT}"
done
local i
local j
for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
kube::log::status "Configuring pod routes on ${NODE_NAMES[${i}]}..."
run-ssh-cmd "${KUBE_MASTER_IP}" "sudo route add -net ${KUBE_NODE_BRIDGE_NETWORK[${i}]} gw ${KUBE_NODE_IP_ADDRESSES[${i}]}"
for (( j=0; j<${#NODE_NAMES[@]}; j++)); do
if [[ "${i}" != "${j}" ]]; then
run-ssh-cmd "${KUBE_NODE_IP_ADDRESSES[${i}]}" "sudo route add -net ${KUBE_NODE_BRIDGE_NETWORK[${j}]} gw ${KUBE_NODE_IP_ADDRESSES[${j}]}"
fi
done
done
}
#
# Copy the certificate/key from the Kubernetes master
# These are used to create the kubeconfig file, which allows
# users to use kubectl easily
#
# We also set KUBE_CERT, KUBE_KEY, CA_CERT, and CONTEXT because they
# are needed by create-kubeconfig from common.sh to generate
# the kube config file.
#
function copy-kube-certs {
local cert="kubecfg.crt"
local key="kubecfg.key"
local ca="ca.crt"
local cert_dir="/srv/kubernetes"
kube::log::status "Copying credentials from ${KUBE_MASTER}"
# Set global environment variables: needed by create-kubeconfig
# in common.sh
export KUBE_CERT="${KUBE_TEMP}/${cert}"
export KUBE_KEY="${KUBE_TEMP}/${key}"
export CA_CERT="${KUBE_TEMP}/${ca}"
export CONTEXT="photon-${INSTANCE_PREFIX}"
run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 644 ${cert_dir}/${cert}"
run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 644 ${cert_dir}/${key}"
run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 644 ${cert_dir}/${ca}"
copy-file-from-vm "${KUBE_MASTER_IP}" "${cert_dir}/${cert}" "${KUBE_CERT}"
copy-file-from-vm "${KUBE_MASTER_IP}" "${cert_dir}/${key}" "${KUBE_KEY}"
copy-file-from-vm "${KUBE_MASTER_IP}" "${cert_dir}/${ca}" "${CA_CERT}"
run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 600 ${cert_dir}/${cert}"
run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 600 ${cert_dir}/${key}"
run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 600 ${cert_dir}/${ca}"
}
#
# Copies a script to a VM and runs it
# Parameters:
# - IP of VM
# - Path to local file
#
function run-script-remotely {
local vm_ip=${1}
local local_file="${2}"
local base_file
local remote_file
base_file=$(basename "${local_file}")
remote_file="/tmp/${base_file}"
copy-file-to-vm "${vm_ip}" "${local_file}" "${remote_file}"
run-ssh-cmd "${vm_ip}" "chmod 700 ${remote_file}"
run-ssh-cmd "${vm_ip}" "nohup sudo ${remote_file} < /dev/null 1> ${remote_file}.out 2>&1 &"
}
#
# Runs an command on a VM using ssh
# Parameters:
# - (optional) -i to ignore failure
# - IP address of the VM
# - Command to run
# Assumes environment variables:
# - VM_USER
# - SSH_OPTS
#
function run-ssh-cmd {
local ignore_failure=""
if [[ "${1}" = "-i" ]]; then
ignore_failure="-i"
shift
fi
local vm_ip=${1}
shift
local cmd=${1}
run-cmd ${ignore_failure} "ssh ${SSH_OPTS} $VM_USER@${vm_ip} $1"
}
#
# Uses scp to copy file to VM
# Parameters:
# - IP address of the VM
# - Path to local file
# - Path to remote file
# Assumes environment variables:
# - VM_USER
# - SSH_OPTS
#
function copy-file-to-vm {
local vm_ip=${1}
local local_file=${2}
local remote_file=${3}
run-cmd "scp ${SSH_OPTS} ${local_file} ${VM_USER}@${vm_ip}:${remote_file}"
}
function copy-file-from-vm {
local vm_ip=${1}
local remote_file=${2}
local local_file=${3}
run-cmd "scp ${SSH_OPTS} ${VM_USER}@${vm_ip}:${remote_file} ${local_file}"
}
#
# Run a command, print nice error output
# Used by copy-file-to-vm and run-ssh-cmd
#
function run-cmd {
local rc=0
local ignore_failure=""
if [[ "${1}" = "-i" ]]; then
ignore_failure=${1}
shift
fi
local cmd=$1
local output
output=$(${cmd} 2>&1) || rc=$?
if [[ ${rc} -ne 0 ]]; then
if [[ -z "${ignore_failure}" ]]; then
kube::log::error "Failed to run command: ${cmd} Output:"
echo "${output}"
exit 1
fi
fi
_OUTPUT=${output}
return ${rc}
}
#
# After the initial VM setup, we use SSH with keys to access the VMs
# This requires an SSH agent, so we verify that it's running
#
function verify-ssh-prereqs {
kube::log::status "Validating SSH configuration..."
local rc
rc=0
ssh-add -L 1> /dev/null 2> /dev/null || rc=$?
# "Could not open a connection to your authentication agent."
if [[ "${rc}" -eq 2 ]]; then
# ssh agent wasn't running, so start it and ensure we stop it
eval "$(ssh-agent)" > /dev/null
trap-add "kill ${SSH_AGENT_PID}" EXIT
fi
rc=0
ssh-add -L 1> /dev/null 2> /dev/null || rc=$?
# "The agent has no identities."
if [[ "${rc}" -eq 1 ]]; then
# Try adding one of the default identities, with or without passphrase.
ssh-add || true
fi
# Expect at least one identity to be available.
if ! ssh-add -L 1> /dev/null 2> /dev/null; then
kube::log::error "Could not find or add an SSH identity."
kube::log::error "Please start ssh-agent, add your identity, and retry."
exit 1
fi
}
#
# Verify that Photon Controller has been configured in the way we expect. Specifically
# - Have the flavors been created?
# - Has the image been uploaded?
# TODO: Check the tenant and project as well.
function verify-photon-config {
kube::log::status "Validating Photon configuration..."
# We don't want silent failure: we check for failure
set +o pipefail
verify-photon-flavors
verify-photon-image
verify-photon-tenant
# Reset default set in common.sh
set -o pipefail
}
#
# Verify that the VM and disk flavors have been created
#
function verify-photon-flavors {
local rc=0
${PHOTON} flavor list | awk -F'\t' '{print $2}' | grep -q "^${PHOTON_MASTER_FLAVOR}$" > /dev/null 2>&1 || rc=$?
if [[ ${rc} -ne 0 ]]; then
kube::log::error "ERROR: Cannot find VM flavor named ${PHOTON_MASTER_FLAVOR}"
exit 1
fi
if [[ "${PHOTON_MASTER_FLAVOR}" != "${PHOTON_NODE_FLAVOR}" ]]; then
rc=0
${PHOTON} flavor list | awk -F'\t' '{print $2}' | grep -q "^${PHOTON_NODE_FLAVOR}$" > /dev/null 2>&1 || rc=$?
if [[ ${rc} -ne 0 ]]; then
kube::log::error "ERROR: Cannot find VM flavor named ${PHOTON_NODE_FLAVOR}"
exit 1
fi
fi
${PHOTON} flavor list | awk -F'\t' '{print $2}' | grep -q "^${PHOTON_DISK_FLAVOR}$" > /dev/null 2>&1 || rc=$?
if [[ ${rc} -ne 0 ]]; then
kube::log::error "ERROR: Cannot find disk flavor named ${PHOTON_DISK_FLAVOR}"
exit 1
fi
}
#
# Verify that we have the image we need, and it's not in error state or
# multiple copies
#
function verify-photon-image {
local rc
rc=0
${PHOTON} image list | grep -q $'\t'"${PHOTON_IMAGE}"$'\t' > /dev/null 2>&1 || rc=$?
if [[ ${rc} -ne 0 ]]; then
kube::log::error "ERROR: Cannot find image \"${PHOTON_IMAGE}\""
exit 1
fi
rc=0
${PHOTON} image list | grep $'\t'"${PHOTON_IMAGE}"$'\t' | grep ERROR > /dev/null 2>&1 || rc=$?
if [[ ${rc} -eq 0 ]]; then
echo "Warning: You have at least one ${PHOTON_IMAGE} image in the ERROR state. You may want to investigate."
echo "Images in the ERROR state will be ignored."
fi
rc=0
num_images=$(${PHOTON} image list | grep $'\t'"${PHOTON_IMAGE}"$'\t' | grep -c READY)
if [[ "${num_images}" -gt 1 ]]; then
echo "ERROR: You have more than one READY ${PHOTON_IMAGE} image. Ensure there is only one"
exit 1
fi
}
function verify-photon-tenant {
local rc
rc=0
${PHOTON} tenant list | grep -q $'\t'"${PHOTON_TENANT}" > /dev/null 2>&1 || rc=$?
if [[ ${rc} -ne 0 ]]; then
echo "ERROR: Cannot find tenant \"${PHOTON_TENANT}\""
exit 1
fi
${PHOTON} project list --tenant "${PHOTON_TENANT}" | grep -q $'\t'"${PHOTON_PROJECT}"$'\t' > /dev/null 2>&1 || rc=$?
if [[ ${rc} -ne 0 ]]; then
echo "ERROR: Cannot find project \"${PHOTON_PROJECT}\""
exit 1
fi
}
#
# Verifies that a given command is in the PATH
#
function verify-cmd-in-path {
cmd=${1}
which "${cmd}" >/dev/null || {
kube::log::error "Can't find ${cmd} in PATH, please install and retry."
exit 1
}
}
#
# Checks that KUBE_TEMP is set, or sets it
# If it sets it, it also creates the temporary directory
# and sets up a trap so that we delete it when we exit
#
function ensure-temp-dir {
if [[ -z ${KUBE_TEMP-} ]]; then
KUBE_TEMP=$(mktemp -d -t kubernetes.XXXXXX)
trap-add "rm -rf '${KUBE_TEMP}'" EXIT
fi
}
#
# Repeatedly try a command over ssh until it succeeds or until five minutes have passed
# The timeout isn't exact, since we assume the command runs instantaneously, and
# it doesn't.
#
function try-until-success-ssh {
local vm_ip=${1}
local cmd_description=${2}
local cmd=${3}
local timeout=600
local sleep_time=5
local max_attempts
((max_attempts=timeout/sleep_time))
kube::log::status "${cmd_description} for up to 10 minutes..."
local attempt=0
while true; do
local rc=0
run-ssh-cmd -i "${vm_ip}" "${cmd}" || rc=1
if [[ ${rc} != 0 ]]; then
if (( attempt == max_attempts )); then
kube::log::error "Failed, cannot proceed: you may need to retry to log into the VM to debug"
exit 1
fi
else
break
fi
attempt=$((attempt+1))
sleep ${sleep_time}
done
}
function try-until-success {
local cmd_description=${1}
local cmd=${2}
local timeout=600
local sleep_time=5
local max_attempts
((max_attempts=timeout/sleep_time))
kube::log::status "${cmd_description} for up to 10 minutes..."
local attempt=0
while true; do
local rc=0
run-cmd -i "${cmd}" || rc=1
if [[ ${rc} != 0 ]]; then
if (( attempt == max_attempts )); then
kube::log::error "Failed, cannot proceed"
exit 1
fi
else
break
fi
attempt=$((attempt+1))
sleep ${sleep_time}
done
}
#
# Sets up a trap handler
#
function trap-add {
local handler="${1}"
local signal="${2-EXIT}"
local cur
cur="$(eval "sh -c 'echo \$3' -- $(trap -p ${signal})")"
if [[ -n "${cur}" ]]; then
handler="${cur}; ${handler}"
fi
# We want ${handler} to expand now, so tell shellcheck
# shellcheck disable=SC2064
trap "${handler}" ${signal}
}