Add configure script logging instrumentation.
- Add log functions to facilitate debug logging. - Wrap commands called in main with debug logging. - Configure a systemd service to forward the logs to the serial port. - Add a 'retry-forever' function to harden download steps. - Add default value support to 'get-metadata-value' function. - Fix some spellcheck lints.
This commit is contained in:
@@ -2316,19 +2316,34 @@ function download-extra-addons {
|
||||
}
|
||||
|
||||
# A function that fetches a GCE metadata value and echoes it out.
|
||||
# Args:
|
||||
# $1 : URL path after /computeMetadata/v1/ (without heading slash).
|
||||
# $2 : An optional default value to echo out if the fetch fails.
|
||||
#
|
||||
# $1: URL path after /computeMetadata/v1/ (without heading slash).
|
||||
# NOTE: this function is duplicated in configure.sh, any changes here should be
|
||||
# duplicated there as well.
|
||||
function get-metadata-value {
|
||||
# We do not want quotes for CURL_RETRY_CONNREFUSED
|
||||
# shellcheck disable=SC2086
|
||||
curl \
|
||||
--retry 5 \
|
||||
--retry-delay 3 \
|
||||
${CURL_RETRY_CONNREFUSED} \
|
||||
--fail \
|
||||
--silent \
|
||||
-H 'Metadata-Flavor: Google' \
|
||||
"http://metadata/computeMetadata/v1/${1}"
|
||||
local default="${2:-}"
|
||||
|
||||
local status
|
||||
# We do not want quotes for CURL_RETRY_CONNREFUSED
|
||||
# shellcheck disable=SC2086
|
||||
curl \
|
||||
--retry 5 \
|
||||
--retry-delay 3 \
|
||||
${CURL_RETRY_CONNREFUSED} \
|
||||
--fail \
|
||||
--silent \
|
||||
-H 'Metadata-Flavor: Google' \
|
||||
"http://metadata/computeMetadata/v1/${1}" \
|
||||
|| status="$?"
|
||||
status="${status:-0}"
|
||||
|
||||
if [[ "${status}" -eq 0 || -z "${default}" ]]; then
|
||||
return "${status}"
|
||||
else
|
||||
echo "${default}"
|
||||
fi
|
||||
}
|
||||
|
||||
# A helper function for copying manifests and setting dir/files
|
||||
@@ -3104,10 +3119,164 @@ function detect_host_info() {
|
||||
esac
|
||||
}
|
||||
|
||||
# Initializes variables used by the log-* functions.
|
||||
#
|
||||
# get-metadata-value must be defined before calling this function.
|
||||
#
|
||||
# NOTE: this function is duplicated in configure.sh, any changes here should be
|
||||
# duplicated there as well.
|
||||
function log-init {
|
||||
# CURL_RETRY_CONNREFUSED needs to be defined before calling get-metadata-value.
|
||||
# Use --retry-connrefused opt only if it's supported by curl.
|
||||
CURL_RETRY_CONNREFUSED=""
|
||||
if curl --help | grep -q -- '--retry-connrefused'; then
|
||||
CURL_RETRY_CONNREFUSED='--retry-connrefused'
|
||||
fi
|
||||
|
||||
# Used by log-* functions.
|
||||
LOG_CLUSTER_ID=$(get-metadata-value 'instance/attributes/cluster-uid' 'get-metadata-value-error')
|
||||
LOG_INSTANCE_NAME=$(hostname)
|
||||
LOG_BOOT_ID=$(journalctl --list-boots | grep -E '^ *0' | awk '{print $2}')
|
||||
declare -Ag LOG_START_TIMES
|
||||
declare -ag LOG_TRAP_STACK
|
||||
|
||||
LOG_STATUS_STARTED='STARTED'
|
||||
LOG_STATUS_COMPLETED='COMPLETED'
|
||||
LOG_STATUS_ERROR='ERROR'
|
||||
}
|
||||
|
||||
# Sets an EXIT trap.
|
||||
# Args:
|
||||
# $1:... : the trap command.
|
||||
#
|
||||
# NOTE: this function is duplicated in configure.sh, any changes here should be
|
||||
# duplicated there as well.
|
||||
function log-trap-push {
|
||||
local t="${*:1}"
|
||||
LOG_TRAP_STACK+=("${t}")
|
||||
# shellcheck disable=2064
|
||||
trap "${t}" EXIT
|
||||
}
|
||||
|
||||
# Removes and restores an EXIT trap.
|
||||
#
|
||||
# NOTE: this function is duplicated in configure.sh, any changes here should be
|
||||
# duplicated there as well.
|
||||
function log-trap-pop {
|
||||
# Remove current trap.
|
||||
unset 'LOG_TRAP_STACK[-1]'
|
||||
|
||||
# Restore previous trap.
|
||||
if [ ${#LOG_TRAP_STACK[@]} -ne 0 ]; then
|
||||
local t="${LOG_TRAP_STACK[-1]}"
|
||||
# shellcheck disable=2064
|
||||
trap "${t}" EXIT
|
||||
else
|
||||
# If no traps in stack, clear.
|
||||
trap EXIT
|
||||
fi
|
||||
}
|
||||
|
||||
# Logs the end of a bootstrap step that errored.
|
||||
# Args:
|
||||
# $1 : bootstrap step name.
|
||||
#
|
||||
# NOTE: this function is duplicated in configure.sh, any changes here should be
|
||||
# duplicated there as well.
|
||||
function log-error {
|
||||
local bootstep="$1"
|
||||
|
||||
log-proto "${bootstep}" "${LOG_STATUS_ERROR}" "error calling '${BASH_COMMAND}'"
|
||||
}
|
||||
|
||||
# Wraps a command with bootstrap logging.
|
||||
# Args:
|
||||
# $1 : bootstrap step name.
|
||||
# $2... : the command to run.
|
||||
#
|
||||
# NOTE: this function is duplicated in configure.sh, any changes here should be
|
||||
# duplicated there as well.
|
||||
function log-wrap {
|
||||
local bootstep="$1"
|
||||
local command="${*:2}"
|
||||
|
||||
log-trap-push "log-error ${bootstep}"
|
||||
log-proto "${bootstep}" "${LOG_STATUS_STARTED}"
|
||||
$command
|
||||
log-proto "${bootstep}" "${LOG_STATUS_COMPLETED}"
|
||||
log-trap-pop
|
||||
}
|
||||
|
||||
# Logs a bootstrap step start. Prefer log-wrap.
|
||||
# Args:
|
||||
# $1 : bootstrap step name.
|
||||
#
|
||||
# NOTE: this function is duplicated in configure.sh, any changes here should be
|
||||
# duplicated there as well.
|
||||
function log-start {
|
||||
local bootstep="$1"
|
||||
|
||||
log-trap-push "log-error ${bootstep}"
|
||||
log-proto "${bootstep}" "${LOG_STATUS_STARTED}"
|
||||
}
|
||||
|
||||
# Logs a bootstrap step end. Prefer log-wrap.
|
||||
# Args:
|
||||
# $1 : bootstrap step name.
|
||||
#
|
||||
# NOTE: this function is duplicated in configure.sh, any changes here should be
|
||||
# duplicated there as well.
|
||||
function log-end {
|
||||
local bootstep="$1"
|
||||
|
||||
log-proto "${bootstep}" "${LOG_STATUS_COMPLETED}"
|
||||
log-trap-pop
|
||||
}
|
||||
|
||||
# Writes a log proto to stdout.
|
||||
# Args:
|
||||
# $1: bootstrap step name.
|
||||
# $2: status. Either 'STARTED', 'COMPLETED', or 'ERROR'.
|
||||
# $3: optional status reason.
|
||||
#
|
||||
# NOTE: this function is duplicated in configure.sh, any changes here should be
|
||||
# duplicated there as well.
|
||||
function log-proto {
|
||||
local bootstep="$1"
|
||||
local status="$2"
|
||||
local status_reason="${3:-}"
|
||||
|
||||
# Get current time.
|
||||
local current_time
|
||||
current_time="$(date --utc '+%s.%N')"
|
||||
# ...formatted as UTC RFC 3339.
|
||||
local timestamp
|
||||
timestamp="$(date --utc --date="@${current_time}" '+%FT%T.%NZ')"
|
||||
|
||||
# Calculate latency.
|
||||
local latency='null'
|
||||
if [ "${status}" == "${LOG_STATUS_STARTED}" ]; then
|
||||
LOG_START_TIMES["${bootstep}"]="${current_time}"
|
||||
else
|
||||
local start_time="${LOG_START_TIMES["${bootstep}"]}"
|
||||
unset 'LOG_START_TIMES['"${bootstep}"']'
|
||||
|
||||
# Bash cannot do non-integer math, shell out to awk.
|
||||
latency="$(echo "${current_time} ${start_time}" | awk '{print $1 - $2}')s"
|
||||
|
||||
# The default latency is null which cannot be wrapped as a string so we must
|
||||
# do it here instead of the printf.
|
||||
latency="\"${latency}\""
|
||||
fi
|
||||
|
||||
printf '[cloud.kubernetes.monitoring.proto.SerialportLog] {"cluster_hash":"%s","vm_instance_name":"%s","boot_id":"%s","timestamp":"%s","bootstrap_status":{"step_name":"%s","status":"%s","status_reason":"%s","latency":%s}}\n' \
|
||||
"${LOG_CLUSTER_ID}" "${LOG_INSTANCE_NAME}" "${LOG_BOOT_ID}" "${timestamp}" "${bootstep}" "${status}" "${status_reason}" "${latency}"
|
||||
}
|
||||
|
||||
########### Main Function ###########
|
||||
function main() {
|
||||
echo "Start to configure instance for kubernetes"
|
||||
detect_host_info
|
||||
log-wrap 'DetectHostInfo' detect_host_info
|
||||
|
||||
readonly UUID_MNT_PREFIX="/mnt/disks/by-uuid/google-local-ssds"
|
||||
readonly UUID_BLOCK_PREFIX="/dev/disk/by-uuid/google-local-ssds"
|
||||
@@ -3118,17 +3287,12 @@ function main() {
|
||||
KUBE_CONTROLLER_MANAGER_CPU_REQUEST="${KUBE_CONTROLLER_MANAGER_CPU_REQUEST:-200m}"
|
||||
KUBE_SCHEDULER_CPU_REQUEST="${KUBE_SCHEDULER_CPU_REQUEST:-75m}"
|
||||
|
||||
# Use --retry-connrefused opt only if it's supported by curl.
|
||||
CURL_RETRY_CONNREFUSED=""
|
||||
if curl --help | grep -q -- '--retry-connrefused'; then
|
||||
CURL_RETRY_CONNREFUSED='--retry-connrefused'
|
||||
fi
|
||||
|
||||
KUBE_HOME="/home/kubernetes"
|
||||
KUBE_BIN=${KUBE_HOME}/bin
|
||||
CONTAINERIZED_MOUNTER_HOME="${KUBE_HOME}/containerized_mounter"
|
||||
PV_RECYCLER_OVERRIDE_TEMPLATE="${KUBE_HOME}/kube-manifests/kubernetes/pv-recycler-template.yaml"
|
||||
|
||||
log-start 'SetPythonVersion'
|
||||
if [[ "$(python -V 2>&1)" =~ "Python 2" ]]; then
|
||||
# found python2, just use that
|
||||
PYTHON="python"
|
||||
@@ -3144,13 +3308,15 @@ function main() {
|
||||
fi
|
||||
fi
|
||||
echo "Version : $(${PYTHON} -V 2>&1)"
|
||||
log-end 'SetPythonVersion'
|
||||
|
||||
log-start 'SourceKubeEnv'
|
||||
if [[ ! -e "${KUBE_HOME}/kube-env" ]]; then
|
||||
echo "The ${KUBE_HOME}/kube-env file does not exist!! Terminate cluster initialization."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
source "${KUBE_HOME}/kube-env"
|
||||
log-end 'SourceKubeEnv'
|
||||
|
||||
if [[ -f "${KUBE_HOME}/kubelet-config.yaml" ]]; then
|
||||
echo "Found Kubelet config file at ${KUBE_HOME}/kubelet-config.yaml"
|
||||
@@ -3158,16 +3324,19 @@ function main() {
|
||||
fi
|
||||
|
||||
if [[ -e "${KUBE_HOME}/kube-master-certs" ]]; then
|
||||
source "${KUBE_HOME}/kube-master-certs"
|
||||
log-wrap 'SourceKubeMasterCerts' source "${KUBE_HOME}/kube-master-certs"
|
||||
fi
|
||||
|
||||
log-start 'VerifyKubeUser'
|
||||
if [[ -n "${KUBE_USER:-}" ]]; then
|
||||
if ! [[ "${KUBE_USER}" =~ ^[-._@a-zA-Z0-9]+$ ]]; then
|
||||
echo "Bad KUBE_USER format."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
log-end 'VerifyKubeUser'
|
||||
|
||||
log-start 'GenerateTokens'
|
||||
KUBE_CONTROLLER_MANAGER_TOKEN="$(secure_random 32)"
|
||||
KUBE_SCHEDULER_TOKEN="$(secure_random 32)"
|
||||
KUBE_CLUSTER_AUTOSCALER_TOKEN="$(secure_random 32)"
|
||||
@@ -3184,35 +3353,36 @@ function main() {
|
||||
if [[ "${ENABLE_MONITORING_TOKEN:-false}" == "true" ]]; then
|
||||
MONITORING_TOKEN="$(secure_random 32)"
|
||||
fi
|
||||
log-end 'GenerateTokens'
|
||||
|
||||
setup-os-params
|
||||
config-ip-firewall
|
||||
create-dirs
|
||||
ensure-local-ssds
|
||||
setup-kubelet-dir
|
||||
setup-logrotate
|
||||
log-wrap 'SetupOSParams' setup-os-params
|
||||
log-wrap 'ConfigIPFirewall' config-ip-firewall
|
||||
log-wrap 'CreateDirs' create-dirs
|
||||
log-wrap 'EnsureLocalSSDs' ensure-local-ssds
|
||||
log-wrap 'SetupKubeletDir' setup-kubelet-dir
|
||||
log-wrap 'SetupLogrotate' setup-logrotate
|
||||
if [[ "${KUBERNETES_MASTER:-}" == "true" ]]; then
|
||||
mount-master-pd
|
||||
create-node-pki
|
||||
create-master-pki
|
||||
create-master-auth
|
||||
ensure-master-bootstrap-kubectl-auth
|
||||
create-master-kubelet-auth
|
||||
create-master-etcd-auth
|
||||
create-master-etcd-apiserver-auth
|
||||
override-pv-recycler
|
||||
gke-master-start
|
||||
log-wrap 'MountMasterPD' mount-master-pd
|
||||
log-wrap 'CreateNodePKI' create-node-pki
|
||||
log-wrap 'CreateMasterPKI' create-master-pki
|
||||
log-wrap 'CreateMasterAuth' create-master-auth
|
||||
log-wrap 'EnsureMasterBootstrapKubectlAuth' ensure-master-bootstrap-kubectl-auth
|
||||
log-wrap 'CreateMasterKubeletAuth' create-master-kubelet-auth
|
||||
log-wrap 'CreateMasterEtcdAuth' create-master-etcd-auth
|
||||
log-wrap 'CreateMasterEtcdApiserverAuth' create-master-etcd-apiserver-auth
|
||||
log-wrap 'OverridePVRecycler' override-pv-recycler
|
||||
log-wrap 'GKEMasterStart' gke-master-start
|
||||
else
|
||||
create-node-pki
|
||||
create-kubelet-kubeconfig "${KUBERNETES_MASTER_NAME}"
|
||||
log-wrap 'CreateNodePKI' create-node-pki
|
||||
log-wrap 'CreateKubeletKubeconfig' create-kubelet-kubeconfig "${KUBERNETES_MASTER_NAME}"
|
||||
if [[ "${KUBE_PROXY_DAEMONSET:-}" != "true" ]] && [[ "${KUBE_PROXY_DISABLE:-}" != "true" ]]; then
|
||||
create-kubeproxy-user-kubeconfig
|
||||
log-wrap 'CreateKubeproxyUserKubeconfig' create-kubeproxy-user-kubeconfig
|
||||
fi
|
||||
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
|
||||
if [[ -n "${NODE_PROBLEM_DETECTOR_TOKEN:-}" ]]; then
|
||||
create-node-problem-detector-kubeconfig "${KUBERNETES_MASTER_NAME}"
|
||||
log-wrap 'CreateNodeProblemDetectorKubeconfig' create-node-problem-detector-kubeconfig "${KUBERNETES_MASTER_NAME}"
|
||||
elif [[ -f "/var/lib/kubelet/kubeconfig" ]]; then
|
||||
create-node-problem-detector-kubeconfig-from-kubelet
|
||||
log-wrap 'CreateNodeProblemDetectorKubeconfigFromKubelet' create-node-problem-detector-kubeconfig-from-kubelet
|
||||
else
|
||||
echo "Either NODE_PROBLEM_DETECTOR_TOKEN or /var/lib/kubelet/kubeconfig must be set"
|
||||
exit 1
|
||||
@@ -3220,60 +3390,71 @@ function main() {
|
||||
fi
|
||||
fi
|
||||
|
||||
override-kubectl
|
||||
log-wrap 'OverrideKubectl' override-kubectl
|
||||
container_runtime="${CONTAINER_RUNTIME:-docker}"
|
||||
# Run the containerized mounter once to pre-cache the container image.
|
||||
if [[ "${container_runtime}" == "docker" ]]; then
|
||||
assemble-docker-flags
|
||||
log-wrap 'AssembleDockerFlags' assemble-docker-flags
|
||||
elif [[ "${container_runtime}" == "containerd" ]]; then
|
||||
if docker-installed; then
|
||||
# We still need to configure docker so it wouldn't reserver the 172.17.0/16 subnet
|
||||
# And if somebody will start docker to build or pull something, logging will also be set up
|
||||
assemble-docker-flags
|
||||
log-wrap 'AssembleDockerFlags' assemble-docker-flags
|
||||
# stop docker if it is present as we want to use just containerd
|
||||
systemctl stop docker || echo "unable to stop docker"
|
||||
log-wrap 'StopDocker' systemctl stop docker || echo "unable to stop docker"
|
||||
fi
|
||||
setup-containerd
|
||||
log-wrap 'SetupContainerd' setup-containerd
|
||||
fi
|
||||
|
||||
log-start 'SetupKubePodLogReadersGroupDir'
|
||||
if [[ -n "${KUBE_POD_LOG_READERS_GROUP:-}" ]]; then
|
||||
mkdir -p /var/log/pods/
|
||||
chgrp -R "${KUBE_POD_LOG_READERS_GROUP:-}" /var/log/pods/
|
||||
chmod -R g+s /var/log/pods/
|
||||
fi
|
||||
log-end 'SetupKubePodLogReadersGroupDir'
|
||||
|
||||
start-kubelet
|
||||
log-wrap 'StartKubelet' start-kubelet
|
||||
|
||||
if [[ "${KUBERNETES_MASTER:-}" == "true" ]]; then
|
||||
compute-master-manifest-variables
|
||||
log-wrap 'ComputeMasterManifestVariables' compute-master-manifest-variables
|
||||
if [[ -z "${ETCD_SERVERS:-}" ]]; then
|
||||
start-etcd-servers
|
||||
log-wrap 'StartEtcdServers' start-etcd-servers
|
||||
fi
|
||||
source ${KUBE_BIN}/configure-kubeapiserver.sh
|
||||
start-kube-apiserver
|
||||
log-wrap 'SourceConfigureKubeApiserver' source ${KUBE_BIN}/configure-kubeapiserver.sh
|
||||
log-wrap 'StartKubeApiserver' start-kube-apiserver
|
||||
if [[ "${RUN_KONNECTIVITY_PODS:-false}" == "true" ]]; then
|
||||
start-konnectivity-server
|
||||
log-wrap 'StartKonnectivityServer' start-konnectivity-server
|
||||
fi
|
||||
start-kube-controller-manager
|
||||
start-kube-scheduler
|
||||
wait-till-apiserver-ready
|
||||
start-kube-addons
|
||||
start-cluster-autoscaler
|
||||
start-lb-controller
|
||||
update-legacy-addon-node-labels &
|
||||
log-wrap 'StartKubeControllerManager' start-kube-controller-manager
|
||||
log-wrap 'StartKubeScheduler' start-kube-scheduler
|
||||
log-wrap 'WaitTillApiserverReady' wait-till-apiserver-ready
|
||||
log-wrap 'StartKubeAddons' start-kube-addons
|
||||
log-wrap 'StartClusterAutoscaler' start-cluster-autoscaler
|
||||
log-wrap 'StartLBController' start-lb-controller
|
||||
log-wrap 'UpdateLegacyAddonNodeLabels' update-legacy-addon-node-labels &
|
||||
else
|
||||
if [[ "${KUBE_PROXY_DAEMONSET:-}" != "true" ]] && [[ "${KUBE_PROXY_DISABLE:-}" != "true" ]]; then
|
||||
start-kube-proxy
|
||||
log-wrap 'StartKubeProxy' start-kube-proxy
|
||||
fi
|
||||
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
|
||||
start-node-problem-detector
|
||||
log-wrap 'StartNodeProblemDetector' start-node-problem-detector
|
||||
fi
|
||||
fi
|
||||
reset-motd
|
||||
prepare-mounter-rootfs
|
||||
log-wrap 'ResetMotd' reset-motd
|
||||
log-wrap 'PrepareMounterRootfs' prepare-mounter-rootfs
|
||||
|
||||
# Wait for all background jobs to finish.
|
||||
wait
|
||||
echo "Done for the configuration for kubernetes"
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
main "${@}"
|
||||
log-init
|
||||
log-wrap 'ConfigureHelperMain' main "${@}"
|
||||
|
||||
if [[ "${KUBERNETES_MASTER:-}" == "true" ]]; then
|
||||
# Give kube-bootstrap-logs-forwarder.service some time to write all logs.
|
||||
sleep 3
|
||||
fi
|
||||
fi
|
||||
|
Reference in New Issue
Block a user