
The staging images are now created with image families, so we can get rid of the image indices stored in GCS. Also, get images based on milestone number instead of "image type".
392 lines
16 KiB
Bash
Executable File
392 lines
16 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Copyright 2015 The Kubernetes Authors All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# Run e2e tests using environment variables exported in e2e.sh.
|
|
|
|
set -o errexit
|
|
set -o nounset
|
|
set -o pipefail
|
|
set -o xtrace
|
|
|
|
: ${KUBE_GCS_RELEASE_BUCKET:="kubernetes-release"}
|
|
|
|
function running_in_docker() {
|
|
grep -q docker /proc/self/cgroup
|
|
}
|
|
|
|
function fetch_output_tars() {
|
|
echo "Using binaries from _output."
|
|
cp _output/release-tars/kubernetes*.tar.gz .
|
|
unpack_binaries
|
|
}
|
|
|
|
function fetch_server_version_tars() {
|
|
local -r server_version="$(gcloud ${CMD_GROUP:-} container get-server-config --project=${PROJECT} --zone=${ZONE} --format='value(defaultClusterVersion)')"
|
|
# Use latest build of the server version's branch for test files.
|
|
fetch_published_version_tars "ci/latest-${server_version:0:3}"
|
|
# Unset cluster api version; we want to use server default for the cluster
|
|
# version.
|
|
unset CLUSTER_API_VERSION
|
|
}
|
|
|
|
# Use a published version like "ci/latest" (default), "release/latest",
|
|
# "release/latest-1", or "release/stable"
|
|
function fetch_published_version_tars() {
|
|
local -r published_version="${1}"
|
|
IFS='/' read -a varr <<< "${published_version}"
|
|
bucket="${varr[0]}"
|
|
build_version=$(gsutil cat gs://${KUBE_GCS_RELEASE_BUCKET}/${published_version}.txt)
|
|
echo "Using published version $bucket/$build_version (from ${published_version})"
|
|
fetch_tars_from_gcs "${bucket}" "${build_version}"
|
|
unpack_binaries
|
|
# Set CLUSTER_API_VERSION for GKE CI
|
|
export CLUSTER_API_VERSION=$(echo ${build_version} | cut -c 2-)
|
|
}
|
|
|
|
# TODO(ihmccreery) I'm not sure if this is necesssary, with the workspace check
|
|
# below.
|
|
function clean_binaries() {
|
|
echo "Cleaning up binaries."
|
|
rm -rf kubernetes*
|
|
}
|
|
|
|
function fetch_tars_from_gcs() {
|
|
local -r bucket="${1}"
|
|
local -r build_version="${2}"
|
|
echo "Pulling binaries from GCS; using server version ${bucket}/${build_version}."
|
|
gsutil -mq cp \
|
|
"gs://${KUBE_GCS_RELEASE_BUCKET}/${bucket}/${build_version}/kubernetes.tar.gz" \
|
|
"gs://${KUBE_GCS_RELEASE_BUCKET}/${bucket}/${build_version}/kubernetes-test.tar.gz" \
|
|
.
|
|
}
|
|
|
|
function unpack_binaries() {
|
|
md5sum kubernetes*.tar.gz
|
|
tar -xzf kubernetes.tar.gz
|
|
tar -xzf kubernetes-test.tar.gz
|
|
}
|
|
|
|
# Get the latest GCI image in a family.
|
|
function get_latest_gci_image() {
|
|
local -r image_project="$1"
|
|
local -r image_family="$2"
|
|
echo "$(gcloud compute images describe-from-family ${image_family} --project=${image_project} --format='value(name)')"
|
|
}
|
|
|
|
function get_latest_docker_release() {
|
|
# Typical Docker release versions are like v1.11.2-rc1, v1.11.2, and etc.
|
|
local -r version_re='.*\"tag_name\":[[:space:]]+\"v([0-9\.r|c-]+)\",.*'
|
|
local -r latest_release="$(curl -fsSL --retry 3 https://api.github.com/repos/docker/docker/releases/latest)"
|
|
if [[ "${latest_release}" =~ ${version_re} ]]; then
|
|
echo "${BASH_REMATCH[1]}"
|
|
else
|
|
echo "Malformed Docker API response for latest release: ${latest_release}"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
function install_google_cloud_sdk_tarball() {
|
|
local -r tarball=$1
|
|
local -r install_dir=$2
|
|
mkdir -p "${install_dir}"
|
|
tar xzf "${tarball}" -C "${install_dir}"
|
|
|
|
export CLOUDSDK_CORE_DISABLE_PROMPTS=1
|
|
"${install_dir}/google-cloud-sdk/install.sh" --disable-installation-options --bash-completion=false --path-update=false --usage-reporting=false
|
|
export PATH=${install_dir}/google-cloud-sdk/bin:${PATH}
|
|
}
|
|
|
|
# Only call after attempting to bring the cluster up. Don't call after
|
|
# bringing the cluster down.
|
|
function dump_cluster_logs_and_exit() {
|
|
local -r exit_status=$?
|
|
dump_cluster_logs
|
|
if [[ "${E2E_DOWN,,}" == "true" ]]; then
|
|
# If we tried to bring the cluster up, make a courtesy attempt
|
|
# to bring the cluster down so we're not leaving resources
|
|
# around. Unlike later, don't sleep beforehand, though. (We're
|
|
# just trying to tear down as many resources as we can as fast
|
|
# as possible and don't even know if we brought the master up.)
|
|
go run ./hack/e2e.go ${E2E_OPT:-} -v --down || true
|
|
fi
|
|
exit ${exit_status}
|
|
}
|
|
|
|
# Only call after attempting to bring the cluster up. Don't call after
|
|
# bringing the cluster down.
|
|
function dump_cluster_logs() {
|
|
if [[ -x "cluster/log-dump.sh" ]]; then
|
|
./cluster/log-dump.sh "${ARTIFACTS}"
|
|
fi
|
|
}
|
|
|
|
### Pre Set Up ###
|
|
if running_in_docker; then
|
|
curl -fsSL --retry 3 -o "${WORKSPACE}/google-cloud-sdk.tar.gz" 'https://dl.google.com/dl/cloudsdk/channels/rapid/google-cloud-sdk.tar.gz'
|
|
install_google_cloud_sdk_tarball "${WORKSPACE}/google-cloud-sdk.tar.gz" /
|
|
fi
|
|
|
|
# Install gcloud from a custom path if provided. Used to test GKE with gcloud
|
|
# at HEAD, release candidate.
|
|
# TODO: figure out how to avoid installing the cloud sdk twice if run inside Docker.
|
|
if [[ -n "${CLOUDSDK_BUCKET:-}" ]]; then
|
|
# Retry the download a few times to mitigate transient server errors and
|
|
# race conditions where the bucket contents change under us as we download.
|
|
for n in $(seq 3); do
|
|
gsutil -mq cp -r "${CLOUDSDK_BUCKET}" ~ && break || sleep 1
|
|
# Delete any temporary files from the download so that we start from
|
|
# scratch when we retry.
|
|
rm -rf ~/.gsutil
|
|
done
|
|
rm -rf ~/repo ~/cloudsdk
|
|
mv ~/$(basename "${CLOUDSDK_BUCKET}") ~/repo
|
|
export CLOUDSDK_COMPONENT_MANAGER_SNAPSHOT_URL=file://${HOME}/repo/components-2.json
|
|
install_google_cloud_sdk_tarball ~/repo/google-cloud-sdk.tar.gz ~/cloudsdk
|
|
# TODO: is this necessary? this won't work inside Docker currently.
|
|
export CLOUDSDK_CONFIG=/var/lib/jenkins/.config/gcloud
|
|
fi
|
|
|
|
# We get the image project and name for GCI dynamically.
|
|
if [[ -n "${JENKINS_GCI_IMAGE_FAMILY:-}" ]]; then
|
|
GCI_STAGING_PROJECT=container-vm-image-staging
|
|
export KUBE_GCE_MASTER_PROJECT="${GCI_STAGING_PROJECT}"
|
|
export KUBE_GCE_MASTER_IMAGE="$(get_latest_gci_image "${GCI_STAGING_PROJECT}" "${JENKINS_GCI_IMAGE_FAMILY}")"
|
|
export KUBE_OS_DISTRIBUTION="gci"
|
|
if [[ "${JENKINS_GCI_IMAGE_TYPE}" == preview-test ]]; then
|
|
export KUBE_GCI_DOCKER_VERSION="$(get_latest_docker_release)"
|
|
fi
|
|
fi
|
|
|
|
function e2e_test() {
|
|
local -r ginkgo_test_args="${1}"
|
|
# Check to make sure the cluster is up before running tests, and fail if it's not.
|
|
go run ./hack/e2e.go ${E2E_OPT:-} -v --isup
|
|
# Jenkins will look at the junit*.xml files for test failures, so don't exit with a nonzero
|
|
# error code if it was only tests that failed.
|
|
go run ./hack/e2e.go ${E2E_OPT:-} -v --test \
|
|
${ginkgo_test_args:+--test_args="${ginkgo_test_args}"} \
|
|
&& exitcode=0 || exitcode=$?
|
|
if [[ "${E2E_PUBLISH_GREEN_VERSION:-}" == "true" && ${exitcode} == 0 ]]; then
|
|
# Use plaintext version file packaged with kubernetes.tar.gz
|
|
echo "Publish version to ci/latest-green.txt: $(cat version)"
|
|
gsutil cp ./version gs://kubernetes-release/ci/latest-green.txt
|
|
fi
|
|
}
|
|
|
|
echo "--------------------------------------------------------------------------------"
|
|
echo "Test Environment:"
|
|
printenv | sort
|
|
echo "--------------------------------------------------------------------------------"
|
|
|
|
# We get the Kubernetes tarballs unless we are going to use old ones
|
|
if [[ "${JENKINS_USE_EXISTING_BINARIES:-}" =~ ^[yY]$ ]]; then
|
|
echo "Using existing binaries; not cleaning, fetching, or unpacking new ones."
|
|
elif [[ "${KUBE_RUN_FROM_OUTPUT:-}" =~ ^[yY]$ ]]; then
|
|
# TODO(spxtr) This should probably be JENKINS_USE_BINARIES_FROM_OUTPUT or
|
|
# something, rather than being prepended with KUBE, since it's sort of a
|
|
# meta-thing.
|
|
clean_binaries
|
|
fetch_output_tars
|
|
elif [[ "${JENKINS_USE_SERVER_VERSION:-}" =~ ^[yY]$ ]]; then
|
|
# This is for test, staging, and prod jobs on GKE, where we want to
|
|
# test what's running in GKE by default rather than some CI build.
|
|
clean_binaries
|
|
fetch_server_version_tars
|
|
else
|
|
# use JENKINS_PUBLISHED_VERSION, default to 'ci/latest', since that's
|
|
# usually what we're testing.
|
|
clean_binaries
|
|
fetch_published_version_tars "${JENKINS_PUBLISHED_VERSION:-ci/latest}"
|
|
fi
|
|
|
|
# Copy GCE keys so we don't keep cycling them.
|
|
# To set this up, you must know the <project>, <zone>, and <instance>
|
|
# on which your jenkins jobs are running. Then do:
|
|
#
|
|
# # SSH from your computer into the instance.
|
|
# $ gcloud compute ssh --project="<prj>" ssh --zone="<zone>" <instance>
|
|
#
|
|
# # Generate a key by ssh'ing from the instance into itself, then exit.
|
|
# $ gcloud compute ssh --project="<prj>" ssh --zone="<zone>" <instance>
|
|
# $ ^D
|
|
#
|
|
# # Copy the keys to the desired location (e.g. /var/lib/jenkins/gce_keys/).
|
|
# $ sudo mkdir -p /var/lib/jenkins/gce_keys/
|
|
# $ sudo cp ~/.ssh/google_compute_engine /var/lib/jenkins/gce_keys/
|
|
# $ sudo cp ~/.ssh/google_compute_engine.pub /var/lib/jenkins/gce_keys/
|
|
#
|
|
# # Move the permissions for the keys to Jenkins.
|
|
# $ sudo chown -R jenkins /var/lib/jenkins/gce_keys/
|
|
# $ sudo chgrp -R jenkins /var/lib/jenkins/gce_keys/
|
|
case "${KUBERNETES_PROVIDER}" in
|
|
gce|gke|kubemark)
|
|
if ! running_in_docker; then
|
|
mkdir -p ${WORKSPACE}/.ssh/
|
|
cp /var/lib/jenkins/gce_keys/google_compute_engine ${WORKSPACE}/.ssh/
|
|
cp /var/lib/jenkins/gce_keys/google_compute_engine.pub ${WORKSPACE}/.ssh/
|
|
fi
|
|
echo 'Checking existence of private ssh key'
|
|
gce_key="${WORKSPACE}/.ssh/google_compute_engine"
|
|
if [[ ! -f "${gce_key}" || ! -f "${gce_key}.pub" ]]; then
|
|
echo 'google_compute_engine ssh key missing!'
|
|
exit 1
|
|
fi
|
|
echo "Checking presence of public key in ${PROJECT}"
|
|
if ! gcloud compute --project="${PROJECT}" project-info describe |
|
|
grep "$(cat "${gce_key}.pub")" >/dev/null; then
|
|
echo 'Uploading public ssh key to project metadata...'
|
|
gcloud compute --project="${PROJECT}" config-ssh
|
|
fi
|
|
;;
|
|
default)
|
|
echo "Not copying ssh keys for ${KUBERNETES_PROVIDER}"
|
|
;;
|
|
esac
|
|
|
|
cd kubernetes
|
|
|
|
# Upload build start time and k8s version to GCS, but not on PR Jenkins.
|
|
# On PR Jenkins this is done before the build.
|
|
if [[ ! "${JOB_NAME}" =~ -pull- ]]; then
|
|
JENKINS_BUILD_STARTED=true bash <(curl -fsS --retry 3 "https://raw.githubusercontent.com/kubernetes/kubernetes/master/hack/jenkins/upload-to-gcs.sh")
|
|
fi
|
|
|
|
# Have cmd/e2e run by goe2e.sh generate JUnit report in ${WORKSPACE}/junit*.xml
|
|
ARTIFACTS=${WORKSPACE}/_artifacts
|
|
mkdir -p ${ARTIFACTS}
|
|
# When run inside Docker, we need to make sure all files are world-readable
|
|
# (since they will be owned by root on the host).
|
|
trap "chmod -R o+r '${ARTIFACTS}'" EXIT SIGINT SIGTERM
|
|
export E2E_REPORT_DIR=${ARTIFACTS}
|
|
declare -r gcp_list_resources_script="./cluster/gce/list-resources.sh"
|
|
declare -r gcp_resources_before="${ARTIFACTS}/gcp-resources-before.txt"
|
|
declare -r gcp_resources_cluster_up="${ARTIFACTS}/gcp-resources-cluster-up.txt"
|
|
declare -r gcp_resources_after="${ARTIFACTS}/gcp-resources-after.txt"
|
|
if [[ ( ${KUBERNETES_PROVIDER} == "gce" || ${KUBERNETES_PROVIDER} == "gke" ) && -x "${gcp_list_resources_script}" ]]; then
|
|
gcp_list_resources="true"
|
|
# Always pull the script from HEAD, overwriting the local one if it exists.
|
|
# We do this to pick up fixes if we are running tests from a branch or tag.
|
|
curl -fsS --retry 3 "https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/gce/list-resources.sh" > "${gcp_list_resources_script}"
|
|
else
|
|
gcp_list_resources="false"
|
|
fi
|
|
|
|
### Set up ###
|
|
if [[ "${E2E_UP,,}" == "true" ]]; then
|
|
go run ./hack/e2e.go ${E2E_OPT:-} -v --down
|
|
fi
|
|
if [[ "${gcp_list_resources}" == "true" ]]; then
|
|
${gcp_list_resources_script} > "${gcp_resources_before}"
|
|
fi
|
|
if [[ "${E2E_UP,,}" == "true" ]]; then
|
|
# We want to try to gather logs even if kube-up fails, so collect the
|
|
# result here and fail after dumping logs if it's nonzero.
|
|
go run ./hack/e2e.go ${E2E_OPT:-} -v --up || dump_cluster_logs_and_exit
|
|
go run ./hack/e2e.go -v --ctl="version --match-server-version=false"
|
|
if [[ "${gcp_list_resources}" == "true" ]]; then
|
|
${gcp_list_resources_script} > "${gcp_resources_cluster_up}"
|
|
fi
|
|
fi
|
|
|
|
# Allow download & unpack of alternate version of tests, for cross-version & upgrade testing.
|
|
#
|
|
# JENKINS_PUBLISHED_SKEW_VERSION downloads an alternate version of Kubernetes
|
|
# for testing, moving the old one to kubernetes_old.
|
|
#
|
|
# E2E_UPGRADE_TEST=true triggers a run of the e2e tests, to do something like
|
|
# upgrade the cluster, before the main test run. It uses
|
|
# GINKGO_UPGRADE_TESTS_ARGS for the test run.
|
|
#
|
|
# JENKINS_USE_SKEW_TESTS=true will run tests from the skewed version rather
|
|
# than the original version.
|
|
if [[ -n "${JENKINS_PUBLISHED_SKEW_VERSION:-}" ]]; then
|
|
cd ..
|
|
mv kubernetes kubernetes_old
|
|
fetch_published_version_tars "${JENKINS_PUBLISHED_SKEW_VERSION}"
|
|
cd kubernetes
|
|
# Upgrade the cluster before running other tests
|
|
if [[ "${E2E_UPGRADE_TEST:-}" == "true" ]]; then
|
|
# Add a report prefix for the e2e tests so that the tests don't get overwritten when we run
|
|
# the rest of the e2es.
|
|
E2E_REPORT_PREFIX='upgrade' e2e_test "${GINKGO_UPGRADE_TEST_ARGS:-}"
|
|
fi
|
|
if [[ "${JENKINS_USE_SKEW_TESTS:-}" != "true" ]]; then
|
|
# Back out into the old tests now that we've downloaded & maybe upgraded.
|
|
cd ../kubernetes_old
|
|
# Append kubectl-path of skewed kubectl to test args, since we always
|
|
# want that to use the skewed kubectl version:
|
|
#
|
|
# - for upgrade jobs, we want kubectl to be at the same version as master.
|
|
# - for client skew tests, we want to use the skewed kubectl (that's what we're testing).
|
|
GINKGO_TEST_ARGS="${GINKGO_TEST_ARGS:-} --kubectl-path=$(pwd)/../kubernetes/cluster/kubectl.sh"
|
|
fi
|
|
fi
|
|
|
|
if [[ "${E2E_TEST,,}" == "true" ]]; then
|
|
e2e_test "${GINKGO_TEST_ARGS:-}"
|
|
fi
|
|
|
|
### Start Kubemark ###
|
|
if [[ "${USE_KUBEMARK:-}" == "true" ]]; then
|
|
export RUN_FROM_DISTRO=true
|
|
NUM_NODES_BKP=${NUM_NODES}
|
|
MASTER_SIZE_BKP=${MASTER_SIZE}
|
|
./test/kubemark/stop-kubemark.sh
|
|
NUM_NODES=${KUBEMARK_NUM_NODES:-$NUM_NODES}
|
|
MASTER_SIZE=${KUBEMARK_MASTER_SIZE:-$MASTER_SIZE}
|
|
# If start-kubemark fails, we trigger empty set of tests that would trigger storing logs from the base cluster.
|
|
./test/kubemark/start-kubemark.sh || dump_cluster_logs_and_exit
|
|
# Similarly, if tests fail, we trigger empty set of tests that would trigger storing logs from the base cluster.
|
|
# We intentionally overwrite the exit-code from `run-e2e-tests.sh` because we want jenkins to look at the
|
|
# junit.xml results for test failures and not process the exit code. This is needed by jenkins to more gracefully
|
|
# handle blocking the merge queue as a result of test failure flakes. Infrastructure failures should continue to
|
|
# exit non-0.
|
|
./test/kubemark/run-e2e-tests.sh --ginkgo.focus="${KUBEMARK_TESTS:-starting\s30\spods}" "${KUBEMARK_TEST_ARGS:-}" || dump_cluster_logs
|
|
./test/kubemark/stop-kubemark.sh
|
|
NUM_NODES=${NUM_NODES_BKP}
|
|
MASTER_SIZE=${MASTER_SIZE_BKP}
|
|
unset RUN_FROM_DISTRO
|
|
unset NUM_NODES_BKP
|
|
unset MASTER_SIZE_BKP
|
|
fi
|
|
|
|
### Clean up ###
|
|
if [[ "${E2E_DOWN,,}" == "true" ]]; then
|
|
# Sleep before deleting the cluster to give the controller manager time to
|
|
# delete any cloudprovider resources still around from the last test.
|
|
# This is calibrated to allow enough time for 3 attempts to delete the
|
|
# resources. Each attempt is allocated 5 seconds for requests to the
|
|
# cloudprovider plus the processingRetryInterval from servicecontroller.go
|
|
# for the wait between attempts.
|
|
sleep 30
|
|
go run ./hack/e2e.go ${E2E_OPT:-} -v --down
|
|
fi
|
|
if [[ "${gcp_list_resources}" == "true" ]]; then
|
|
${gcp_list_resources_script} > "${gcp_resources_after}"
|
|
fi
|
|
|
|
# Compare resources if either the cluster was
|
|
# * started and destroyed (normal e2e)
|
|
# * neither started nor destroyed (soak test)
|
|
if [[ "${E2E_UP:-}" == "${E2E_DOWN:-}" && -f "${gcp_resources_before}" && -f "${gcp_resources_after}" ]]; then
|
|
difference=$(diff -sw -U0 -F'^\[.*\]$' "${gcp_resources_before}" "${gcp_resources_after}") || true
|
|
if [[ -n $(echo "${difference}" | tail -n +3 | grep -E "^\+") ]] && [[ "${FAIL_ON_GCP_RESOURCE_LEAK:-}" == "true" ]]; then
|
|
echo "${difference}"
|
|
echo "!!! FAIL: Google Cloud Platform resources leaked while running tests!"
|
|
exit 1
|
|
fi
|
|
fi
|