Merge pull request #4319 from justinsb/aws_fixes

Fixes to get AWS tests to run
2015-02-18 22:43:46 -08:00
parent 43088bc58e cf470f7da4
commit c9657cad04
5 changed files with 308 additions and 59 deletions
--- a/cluster/aws/config-default.sh
+++ b/cluster/aws/config-default.sh
@@ -15,7 +15,7 @@
 # limitations under the License.

 # TODO: this isn't quite piped into all the right places...
-ZONE=us-west-2
+ZONE=${KUBE_AWS_ZONE:-us-west-2}
 MASTER_SIZE=t2.micro
 MINION_SIZE=t2.micro
 NUM_MINIONS=${NUM_MINIONS:-4}
@@ -24,8 +24,11 @@ NUM_MINIONS=${NUM_MINIONS:-4}
 # See here: http://cloud-images.ubuntu.com/locator/ec2/ for other images
 # This will need to be updated from time to time as amis are deprecated
 IMAGE=ami-39501209
-INSTANCE_PREFIX=kubernetes
-AWS_SSH_KEY=$HOME/.ssh/kube_aws_rsa
+INSTANCE_PREFIX="${KUBE_AWS_INSTANCE_PREFIX:-kubernetes}"
+AWS_SSH_KEY=${AWS_SSH_KEY:-$HOME/.ssh/kube_aws_rsa}
+IAM_PROFILE="kubernetes"
+
+LOG="/dev/null"

 MASTER_NAME="ip-172-20-0-9.$ZONE.compute.internal"
 MASTER_TAG="${INSTANCE_PREFIX}-master"
@@ -36,16 +39,26 @@ MINION_SCOPES=""
 POLL_SLEEP_INTERVAL=3
 PORTAL_NET="10.0.0.0/16"

-# Optional: Install node logging
-ENABLE_NODE_LOGGING=false
-LOGGING_DESTINATION=elasticsearch # options: elasticsearch, gcp
+
+# When set to true, Docker Cache is enabled by default as part of the cluster bring up.
+ENABLE_DOCKER_REGISTRY_CACHE=true
+
+# Optional: Install node monitoring.
+ENABLE_NODE_MONITORING="${KUBE_ENABLE_NODE_MONITORING:-true}"
+
+# Optional: When set to true, heapster will be setup as part of the cluster bring up.
+ENABLE_CLUSTER_MONITORING="${KUBE_ENABLE_CLUSTER_MONITORING:-true}"
+
+# Optional: Enable node logging.
+ENABLE_NODE_LOGGING="${KUBE_ENABLE_NODE_LOGGING:-true}"
+LOGGING_DESTINATION="${KUBE_LOGGING_DESTINATION:-elasticsearch}" # options: elasticsearch, gcp

 # Optional: When set to true, Elasticsearch and Kibana will be setup as part of the cluster bring up.
-ENABLE_CLUSTER_LOGGING=false
+ENABLE_CLUSTER_LOGGING="${KUBE_ENABLE_CLUSTER_LOGGING:-true}"
 ELASTICSEARCH_LOGGING_REPLICAS=1

-IAM_PROFILE="kubernetes"
-LOG="/dev/null"
+# Don't require https for registries in our local RFC1918 network
+EXTRA_DOCKER_OPTS="--insecure-registry 10.0.0.0/8"

 # Optional: Install cluster DNS.
 ENABLE_CLUSTER_DNS=true
--- a/cluster/aws/config-test.sh
+++ b/cluster/aws/config-test.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Copyright 2014 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO: this isn't quite piped into all the right places...
+ZONE=${KUBE_AWS_ZONE:-us-west-2}
+MASTER_SIZE=t2.micro
+MINION_SIZE=t2.micro
+NUM_MINIONS=${NUM_MINIONS:-2}
+
+# This is the ubuntu 14.04 image for us-west-2 + ebs
+# See here: http://cloud-images.ubuntu.com/locator/ec2/ for other images
+# This will need to be updated from time to time as amis are deprecated
+IMAGE=ami-39501209
+INSTANCE_PREFIX="${KUBE_AWS_INSTANCE_PREFIX:-e2e-test-${USER}}"
+AWS_SSH_KEY=${AWS_SSH_KEY:-$HOME/.ssh/kube_aws_rsa}
+IAM_PROFILE="kubernetes"
+
+LOG="/dev/null"
+
+MASTER_NAME="ip-172-20-0-9.$ZONE.compute.internal"
+MASTER_TAG="${INSTANCE_PREFIX}-master"
+MINION_TAG="${INSTANCE_PREFIX}-minion"
+MINION_NAMES=($(eval echo ip-172-20-0-1{0..$(($NUM_MINIONS-1))}.$ZONE.compute.internal))
+MINION_IP_RANGES=($(eval echo "10.244.{1..${NUM_MINIONS}}.0/24"))
+MINION_SCOPES=""
+POLL_SLEEP_INTERVAL=3
+PORTAL_NET="10.0.0.0/16"
+
+
+# When set to true, Docker Cache is enabled by default as part of the cluster bring up.
+ENABLE_DOCKER_REGISTRY_CACHE=true
+
+# Optional: Install node monitoring.
+ENABLE_NODE_MONITORING="${KUBE_ENABLE_NODE_MONITORING:-true}"
+
+# Optional: When set to true, heapster will be setup as part of the cluster bring up.
+ENABLE_CLUSTER_MONITORING="${KUBE_ENABLE_CLUSTER_MONITORING:-false}"
+
+# Optional: Enable node logging.
+ENABLE_NODE_LOGGING="${KUBE_ENABLE_NODE_LOGGING:-true}"
+LOGGING_DESTINATION="${KUBE_LOGGING_DESTINATION:-elasticsearch}" # options: elasticsearch, gcp
+
+# Optional: When set to true, Elasticsearch and Kibana will be setup as part of the cluster bring up.
+ENABLE_CLUSTER_LOGGING="${KUBE_ENABLE_CLUSTER_LOGGING:-false}"
+ELASTICSEARCH_LOGGING_REPLICAS=1
+
+# Don't require https for registries in our local RFC1918 network
+EXTRA_DOCKER_OPTS="--insecure-registry 10.0.0.0/8"
+
+# Optional: Install cluster DNS.
+ENABLE_CLUSTER_DNS=true
+DNS_SERVER_IP="10.0.0.10"
+DNS_DOMAIN="kubernetes.local"
+DNS_REPLICAS=1
--- a/cluster/aws/util.sh
+++ b/cluster/aws/util.sh
@@ -18,7 +18,8 @@

 # Use the config file specified in $KUBE_CONFIG_FILE, or default to
 # config-default.sh.
-source $(dirname ${BASH_SOURCE})/${KUBE_CONFIG_FILE-"config-default.sh"}
+KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
+source "${KUBE_ROOT}/cluster/aws/${KUBE_CONFIG_FILE-"config-default.sh"}"

 export AWS_DEFAULT_REGION=${ZONE}
 AWS_CMD="aws --output json ec2"
@@ -30,7 +31,7 @@ function json_val {
 # TODO (ayurchuk) Refactor the get_* functions to use filters
 # TODO (bburns) Parameterize this for multiple cluster per project
 function get_instance_ids {
-  python -c 'import json,sys; lst = [str(instance["InstanceId"]) for reservation in json.load(sys.stdin)["Reservations"] for instance in reservation["Instances"] for tag in instance.get("Tags", []) if tag["Value"].startswith("kubernetes-minion") or tag["Value"].startswith("kubernetes-master")]; print " ".join(lst)'
+  python -c "import json,sys; lst = [str(instance['InstanceId']) for reservation in json.load(sys.stdin)['Reservations'] for instance in reservation['Instances'] for tag in instance.get('Tags', []) if tag['Value'].startswith('${MASTER_TAG}') or tag['Value'].startswith('${MINION_TAG}')]; print ' '.join(lst)"
 }

 function get_vpc_id {
@@ -106,20 +107,6 @@ function ensure-temp-dir {
  fi
 }

-function setup-monitoring-firewall {
-  if [[ "${ENABLE_CLUSTER_MONITORING:-false}" == "true" ]]; then
-    # TODO: Implement this.
-    echo "Monitoring not currently supported on AWS"
-  fi
-}
-
-function teardown-monitoring-firewall {
-  if [[ "${ENABLE_CLUSTER_MONITORING:-false}" == "true" ]]; then
-    # TODO: Implement this.
-    echo "Monitoring not currently supported on AWS"
-  fi
-}
-
 # Verify and find the various tar files that we are going to use on the server.
 #
 # Vars set:
@@ -342,10 +329,56 @@ function kube-up {
  add-tag $master_id Name $MASTER_NAME
  add-tag $master_id Role $MASTER_TAG

-  echo "Waiting 1 minute for master to be ready"
-  # TODO(justinsb): Actually poll for the master being ready
-  #  (we at least need the salt-master to be up before the minions come up)
-  sleep 60
+  echo "Waiting for master to be ready"
+
+  local attempt=0
+
+   while true; do
+    echo -n Attempt "$(($attempt+1))" to check for master node
+    local ip=$($AWS_CMD describe-instances | get_instance_public_ip $MASTER_NAME)
+    if [[ -z "${ip}" ]]; then
+      if (( attempt > 30 )); then
+        echo
+        echo -e "${color_red}master failed to start. Your cluster is unlikely" >&2
+        echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
+        echo -e "cluster. (sorry!)${color_norm}" >&2
+        exit 1
+      fi
+    else
+      KUBE_MASTER=${MASTER_NAME}
+      KUBE_MASTER_IP=${ip}
+
+      echo -e " ${color_green}[master running @${KUBE_MASTER_IP}]${color_norm}"
+      break
+    fi
+    echo -e " ${color_yellow}[master not working yet]${color_norm}"
+    attempt=$(($attempt+1))
+    sleep 10
+  done
+
+  # We need the salt-master to be up for the minions to work
+  attempt=0
+  while true; do
+    echo -n Attempt "$(($attempt+1))" to check for salt-master
+    local output
+    output=$(ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} pgrep salt-master 2> $LOG) || output=""
+    if [[ -z "${output}" ]]; then
+      if (( attempt > 30 )); then
+        echo
+        echo -e "${color_red}salt-master failed to start on ${KUBE_MASTER_IP}. Your cluster is unlikely" >&2
+        echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
+        echo -e "cluster. (sorry!)${color_norm}" >&2
+        exit 1
+      fi
+    else
+      echo -e " ${color_green}[salt-master running]${color_norm}"
+      break
+    fi
+    echo -e " ${color_yellow}[salt-master not working yet]${color_norm}"
+    attempt=$(($attempt+1))
+    sleep 10
+  done
+

  for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
    echo "Starting Minion (${MINION_NAMES[$i]})"
@@ -413,7 +446,7 @@ function kube-up {
    sleep 10
  done
  echo "Re-running salt highstate"
-  ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo salt '*' state.highstate > $LOG
+  ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo salt '*' state.highstate > $LOG

  echo "Waiting for cluster initialization."
  echo
@@ -439,7 +472,7 @@ function kube-up {
  # Basic sanity checking
  for i in ${KUBE_MINION_IP_ADDRESSES[@]}; do
    # Make sure docker is installed
-    ssh -oStrictHostKeyChecking=no ubuntu@$i -i ~/.ssh/kube_aws_rsa which docker > $LOG 2>&1
+    ssh -oStrictHostKeyChecking=no ubuntu@$i -i ${AWS_SSH_KEY} which docker > $LOG 2>&1
    if [ "$?" != "0" ]; then
      echo "Docker failed to install on $i. Your cluster is unlikely to work correctly."
      echo "Please run ./cluster/aws/kube-down.sh and re-create the cluster. (sorry!)"
@@ -461,9 +494,9 @@ function kube-up {
  # config file.  Distribute the same way the htpasswd is done.
  (
    umask 077
-    ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.crt >"${HOME}/${kube_cert}" 2>$LOG
-    ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.key >"${HOME}/${kube_key}" 2>$LOG
-    ssh -oStrictHostKeyChecking=no -i ~/.ssh/kube_aws_rsa ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/ca.crt >"${HOME}/${ca_cert}" 2>$LOG
+    ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.crt >"${HOME}/${kube_cert}" 2>$LOG
+    ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/kubecfg.key >"${HOME}/${kube_key}" 2>$LOG
+    ssh -oStrictHostKeyChecking=no -i ${AWS_SSH_KEY} ubuntu@${KUBE_MASTER_IP} sudo cat /srv/kubernetes/ca.crt >"${HOME}/${ca_cert}" 2>$LOG

    cat << EOF > ~/.kubernetes_auth
 {
@@ -482,33 +515,41 @@ EOF

 function kube-down {
  instance_ids=$($AWS_CMD describe-instances | get_instance_ids)
-  $AWS_CMD terminate-instances --instance-ids $instance_ids > $LOG
-  echo "Waiting for instances deleted"
-  while true; do
-    instance_states=$($AWS_CMD describe-instances --instance-ids $instance_ids | expect_instance_states terminated)
-    if [[ "$instance_states" == "" ]]; then
-      echo "All instances terminated"
-      break
-    else
-      echo "Instances not yet terminated: $instance_states"
-      echo "Sleeping for 3 seconds..."
-      sleep 3
-    fi
-  done
+  if [[ -n ${instance_ids} ]]; then
+    $AWS_CMD terminate-instances --instance-ids $instance_ids > $LOG
+    echo "Waiting for instances deleted"
+    while true; do
+      instance_states=$($AWS_CMD describe-instances --instance-ids $instance_ids | expect_instance_states terminated)
+      if [[ "$instance_states" == "" ]]; then
+        echo "All instances terminated"
+        break
+      else
+        echo "Instances not yet terminated: $instance_states"
+        echo "Sleeping for 3 seconds..."
+        sleep 3
+      fi
+    done
+  fi

  echo "Deleting VPC"
-  vpc_id=$($AWS_CMD describe-vpcs | get_vpc_id)
-  subnet_id=$($AWS_CMD describe-subnets | get_subnet_id $vpc_id)
-  igw_id=$($AWS_CMD describe-internet-gateways | get_igw_id $vpc_id)
-  route_table_id=$($AWS_CMD describe-route-tables | get_route_table_id $vpc_id)
  sec_group_id=$($AWS_CMD describe-security-groups | get_sec_group_id)
+  if [[ -n "${sec_group_id}" ]]; then
+    $AWS_CMD delete-security-group --group-id $sec_group_id > $LOG
+  fi

-  $AWS_CMD delete-subnet --subnet-id $subnet_id > $LOG
-  $AWS_CMD detach-internet-gateway --internet-gateway-id $igw_id --vpc-id $vpc_id > $LOG
-  $AWS_CMD delete-internet-gateway --internet-gateway-id $igw_id > $LOG
-  $AWS_CMD delete-security-group --group-id $sec_group_id > $LOG
-  $AWS_CMD delete-route --route-table-id $route_table_id --destination-cidr-block 0.0.0.0/0 > $LOG
-  $AWS_CMD delete-vpc --vpc-id $vpc_id > $LOG
+  vpc_id=$($AWS_CMD describe-vpcs | get_vpc_id)
+  if [[ -n "${vpc_id}" ]]; then
+    subnet_id=$($AWS_CMD describe-subnets | get_subnet_id $vpc_id)
+    igw_id=$($AWS_CMD describe-internet-gateways | get_igw_id $vpc_id)
+    route_table_id=$($AWS_CMD describe-route-tables | get_route_table_id $vpc_id)
+
+    $AWS_CMD delete-subnet --subnet-id $subnet_id > $LOG
+    $AWS_CMD detach-internet-gateway --internet-gateway-id $igw_id --vpc-id $vpc_id > $LOG
+    $AWS_CMD delete-internet-gateway --internet-gateway-id $igw_id > $LOG
+    $AWS_CMD delete-route --route-table-id $route_table_id --destination-cidr-block 0.0.0.0/0 > $LOG
+
+    $AWS_CMD delete-vpc --vpc-id $vpc_id > $LOG
+  fi
 }

 function setup-logging-firewall {
@@ -518,3 +559,127 @@ function setup-logging-firewall {
 function teardown-logging-firewall {
  echo "TODO: teardown logging"
 }
+
+# -----------------------------------------------------------------------------
+# Cluster specific test helpers used from hack/e2e-test.sh
+
+# Execute prior to running tests to build a release if required for env.
+#
+# Assumed Vars:
+#   KUBE_ROOT
+function test-build-release {
+  # Make a release
+  "${KUBE_ROOT}/build/release.sh"
+}
+
+# Execute prior to running tests to initialize required structure. This is
+# called from hack/e2e.go only when running -up (it is run after kube-up).
+#
+# Assumed vars:
+#   Variables from config.sh
+function test-setup {
+  echo "test-setup complete"
+}
+
+# Execute after running tests to perform any required clean-up. This is called
+# from hack/e2e.go
+function test-teardown {
+#  detect-project
+#  echo "Shutting down test cluster in background."
+#  gcloud compute firewall-rules delete  \
+#    --project "${PROJECT}" \
+#    --quiet \
+#    "${MINION_TAG}-${INSTANCE_PREFIX}-http-alt" || true
+  echo "Shutting down test cluster."
+  "${KUBE_ROOT}/cluster/kube-down.sh"
+}
+
+# SSH to a node by name ($1) and run a command ($2).
+function ssh-to-node {
+  local node="$1"
+  local cmd="$2"
+  for try in $(seq 1 5); do
+    if gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "${cmd}"; then
+      break
+    fi
+  done
+}
+
+# Restart the kube-proxy on a node ($1)
+function restart-kube-proxy {
+  ssh-to-node "$1" "sudo /etc/init.d/kube-proxy restart"
+}
+
+# Setup monitoring firewalls using heapster and InfluxDB
+function setup-monitoring-firewall {
+  if [[ "${ENABLE_CLUSTER_MONITORING}" != "true" ]]; then
+    return
+  fi
+
+  # TODO: Support monitoring firewall
+  echo "Cluster monitoring setup is not (yet) supported on AWS"
+}
+
+function teardown-monitoring-firewall {
+  if [[ "${ENABLE_CLUSTER_MONITORING}" != "true" ]]; then
+    return
+  fi
+
+  # TODO: Support monitoring firewall
+}
+
+function setup-logging-firewall {
+  # If logging with Fluentd to Elasticsearch is enabled then create pods
+  # and services for Elasticsearch (for ingesting logs) and Kibana (for
+  # viewing logs).
+  if [[ "${ENABLE_NODE_LOGGING-}" != "true" ]] || \
+     [[ "${LOGGING_DESTINATION-}" != "elasticsearch" ]] || \
+     [[ "${ENABLE_CLUSTER_LOGGING-}" != "true" ]]; then
+    return
+  fi
+
+  # TODO: Support logging
+  echo "Logging setup is not (yet) supported on AWS"
+
+#  detect-project
+#  gcloud compute firewall-rules create "${INSTANCE_PREFIX}-fluentd-elasticsearch-logging" --project "${PROJECT}" \
+#    --allow tcp:5601 tcp:9200 tcp:9300 --target-tags "${MINION_TAG}" --network="${NETWORK}"
+#
+#  # This should be nearly instant once kube-addons gets a chance to
+#  # run, and we already know we can hit the apiserver, but it's still
+#  # worth checking.
+#  echo "waiting for logging services to be created by the master."
+#  local kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
+#  for i in `seq 1 10`; do
+#    if "${kubectl}" get services -l name=kibana-logging -o template -t {{range.items}}{{.id}}{{end}} | grep -q kibana-logging &&
+#      "${kubectl}" get services -l name=elasticsearch-logging -o template -t {{range.items}}{{.id}}{{end}} | grep -q elasticsearch-logging; then
+#      break
+#    fi
+#    sleep 10
+#  done
+#
+#  local -r region="${ZONE::-2}"
+#  local -r es_ip=$(gcloud compute forwarding-rules --project "${PROJECT}" describe --region "${region}" elasticsearch-logging | grep IPAddress | awk '{print $2}')
+#  local -r kibana_ip=$(gcloud compute forwarding-rules --project "${PROJECT}" describe --region "${region}" kibana-logging | grep IPAddress | awk '{print $2}')
+#  echo
+#  echo -e "${color_green}Cluster logs are ingested into Elasticsearch running at ${color_yellow}http://${es_ip}:9200"
+#  echo -e "${color_green}Kibana logging dashboard will be available at ${color_yellow}http://${kibana_ip}:5601${color_norm}"
+#  echo
+}
+
+function teardown-logging-firewall {
+  if [[ "${ENABLE_NODE_LOGGING-}" != "true" ]] || \
+     [[ "${LOGGING_DESTINATION-}" != "elasticsearch" ]] || \
+     [[ "${ENABLE_CLUSTER_LOGGING-}" != "true" ]]; then
+    return
+  fi
+
+  # TODO: Support logging
+}
+
+# Perform preparations required to run e2e tests
+function prepare-e2e() {
+  # (AWS runs detect-project, I don't think we need to anything)
+  # Note: we can't print anything here, or else the test tools will break with the extra output
+  return
+}
--- a/hack/ginkgo-e2e.sh
+++ b/hack/ginkgo-e2e.sh
@@ -95,6 +95,10 @@ elif [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then
  auth_config=(
    "--auth_config=${HOME}/.kube/${PROJECT}_${INSTANCE_PREFIX}/kubernetes_auth"
  )
+elif [[ "${KUBERNETES_PROVIDER}" == "aws" ]]; then
+  auth_config=(
+    "--auth_config=${HOME}/.kubernetes_auth"
+  )
 else
  auth_config=()
 fi
--- a/test/e2e/rc.go
+++ b/test/e2e/rc.go
@@ -46,10 +46,10 @@ var _ = Describe("ReplicationController", func() {

 	It("should serve a basic image on each replica with a private image", func() {
 		switch testContext.provider {
-		case "gce", "gke", "aws":
+		case "gce", "gke":
 			ServeImageOrFail(c, "private", "gcr.io/_b_k8s_test/serve_hostname:1.0")
 		default:
-			By(fmt.Sprintf("Skipping private variant, which is only supported for providers gce, gke and aws (not %s)",
+			By(fmt.Sprintf("Skipping private variant, which is only supported for providers gce and gke (not %s)",
 				testContext.provider))
 		}
 	})