Removing old shell based monitoring test.
This commit is contained in:
parent
425dd7e3ee
commit
cbb3c96f31
@ -1,134 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2014 Google Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Assumes a running Kubernetes test cluster; verifies that the monitoring setup
|
||||
# works. Assumes that we're being called by hack/e2e-test.sh (we use some env
|
||||
# vars it sets up).
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
|
||||
|
||||
: ${KUBE_VERSION_ROOT:=${KUBE_ROOT}}
|
||||
: ${KUBECTL:="${KUBE_VERSION_ROOT}/cluster/kubectl.sh"}
|
||||
: ${KUBE_CONFIG_FILE:="config-test.sh"}
|
||||
|
||||
export KUBECTL KUBE_CONFIG_FILE
|
||||
|
||||
source "${KUBE_ROOT}/cluster/kube-env.sh"
|
||||
source "${KUBE_VERSION_ROOT}/cluster/${KUBERNETES_PROVIDER}/util.sh"
|
||||
|
||||
prepare-e2e
|
||||
|
||||
MONITORING="${KUBE_ROOT}/cluster/addons/cluster-monitoring"
|
||||
KUBECTL="${KUBE_ROOT}/cluster/kubectl.sh"
|
||||
BIGRAND=$(printf "%x\n" $(( $RANDOM << 16 | $RANDOM ))) # random 2^32 in hex
|
||||
MONITORING_FIREWALL_RULE="monitoring-test-${BIGRAND}"
|
||||
|
||||
function setup {
|
||||
# This only has work to do on gce and gke
|
||||
if [[ "${KUBERNETES_PROVIDER}" == "gce" ]] || [[ "${KUBERNETES_PROVIDER}" == "gke" ]]; then
|
||||
detect-project
|
||||
if ! "${GCLOUD}" compute firewall-rules create "${MONITORING_FIREWALL_RULE}" \
|
||||
--project "${PROJECT}" \
|
||||
--network "${NETWORK}" \
|
||||
--quiet \
|
||||
--allow tcp:80 tcp:8083 tcp:8086 tcp:9200; then
|
||||
echo "Failed to set up firewall for monitoring" && false
|
||||
fi
|
||||
fi
|
||||
|
||||
"${KUBECTL}" create -f "${MONITORING}/"
|
||||
}
|
||||
|
||||
function cleanup {
|
||||
"${KUBECTL}" stop rc monitoring-influx-grafana-controller &> /dev/null || true
|
||||
"${KUBECTL}" stop rc monitoring-heapster-controller &> /dev/null || true
|
||||
"${KUBECTL}" delete -f "${MONITORING}/" &> /dev/null || true
|
||||
|
||||
# This only has work to do on gce and gke
|
||||
if [[ "${KUBERNETES_PROVIDER}" == "gce" ]] || [[ "${KUBERNETES_PROVIDER}" == "gke" ]]; then
|
||||
detect-project
|
||||
if "${GCLOUD}" compute firewall-rules describe "${MONITORING_FIREWALL_RULE}" &> /dev/null; then
|
||||
"${GCLOUD}" compute firewall-rules delete \
|
||||
--project "${PROJECT}" \
|
||||
--quiet \
|
||||
"${MONITORING_FIREWALL_RULE}" || true
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
function influx-data-exists {
|
||||
local max_retries=10
|
||||
local retry_delay=30 #seconds
|
||||
local influx_ip=$("${KUBECTL}" get pods -l name=influxGrafana -o template -t {{range.items}}{{.currentState.hostIP}}:{{end}} | sed s/://g)
|
||||
local influx_url="http://$influx_ip:8086/db/k8s/series?u=root&p=root"
|
||||
local ok="false"
|
||||
for i in `seq 1 10`; do
|
||||
if curl --retry $max_retries --retry-delay $retry_delay -G $influx_url --data-urlencode "q=select * from stats limit 1" \
|
||||
&& curl --retry $max_retries --retry-delay $retry_delay -G $influx_url --data-urlencode "q=select * from machine limit 1"; then
|
||||
echo "retrieved data from InfluxDB."
|
||||
ok="true"
|
||||
break
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
if [[ "${ok}" != "true" ]]; then
|
||||
echo "failed to retrieve stats from InfluxDB. monitoring test failed"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function wait-for-pods {
|
||||
local running=false
|
||||
for i in `seq 1 20`; do
|
||||
sleep 20
|
||||
if "${KUBECTL}" get pods -l name=influxGrafana -o template -t {{range.items}}{{.currentState.status}}:{{end}} | grep Running &> /dev/null \
|
||||
&& "${KUBECTL}" get pods -l name=heapster -o template -t {{range.items}}{{.currentState.status}}:{{end}} | grep Running &> /dev/null; then
|
||||
running=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [ running == false ]; then
|
||||
echo "giving up waiting on monitoring pods to be active. monitoring test failed"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
# Remove any pre-existing monitoring services.
|
||||
cleanup
|
||||
|
||||
# Start monitoring pods and services.
|
||||
setup
|
||||
|
||||
# Wait for a maximum of 5 minutes for the influx grafana pod to be running.
|
||||
echo "waiting for monitoring pods to be running"
|
||||
wait-for-pods
|
||||
|
||||
# Wait for some time to let heapster push some stats to InfluxDB.
|
||||
echo "monitoring pods are running. waiting for stats to be pushed to InfluxDB"
|
||||
sleep 60
|
||||
|
||||
# Check if stats data exists in InfluxDB
|
||||
echo "checking if stats exist in InfluxDB"
|
||||
influx-data-exists
|
||||
|
||||
echo "monitoring setup works"
|
||||
exit 0
|
@ -39,7 +39,12 @@ var _ = Describe("Monitoring", func() {
|
||||
expectNoError(err)
|
||||
})
|
||||
|
||||
It("pod and node resource usage metrics are available on influxdb using heapster.", func() {
|
||||
It("verify monitoring pods and all cluster nodes are available on influxdb using heapster.", func() {
|
||||
if testContext.provider != "gce" {
|
||||
By(fmt.Sprintf("Skipping Monitoring test, which is only supported for provider gce (not %s)",
|
||||
testContext.provider))
|
||||
return
|
||||
}
|
||||
testMonitoringUsingHeapsterInfluxdb(c)
|
||||
})
|
||||
})
|
||||
@ -51,8 +56,8 @@ const (
|
||||
influxdbPW = "root"
|
||||
podlistQuery = "select distinct(pod) from stats"
|
||||
nodelistQuery = "select distinct(hostname) from machine"
|
||||
sleepBetweenAttempts = 30 * time.Second
|
||||
maxAttempts = 10 // Total sleep time of 5 minutes for this test.
|
||||
sleepBetweenAttempts = 5 * time.Second
|
||||
testTimeout = 5 * time.Minute
|
||||
)
|
||||
|
||||
var (
|
||||
@ -67,27 +72,40 @@ var (
|
||||
}
|
||||
)
|
||||
|
||||
func expectedRcsExist(c *client.Client) {
|
||||
func verifyExpectedRcsExistAndGetExpectedPods(c *client.Client) ([]string, error) {
|
||||
rcList, err := c.ReplicationControllers(api.NamespaceDefault).List(labels.Everything())
|
||||
expectNoError(err)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
expectedPods := []string{}
|
||||
for _, rc := range rcList.Items {
|
||||
if _, ok := expectedRcs[rc.Name]; ok {
|
||||
if rc.Status.Replicas != 1 {
|
||||
Failf("expected to find only one replica for rc %q, found %d", rc.Name, rc.Status.Replicas)
|
||||
return nil, fmt.Errorf("expected to find only one replica for rc %q, found %d", rc.Name, rc.Status.Replicas)
|
||||
}
|
||||
expectedRcs[rc.Name] = true
|
||||
podList, err := c.Pods(api.NamespaceDefault).List(labels.Set(rc.Spec.Selector).AsSelector())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, pod := range podList.Items {
|
||||
expectedPods = append(expectedPods, pod.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
for rc, found := range expectedRcs {
|
||||
if !found {
|
||||
Failf("Replication Controller %q not found.", rc)
|
||||
return nil, fmt.Errorf("Replication Controller %q not found.", rc)
|
||||
}
|
||||
}
|
||||
return expectedPods, nil
|
||||
}
|
||||
|
||||
func expectedServicesExist(c *client.Client) {
|
||||
func expectedServicesExist(c *client.Client) error {
|
||||
serviceList, err := c.Services(api.NamespaceDefault).List(labels.Everything())
|
||||
expectNoError(err)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, service := range serviceList.Items {
|
||||
if _, ok := expectedServices[service.Name]; ok {
|
||||
expectedServices[service.Name] = true
|
||||
@ -95,29 +113,22 @@ func expectedServicesExist(c *client.Client) {
|
||||
}
|
||||
for service, found := range expectedServices {
|
||||
if !found {
|
||||
Failf("Service %q not found", service)
|
||||
return fmt.Errorf("Service %q not found", service)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getAllPodsInCluster(c *client.Client) []string {
|
||||
podList, err := c.Pods(api.NamespaceAll).List(labels.Everything())
|
||||
expectNoError(err)
|
||||
result := []string{}
|
||||
for _, pod := range podList.Items {
|
||||
result = append(result, pod.Name)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func getAllNodesInCluster(c *client.Client) []string {
|
||||
func getAllNodesInCluster(c *client.Client) ([]string, error) {
|
||||
nodeList, err := c.Nodes().List()
|
||||
expectNoError(err)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result := []string{}
|
||||
for _, node := range nodeList.Items {
|
||||
result = append(result, node.Name)
|
||||
}
|
||||
return result
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func getInfluxdbData(c *influxdb.Client, query string) (map[string]bool, error) {
|
||||
@ -133,6 +144,9 @@ func getInfluxdbData(c *influxdb.Client, query string) (map[string]bool, error)
|
||||
}
|
||||
result := map[string]bool{}
|
||||
for _, point := range series[0].GetPoints() {
|
||||
if len(point) != 2 {
|
||||
Failf("Expected only two entries in a point for query %q. Got %v", query, point)
|
||||
}
|
||||
name, ok := point[1].(string)
|
||||
if !ok {
|
||||
Failf("expected %v to be a string, but it is %T", point[1], point[1])
|
||||
@ -143,6 +157,9 @@ func getInfluxdbData(c *influxdb.Client, query string) (map[string]bool, error)
|
||||
}
|
||||
|
||||
func expectedItemsExist(expectedItems []string, actualItems map[string]bool) bool {
|
||||
if len(actualItems) < len(expectedItems) {
|
||||
return false
|
||||
}
|
||||
for _, item := range expectedItems {
|
||||
if _, found := actualItems[item]; !found {
|
||||
return false
|
||||
@ -182,8 +199,9 @@ func getMasterHost() string {
|
||||
|
||||
func testMonitoringUsingHeapsterInfluxdb(c *client.Client) {
|
||||
// Check if heapster pods and services are up.
|
||||
expectedRcsExist(c)
|
||||
expectedServicesExist(c)
|
||||
expectedPods, err := verifyExpectedRcsExistAndGetExpectedPods(c)
|
||||
expectNoError(err)
|
||||
expectNoError(expectedServicesExist(c))
|
||||
// TODO: Wait for all pods and services to be running.
|
||||
kubeMasterHttpClient, ok := c.Client.(*http.Client)
|
||||
if !ok {
|
||||
@ -202,14 +220,14 @@ func testMonitoringUsingHeapsterInfluxdb(c *client.Client) {
|
||||
influxdbClient, err := influxdb.NewClient(config)
|
||||
expectNoError(err, "failed to create influxdb client")
|
||||
|
||||
expectedPods := getAllPodsInCluster(c)
|
||||
expectedNodes := getAllNodesInCluster(c)
|
||||
attempt := maxAttempts
|
||||
expectedNodes, err := getAllNodesInCluster(c)
|
||||
expectNoError(err)
|
||||
startTime := time.Now()
|
||||
for {
|
||||
if validatePodsAndNodes(influxdbClient, expectedPods, expectedNodes) {
|
||||
return
|
||||
}
|
||||
if attempt--; attempt <= 0 {
|
||||
if time.Since(startTime) >= testTimeout {
|
||||
break
|
||||
}
|
||||
time.Sleep(sleepBetweenAttempts)
|
||||
|
Loading…
Reference in New Issue
Block a user