Node e2e Makefile support for running remote tests against kubernetes-node-e2e-images.

Also includes other improvements:
- Makefile rule to run tests against remote instance using existing host or image
- Makefile will reuse an instance created from an image if it was not torn down
- Runner starts gce instances in parallel with building source
- Runner uses instance ip instead of hostname so that it doesn't need to resolve
- Runner supports cleaning up files and processes on an instance without stopping / deleting it
- Runner runs tests using `ginkgo` binary to support running tests in parallel
This commit is contained in:
Phillip Wittrock
2016-06-03 17:50:21 -07:00
parent 0d3be6a316
commit e94e1c6e3d
10 changed files with 461 additions and 192 deletions

View File

@@ -120,7 +120,7 @@ func getK8sNodeTestDir() (string, error) {
func getKubeletServerBin() string {
bin, err := getK8sBin("kubelet")
if err != nil {
panic(fmt.Sprintf("Could not locate kubelet binary."))
glog.Fatalf("Could not locate kubelet binary %v.", err)
}
return bin
}
@@ -128,7 +128,7 @@ func getKubeletServerBin() string {
func getApiServerBin() string {
bin, err := getK8sBin("kube-apiserver")
if err != nil {
panic(fmt.Sprintf("Could not locate kube-apiserver binary."))
glog.Fatalf("Could not locate kube-apiserver binary %v.", err)
}
return bin
}

View File

@@ -26,6 +26,7 @@ import (
"os/user"
"path/filepath"
"strings"
"sync"
"github.com/golang/glog"
utilerrors "k8s.io/kubernetes/pkg/util/errors"
@@ -41,6 +42,11 @@ var sshOptionsMap map[string]string
const archiveName = "e2e_node_test.tar.gz"
var hostnameIpOverrides = struct {
sync.RWMutex
m map[string]string
}{m: make(map[string]string)}
func init() {
usr, err := user.Current()
if err != nil {
@@ -51,9 +57,24 @@ func init() {
}
}
func AddHostnameIp(hostname, ip string) {
hostnameIpOverrides.Lock()
defer hostnameIpOverrides.Unlock()
hostnameIpOverrides.m[hostname] = ip
}
func GetHostnameOrIp(hostname string) string {
hostnameIpOverrides.RLock()
defer hostnameIpOverrides.RUnlock()
if ip, found := hostnameIpOverrides.m[hostname]; found {
return ip
}
return hostname
}
// CreateTestArchive builds the local source and creates a tar archive e2e_node_test.tar.gz containing
// the binaries k8s required for node e2e tests
func CreateTestArchive() string {
func CreateTestArchive() (string, error) {
// Build the executables
buildGo()
@@ -65,50 +86,57 @@ func CreateTestArchive() string {
ginkgoTest := filepath.Join(buildOutputDir, "e2e_node.test")
if _, err := os.Stat(ginkgoTest); err != nil {
glog.Fatalf("Failed to locate test binary %s", ginkgoTest)
return "", fmt.Errorf("failed to locate test binary %s", ginkgoTest)
}
kubelet := filepath.Join(buildOutputDir, "kubelet")
if _, err := os.Stat(kubelet); err != nil {
glog.Fatalf("Failed to locate binary %s", kubelet)
return "", fmt.Errorf("failed to locate binary %s", kubelet)
}
apiserver := filepath.Join(buildOutputDir, "kube-apiserver")
if _, err := os.Stat(apiserver); err != nil {
glog.Fatalf("Failed to locate binary %s", apiserver)
return "", fmt.Errorf("failed to locate binary %s", apiserver)
}
ginkgo := filepath.Join(buildOutputDir, "ginkgo")
if _, err := os.Stat(apiserver); err != nil {
return "", fmt.Errorf("failed to locate binary %s", ginkgo)
}
glog.Infof("Building archive...")
tardir, err := ioutil.TempDir("", "node-e2e-archive")
if err != nil {
glog.Fatalf("Failed to create temporary directory %v.", err)
return "", fmt.Errorf("failed to create temporary directory %v.", err)
}
defer os.RemoveAll(tardir)
// Copy binaries
out, err := exec.Command("cp", ginkgoTest, filepath.Join(tardir, "e2e_node.test")).CombinedOutput()
if err != nil {
glog.Fatalf("Failed to copy e2e_node.test %v.", err)
return "", fmt.Errorf("failed to copy e2e_node.test %v.", err)
}
out, err = exec.Command("cp", kubelet, filepath.Join(tardir, "kubelet")).CombinedOutput()
if err != nil {
glog.Fatalf("Failed to copy kubelet %v.", err)
return "", fmt.Errorf("failed to copy kubelet %v.", err)
}
out, err = exec.Command("cp", apiserver, filepath.Join(tardir, "kube-apiserver")).CombinedOutput()
if err != nil {
glog.Fatalf("Failed to copy kube-apiserver %v.", err)
return "", fmt.Errorf("failed to copy kube-apiserver %v.", err)
}
out, err = exec.Command("cp", ginkgo, filepath.Join(tardir, "ginkgo")).CombinedOutput()
if err != nil {
return "", fmt.Errorf("failed to copy ginkgo %v.", err)
}
// Build the tar
out, err = exec.Command("tar", "-zcvf", archiveName, "-C", tardir, ".").CombinedOutput()
if err != nil {
glog.Fatalf("Failed to build tar %v. Output:\n%s", err, out)
return "", fmt.Errorf("failed to build tar %v. Output:\n%s", err, out)
}
dir, err := os.Getwd()
if err != nil {
glog.Fatalf("Failed to get working directory %v.", err)
return "", fmt.Errorf("failed to get working directory %v.", err)
}
return filepath.Join(dir, archiveName)
return filepath.Join(dir, archiveName), nil
}
// Returns the command output, whether the exit was ok, and any errors
@@ -118,31 +146,31 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s
if err != nil {
return "", false, fmt.Errorf("could not find username: %v", err)
}
output, err := RunSshCommand("ssh", host, "--", "sudo", "usermod", "-a", "-G", "docker", uname.Username)
output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sudo", "usermod", "-a", "-G", "docker", uname.Username)
if err != nil {
return "", false, fmt.Errorf("Instance %s not running docker daemon - Command failed: %s", host, output)
return "", false, fmt.Errorf("instance %s not running docker daemon - Command failed: %s", host, output)
}
}
// Create the temp staging directory
glog.Infof("Staging test binaries on %s", host)
tmp := fmt.Sprintf("/tmp/gcloud-e2e-%d", rand.Int31())
_, err := RunSshCommand("ssh", host, "--", "mkdir", tmp)
_, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "mkdir", tmp)
if err != nil {
// Exit failure with the error
return "", false, err
}
if cleanup {
defer func() {
output, err := RunSshCommand("ssh", host, "--", "rm", "-rf", tmp)
output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "rm", "-rf", tmp)
if err != nil {
glog.Errorf("Failed to cleanup tmp directory %s on host %v. Output:\n%s", tmp, err, output)
glog.Errorf("failed to cleanup tmp directory %s on host %v. Output:\n%s", tmp, err, output)
}
}()
}
// Copy the archive to the staging directory
_, err = RunSshCommand("scp", archive, fmt.Sprintf("%s:%s/", host, tmp))
_, err = RunSshCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), tmp))
if err != nil {
// Exit failure with the error
return "", false, err
@@ -158,12 +186,12 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s
// If we are unable to stop existing running k8s processes, we should see messages in the kubelet/apiserver/etcd
// logs about failing to bind the required ports.
glog.Infof("Killing any existing node processes on %s", host)
RunSshCommand("ssh", host, "--", "sh", "-c", cmd)
RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd)
// Extract the archive
cmd = getSshCommand(" && ", fmt.Sprintf("cd %s", tmp), fmt.Sprintf("tar -xzvf ./%s", archiveName))
glog.Infof("Extracting tar on %s", host)
output, err := RunSshCommand("ssh", host, "--", "sh", "-c", cmd)
output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd)
if err != nil {
// Exit failure with the error
return "", false, err
@@ -172,12 +200,13 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s
// Run the tests
cmd = getSshCommand(" && ",
fmt.Sprintf("cd %s", tmp),
fmt.Sprintf("timeout -k 30s %ds ./e2e_node.test --logtostderr --v 2 --build-services=false --stop-services=%t --node-name=%s --report-dir=%s/results --junit-file-number=%d %s", *testTimeoutSeconds, cleanup, host, tmp, junitFileNumber, *ginkgoFlags),
fmt.Sprintf("timeout -k 30s %ds ./ginkgo %s ./e2e_node.test -- --logtostderr --v 2 --build-services=false --stop-services=%t --node-name=%s --report-dir=%s/results --junit-file-number=%d", *testTimeoutSeconds, *ginkgoFlags, cleanup, host, tmp, junitFileNumber),
)
aggErrs := []error{}
glog.Infof("Starting tests on %s", host)
output, err = RunSshCommand("ssh", host, "--", "sh", "-c", cmd)
output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd)
if err != nil {
aggErrs = append(aggErrs, err)
}
@@ -195,13 +224,13 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s
}
func getTestArtifacts(host, testDir string) error {
_, err := RunSshCommand("scp", "-r", fmt.Sprintf("%s:%s/results/", host, testDir), fmt.Sprintf("%s/%s", *resultsDir, host))
_, err := RunSshCommand("scp", "-r", fmt.Sprintf("%s:%s/results/", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/%s", *resultsDir, host))
if err != nil {
return err
}
// Copy junit to the top of artifacts
_, err = RunSshCommand("scp", fmt.Sprintf("%s:%s/results/junit*", host, testDir), fmt.Sprintf("%s/", *resultsDir))
_, err = RunSshCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/", *resultsDir))
if err != nil {
return err
}
@@ -223,7 +252,7 @@ func RunSshCommand(cmd string, args ...string) (string, error) {
}
output, err := exec.Command(cmd, args...).CombinedOutput()
if err != nil {
return fmt.Sprintf("%s", output), fmt.Errorf("Command [%s %s] failed with error: %v and output:\n%s", cmd, strings.Join(args, " "), err, output)
return fmt.Sprintf("%s", output), fmt.Errorf("command [%s %s] failed with error: %v and output:\n%s", cmd, strings.Join(args, " "), err, output)
}
return fmt.Sprintf("%s", output), nil
}

View File

@@ -29,7 +29,10 @@ set -x
. $1
go build test/e2e_node/environment/conformance.go
WORKSPACE=${WORKSPACE:-"/tmp/"}
ARTIFACTS=${WORKSPACE}/_artifacts
mkdir -p ${ARTIFACTS}
go run test/e2e_node/runner/run_e2e.go --logtostderr --vmodule=*=2 --ssh-env="gce" \
--zone="$GCE_ZONE" --project="$GCE_PROJECT" --image-project="$GCE_IMAGE_PROJECT" \

View File

@@ -11,6 +11,6 @@ GCE_ZONE=us-central1-f
GCE_PROJECT=kubernetes-jenkins
GCE_IMAGE_PROJECT=kubernetes-jenkins
CLEANUP=true
GINKGO_FLAGS=--ginkgo.skip=FLAKY
GINKGO_FLAGS=--skip=FLAKY
SETUP_NODE=false

View File

@@ -11,5 +11,5 @@ GCE_ZONE=us-central1-f
GCE_PROJECT=kubernetes-jenkins-pull
GCE_IMAGE_PROJECT=kubernetes-jenkins-pull
CLEANUP=true
GINKGO_FLAGS=--ginkgo.skip=FLAKY
GINKGO_FLAGS=--skip=FLAKY
SETUP_NODE=false

View File

@@ -27,6 +27,7 @@ import (
"net/http"
"os"
"strings"
"sync"
"time"
"k8s.io/kubernetes/test/e2e_node"
@@ -45,11 +46,20 @@ var imageProject = flag.String("image-project", "", "gce project the hosts live
var images = flag.String("images", "", "images to test")
var hosts = flag.String("hosts", "", "hosts to test")
var cleanup = flag.Bool("cleanup", true, "If true remove files from remote hosts and delete temporary instances")
var deleteInstances = flag.Bool("delete-instances", true, "If true, delete any instances created")
var buildOnly = flag.Bool("build-only", false, "If true, build e2e_node_test.tar.gz and exit.")
var setupNode = flag.Bool("setup-node", false, "When true, current user will be added to docker group on the test machine")
var computeService *compute.Service
type Archive struct {
sync.Once
path string
err error
}
var arc Archive
type TestResult struct {
output string
err error
@@ -94,35 +104,22 @@ func main() {
noColour = "\033[0m"
}
archive := e2e_node.CreateTestArchive()
defer os.Remove(archive)
go arc.getArchive()
defer arc.deleteArchive()
var err error
computeService, err = getComputeClient()
if err != nil {
glog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err)
}
results := make(chan *TestResult)
running := 0
if *images != "" {
// Setup the gce client for provisioning instances
// Getting credentials on gce jenkins is flaky, so try a couple times
var err error
for i := 0; i < 10; i++ {
var client *http.Client
client, err = google.DefaultClient(oauth2.NoContext, compute.ComputeScope)
if err != nil {
continue
}
computeService, err = compute.New(client)
if err != nil {
continue
}
time.Sleep(time.Second * 6)
}
if err != nil {
glog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err)
}
for _, image := range strings.Split(*images, ",") {
running++
fmt.Printf("Initializing e2e tests using image %s.\n", image)
go func(image string, junitFileNum int) { results <- testImage(image, archive, junitFileNum) }(image, running)
go func(image string, junitFileNum int) { results <- testImage(image, junitFileNum) }(image, running)
}
}
if *hosts != "" {
@@ -130,7 +127,7 @@ func main() {
fmt.Printf("Initializing e2e tests using host %s.\n", host)
running++
go func(host string, junitFileNum int) {
results <- testHost(host, archive, *cleanup, junitFileNum, *setupNode)
results <- testHost(host, *cleanup, junitFileNum, *setupNode)
}(host, running)
}
}
@@ -159,9 +156,51 @@ func main() {
}
}
func (a *Archive) getArchive() (string, error) {
a.Do(func() { a.path, a.err = e2e_node.CreateTestArchive() })
return a.path, a.err
}
func (a *Archive) deleteArchive() {
path, err := a.getArchive()
if err != nil {
return
}
os.Remove(path)
}
// Run tests in archive against host
func testHost(host, archive string, deleteFiles bool, junitFileNum int, setupNode bool) *TestResult {
output, exitOk, err := e2e_node.RunRemote(archive, host, deleteFiles, junitFileNum, setupNode)
func testHost(host string, deleteFiles bool, junitFileNum int, setupNode bool) *TestResult {
instance, err := computeService.Instances.Get(*project, *zone, host).Do()
if err != nil {
return &TestResult{
err: err,
host: host,
exitOk: false,
}
}
if strings.ToUpper(instance.Status) != "RUNNING" {
err = fmt.Errorf("instance %s not in state RUNNING, was %s.", host, instance.Status)
return &TestResult{
err: err,
host: host,
exitOk: false,
}
}
externalIp := getExternalIp(instance)
if len(externalIp) > 0 {
e2e_node.AddHostnameIp(host, externalIp)
}
path, err := arc.getArchive()
if err != nil {
// Don't log fatal because we need to do any needed cleanup contained in "defer" statements
return &TestResult{
err: fmt.Errorf("unable to create test archive %v.", err),
}
}
output, exitOk, err := e2e_node.RunRemote(path, host, deleteFiles, junitFileNum, setupNode)
return &TestResult{
output: output,
err: err,
@@ -172,17 +211,21 @@ func testHost(host, archive string, deleteFiles bool, junitFileNum int, setupNod
// Provision a gce instance using image and run the tests in archive against the instance.
// Delete the instance afterward.
func testImage(image, archive string, junitFileNum int) *TestResult {
func testImage(image string, junitFileNum int) *TestResult {
host, err := createInstance(image)
if *cleanup {
if *deleteInstances {
defer deleteInstance(image)
}
if err != nil {
return &TestResult{
err: fmt.Errorf("Unable to create gce instance with running docker daemon for image %s. %v", image, err),
err: fmt.Errorf("unable to create gce instance with running docker daemon for image %s. %v", image, err),
}
}
return testHost(host, archive, false, junitFileNum, *setupNode)
// Only delete the files if we are keeping the instance and want it cleaned up.
// If we are going to delete the instance, don't bother with cleaning up the files
deleteFiles := !*deleteInstances && *cleanup
return testHost(host, deleteFiles, junitFileNum, *setupNode)
}
// Provision a gce instance using image
@@ -216,7 +259,7 @@ func createInstance(image string) (string, error) {
return "", err
}
if op.Error != nil {
return "", fmt.Errorf("Could not create instance %s: %+v", name, op.Error)
return "", fmt.Errorf("could not create instance %s: %+v", name, op.Error)
}
instanceRunning := false
@@ -230,17 +273,21 @@ func createInstance(image string) (string, error) {
continue
}
if strings.ToUpper(instance.Status) != "RUNNING" {
err = fmt.Errorf("Instance %s not in state RUNNING, was %s.", name, instance.Status)
err = fmt.Errorf("instance %s not in state RUNNING, was %s.", name, instance.Status)
continue
}
externalIp := getExternalIp(instance)
if len(externalIp) > 0 {
e2e_node.AddHostnameIp(name, externalIp)
}
var output string
output, err = e2e_node.RunSshCommand("ssh", name, "--", "sudo", "docker", "version")
output, err = e2e_node.RunSshCommand("ssh", e2e_node.GetHostnameOrIp(name), "--", "sudo", "docker", "version")
if err != nil {
err = fmt.Errorf("Instance %s not running docker daemon - Command failed: %s", name, output)
err = fmt.Errorf("instance %s not running docker daemon - Command failed: %s", name, output)
continue
}
if !strings.Contains(output, "Server") {
err = fmt.Errorf("Instance %s not running docker daemon - Server not found: %s", name, output)
err = fmt.Errorf("instance %s not running docker daemon - Server not found: %s", name, output)
continue
}
instanceRunning = true
@@ -248,6 +295,47 @@ func createInstance(image string) (string, error) {
return name, err
}
func getExternalIp(instance *compute.Instance) string {
for i := range instance.NetworkInterfaces {
ni := instance.NetworkInterfaces[i]
for j := range ni.AccessConfigs {
ac := ni.AccessConfigs[j]
if len(ac.NatIP) > 0 {
return ac.NatIP
}
}
}
return ""
}
func getComputeClient() (*compute.Service, error) {
const retries = 10
const backoff = time.Second * 6
// Setup the gce client for provisioning instances
// Getting credentials on gce jenkins is flaky, so try a couple times
var err error
var cs *compute.Service
for i := 0; i < retries; i++ {
if i > 0 {
time.Sleep(backoff)
}
var client *http.Client
client, err = google.DefaultClient(oauth2.NoContext, compute.ComputeScope)
if err != nil {
continue
}
cs, err = compute.New(client)
if err != nil {
continue
}
return cs, nil
}
return nil, err
}
func deleteInstance(image string) {
_, err := computeService.Instances.Delete(*project, *zone, imageToInstanceName(image)).Do()
if err != nil {