Merge pull request #47467 from mindprince/issue-47388-e2e-gke-gpu
Automatic merge from submit-queue
Update GPU e2e tests.
* Use nvidia driver installer from external repo.
    
    That installer decouples itself from COS image version (as long as the
    image version is newer than cos-stable-59-9460-60-0).
    
    A separate commit in the test-infra repo will update the cos version
    used for this test to cos-stable-59-9460-60-0.
* Use cos-stable-59-9460-60-0 and newer installer for GPU node e2e tests.
This is to enable #47388.
This supercedes #47091.
**Release note**:
```release-note
NONE
```
/sig node
			
			
This commit is contained in:
		@@ -17,6 +17,8 @@ limitations under the License.
 | 
				
			|||||||
package e2e
 | 
					package e2e
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
 | 
						"io/ioutil"
 | 
				
			||||||
 | 
						"net/http"
 | 
				
			||||||
	"strings"
 | 
						"strings"
 | 
				
			||||||
	"time"
 | 
						"time"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -29,7 +31,6 @@ import (
 | 
				
			|||||||
	"k8s.io/kubernetes/pkg/api/v1"
 | 
						"k8s.io/kubernetes/pkg/api/v1"
 | 
				
			||||||
	extensions "k8s.io/kubernetes/pkg/apis/extensions/v1beta1"
 | 
						extensions "k8s.io/kubernetes/pkg/apis/extensions/v1beta1"
 | 
				
			||||||
	"k8s.io/kubernetes/test/e2e/framework"
 | 
						"k8s.io/kubernetes/test/e2e/framework"
 | 
				
			||||||
	"k8s.io/kubernetes/test/e2e/generated"
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	. "github.com/onsi/ginkgo"
 | 
						. "github.com/onsi/ginkgo"
 | 
				
			||||||
	. "github.com/onsi/gomega"
 | 
						. "github.com/onsi/gomega"
 | 
				
			||||||
@@ -42,7 +43,7 @@ const (
 | 
				
			|||||||
	// Nvidia driver installation can take upwards of 5 minutes.
 | 
						// Nvidia driver installation can take upwards of 5 minutes.
 | 
				
			||||||
	driverInstallTimeout = 10 * time.Minute
 | 
						driverInstallTimeout = 10 * time.Minute
 | 
				
			||||||
	// Nvidia COS driver installer daemonset.
 | 
						// Nvidia COS driver installer daemonset.
 | 
				
			||||||
	cosNvidiaDriverInstallerPath = "cluster/gce/gci/nvidia-gpus/cos-installer-daemonset.yaml"
 | 
						cosNvidiaDriverInstallerUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/stable/cos-nvidia-gpu-installer/daemonset.yaml"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func makeCudaAdditionTestPod() *v1.Pod {
 | 
					func makeCudaAdditionTestPod() *v1.Pod {
 | 
				
			||||||
@@ -135,7 +136,7 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
 | 
				
			|||||||
	// GPU drivers might have already been installed.
 | 
						// GPU drivers might have already been installed.
 | 
				
			||||||
	if !areGPUsAvailableOnAllSchedulableNodes(f) {
 | 
						if !areGPUsAvailableOnAllSchedulableNodes(f) {
 | 
				
			||||||
		// Install Nvidia Drivers.
 | 
							// Install Nvidia Drivers.
 | 
				
			||||||
		ds := dsFromManifest(cosNvidiaDriverInstallerPath)
 | 
							ds := dsFromManifest(cosNvidiaDriverInstallerUrl)
 | 
				
			||||||
		ds.Namespace = f.Namespace.Name
 | 
							ds.Namespace = f.Namespace.Name
 | 
				
			||||||
		_, err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Create(ds)
 | 
							_, err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Create(ds)
 | 
				
			||||||
		framework.ExpectNoError(err, "failed to create daemonset")
 | 
							framework.ExpectNoError(err, "failed to create daemonset")
 | 
				
			||||||
@@ -158,10 +159,25 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// dsFromManifest reads a .json/yaml file and returns the daemonset in it.
 | 
					// dsFromManifest reads a .json/yaml file and returns the daemonset in it.
 | 
				
			||||||
func dsFromManifest(fileName string) *extensions.DaemonSet {
 | 
					func dsFromManifest(url string) *extensions.DaemonSet {
 | 
				
			||||||
	var controller extensions.DaemonSet
 | 
						var controller extensions.DaemonSet
 | 
				
			||||||
	framework.Logf("Parsing ds from %v", fileName)
 | 
						framework.Logf("Parsing ds from %v", url)
 | 
				
			||||||
	data := generated.ReadOrDie(fileName)
 | 
					
 | 
				
			||||||
 | 
						var response *http.Response
 | 
				
			||||||
 | 
						var err error
 | 
				
			||||||
 | 
						for i := 1; i <= 5; i++ {
 | 
				
			||||||
 | 
							response, err = http.Get(url)
 | 
				
			||||||
 | 
							if err == nil && response.StatusCode == 200 {
 | 
				
			||||||
 | 
								break
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							time.Sleep(time.Duration(i) * time.Second)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						Expect(err).NotTo(HaveOccurred())
 | 
				
			||||||
 | 
						Expect(response.StatusCode).To(Equal(200))
 | 
				
			||||||
 | 
						defer response.Body.Close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						data, err := ioutil.ReadAll(response.Body)
 | 
				
			||||||
 | 
						Expect(err).NotTo(HaveOccurred())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	json, err := utilyaml.ToJSON(data)
 | 
						json, err := utilyaml.ToJSON(data)
 | 
				
			||||||
	Expect(err).NotTo(HaveOccurred())
 | 
						Expect(err).NotTo(HaveOccurred())
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2,7 +2,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
runcmd:
 | 
					runcmd:
 | 
				
			||||||
  - modprobe configs
 | 
					  - modprobe configs
 | 
				
			||||||
  - docker run -v /dev:/dev -v /home/kubernetes/bin/nvidia:/rootfs/nvidia -v /etc/os-release:/rootfs/etc/os-release -v /proc/sysrq-trigger:/sysrq -e LAKITU_KERNEL_SHA1=26481563cb3788ad254c2bf2126b843c161c7e48 -e BASE_DIR=/rootfs/nvidia --privileged gcr.io/google_containers/cos-nvidia-driver-install@sha256:ad83ede6e0c6d768bf7cf69a7dec972aa5e8f88778142ca46afd3286ad58cfc8
 | 
					  - docker run -v /dev:/dev -v /home/kubernetes/bin/nvidia:/rootfs/nvidia -v /etc/os-release:/rootfs/etc/os-release -v /proc/sysrq-trigger:/sysrq -e BASE_DIR=/rootfs/nvidia --privileged gcr.io/google_containers/cos-nvidia-driver-install@sha256:cb55c7971c337fece62f2bfe858662522a01e43ac9984a2dd1dd5c71487d225c
 | 
				
			||||||
  - mount /tmp /tmp -o remount,exec,suid
 | 
					  - mount /tmp /tmp -o remount,exec,suid
 | 
				
			||||||
  - usermod -a -G docker jenkins
 | 
					  - usermod -a -G docker jenkins
 | 
				
			||||||
  - mkdir -p /var/lib/kubelet
 | 
					  - mkdir -p /var/lib/kubelet
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -16,11 +16,10 @@ images:
 | 
				
			|||||||
    image: e2e-node-containervm-v20161208-image # docker 1.11.2
 | 
					    image: e2e-node-containervm-v20161208-image # docker 1.11.2
 | 
				
			||||||
    project: kubernetes-node-e2e-images
 | 
					    project: kubernetes-node-e2e-images
 | 
				
			||||||
  gci:
 | 
					  gci:
 | 
				
			||||||
    image_regex: cos-beta-59-9460-20-0 # docker 1.11.2
 | 
					    image_regex: cos-stable-59-9460-60-0 # docker 1.11.2
 | 
				
			||||||
    project: cos-cloud
 | 
					    project: cos-cloud
 | 
				
			||||||
    metadata: "user-data<test/e2e_node/jenkins/gci-init-gpu.yaml,gci-update-strategy=update_disabled"
 | 
					    metadata: "user-data<test/e2e_node/jenkins/gci-init-gpu.yaml,gci-update-strategy=update_disabled"
 | 
				
			||||||
    resources:
 | 
					    resources:
 | 
				
			||||||
      accelerators:
 | 
					      accelerators:
 | 
				
			||||||
        - type: nvidia-tesla-k80
 | 
					        - type: nvidia-tesla-k80
 | 
				
			||||||
          count: 2
 | 
					          count: 2
 | 
				
			||||||
          
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user