Merge pull request #86708 from aojea/tcpClose
fix flakiness on e2e test TCP CLOSE_WAIT timeout
This commit is contained in:
		| @@ -26,11 +26,11 @@ import ( | |||||||
|  |  | ||||||
| 	v1 "k8s.io/api/core/v1" | 	v1 "k8s.io/api/core/v1" | ||||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||
|  | 	"k8s.io/apimachinery/pkg/util/wait" | ||||||
|  |  | ||||||
| 	"k8s.io/kubernetes/test/e2e/framework" | 	"k8s.io/kubernetes/test/e2e/framework" | ||||||
| 	e2enode "k8s.io/kubernetes/test/e2e/framework/node" | 	e2enode "k8s.io/kubernetes/test/e2e/framework/node" | ||||||
| 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod" | 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod" | ||||||
| 	e2essh "k8s.io/kubernetes/test/e2e/framework/ssh" |  | ||||||
| 	"k8s.io/kubernetes/test/images/agnhost/net/nat" | 	"k8s.io/kubernetes/test/images/agnhost/net/nat" | ||||||
| 	imageutils "k8s.io/kubernetes/test/utils/image" | 	imageutils "k8s.io/kubernetes/test/utils/image" | ||||||
|  |  | ||||||
| @@ -44,8 +44,8 @@ var _ = SIGDescribe("Network", func() { | |||||||
| 	const ( | 	const ( | ||||||
| 		testDaemonHTTPPort     = 11301 | 		testDaemonHTTPPort     = 11301 | ||||||
| 		testDaemonTCPPort      = 11302 | 		testDaemonTCPPort      = 11302 | ||||||
| 		timeoutSeconds        = 10 | 		deadlineTimeoutSeconds = 10 | ||||||
| 		postFinTimeoutSeconds = 5 | 		postFinTimeoutSeconds  = 30 | ||||||
| 	) | 	) | ||||||
|  |  | ||||||
| 	fr := framework.NewDefaultFramework("network") | 	fr := framework.NewDefaultFramework("network") | ||||||
| @@ -81,16 +81,63 @@ var _ = SIGDescribe("Network", func() { | |||||||
|  |  | ||||||
| 		zero := int64(0) | 		zero := int64(0) | ||||||
|  |  | ||||||
|  | 		// Create a pod to check the conntrack entries on the host node | ||||||
|  | 		// It mounts the host /proc/net folder to be able to access | ||||||
|  | 		// the nf_conntrack file with the host conntrack entries | ||||||
|  | 		privileged := true | ||||||
|  |  | ||||||
|  | 		hostExecPod := &v1.Pod{ | ||||||
|  | 			ObjectMeta: metav1.ObjectMeta{ | ||||||
|  | 				Name:      "e2e-net-exec", | ||||||
|  | 				Namespace: fr.Namespace.Name, | ||||||
|  | 				Labels:    map[string]string{"app": "e2e-net-exec"}, | ||||||
|  | 			}, | ||||||
|  | 			Spec: v1.PodSpec{ | ||||||
|  | 				HostNetwork: true, | ||||||
|  | 				NodeName:    clientNodeInfo.name, | ||||||
|  | 				Containers: []v1.Container{ | ||||||
|  | 					{ | ||||||
|  | 						Name:            "e2e-net-exec", | ||||||
|  | 						Image:           kubeProxyE2eImage, | ||||||
|  | 						ImagePullPolicy: "Always", | ||||||
|  | 						Args:            []string{"pause"}, | ||||||
|  | 						VolumeMounts: []v1.VolumeMount{ | ||||||
|  | 							{ | ||||||
|  | 								Name:      "proc-net", | ||||||
|  | 								MountPath: "/rootfs/proc/net", | ||||||
|  | 								ReadOnly:  true, | ||||||
|  | 							}, | ||||||
|  | 						}, | ||||||
|  | 						SecurityContext: &v1.SecurityContext{ | ||||||
|  | 							Privileged: &privileged, | ||||||
|  | 						}, | ||||||
|  | 					}, | ||||||
|  | 				}, | ||||||
|  | 				Volumes: []v1.Volume{ | ||||||
|  | 					{ | ||||||
|  | 						Name: "proc-net", | ||||||
|  | 						VolumeSource: v1.VolumeSource{ | ||||||
|  | 							HostPath: &v1.HostPathVolumeSource{ | ||||||
|  | 								Path: "/proc/net", | ||||||
|  | 							}, | ||||||
|  | 						}, | ||||||
|  | 					}, | ||||||
|  | 				}, | ||||||
|  | 				TerminationGracePeriodSeconds: &zero, | ||||||
|  | 			}, | ||||||
|  | 		} | ||||||
|  | 		fr.PodClient().CreateSync(hostExecPod) | ||||||
|  |  | ||||||
| 		// Some distributions (Ubuntu 16.04 etc.) don't support the proc file. | 		// Some distributions (Ubuntu 16.04 etc.) don't support the proc file. | ||||||
| 		_, err = e2essh.IssueSSHCommandWithResult( | 		_, err = framework.RunHostCmd(fr.Namespace.Name, "e2e-net-exec", | ||||||
| 			"ls /proc/net/nf_conntrack", | 			"ls /rootfs/proc/net/nf_conntrack") | ||||||
| 			framework.TestContext.Provider, |  | ||||||
| 			clientNodeInfo.node) |  | ||||||
| 		if err != nil && strings.Contains(err.Error(), "No such file or directory") { | 		if err != nil && strings.Contains(err.Error(), "No such file or directory") { | ||||||
| 			framework.Skipf("The node %s does not support /proc/net/nf_conntrack", clientNodeInfo.name) | 			framework.Skipf("The node %s does not support /proc/net/nf_conntrack", | ||||||
|  | 				clientNodeInfo.name) | ||||||
| 		} | 		} | ||||||
| 		framework.ExpectNoError(err) | 		framework.ExpectNoError(err) | ||||||
|  |  | ||||||
|  | 		// Create the client and server pods | ||||||
| 		clientPodSpec := &v1.Pod{ | 		clientPodSpec := &v1.Pod{ | ||||||
| 			ObjectMeta: metav1.ObjectMeta{ | 			ObjectMeta: metav1.ObjectMeta{ | ||||||
| 				Name:      "e2e-net-client", | 				Name:      "e2e-net-client", | ||||||
| @@ -105,7 +152,7 @@ var _ = SIGDescribe("Network", func() { | |||||||
| 						Image:           kubeProxyE2eImage, | 						Image:           kubeProxyE2eImage, | ||||||
| 						ImagePullPolicy: "Always", | 						ImagePullPolicy: "Always", | ||||||
| 						Args: []string{ | 						Args: []string{ | ||||||
| 							"net", "--serve", fmt.Sprintf("0.0.0.0:%d", testDaemonHTTPPort), | 							"net", "--serve", fmt.Sprintf(":%d", testDaemonHTTPPort), | ||||||
| 						}, | 						}, | ||||||
| 					}, | 					}, | ||||||
| 				}, | 				}, | ||||||
| @@ -130,7 +177,7 @@ var _ = SIGDescribe("Network", func() { | |||||||
| 							"net", | 							"net", | ||||||
| 							"--runner", "nat-closewait-server", | 							"--runner", "nat-closewait-server", | ||||||
| 							"--options", | 							"--options", | ||||||
| 							fmt.Sprintf(`{"LocalAddr":"0.0.0.0:%v", "PostFindTimeoutSeconds":%v}`, | 							fmt.Sprintf(`{"LocalAddr":":%v", "PostFinTimeoutSeconds":%v}`, | ||||||
| 								testDaemonTCPPort, | 								testDaemonTCPPort, | ||||||
| 								postFinTimeoutSeconds), | 								postFinTimeoutSeconds), | ||||||
| 						}, | 						}, | ||||||
| @@ -166,8 +213,8 @@ var _ = SIGDescribe("Network", func() { | |||||||
| 		options := nat.CloseWaitClientOptions{ | 		options := nat.CloseWaitClientOptions{ | ||||||
| 			RemoteAddr: fmt.Sprintf("%v:%v", | 			RemoteAddr: fmt.Sprintf("%v:%v", | ||||||
| 				serverNodeInfo.nodeIP, testDaemonTCPPort), | 				serverNodeInfo.nodeIP, testDaemonTCPPort), | ||||||
| 			TimeoutSeconds:        timeoutSeconds, | 			TimeoutSeconds:        deadlineTimeoutSeconds, | ||||||
| 			PostFinTimeoutSeconds: 0, | 			PostFinTimeoutSeconds: postFinTimeoutSeconds, | ||||||
| 			LeakConnection:        true, | 			LeakConnection:        true, | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| @@ -179,47 +226,52 @@ var _ = SIGDescribe("Network", func() { | |||||||
| 				`'%v' 2>/dev/null`, | 				`'%v' 2>/dev/null`, | ||||||
| 			testDaemonHTTPPort, | 			testDaemonHTTPPort, | ||||||
| 			string(jsonBytes)) | 			string(jsonBytes)) | ||||||
|  | 		// Run the closewait command in a subroutine so it keeps waiting during postFinTimeoutSeconds | ||||||
|  | 		// otherwise the pod is deleted and the connection is closed loosing the conntrack entry | ||||||
|  | 		go func() { | ||||||
| 			framework.RunHostCmdOrDie(fr.Namespace.Name, "e2e-net-client", cmd) | 			framework.RunHostCmdOrDie(fr.Namespace.Name, "e2e-net-client", cmd) | ||||||
|  | 		}() | ||||||
|  |  | ||||||
| 		<-time.After(time.Duration(1) * time.Second) | 		<-time.After(time.Duration(1) * time.Second) | ||||||
|  |  | ||||||
| 		ginkgo.By("Checking /proc/net/nf_conntrack for the timeout") | 		ginkgo.By("Checking /proc/net/nf_conntrack for the timeout") | ||||||
| 		// If test flakes occur here, then this check should be performed |  | ||||||
| 		// in a loop as there may be a race with the client connecting. |  | ||||||
| 		e2essh.IssueSSHCommandWithResult( |  | ||||||
| 			fmt.Sprintf("sudo cat /proc/net/nf_conntrack | grep 'dport=%v'", |  | ||||||
| 				testDaemonTCPPort), |  | ||||||
| 			framework.TestContext.Provider, |  | ||||||
| 			clientNodeInfo.node) |  | ||||||
|  |  | ||||||
| 		// Timeout in seconds is available as the fifth column from |  | ||||||
| 		// /proc/net/nf_conntrack. |  | ||||||
| 		result, err := e2essh.IssueSSHCommandWithResult( |  | ||||||
| 			fmt.Sprintf( |  | ||||||
| 				"sudo cat /proc/net/nf_conntrack "+ |  | ||||||
| 					"| grep 'CLOSE_WAIT.*dst=%v.*dport=%v' "+ |  | ||||||
| 					"| tail -n 1"+ |  | ||||||
| 					"| awk '{print $5}' ", |  | ||||||
| 				serverNodeInfo.nodeIP, |  | ||||||
| 				testDaemonTCPPort), |  | ||||||
| 			framework.TestContext.Provider, |  | ||||||
| 			clientNodeInfo.node) |  | ||||||
| 		framework.ExpectNoError(err) |  | ||||||
|  |  | ||||||
| 		timeoutSeconds, err := strconv.Atoi(strings.TrimSpace(result.Stdout)) |  | ||||||
| 		framework.ExpectNoError(err) |  | ||||||
|  |  | ||||||
| 		// These must be synchronized from the default values set in | 		// These must be synchronized from the default values set in | ||||||
| 		// pkg/apis/../defaults.go ConntrackTCPCloseWaitTimeout. The | 		// pkg/apis/../defaults.go ConntrackTCPCloseWaitTimeout. The | ||||||
| 		// current defaults are hidden in the initialization code. | 		// current defaults are hidden in the initialization code. | ||||||
| 		const epsilonSeconds = 60 | 		const epsilonSeconds = 60 | ||||||
| 		const expectedTimeoutSeconds = 60 * 60 | 		const expectedTimeoutSeconds = 60 * 60 | ||||||
|  | 		// Obtain the corresponding conntrack entry on the host checking | ||||||
| 		framework.Logf("conntrack entry timeout was: %v, expected: %v", | 		// the nf_conntrack file from the pod e2e-net-exec. | ||||||
| 			timeoutSeconds, expectedTimeoutSeconds) | 		// It retries in a loop if the entry is not found. | ||||||
|  | 		cmd = fmt.Sprintf("cat /rootfs/proc/net/nf_conntrack "+ | ||||||
| 		gomega.Expect(math.Abs(float64(timeoutSeconds - expectedTimeoutSeconds))).Should( | 			"| grep -m 1 'CLOSE_WAIT.*dst=%v.*dport=%v' ", | ||||||
| 			gomega.BeNumerically("<", (epsilonSeconds))) | 			serverNodeInfo.nodeIP, | ||||||
|  | 			testDaemonTCPPort) | ||||||
|  | 		if err := wait.PollImmediate(5*time.Second, 30*time.Second, func() (bool, error) { | ||||||
|  | 			result, err := framework.RunHostCmd(fr.Namespace.Name, "e2e-net-exec", cmd) | ||||||
|  | 			// retry if we can't obtain the conntrack entry | ||||||
|  | 			if err != nil { | ||||||
|  | 				framework.Logf("failed to obtain conntrack entry: %v %v", result, err) | ||||||
|  | 				return false, nil | ||||||
|  | 			} | ||||||
|  | 			framework.Logf("conntrack entry for node %v and port %v:  %v", serverNodeInfo.nodeIP, testDaemonTCPPort, result) | ||||||
|  | 			// Timeout in seconds is available as the fifth column of | ||||||
|  | 			// the matched entry in /proc/net/nf_conntrack. | ||||||
|  | 			line := strings.Fields(result) | ||||||
|  | 			if len(line) < 5 { | ||||||
|  | 				return false, fmt.Errorf("conntrack entry does not have a timeout field: %v", line) | ||||||
|  | 			} | ||||||
|  | 			timeoutSeconds, err := strconv.Atoi(line[4]) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return false, fmt.Errorf("failed to convert matched timeout %s to integer: %v", line[4], err) | ||||||
|  | 			} | ||||||
|  | 			if math.Abs(float64(timeoutSeconds-expectedTimeoutSeconds)) < epsilonSeconds { | ||||||
|  | 				return true, nil | ||||||
|  | 			} | ||||||
|  | 			return false, fmt.Errorf("wrong TCP CLOSE_WAIT timeout: %v expected: %v", timeoutSeconds, expectedTimeoutSeconds) | ||||||
|  | 		}); err != nil { | ||||||
|  | 			framework.Failf("no conntrack entry for port %d on node %s", testDaemonTCPPort, serverNodeInfo.nodeIP) | ||||||
|  | 		} | ||||||
| 	}) | 	}) | ||||||
|  |  | ||||||
| 	// Regression test for #74839, where: | 	// Regression test for #74839, where: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot