Merge pull request #53318 from sjenning/fix-http-probe-conn-pools
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. create separate transports for liveness and readiness probes There is currently an issue with the http2 connection pools in golang such that two GETs to the same host:port using the same Transport can collide and one gets rejected with `http2: no cached connection was available`. This happens with readiness and liveness probes if the intervals line up such that worker goroutines invoke the two probes at the exact same time. The result is a transient probe error that appears in the events. If the failureThreshold is 1, which is kinda crazy, it would cause a pod restart. The PR creates a separate `httprobe` instance for readiness and liveness probes so that they don't share a Transport and connection pool. Fixes https://github.com/kubernetes/kubernetes/issues/49740 @smarterclayton @jhorwit2
This commit is contained in:
		| @@ -46,10 +46,14 @@ const maxProbeRetries = 3 | ||||
|  | ||||
| // Prober helps to check the liveness/readiness of a container. | ||||
| type prober struct { | ||||
| 	exec   execprobe.ExecProber | ||||
| 	http   httprobe.HTTPProber | ||||
| 	tcp    tcprobe.TCPProber | ||||
| 	runner kubecontainer.ContainerCommandRunner | ||||
| 	exec execprobe.ExecProber | ||||
| 	// probe types needs different httprobe instances so they don't | ||||
| 	// share a connection pool which can cause collsions to the | ||||
| 	// same host:port and transient failures. See #49740. | ||||
| 	readinessHttp httprobe.HTTPProber | ||||
| 	livenessHttp  httprobe.HTTPProber | ||||
| 	tcp           tcprobe.TCPProber | ||||
| 	runner        kubecontainer.ContainerCommandRunner | ||||
|  | ||||
| 	refManager *kubecontainer.RefManager | ||||
| 	recorder   record.EventRecorder | ||||
| @@ -63,12 +67,13 @@ func newProber( | ||||
| 	recorder record.EventRecorder) *prober { | ||||
|  | ||||
| 	return &prober{ | ||||
| 		exec:       execprobe.New(), | ||||
| 		http:       httprobe.New(), | ||||
| 		tcp:        tcprobe.New(), | ||||
| 		runner:     runner, | ||||
| 		refManager: refManager, | ||||
| 		recorder:   recorder, | ||||
| 		exec:          execprobe.New(), | ||||
| 		readinessHttp: httprobe.New(), | ||||
| 		livenessHttp:  httprobe.New(), | ||||
| 		tcp:           tcprobe.New(), | ||||
| 		runner:        runner, | ||||
| 		refManager:    refManager, | ||||
| 		recorder:      recorder, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @@ -90,7 +95,7 @@ func (pb *prober) probe(probeType probeType, pod *v1.Pod, status v1.PodStatus, c | ||||
| 		return results.Success, nil | ||||
| 	} | ||||
|  | ||||
| 	result, output, err := pb.runProbeWithRetries(probeSpec, pod, status, container, containerID, maxProbeRetries) | ||||
| 	result, output, err := pb.runProbeWithRetries(probeType, probeSpec, pod, status, container, containerID, maxProbeRetries) | ||||
| 	if err != nil || result != probe.Success { | ||||
| 		// Probe failed in one way or another. | ||||
| 		ref, hasRef := pb.refManager.GetRef(containerID) | ||||
| @@ -116,12 +121,12 @@ func (pb *prober) probe(probeType probeType, pod *v1.Pod, status v1.PodStatus, c | ||||
|  | ||||
| // runProbeWithRetries tries to probe the container in a finite loop, it returns the last result | ||||
| // if it never succeeds. | ||||
| func (pb *prober) runProbeWithRetries(p *v1.Probe, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID, retries int) (probe.Result, string, error) { | ||||
| func (pb *prober) runProbeWithRetries(probeType probeType, p *v1.Probe, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID, retries int) (probe.Result, string, error) { | ||||
| 	var err error | ||||
| 	var result probe.Result | ||||
| 	var output string | ||||
| 	for i := 0; i < retries; i++ { | ||||
| 		result, output, err = pb.runProbe(p, pod, status, container, containerID) | ||||
| 		result, output, err = pb.runProbe(probeType, p, pod, status, container, containerID) | ||||
| 		if err == nil { | ||||
| 			return result, output, nil | ||||
| 		} | ||||
| @@ -139,7 +144,7 @@ func buildHeader(headerList []v1.HTTPHeader) http.Header { | ||||
| 	return headers | ||||
| } | ||||
|  | ||||
| func (pb *prober) runProbe(p *v1.Probe, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID) (probe.Result, string, error) { | ||||
| func (pb *prober) runProbe(probeType probeType, p *v1.Probe, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID) (probe.Result, string, error) { | ||||
| 	timeout := time.Duration(p.TimeoutSeconds) * time.Second | ||||
| 	if p.Exec != nil { | ||||
| 		glog.V(4).Infof("Exec-Probe Pod: %v, Container: %v, Command: %v", pod, container, p.Exec.Command) | ||||
| @@ -161,7 +166,11 @@ func (pb *prober) runProbe(p *v1.Probe, pod *v1.Pod, status v1.PodStatus, contai | ||||
| 		url := formatURL(scheme, host, port, path) | ||||
| 		headers := buildHeader(p.HTTPGet.HTTPHeaders) | ||||
| 		glog.V(4).Infof("HTTP-Probe Headers: %v", headers) | ||||
| 		return pb.http.Probe(url, headers, timeout) | ||||
| 		if probeType == liveness { | ||||
| 			return pb.livenessHttp.Probe(url, headers, timeout) | ||||
| 		} else { // readiness | ||||
| 			return pb.readinessHttp.Probe(url, headers, timeout) | ||||
| 		} | ||||
| 	} | ||||
| 	if p.TCPSocket != nil { | ||||
| 		port, err := extractPort(p.TCPSocket.Port, container) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Submit Queue
					Kubernetes Submit Queue