Add activeDeadlineSeconds to kubeadm upgrade-health-check job
With https://github.com/kubernetes/kubernetes/pull/122079, kubeadm now relies on `ttlSecondsAfterFinished` to clean up `upgrade-health-check` once its pod reaches a terminal state. However, there is a case where the pod won't reach a terminal state and the job will not register a terminal state, hence no garbage collection. For example, if the pause image is not present, `ErrImagePull` will make the pod keep retrying to pull the image and the pod will never reach a terminal state on its own. And the job will continue to wait for the pod to reach a terminal state which will not happen. So we need to set `activeDeadlineSeconds` to prevent the job from waiting forever for the pod to reach a terminal state. Without this, users invoking `kubeadm upgrade plan` need to cleanup the job outside of kubeadm even if they ignore the preflight result because the job still runs when the result is configured to be ignored via `--ignore-prelight-errors=CreateJob` flag. Since the timeout for the polling in the `CreateJob` step in kubeadm is 15 seconds, we should set the `activeDeadlineSeconds` to the same timeout.
This commit is contained in:
		| @@ -98,6 +98,7 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration) | |||||||
| 		fieldSelector = "spec.unschedulable=false" | 		fieldSelector = "spec.unschedulable=false" | ||||||
| 		ns            = metav1.NamespaceSystem | 		ns            = metav1.NamespaceSystem | ||||||
| 		timeout       = 15 * time.Second | 		timeout       = 15 * time.Second | ||||||
|  | 		timeoutMargin = 5 * time.Second | ||||||
| 	) | 	) | ||||||
| 	var ( | 	var ( | ||||||
| 		err, lastError error | 		err, lastError error | ||||||
| @@ -132,6 +133,9 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration) | |||||||
| 		return nil | 		return nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	// Adding a margin of error to the polling timeout. | ||||||
|  | 	timeoutWithMargin := timeout.Seconds() + timeoutMargin.Seconds() | ||||||
|  |  | ||||||
| 	// Prepare Job | 	// Prepare Job | ||||||
| 	job := &batchv1.Job{ | 	job := &batchv1.Job{ | ||||||
| 		ObjectMeta: metav1.ObjectMeta{ | 		ObjectMeta: metav1.ObjectMeta{ | ||||||
| @@ -140,7 +144,8 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration) | |||||||
| 		}, | 		}, | ||||||
| 		Spec: batchv1.JobSpec{ | 		Spec: batchv1.JobSpec{ | ||||||
| 			BackoffLimit:            ptr.To[int32](0), | 			BackoffLimit:            ptr.To[int32](0), | ||||||
| 			TTLSecondsAfterFinished: ptr.To[int32](int32(timeout.Seconds()) + 5), // Make sure it's more than 'timeout'. | 			TTLSecondsAfterFinished: ptr.To[int32](int32(timeoutWithMargin)), | ||||||
|  | 			ActiveDeadlineSeconds:   ptr.To[int64](int64(timeoutWithMargin)), | ||||||
| 			Template: v1.PodTemplateSpec{ | 			Template: v1.PodTemplateSpec{ | ||||||
| 				Spec: v1.PodSpec{ | 				Spec: v1.PodSpec{ | ||||||
| 					RestartPolicy: v1.RestartPolicyNever, | 					RestartPolicy: v1.RestartPolicyNever, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Yuya Yabe
					Yuya Yabe