Merge pull request #122079 from carlory/kubeadm-upgrade-health
refactor healthCheck's CreateJob with TTLSecondsAfterFinished
This commit is contained in:
		@@ -93,7 +93,7 @@ func CheckClusterHealth(client clientset.Interface, cfg *kubeadmapi.ClusterConfi
 | 
				
			|||||||
// createJob is a check that verifies that a Job can be created in the cluster
 | 
					// createJob is a check that verifies that a Job can be created in the cluster
 | 
				
			||||||
func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration) (lastError error) {
 | 
					func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration) (lastError error) {
 | 
				
			||||||
	const (
 | 
						const (
 | 
				
			||||||
		jobName = "upgrade-health-check"
 | 
							prefix  = "upgrade-health-check"
 | 
				
			||||||
		ns      = metav1.NamespaceSystem
 | 
							ns      = metav1.NamespaceSystem
 | 
				
			||||||
		timeout = 15 * time.Second
 | 
							timeout = 15 * time.Second
 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
@@ -101,18 +101,19 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
 | 
				
			|||||||
	// If client.Discovery().RESTClient() is nil, the fake client is used.
 | 
						// If client.Discovery().RESTClient() is nil, the fake client is used.
 | 
				
			||||||
	// Return early because the kubeadm dryrun dynamic client only handles the core/v1 GroupVersion.
 | 
						// Return early because the kubeadm dryrun dynamic client only handles the core/v1 GroupVersion.
 | 
				
			||||||
	if client.Discovery().RESTClient() == nil {
 | 
						if client.Discovery().RESTClient() == nil {
 | 
				
			||||||
		fmt.Printf("[upgrade/health] Would create the Job %q in namespace %q and wait until it completes\n", jobName, ns)
 | 
							fmt.Printf("[upgrade/health] Would create the Job with the prefix %q in namespace %q and wait until it completes\n", prefix, ns)
 | 
				
			||||||
		return nil
 | 
							return nil
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Prepare Job
 | 
						// Prepare Job
 | 
				
			||||||
	job := &batchv1.Job{
 | 
						job := &batchv1.Job{
 | 
				
			||||||
		ObjectMeta: metav1.ObjectMeta{
 | 
							ObjectMeta: metav1.ObjectMeta{
 | 
				
			||||||
			Name:      jobName,
 | 
								GenerateName: prefix + "-",
 | 
				
			||||||
			Namespace:    ns,
 | 
								Namespace:    ns,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		Spec: batchv1.JobSpec{
 | 
							Spec: batchv1.JobSpec{
 | 
				
			||||||
			BackoffLimit:            ptr.To[int32](0),
 | 
								BackoffLimit:            ptr.To[int32](0),
 | 
				
			||||||
 | 
								TTLSecondsAfterFinished: ptr.To[int32](2),
 | 
				
			||||||
			Template: v1.PodTemplateSpec{
 | 
								Template: v1.PodTemplateSpec{
 | 
				
			||||||
				Spec: v1.PodSpec{
 | 
									Spec: v1.PodSpec{
 | 
				
			||||||
					RestartPolicy: v1.RestartPolicyNever,
 | 
										RestartPolicy: v1.RestartPolicyNever,
 | 
				
			||||||
@@ -129,7 +130,7 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
 | 
				
			|||||||
					},
 | 
										},
 | 
				
			||||||
					Containers: []v1.Container{
 | 
										Containers: []v1.Container{
 | 
				
			||||||
						{
 | 
											{
 | 
				
			||||||
							Name:  jobName,
 | 
												Name:  prefix,
 | 
				
			||||||
							Image: images.GetPauseImage(cfg),
 | 
												Image: images.GetPauseImage(cfg),
 | 
				
			||||||
							Args:  []string{"-v"},
 | 
												Args:  []string{"-v"},
 | 
				
			||||||
						},
 | 
											},
 | 
				
			||||||
@@ -139,38 +140,29 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
 | 
				
			|||||||
		},
 | 
							},
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Check if the Job already exists and delete it
 | 
						ctx := context.Background()
 | 
				
			||||||
	if _, err := client.BatchV1().Jobs(ns).Get(context.TODO(), jobName, metav1.GetOptions{}); err == nil {
 | 
					 | 
				
			||||||
		if err = deleteHealthCheckJob(client, ns, jobName); err != nil {
 | 
					 | 
				
			||||||
			return err
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Cleanup the Job on exit
 | 
						// Create the Job, but retry if it fails
 | 
				
			||||||
	defer func() {
 | 
						klog.V(2).Infof("Creating a Job with the prefix %q in the namespace %q", prefix, ns)
 | 
				
			||||||
		lastError = deleteHealthCheckJob(client, ns, jobName)
 | 
						var jobName string
 | 
				
			||||||
	}()
 | 
						err := wait.PollUntilContextTimeout(ctx, time.Second*1, timeout, true, func(ctx context.Context) (bool, error) {
 | 
				
			||||||
 | 
							createdJob, err := client.BatchV1().Jobs(ns).Create(ctx, job, metav1.CreateOptions{})
 | 
				
			||||||
	// Create the Job, but retry in case it is being currently deleted
 | 
							if err != nil {
 | 
				
			||||||
	klog.V(2).Infof("Creating Job %q in the namespace %q", jobName, ns)
 | 
								klog.V(2).Infof("Could not create a Job with the prefix %q in the namespace %q, retrying: %v", prefix, ns, err)
 | 
				
			||||||
	err := wait.PollImmediate(time.Second*1, timeout, func() (bool, error) {
 | 
					 | 
				
			||||||
		if _, err := client.BatchV1().Jobs(ns).Create(context.TODO(), job, metav1.CreateOptions{}); err != nil {
 | 
					 | 
				
			||||||
			klog.V(2).Infof("Could not create Job %q in the namespace %q, retrying: %v", jobName, ns, err)
 | 
					 | 
				
			||||||
			lastError = err
 | 
								lastError = err
 | 
				
			||||||
			return false, nil
 | 
								return false, nil
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							jobName = createdJob.Name
 | 
				
			||||||
		return true, nil
 | 
							return true, nil
 | 
				
			||||||
	})
 | 
						})
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		return errors.Wrapf(lastError, "could not create Job %q in the namespace %q", jobName, ns)
 | 
							return errors.Wrapf(lastError, "could not create a Job with the prefix %q in the namespace %q", prefix, ns)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Waiting and manually deleting the Job is a workaround to not enabling the TTL controller.
 | 
					 | 
				
			||||||
	// TODO: refactor this if the TTL controller is enabled in kubeadm once it goes Beta.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// Wait for the Job to complete
 | 
						// Wait for the Job to complete
 | 
				
			||||||
	err = wait.PollImmediate(time.Second*1, timeout, func() (bool, error) {
 | 
						err = wait.PollUntilContextTimeout(ctx, time.Second*1, timeout, true, func(ctx context.Context) (bool, error) {
 | 
				
			||||||
		job, err := client.BatchV1().Jobs(ns).Get(context.TODO(), jobName, metav1.GetOptions{})
 | 
							job, err := client.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
 | 
				
			||||||
		if err != nil {
 | 
							if err != nil {
 | 
				
			||||||
			lastError = err
 | 
								lastError = err
 | 
				
			||||||
			klog.V(2).Infof("could not get Job %q in the namespace %q, retrying: %v", jobName, ns, err)
 | 
								klog.V(2).Infof("could not get Job %q in the namespace %q, retrying: %v", jobName, ns, err)
 | 
				
			||||||
@@ -194,15 +186,6 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
 | 
				
			|||||||
	return nil
 | 
						return nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func deleteHealthCheckJob(client clientset.Interface, ns, jobName string) error {
 | 
					 | 
				
			||||||
	klog.V(2).Infof("Deleting Job %q in the namespace %q", jobName, ns)
 | 
					 | 
				
			||||||
	propagation := metav1.DeletePropagationForeground
 | 
					 | 
				
			||||||
	if err := client.BatchV1().Jobs(ns).Delete(context.TODO(), jobName, metav1.DeleteOptions{PropagationPolicy: &propagation}); err != nil {
 | 
					 | 
				
			||||||
		return errors.Wrapf(err, "could not delete Job %q in the namespace %q", jobName, ns)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// controlPlaneNodesReady checks whether all control-plane Nodes in the cluster are in the Running state
 | 
					// controlPlaneNodesReady checks whether all control-plane Nodes in the cluster are in the Running state
 | 
				
			||||||
func controlPlaneNodesReady(client clientset.Interface, _ *kubeadmapi.ClusterConfiguration) error {
 | 
					func controlPlaneNodesReady(client clientset.Interface, _ *kubeadmapi.ClusterConfiguration) error {
 | 
				
			||||||
	selectorControlPlane := labels.SelectorFromSet(map[string]string{
 | 
						selectorControlPlane := labels.SelectorFromSet(map[string]string{
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user