Merge pull request #124503 from neolit123/1.31-convert-create-job-preflight-to-warning
kubeadm: check for available nodes during 'CreateJob' preflight
This commit is contained in:
commit
bae83009d3
@ -64,7 +64,7 @@ func (c *healthCheck) Name() string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// CheckClusterHealth makes sure:
|
// CheckClusterHealth makes sure:
|
||||||
// - the API /healthz endpoint is healthy
|
// - the cluster can accept a workload
|
||||||
// - all control-plane Nodes are Ready
|
// - all control-plane Nodes are Ready
|
||||||
// - (if static pod-hosted) that all required Static Pod manifests exist on disk
|
// - (if static pod-hosted) that all required Static Pod manifests exist on disk
|
||||||
func CheckClusterHealth(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration, ignoreChecksErrors sets.Set[string], printer output.Printer) error {
|
func CheckClusterHealth(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration, ignoreChecksErrors sets.Set[string], printer output.Printer) error {
|
||||||
@ -92,11 +92,18 @@ func CheckClusterHealth(client clientset.Interface, cfg *kubeadmapi.ClusterConfi
|
|||||||
}
|
}
|
||||||
|
|
||||||
// createJob is a check that verifies that a Job can be created in the cluster
|
// createJob is a check that verifies that a Job can be created in the cluster
|
||||||
func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration) (lastError error) {
|
func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration) error {
|
||||||
const (
|
const (
|
||||||
prefix = "upgrade-health-check"
|
prefix = "upgrade-health-check"
|
||||||
ns = metav1.NamespaceSystem
|
fieldSelector = "spec.unschedulable=false"
|
||||||
timeout = 15 * time.Second
|
ns = metav1.NamespaceSystem
|
||||||
|
timeout = 15 * time.Second
|
||||||
|
)
|
||||||
|
var (
|
||||||
|
err, lastError error
|
||||||
|
ctx = context.Background()
|
||||||
|
nodes *v1.NodeList
|
||||||
|
listOptions = metav1.ListOptions{Limit: 1, FieldSelector: fieldSelector}
|
||||||
)
|
)
|
||||||
|
|
||||||
// If client.Discovery().RESTClient() is nil, the fake client is used.
|
// If client.Discovery().RESTClient() is nil, the fake client is used.
|
||||||
@ -106,6 +113,25 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if there is at least one Node where a Job's Pod can schedule. If not, skip this preflight check.
|
||||||
|
err = wait.PollUntilContextTimeout(ctx, time.Second*1, timeout, true, func(_ context.Context) (bool, error) {
|
||||||
|
nodes, err = client.CoreV1().Nodes().List(context.Background(), listOptions)
|
||||||
|
if err != nil {
|
||||||
|
klog.V(2).Infof("Could not list Nodes with field selector %q: %v", fieldSelector, err)
|
||||||
|
lastError = err
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrap(lastError, "could not check if there is at least one Node that can schedule a test Pod")
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(nodes.Items) == 0 {
|
||||||
|
klog.Warning("The preflight check \"CreateJob\" was skipped because there are no schedulable Nodes in the cluster.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Prepare Job
|
// Prepare Job
|
||||||
job := &batchv1.Job{
|
job := &batchv1.Job{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
@ -114,7 +140,7 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
|
|||||||
},
|
},
|
||||||
Spec: batchv1.JobSpec{
|
Spec: batchv1.JobSpec{
|
||||||
BackoffLimit: ptr.To[int32](0),
|
BackoffLimit: ptr.To[int32](0),
|
||||||
TTLSecondsAfterFinished: ptr.To[int32](2),
|
TTLSecondsAfterFinished: ptr.To[int32](int32(timeout.Seconds()) + 5), // Make sure it's more than 'timeout'.
|
||||||
Template: v1.PodTemplateSpec{
|
Template: v1.PodTemplateSpec{
|
||||||
Spec: v1.PodSpec{
|
Spec: v1.PodSpec{
|
||||||
RestartPolicy: v1.RestartPolicyNever,
|
RestartPolicy: v1.RestartPolicyNever,
|
||||||
@ -141,13 +167,11 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
// Create the Job, but retry if it fails
|
// Create the Job, but retry if it fails
|
||||||
klog.V(2).Infof("Creating a Job with the prefix %q in the namespace %q", prefix, ns)
|
klog.V(2).Infof("Creating a Job with the prefix %q in the namespace %q", prefix, ns)
|
||||||
var jobName string
|
var jobName string
|
||||||
err := wait.PollUntilContextTimeout(ctx, time.Second*1, timeout, true, func(ctx context.Context) (bool, error) {
|
err = wait.PollUntilContextTimeout(ctx, time.Second*1, timeout, true, func(_ context.Context) (bool, error) {
|
||||||
createdJob, err := client.BatchV1().Jobs(ns).Create(ctx, job, metav1.CreateOptions{})
|
createdJob, err := client.BatchV1().Jobs(ns).Create(context.Background(), job, metav1.CreateOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.V(2).Infof("Could not create a Job with the prefix %q in the namespace %q, retrying: %v", prefix, ns, err)
|
klog.V(2).Infof("Could not create a Job with the prefix %q in the namespace %q, retrying: %v", prefix, ns, err)
|
||||||
lastError = err
|
lastError = err
|
||||||
@ -162,8 +186,8 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Wait for the Job to complete
|
// Wait for the Job to complete
|
||||||
err = wait.PollUntilContextTimeout(ctx, time.Second*1, timeout, true, func(ctx context.Context) (bool, error) {
|
err = wait.PollUntilContextTimeout(ctx, time.Second*1, timeout, true, func(_ context.Context) (bool, error) {
|
||||||
job, err := client.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
|
job, err := client.BatchV1().Jobs(ns).Get(context.Background(), jobName, metav1.GetOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
lastError = err
|
lastError = err
|
||||||
klog.V(2).Infof("could not get Job %q in the namespace %q, retrying: %v", jobName, ns, err)
|
klog.V(2).Infof("could not get Job %q in the namespace %q, retrying: %v", jobName, ns, err)
|
||||||
@ -192,7 +216,7 @@ func controlPlaneNodesReady(client clientset.Interface, _ *kubeadmapi.ClusterCon
|
|||||||
selectorControlPlane := labels.SelectorFromSet(map[string]string{
|
selectorControlPlane := labels.SelectorFromSet(map[string]string{
|
||||||
constants.LabelNodeRoleControlPlane: "",
|
constants.LabelNodeRoleControlPlane: "",
|
||||||
})
|
})
|
||||||
nodes, err := client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{
|
nodes, err := client.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{
|
||||||
LabelSelector: selectorControlPlane.String(),
|
LabelSelector: selectorControlPlane.String(),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user