Use BindingHostKey annotation to detect scheduled pods in k8sm-scheduler

Before NodeName in the pod spec was used. Hence, pods with a fixed, pre-set NodeName were never scheduled by the k8sm-scheduler, leading e.g. to a failing e2e intra-pod test. Fixes mesosphere/kubernetes-mesos#388
2015-07-13 16:34:43 +02:00
parent b0d31fb794
commit f59b5f503b
7 changed files with 199 additions and 84 deletions
--- a/contrib/mesos/pkg/executor/executor.go
+++ b/contrib/mesos/pkg/executor/executor.go
@@ -437,25 +437,6 @@ func (k *KubernetesExecutor) attemptSuicide(driver bindings.ExecutorDriver, abor

 // async continuation of LaunchTask
 func (k *KubernetesExecutor) launchTask(driver bindings.ExecutorDriver, taskId string, pod *api.Pod) {
-
-	//HACK(jdef): cloned binding construction from k8s plugin/pkg/scheduler/scheduler.go
-	binding := &api.Binding{
-		ObjectMeta: api.ObjectMeta{
-			Namespace:   pod.Namespace,
-			Name:        pod.Name,
-			Annotations: make(map[string]string),
-		},
-		Target: api.ObjectReference{
-			Kind: "Node",
-			Name: pod.Annotations[meta.BindingHostKey],
-		},
-	}
-
-	// forward the annotations that the scheduler wants to apply
-	for k, v := range pod.Annotations {
-		binding.Annotations[k] = v
-	}
-
 	deleteTask := func() {
 		k.lock.Lock()
 		defer k.lock.Unlock()
@@ -463,17 +444,57 @@ func (k *KubernetesExecutor) launchTask(driver bindings.ExecutorDriver, taskId s
 		k.resetSuicideWatch(driver)
 	}

-	log.Infof("Binding '%v/%v' to '%v' with annotations %+v...", pod.Namespace, pod.Name, binding.Target.Name, binding.Annotations)
-	ctx := api.WithNamespace(api.NewContext(), binding.Namespace)
 	// TODO(k8s): use Pods interface for binding once clusters are upgraded
 	// return b.Pods(binding.Namespace).Bind(binding)
-	err := k.client.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error()
-	if err != nil {
-		deleteTask()
-		k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED,
-			messages.CreateBindingFailure))
-		return
+	if pod.Spec.NodeName == "" {
+		//HACK(jdef): cloned binding construction from k8s plugin/pkg/scheduler/scheduler.go
+		binding := &api.Binding{
+			ObjectMeta: api.ObjectMeta{
+				Namespace:   pod.Namespace,
+				Name:        pod.Name,
+				Annotations: make(map[string]string),
+			},
+			Target: api.ObjectReference{
+				Kind: "Node",
+				Name: pod.Annotations[meta.BindingHostKey],
+			},
+		}
+
+		// forward the annotations that the scheduler wants to apply
+		for k, v := range pod.Annotations {
+			binding.Annotations[k] = v
+		}
+
+		// create binding on apiserver
+		log.Infof("Binding '%v/%v' to '%v' with annotations %+v...", pod.Namespace, pod.Name, binding.Target.Name, binding.Annotations)
+		ctx := api.WithNamespace(api.NewContext(), binding.Namespace)
+		err := k.client.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error()
+		if err != nil {
+			deleteTask()
+			k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED,
+				messages.CreateBindingFailure))
+			return
+		}
+	} else {
+		// post annotations update to apiserver
+		patch := struct {
+			Metadata struct {
+				Annotations map[string]string `json:"annotations"`
+			} `json:"metadata"`
+		}{}
+		patch.Metadata.Annotations = pod.Annotations
+		patchJson, _ := json.Marshal(patch)
+		log.V(4).Infof("Patching annotations %v of pod %v/%v: %v", pod.Annotations, pod.Namespace, pod.Name, string(patchJson))
+		err := k.client.Patch(api.MergePatchType).RequestURI(pod.SelfLink).Body(patchJson).Do().Error()
+		if err != nil {
+			log.Errorf("Error updating annotations of ready-to-launch pod %v/%v: %v", pod.Namespace, pod.Name, err)
+			deleteTask()
+			k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED,
+				messages.AnnotationUpdateFailure))
+			return
+		}
 	}
+
 	podFullName := container.GetPodFullName(pod)

 	// allow a recently failed-over scheduler the chance to recover the task/pod binding: