From 0e296c3dcd018cfbfdb80eddcbf55ea9d7d31f67 Mon Sep 17 00:00:00 2001
From: Ravi L R <ravilr@yahoo-inc.com>
Date: Wed, 14 Oct 2015 23:10:46 -0700
Subject: [PATCH 1/2] while deleting pod on terminal task status update, delete
 with grace_period=0 as the corresponding pod is already gone

---
 contrib/mesos/pkg/scheduler/scheduler.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/mesos/pkg/scheduler/scheduler.go b/contrib/mesos/pkg/scheduler/scheduler.go
index fcb4c901c4a..1a2bc767f17 100644
--- a/contrib/mesos/pkg/scheduler/scheduler.go
+++ b/contrib/mesos/pkg/scheduler/scheduler.go
@@ -446,7 +446,7 @@ func (k *KubernetesScheduler) reconcileTerminalTask(driver bindings.SchedulerDri
 		// TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed
 		pod := &task.Pod
 		log.Warningf("deleting rogue pod %v/%v for lost task %v", pod.Namespace, pod.Name, task.ID)
-		if err := k.client.Pods(pod.Namespace).Delete(pod.Name, nil); err != nil && !errors.IsNotFound(err) {
+		if err := k.client.Pods(pod.Namespace).Delete(pod.Name, api.NewDeleteOptions(0)); err != nil && !errors.IsNotFound(err) {
 			log.Errorf("failed to delete pod %v/%v for terminal task %v: %v", pod.Namespace, pod.Name, task.ID, err)
 		}
 	} else if taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED || taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED {

From 99b03baa9bf3c7c63b7a505bdfc16098198504d6 Mon Sep 17 00:00:00 2001
From: Ravi L R <ravilr@yahoo-inc.com>
Date: Wed, 14 Oct 2015 23:14:22 -0700
Subject: [PATCH 2/2] handle task lost status update from executor on docker
 daemon crash/restart by deleting the pod, so that the RC creates a new one,
 if the pod is backed by RC

---
 contrib/mesos/pkg/scheduler/scheduler.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/contrib/mesos/pkg/scheduler/scheduler.go b/contrib/mesos/pkg/scheduler/scheduler.go
index 1a2bc767f17..3fe9d8e0da6 100644
--- a/contrib/mesos/pkg/scheduler/scheduler.go
+++ b/contrib/mesos/pkg/scheduler/scheduler.go
@@ -437,11 +437,13 @@ func (k *KubernetesScheduler) reconcileTerminalTask(driver bindings.SchedulerDri
 	if (state == podtask.StateRunning || state == podtask.StatePending) &&
 		((taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER && taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION) ||
 			(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) ||
-			(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED)) {
+			(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED) ||
+			(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.ContainersDisappeared)) {
 		//--
 		// pod-task has metadata that refers to:
 		// (1) a task that Mesos no longer knows about, or else
 		// (2) a pod that the Kubelet will never report as "failed"
+		// (3) a pod that the kubeletExecutor reported as lost (likely due to docker daemon crash/restart)
 		// For now, destroy the pod and hope that there's a replication controller backing it up.
 		// TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed
 		pod := &task.Pod