Revert "Revert "Gracefully delete pods from the Kubelet""
This reverts commit 98115facfd
.
This commit is contained in:
@@ -56,12 +56,21 @@ import (
|
||||
const (
|
||||
maxReasonCacheEntries = 200
|
||||
|
||||
kubernetesPodLabel = "io.kubernetes.pod.data"
|
||||
kubernetesContainerLabel = "io.kubernetes.container.name"
|
||||
// ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified)
|
||||
// we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative.
|
||||
// hence, setting ndots to be 5.
|
||||
ndotsDNSOption = "options ndots:5\n"
|
||||
// In order to avoid unnecessary SIGKILLs, give every container a minimum grace
|
||||
// period after SIGTERM. Docker will guarantee the termination, but SIGTERM is
|
||||
// potentially dangerous.
|
||||
// TODO: evaluate whether there are scenarios in which SIGKILL is preferable to
|
||||
// SIGTERM for certain process types, which may justify setting this to 0.
|
||||
minimumGracePeriodInSeconds = 2
|
||||
|
||||
kubernetesNameLabel = "io.kubernetes.pod.name"
|
||||
kubernetesPodLabel = "io.kubernetes.pod.data"
|
||||
kubernetesTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod"
|
||||
kubernetesContainerLabel = "io.kubernetes.container.name"
|
||||
)
|
||||
|
||||
// DockerManager implements the Runtime interface.
|
||||
@@ -588,12 +597,19 @@ func (dm *DockerManager) runContainer(
|
||||
if len(containerHostname) > hostnameMaxLen {
|
||||
containerHostname = containerHostname[:hostnameMaxLen]
|
||||
}
|
||||
|
||||
// Pod information is recorded on the container as labels to preserve it in the event the pod is deleted
|
||||
// while the Kubelet is down and there is no information available to recover the pod. This includes
|
||||
// termination information like the termination grace period and the pre stop hooks.
|
||||
// TODO: keep these labels up to date if the pod changes
|
||||
namespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
|
||||
labels := map[string]string{
|
||||
"io.kubernetes.pod.name": namespacedName.String(),
|
||||
kubernetesNameLabel: namespacedName.String(),
|
||||
}
|
||||
if pod.Spec.TerminationGracePeriodSeconds != nil {
|
||||
labels[kubernetesTerminationGracePeriodLabel] = strconv.FormatInt(*pod.Spec.TerminationGracePeriodSeconds, 10)
|
||||
}
|
||||
if container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
|
||||
glog.V(1).Infof("Setting preStop hook")
|
||||
// TODO: This is kind of hacky, we should really just encode the bits we need.
|
||||
data, err := latest.Codec.Encode(pod)
|
||||
if err != nil {
|
||||
@@ -1104,40 +1120,56 @@ func (dm *DockerManager) PortForward(pod *kubecontainer.Pod, port uint16, stream
|
||||
}
|
||||
|
||||
// Kills all containers in the specified pod
|
||||
func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error {
|
||||
func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
|
||||
// Send the kills in parallel since they may take a long time. Len + 1 since there
|
||||
// can be Len errors + the networkPlugin teardown error.
|
||||
errs := make(chan error, len(pod.Containers)+1)
|
||||
errs := make(chan error, len(runningPod.Containers)+1)
|
||||
wg := sync.WaitGroup{}
|
||||
var networkID types.UID
|
||||
for _, container := range pod.Containers {
|
||||
var (
|
||||
networkContainer *kubecontainer.Container
|
||||
networkSpec *api.Container
|
||||
)
|
||||
for _, container := range runningPod.Containers {
|
||||
wg.Add(1)
|
||||
go func(container *kubecontainer.Container) {
|
||||
defer util.HandleCrash()
|
||||
defer wg.Done()
|
||||
|
||||
var containerSpec *api.Container
|
||||
if pod != nil {
|
||||
for i, c := range pod.Spec.Containers {
|
||||
if c.Name == container.Name {
|
||||
containerSpec = &pod.Spec.Containers[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Handle this without signaling the pod infra container to
|
||||
// adapt to the generic container runtime.
|
||||
if container.Name == PodInfraContainerName {
|
||||
// Store the container runtime for later deletion.
|
||||
// We do this so that PreStop handlers can run in the network namespace.
|
||||
networkID = container.ID
|
||||
networkContainer = container
|
||||
networkSpec = containerSpec
|
||||
return
|
||||
}
|
||||
if err := dm.killContainer(container.ID); err != nil {
|
||||
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, pod.ID)
|
||||
|
||||
err := dm.killContainer(container.ID, containerSpec, pod)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
|
||||
errs <- err
|
||||
}
|
||||
}(container)
|
||||
}
|
||||
wg.Wait()
|
||||
if len(networkID) > 0 {
|
||||
if err := dm.networkPlugin.TearDownPod(pod.Namespace, pod.Name, kubeletTypes.DockerID(networkID)); err != nil {
|
||||
if networkContainer != nil {
|
||||
if err := dm.networkPlugin.TearDownPod(runningPod.Namespace, runningPod.Name, kubeletTypes.DockerID(networkContainer.ID)); err != nil {
|
||||
glog.Errorf("Failed tearing down the infra container: %v", err)
|
||||
errs <- err
|
||||
}
|
||||
if err := dm.killContainer(networkID); err != nil {
|
||||
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, pod.ID)
|
||||
if err := dm.killContainer(networkContainer.ID, networkSpec, pod); err != nil {
|
||||
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
|
||||
errs <- err
|
||||
}
|
||||
}
|
||||
@@ -1152,75 +1184,128 @@ func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// KillContainerInPod kills a container in the pod.
|
||||
func (dm *DockerManager) KillContainerInPod(container api.Container, pod *api.Pod) error {
|
||||
// Locate the container.
|
||||
pods, err := dm.GetPods(false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID)
|
||||
targetContainer := targetPod.FindContainerByName(container.Name)
|
||||
if targetContainer == nil {
|
||||
return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name)
|
||||
}
|
||||
return dm.killContainer(targetContainer.ID)
|
||||
}
|
||||
|
||||
// TODO(vmarmol): Unexport this as it is no longer used externally.
|
||||
// KillContainer kills a container identified by containerID.
|
||||
// Internally, it invokes docker's StopContainer API with a timeout of 10s.
|
||||
// TODO: Deprecate this function in favor of KillContainerInPod.
|
||||
func (dm *DockerManager) KillContainer(containerID types.UID) error {
|
||||
return dm.killContainer(containerID)
|
||||
}
|
||||
|
||||
func (dm *DockerManager) killContainer(containerID types.UID) error {
|
||||
ID := string(containerID)
|
||||
glog.V(2).Infof("Killing container with id %q", ID)
|
||||
inspect, err := dm.client.InspectContainer(ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var found bool
|
||||
var preStop string
|
||||
if inspect != nil && inspect.Config != nil && inspect.Config.Labels != nil {
|
||||
preStop, found = inspect.Config.Labels[kubernetesPodLabel]
|
||||
}
|
||||
if found {
|
||||
var pod api.Pod
|
||||
err := latest.Codec.DecodeInto([]byte(preStop), &pod)
|
||||
// KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod,
|
||||
// and will attempt to lookup the other information if missing.
|
||||
func (dm *DockerManager) KillContainerInPod(containerID types.UID, container *api.Container, pod *api.Pod) error {
|
||||
switch {
|
||||
case len(containerID) == 0:
|
||||
// Locate the container.
|
||||
pods, err := dm.GetPods(false)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to decode prestop: %s, %s", preStop, ID)
|
||||
} else {
|
||||
name := inspect.Config.Labels[kubernetesContainerLabel]
|
||||
var container *api.Container
|
||||
return err
|
||||
}
|
||||
targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID)
|
||||
targetContainer := targetPod.FindContainerByName(container.Name)
|
||||
if targetContainer == nil {
|
||||
return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name)
|
||||
}
|
||||
containerID = targetContainer.ID
|
||||
|
||||
case container == nil || pod == nil:
|
||||
// Read information about the container from labels
|
||||
inspect, err := dm.client.InspectContainer(string(containerID))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
storedPod, storedContainer, cerr := containerAndPodFromLabels(inspect)
|
||||
if cerr != nil {
|
||||
glog.Errorf("unable to access pod data from container: %v", err)
|
||||
}
|
||||
if container == nil {
|
||||
container = storedContainer
|
||||
}
|
||||
if pod == nil {
|
||||
pod = storedPod
|
||||
}
|
||||
}
|
||||
return dm.killContainer(containerID, container, pod)
|
||||
}
|
||||
|
||||
// killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke
|
||||
// KillContainerInPod if information must be retrieved first.
|
||||
func (dm *DockerManager) killContainer(containerID types.UID, container *api.Container, pod *api.Pod) error {
|
||||
ID := string(containerID)
|
||||
name := ID
|
||||
if container != nil {
|
||||
name = fmt.Sprintf("%s %s", name, container.Name)
|
||||
}
|
||||
if pod != nil {
|
||||
name = fmt.Sprintf("%s %s/%s", name, pod.Namespace, pod.Name)
|
||||
}
|
||||
|
||||
gracePeriod := int64(minimumGracePeriodInSeconds)
|
||||
if pod != nil && pod.DeletionGracePeriodSeconds != nil {
|
||||
gracePeriod = *pod.DeletionGracePeriodSeconds
|
||||
}
|
||||
glog.V(2).Infof("Killing container %q with %d second grace period", name, gracePeriod)
|
||||
|
||||
if pod != nil && container != nil && container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
|
||||
glog.V(4).Infof("Running preStop hook for container %q", name)
|
||||
start := util.Now()
|
||||
// TODO: timebox PreStop execution to at most gracePeriod
|
||||
if err := dm.runner.Run(ID, pod, container, container.Lifecycle.PreStop); err != nil {
|
||||
glog.Errorf("preStop hook for container %q failed: %v", name, err)
|
||||
}
|
||||
gracePeriod -= int64(util.Now().Sub(start.Time).Seconds())
|
||||
}
|
||||
|
||||
dm.readinessManager.RemoveReadiness(ID)
|
||||
|
||||
// always give containers a minimal shutdown window to avoid unnecessary SIGKILLs
|
||||
if gracePeriod < minimumGracePeriodInSeconds {
|
||||
gracePeriod = minimumGracePeriodInSeconds
|
||||
}
|
||||
err := dm.client.StopContainer(ID, uint(gracePeriod))
|
||||
ref, ok := dm.containerRefManager.GetRef(ID)
|
||||
if !ok {
|
||||
glog.Warningf("No ref for pod '%q'", name)
|
||||
} else {
|
||||
// TODO: pass reason down here, and state, or move this call up the stack.
|
||||
dm.recorder.Eventf(ref, "Killing", "Killing with docker id %v", util.ShortenString(ID, 12))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
var errNoPodOnContainer = fmt.Errorf("no pod information labels on Docker container")
|
||||
|
||||
// containerAndPodFromLabels tries to load the appropriate container info off of a Docker container's labels
|
||||
func containerAndPodFromLabels(inspect *docker.Container) (pod *api.Pod, container *api.Container, err error) {
|
||||
if inspect == nil && inspect.Config == nil && inspect.Config.Labels == nil {
|
||||
return nil, nil, errNoPodOnContainer
|
||||
}
|
||||
labels := inspect.Config.Labels
|
||||
|
||||
// the pod data may not be set
|
||||
if body, found := labels[kubernetesPodLabel]; found {
|
||||
pod = &api.Pod{}
|
||||
if err = latest.Codec.DecodeInto([]byte(body), pod); err == nil {
|
||||
name := labels[kubernetesContainerLabel]
|
||||
for ix := range pod.Spec.Containers {
|
||||
if pod.Spec.Containers[ix].Name == name {
|
||||
container = &pod.Spec.Containers[ix]
|
||||
break
|
||||
}
|
||||
}
|
||||
if container != nil {
|
||||
glog.V(1).Infof("Running preStop hook")
|
||||
if err := dm.runner.Run(ID, &pod, container, container.Lifecycle.PreStop); err != nil {
|
||||
glog.Errorf("failed to run preStop hook: %v", err)
|
||||
}
|
||||
} else {
|
||||
glog.Errorf("unable to find container %v, %s", pod, name)
|
||||
if container == nil {
|
||||
err = fmt.Errorf("unable to find container %s in pod %v", name, pod)
|
||||
}
|
||||
} else {
|
||||
pod = nil
|
||||
}
|
||||
}
|
||||
|
||||
// attempt to find the default grace period if we didn't commit a pod, but set the generic metadata
|
||||
// field (the one used by kill)
|
||||
if pod == nil {
|
||||
if period, ok := labels[kubernetesTerminationGracePeriodLabel]; ok {
|
||||
if seconds, err := strconv.ParseInt(period, 10, 64); err == nil {
|
||||
pod = &api.Pod{}
|
||||
pod.DeletionGracePeriodSeconds = &seconds
|
||||
}
|
||||
}
|
||||
}
|
||||
dm.readinessManager.RemoveReadiness(ID)
|
||||
err = dm.client.StopContainer(ID, 10)
|
||||
ref, ok := dm.containerRefManager.GetRef(ID)
|
||||
if !ok {
|
||||
glog.Warningf("No ref for pod '%v'", ID)
|
||||
} else {
|
||||
// TODO: pass reason down here, and state, or move this call up the stack.
|
||||
dm.recorder.Eventf(ref, "Killing", "Killing with docker id %v", util.ShortenString(ID, 12))
|
||||
}
|
||||
return err
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Run a single container from a pod. Returns the docker container ID
|
||||
@@ -1253,7 +1338,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
|
||||
if container.Lifecycle != nil && container.Lifecycle.PostStart != nil {
|
||||
handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart)
|
||||
if handlerErr != nil {
|
||||
dm.killContainer(types.UID(id))
|
||||
dm.killContainer(types.UID(id), container, pod)
|
||||
return kubeletTypes.DockerID(""), fmt.Errorf("failed to call event handler: %v", handlerErr)
|
||||
}
|
||||
}
|
||||
@@ -1413,6 +1498,11 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub
|
||||
containersToKeep := make(map[kubeletTypes.DockerID]int)
|
||||
createPodInfraContainer := false
|
||||
|
||||
if pod.DeletionTimestamp != nil {
|
||||
glog.V(4).Infof("Pod is terminating %q", podFullName)
|
||||
return PodContainerChangesSpec{}, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
var podInfraContainerID kubeletTypes.DockerID
|
||||
var changed bool
|
||||
@@ -1547,7 +1637,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
|
||||
}
|
||||
|
||||
// Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container)
|
||||
err = dm.KillPod(runningPod)
|
||||
err = dm.KillPod(pod, runningPod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1557,7 +1647,15 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
|
||||
_, keep := containerChanges.ContainersToKeep[kubeletTypes.DockerID(container.ID)]
|
||||
if !keep {
|
||||
glog.V(3).Infof("Killing unwanted container %+v", container)
|
||||
err = dm.KillContainer(container.ID)
|
||||
// attempt to find the appropriate container policy
|
||||
var podContainer *api.Container
|
||||
for i, c := range pod.Spec.Containers {
|
||||
if c.Name == container.Name {
|
||||
podContainer = &pod.Spec.Containers[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
err = dm.KillContainerInPod(container.ID, podContainer, pod)
|
||||
if err != nil {
|
||||
glog.Errorf("Error killing container: %v", err)
|
||||
}
|
||||
|
@@ -405,7 +405,7 @@ func TestKillContainerInPod(t *testing.T) {
|
||||
manager.readinessManager.SetReadiness(c.ID, true)
|
||||
}
|
||||
|
||||
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil {
|
||||
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
// Assert the container has been stopped.
|
||||
@@ -478,14 +478,14 @@ func TestKillContainerInPodWithPreStop(t *testing.T) {
|
||||
manager.readinessManager.SetReadiness(c.ID, true)
|
||||
}
|
||||
|
||||
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil {
|
||||
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
// Assert the container has been stopped.
|
||||
if err := fakeDocker.AssertStopped([]string{containerToKill.ID}); err != nil {
|
||||
t.Errorf("container was not stopped correctly: %v", err)
|
||||
}
|
||||
verifyCalls(t, fakeDocker, []string{"list", "inspect_container", "create_exec", "start_exec", "stop"})
|
||||
verifyCalls(t, fakeDocker, []string{"list", "create_exec", "start_exec", "stop"})
|
||||
if !reflect.DeepEqual(expectedCmd, fakeDocker.execCmd) {
|
||||
t.Errorf("expected: %v, got %v", expectedCmd, fakeDocker.execCmd)
|
||||
}
|
||||
@@ -522,7 +522,7 @@ func TestKillContainerInPodWithError(t *testing.T) {
|
||||
manager.readinessManager.SetReadiness(c.ID, true)
|
||||
}
|
||||
|
||||
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err == nil {
|
||||
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err == nil {
|
||||
t.Errorf("expected error, found nil")
|
||||
}
|
||||
|
||||
@@ -1021,7 +1021,7 @@ func TestSyncPodDeletesWithNoPodInfraContainer(t *testing.T) {
|
||||
|
||||
verifyCalls(t, fakeDocker, []string{
|
||||
// Kill the container since pod infra container is not running.
|
||||
"inspect_container", "stop",
|
||||
"stop",
|
||||
// Create pod infra container.
|
||||
"create", "start", "inspect_container",
|
||||
// Create container.
|
||||
@@ -1096,7 +1096,7 @@ func TestSyncPodDeletesDuplicate(t *testing.T) {
|
||||
// Check the pod infra container.
|
||||
"inspect_container",
|
||||
// Kill the duplicated container.
|
||||
"inspect_container", "stop",
|
||||
"stop",
|
||||
})
|
||||
// Expect one of the duplicates to be killed.
|
||||
if len(fakeDocker.Stopped) != 1 || (fakeDocker.Stopped[0] != "1234" && fakeDocker.Stopped[0] != "4567") {
|
||||
@@ -1150,7 +1150,7 @@ func TestSyncPodBadHash(t *testing.T) {
|
||||
// Check the pod infra container.
|
||||
"inspect_container",
|
||||
// Kill and restart the bad hash container.
|
||||
"inspect_container", "stop", "create", "start", "inspect_container",
|
||||
"stop", "create", "start",
|
||||
})
|
||||
|
||||
if err := fakeDocker.AssertStopped([]string{"1234"}); err != nil {
|
||||
@@ -1208,7 +1208,7 @@ func TestSyncPodsUnhealthy(t *testing.T) {
|
||||
// Check the pod infra container.
|
||||
"inspect_container",
|
||||
// Kill the unhealthy container.
|
||||
"inspect_container", "stop",
|
||||
"stop",
|
||||
// Restart the unhealthy container.
|
||||
"create", "start", "inspect_container",
|
||||
})
|
||||
@@ -1443,7 +1443,7 @@ func TestSyncPodWithRestartPolicy(t *testing.T) {
|
||||
// Check the pod infra container.
|
||||
"inspect_container",
|
||||
// Stop the last pod infra container.
|
||||
"inspect_container", "stop",
|
||||
"stop",
|
||||
},
|
||||
[]string{},
|
||||
[]string{"9876"},
|
||||
@@ -1910,7 +1910,7 @@ func TestSyncPodEventHandlerFails(t *testing.T) {
|
||||
// Create the container.
|
||||
"create", "start",
|
||||
// Kill the container since event handler fails.
|
||||
"inspect_container", "stop",
|
||||
"stop",
|
||||
})
|
||||
|
||||
// TODO(yifan): Check the stopped container's name.
|
||||
|
Reference in New Issue
Block a user