Override terminationLivenessGracePeriod for probes
This commit is contained in:
parent
c88512d34f
commit
9fb6e712ff
@ -227,7 +227,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
|
|||||||
msg, handlerErr := m.runner.Run(kubeContainerID, pod, container, container.Lifecycle.PostStart)
|
msg, handlerErr := m.runner.Run(kubeContainerID, pod, container, container.Lifecycle.PostStart)
|
||||||
if handlerErr != nil {
|
if handlerErr != nil {
|
||||||
m.recordContainerEvent(pod, container, kubeContainerID.ID, v1.EventTypeWarning, events.FailedPostStartHook, msg)
|
m.recordContainerEvent(pod, container, kubeContainerID.ID, v1.EventTypeWarning, events.FailedPostStartHook, msg)
|
||||||
if err := m.killContainer(pod, kubeContainerID, container.Name, "FailedPostStartHook", nil); err != nil {
|
if err := m.killContainer(pod, kubeContainerID, container.Name, "FailedPostStartHook", reasonFailedPostStartHook, nil); err != nil {
|
||||||
klog.ErrorS(fmt.Errorf("%s: %v", ErrPostStartHook, handlerErr), "Failed to kill container", "pod", klog.KObj(pod),
|
klog.ErrorS(fmt.Errorf("%s: %v", ErrPostStartHook, handlerErr), "Failed to kill container", "pod", klog.KObj(pod),
|
||||||
"podUID", pod.UID, "containerName", container.Name, "containerID", kubeContainerID.String())
|
"podUID", pod.UID, "containerName", container.Name, "containerID", kubeContainerID.String())
|
||||||
}
|
}
|
||||||
@ -596,7 +596,7 @@ func (m *kubeGenericRuntimeManager) restoreSpecsFromContainerLabels(containerID
|
|||||||
// killContainer kills a container through the following steps:
|
// killContainer kills a container through the following steps:
|
||||||
// * Run the pre-stop lifecycle hooks (if applicable).
|
// * Run the pre-stop lifecycle hooks (if applicable).
|
||||||
// * Stop the container.
|
// * Stop the container.
|
||||||
func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubecontainer.ContainerID, containerName string, message string, gracePeriodOverride *int64) error {
|
func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubecontainer.ContainerID, containerName string, message string, reason containerKillReason, gracePeriodOverride *int64) error {
|
||||||
var containerSpec *v1.Container
|
var containerSpec *v1.Container
|
||||||
if pod != nil {
|
if pod != nil {
|
||||||
if containerSpec = kubecontainer.GetContainerSpec(pod, containerName); containerSpec == nil {
|
if containerSpec = kubecontainer.GetContainerSpec(pod, containerName); containerSpec == nil {
|
||||||
@ -619,6 +619,19 @@ func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubec
|
|||||||
gracePeriod = *pod.DeletionGracePeriodSeconds
|
gracePeriod = *pod.DeletionGracePeriodSeconds
|
||||||
case pod.Spec.TerminationGracePeriodSeconds != nil:
|
case pod.Spec.TerminationGracePeriodSeconds != nil:
|
||||||
gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
|
gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
|
||||||
|
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(features.ProbeTerminationGracePeriod) {
|
||||||
|
switch reason {
|
||||||
|
case reasonStartupProbe:
|
||||||
|
if containerSpec.StartupProbe != nil && containerSpec.StartupProbe.TerminationGracePeriodSeconds != nil {
|
||||||
|
gracePeriod = *containerSpec.StartupProbe.TerminationGracePeriodSeconds
|
||||||
|
}
|
||||||
|
case reasonLivenessProbe:
|
||||||
|
if containerSpec.LivenessProbe != nil && containerSpec.LivenessProbe.TerminationGracePeriodSeconds != nil {
|
||||||
|
gracePeriod = *containerSpec.LivenessProbe.TerminationGracePeriodSeconds
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(message) == 0 {
|
if len(message) == 0 {
|
||||||
@ -672,7 +685,7 @@ func (m *kubeGenericRuntimeManager) killContainersWithSyncResult(pod *v1.Pod, ru
|
|||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name)
|
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name)
|
||||||
if err := m.killContainer(pod, container.ID, container.Name, "", gracePeriodOverride); err != nil {
|
if err := m.killContainer(pod, container.ID, container.Name, "", reasonUnknown, gracePeriodOverride); err != nil {
|
||||||
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
|
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
|
||||||
klog.ErrorS(err, "Kill container failed", "pod", klog.KObj(pod), "podUID", pod.UID,
|
klog.ErrorS(err, "Kill container failed", "pod", klog.KObj(pod), "podUID", pod.UID,
|
||||||
"containerName", container.Name, "containerID", container.ID)
|
"containerName", container.Name, "containerID", container.ID)
|
||||||
|
@ -120,7 +120,7 @@ func TestKillContainer(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
err := m.killContainer(test.pod, test.containerID, test.containerName, test.reason, &test.gracePeriodOverride)
|
err := m.killContainer(test.pod, test.containerID, test.containerName, test.reason, "", &test.gracePeriodOverride)
|
||||||
if test.succeed != (err == nil) {
|
if test.succeed != (err == nil) {
|
||||||
t.Errorf("%s: expected %v, got %v (%v)", test.caseName, test.succeed, (err == nil), err)
|
t.Errorf("%s: expected %v, got %v (%v)", test.caseName, test.succeed, (err == nil), err)
|
||||||
}
|
}
|
||||||
@ -290,7 +290,7 @@ func TestLifeCycleHook(t *testing.T) {
|
|||||||
// Configured and works as expected
|
// Configured and works as expected
|
||||||
t.Run("PreStop-CMDExec", func(t *testing.T) {
|
t.Run("PreStop-CMDExec", func(t *testing.T) {
|
||||||
testPod.Spec.Containers[0].Lifecycle = cmdLifeCycle
|
testPod.Spec.Containers[0].Lifecycle = cmdLifeCycle
|
||||||
m.killContainer(testPod, cID, "foo", "testKill", &gracePeriod)
|
m.killContainer(testPod, cID, "foo", "testKill", "", &gracePeriod)
|
||||||
if fakeRunner.Cmd[0] != cmdLifeCycle.PreStop.Exec.Command[0] {
|
if fakeRunner.Cmd[0] != cmdLifeCycle.PreStop.Exec.Command[0] {
|
||||||
t.Errorf("CMD Prestop hook was not invoked")
|
t.Errorf("CMD Prestop hook was not invoked")
|
||||||
}
|
}
|
||||||
@ -300,7 +300,7 @@ func TestLifeCycleHook(t *testing.T) {
|
|||||||
t.Run("PreStop-HTTPGet", func(t *testing.T) {
|
t.Run("PreStop-HTTPGet", func(t *testing.T) {
|
||||||
defer func() { fakeHTTP.url = "" }()
|
defer func() { fakeHTTP.url = "" }()
|
||||||
testPod.Spec.Containers[0].Lifecycle = httpLifeCycle
|
testPod.Spec.Containers[0].Lifecycle = httpLifeCycle
|
||||||
m.killContainer(testPod, cID, "foo", "testKill", &gracePeriod)
|
m.killContainer(testPod, cID, "foo", "testKill", "", &gracePeriod)
|
||||||
|
|
||||||
if !strings.Contains(fakeHTTP.url, httpLifeCycle.PreStop.HTTPGet.Host) {
|
if !strings.Contains(fakeHTTP.url, httpLifeCycle.PreStop.HTTPGet.Host) {
|
||||||
t.Errorf("HTTP Prestop hook was not invoked")
|
t.Errorf("HTTP Prestop hook was not invoked")
|
||||||
@ -314,7 +314,7 @@ func TestLifeCycleHook(t *testing.T) {
|
|||||||
testPod.DeletionGracePeriodSeconds = &gracePeriodLocal
|
testPod.DeletionGracePeriodSeconds = &gracePeriodLocal
|
||||||
testPod.Spec.TerminationGracePeriodSeconds = &gracePeriodLocal
|
testPod.Spec.TerminationGracePeriodSeconds = &gracePeriodLocal
|
||||||
|
|
||||||
m.killContainer(testPod, cID, "foo", "testKill", &gracePeriodLocal)
|
m.killContainer(testPod, cID, "foo", "testKill", "", &gracePeriodLocal)
|
||||||
|
|
||||||
if strings.Contains(fakeHTTP.url, httpLifeCycle.PreStop.HTTPGet.Host) {
|
if strings.Contains(fakeHTTP.url, httpLifeCycle.PreStop.HTTPGet.Host) {
|
||||||
t.Errorf("HTTP Should not execute when gracePeriod is 0")
|
t.Errorf("HTTP Should not execute when gracePeriod is 0")
|
||||||
|
@ -137,7 +137,7 @@ func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int
|
|||||||
ID: containers[i].id,
|
ID: containers[i].id,
|
||||||
}
|
}
|
||||||
message := "Container is in unknown state, try killing it before removal"
|
message := "Container is in unknown state, try killing it before removal"
|
||||||
if err := cgc.manager.killContainer(nil, id, containers[i].name, message, nil); err != nil {
|
if err := cgc.manager.killContainer(nil, id, containers[i].name, message, reasonUnknown, nil); err != nil {
|
||||||
klog.Errorf("Failed to stop container %q: %v", containers[i].id, err)
|
klog.Errorf("Failed to stop container %q: %v", containers[i].id, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -403,6 +403,16 @@ func (m *kubeGenericRuntimeManager) GetPods(all bool) ([]*kubecontainer.Pod, err
|
|||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// containerKillReason explains what killed a given container
|
||||||
|
type containerKillReason string
|
||||||
|
|
||||||
|
const (
|
||||||
|
reasonStartupProbe containerKillReason = "StartupProbe"
|
||||||
|
reasonLivenessProbe containerKillReason = "LivenessProbe"
|
||||||
|
reasonFailedPostStartHook containerKillReason = "FailedPostStartHook"
|
||||||
|
reasonUnknown containerKillReason = "Unknown"
|
||||||
|
)
|
||||||
|
|
||||||
// containerToKillInfo contains necessary information to kill a container.
|
// containerToKillInfo contains necessary information to kill a container.
|
||||||
type containerToKillInfo struct {
|
type containerToKillInfo struct {
|
||||||
// The spec of the container.
|
// The spec of the container.
|
||||||
@ -411,6 +421,9 @@ type containerToKillInfo struct {
|
|||||||
name string
|
name string
|
||||||
// The message indicates why the container will be killed.
|
// The message indicates why the container will be killed.
|
||||||
message string
|
message string
|
||||||
|
// The reason is a clearer source of info on why a container will be killed
|
||||||
|
// TODO: replace message with reason?
|
||||||
|
reason containerKillReason
|
||||||
}
|
}
|
||||||
|
|
||||||
// podActions keeps information what to do for a pod.
|
// podActions keeps information what to do for a pod.
|
||||||
@ -582,6 +595,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
|
|||||||
container: next,
|
container: next,
|
||||||
message: fmt.Sprintf("Init container is in %q state, try killing it before restart",
|
message: fmt.Sprintf("Init container is in %q state, try killing it before restart",
|
||||||
initLastStatus.State),
|
initLastStatus.State),
|
||||||
|
reason: reasonUnknown,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
changes.NextInitContainerToStart = next
|
changes.NextInitContainerToStart = next
|
||||||
@ -623,6 +637,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
|
|||||||
container: &pod.Spec.Containers[idx],
|
container: &pod.Spec.Containers[idx],
|
||||||
message: fmt.Sprintf("Container is in %q state, try killing it before restart",
|
message: fmt.Sprintf("Container is in %q state, try killing it before restart",
|
||||||
containerStatus.State),
|
containerStatus.State),
|
||||||
|
reason: reasonUnknown,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -630,6 +645,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
|
|||||||
}
|
}
|
||||||
// The container is running, but kill the container if any of the following condition is met.
|
// The container is running, but kill the container if any of the following condition is met.
|
||||||
var message string
|
var message string
|
||||||
|
var reason containerKillReason
|
||||||
restart := shouldRestartOnFailure(pod)
|
restart := shouldRestartOnFailure(pod)
|
||||||
if _, _, changed := containerChanged(&container, containerStatus); changed {
|
if _, _, changed := containerChanged(&container, containerStatus); changed {
|
||||||
message = fmt.Sprintf("Container %s definition changed", container.Name)
|
message = fmt.Sprintf("Container %s definition changed", container.Name)
|
||||||
@ -639,9 +655,11 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
|
|||||||
} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
|
} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
|
||||||
// If the container failed the liveness probe, we should kill it.
|
// If the container failed the liveness probe, we should kill it.
|
||||||
message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
|
message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
|
||||||
|
reason = reasonLivenessProbe
|
||||||
} else if startup, found := m.startupManager.Get(containerStatus.ID); found && startup == proberesults.Failure {
|
} else if startup, found := m.startupManager.Get(containerStatus.ID); found && startup == proberesults.Failure {
|
||||||
// If the container failed the startup probe, we should kill it.
|
// If the container failed the startup probe, we should kill it.
|
||||||
message = fmt.Sprintf("Container %s failed startup probe", container.Name)
|
message = fmt.Sprintf("Container %s failed startup probe", container.Name)
|
||||||
|
reason = reasonStartupProbe
|
||||||
} else {
|
} else {
|
||||||
// Keep the container.
|
// Keep the container.
|
||||||
keepCount++
|
keepCount++
|
||||||
@ -660,6 +678,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
|
|||||||
name: containerStatus.Name,
|
name: containerStatus.Name,
|
||||||
container: &pod.Spec.Containers[idx],
|
container: &pod.Spec.Containers[idx],
|
||||||
message: message,
|
message: message,
|
||||||
|
reason: reason,
|
||||||
}
|
}
|
||||||
klog.V(2).InfoS("Message for Container of pod", "containerName", container.Name, "containerStatusID", containerStatus.ID, "pod", klog.KObj(pod), "containerMessage", message)
|
klog.V(2).InfoS("Message for Container of pod", "containerName", container.Name, "containerStatusID", containerStatus.ID, "pod", klog.KObj(pod), "containerMessage", message)
|
||||||
}
|
}
|
||||||
@ -720,7 +739,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
|
|||||||
klog.V(3).InfoS("Killing unwanted container for pod", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
|
klog.V(3).InfoS("Killing unwanted container for pod", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
|
||||||
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
|
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
|
||||||
result.AddSyncResult(killContainerResult)
|
result.AddSyncResult(killContainerResult)
|
||||||
if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, nil); err != nil {
|
if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, containerInfo.reason, nil); err != nil {
|
||||||
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
|
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
|
||||||
klog.ErrorS(err, "killContainer for pod failed", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
|
klog.ErrorS(err, "killContainer for pod failed", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
|
||||||
return
|
return
|
||||||
|
@ -1002,9 +1002,10 @@ func getKillMapWithInitContainers(pod *v1.Pod, status *kubecontainer.PodStatus,
|
|||||||
|
|
||||||
func verifyActions(t *testing.T, expected, actual *podActions, desc string) {
|
func verifyActions(t *testing.T, expected, actual *podActions, desc string) {
|
||||||
if actual.ContainersToKill != nil {
|
if actual.ContainersToKill != nil {
|
||||||
// Clear the message field since we don't need to verify the message.
|
// Clear the message and reason fields since we don't need to verify them.
|
||||||
for k, info := range actual.ContainersToKill {
|
for k, info := range actual.ContainersToKill {
|
||||||
info.message = ""
|
info.message = ""
|
||||||
|
info.reason = ""
|
||||||
actual.ContainersToKill[k] = info
|
actual.ContainersToKill[k] = info
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user