Merge pull request #84279 from matthyx/kuberuntime-startupprobe
Add startupProbe result handling to kuberuntime
This commit is contained in:
commit
a08b09d52f
@ -582,6 +582,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
|
|||||||
imageBackOff := flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)
|
imageBackOff := flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)
|
||||||
|
|
||||||
klet.livenessManager = proberesults.NewManager()
|
klet.livenessManager = proberesults.NewManager()
|
||||||
|
klet.startupManager = proberesults.NewManager()
|
||||||
|
|
||||||
klet.podCache = kubecontainer.NewCache()
|
klet.podCache = kubecontainer.NewCache()
|
||||||
var checkpointManager checkpointmanager.CheckpointManager
|
var checkpointManager checkpointmanager.CheckpointManager
|
||||||
@ -671,6 +672,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
|
|||||||
runtime, err := kuberuntime.NewKubeGenericRuntimeManager(
|
runtime, err := kuberuntime.NewKubeGenericRuntimeManager(
|
||||||
kubecontainer.FilterEventRecorder(kubeDeps.Recorder),
|
kubecontainer.FilterEventRecorder(kubeDeps.Recorder),
|
||||||
klet.livenessManager,
|
klet.livenessManager,
|
||||||
|
klet.startupManager,
|
||||||
seccompProfileRoot,
|
seccompProfileRoot,
|
||||||
containerRefManager,
|
containerRefManager,
|
||||||
machineInfo,
|
machineInfo,
|
||||||
@ -777,6 +779,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
|
|||||||
klet.probeManager = prober.NewManager(
|
klet.probeManager = prober.NewManager(
|
||||||
klet.statusManager,
|
klet.statusManager,
|
||||||
klet.livenessManager,
|
klet.livenessManager,
|
||||||
|
klet.startupManager,
|
||||||
klet.runner,
|
klet.runner,
|
||||||
containerRefManager,
|
containerRefManager,
|
||||||
kubeDeps.Recorder)
|
kubeDeps.Recorder)
|
||||||
@ -972,6 +975,7 @@ type Kubelet struct {
|
|||||||
probeManager prober.Manager
|
probeManager prober.Manager
|
||||||
// Manages container health check results.
|
// Manages container health check results.
|
||||||
livenessManager proberesults.Manager
|
livenessManager proberesults.Manager
|
||||||
|
startupManager proberesults.Manager
|
||||||
|
|
||||||
// How long to keep idle streaming command execution/port forwarding
|
// How long to keep idle streaming command execution/port forwarding
|
||||||
// connections open before terminating them
|
// connections open before terminating them
|
||||||
|
@ -235,6 +235,7 @@ func newTestKubeletWithImageList(
|
|||||||
|
|
||||||
kubelet.probeManager = probetest.FakeManager{}
|
kubelet.probeManager = probetest.FakeManager{}
|
||||||
kubelet.livenessManager = proberesults.NewManager()
|
kubelet.livenessManager = proberesults.NewManager()
|
||||||
|
kubelet.startupManager = proberesults.NewManager()
|
||||||
|
|
||||||
kubelet.containerManager = cm.NewStubContainerManager()
|
kubelet.containerManager = cm.NewStubContainerManager()
|
||||||
fakeNodeRef := &v1.ObjectReference{
|
fakeNodeRef := &v1.ObjectReference{
|
||||||
|
@ -110,6 +110,7 @@ go_test(
|
|||||||
"//pkg/kubelet/container/testing:go_default_library",
|
"//pkg/kubelet/container/testing:go_default_library",
|
||||||
"//pkg/kubelet/lifecycle:go_default_library",
|
"//pkg/kubelet/lifecycle:go_default_library",
|
||||||
"//pkg/kubelet/metrics:go_default_library",
|
"//pkg/kubelet/metrics:go_default_library",
|
||||||
|
"//pkg/kubelet/prober/results:go_default_library",
|
||||||
"//pkg/kubelet/runtimeclass:go_default_library",
|
"//pkg/kubelet/runtimeclass:go_default_library",
|
||||||
"//pkg/kubelet/runtimeclass/testing:go_default_library",
|
"//pkg/kubelet/runtimeclass/testing:go_default_library",
|
||||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||||
|
@ -78,6 +78,7 @@ func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageS
|
|||||||
cpuCFSQuota: false,
|
cpuCFSQuota: false,
|
||||||
cpuCFSQuotaPeriod: metav1.Duration{Duration: time.Microsecond * 100},
|
cpuCFSQuotaPeriod: metav1.Duration{Duration: time.Microsecond * 100},
|
||||||
livenessManager: proberesults.NewManager(),
|
livenessManager: proberesults.NewManager(),
|
||||||
|
startupManager: proberesults.NewManager(),
|
||||||
containerRefManager: kubecontainer.NewRefManager(),
|
containerRefManager: kubecontainer.NewRefManager(),
|
||||||
machineInfo: machineInfo,
|
machineInfo: machineInfo,
|
||||||
osInterface: osInterface,
|
osInterface: osInterface,
|
||||||
|
@ -100,6 +100,7 @@ type kubeGenericRuntimeManager struct {
|
|||||||
|
|
||||||
// Health check results.
|
// Health check results.
|
||||||
livenessManager proberesults.Manager
|
livenessManager proberesults.Manager
|
||||||
|
startupManager proberesults.Manager
|
||||||
|
|
||||||
// If true, enforce container cpu limits with CFS quota support
|
// If true, enforce container cpu limits with CFS quota support
|
||||||
cpuCFSQuota bool
|
cpuCFSQuota bool
|
||||||
@ -150,6 +151,7 @@ type LegacyLogProvider interface {
|
|||||||
func NewKubeGenericRuntimeManager(
|
func NewKubeGenericRuntimeManager(
|
||||||
recorder record.EventRecorder,
|
recorder record.EventRecorder,
|
||||||
livenessManager proberesults.Manager,
|
livenessManager proberesults.Manager,
|
||||||
|
startupManager proberesults.Manager,
|
||||||
seccompProfileRoot string,
|
seccompProfileRoot string,
|
||||||
containerRefManager *kubecontainer.RefManager,
|
containerRefManager *kubecontainer.RefManager,
|
||||||
machineInfo *cadvisorapi.MachineInfo,
|
machineInfo *cadvisorapi.MachineInfo,
|
||||||
@ -175,6 +177,7 @@ func NewKubeGenericRuntimeManager(
|
|||||||
cpuCFSQuotaPeriod: cpuCFSQuotaPeriod,
|
cpuCFSQuotaPeriod: cpuCFSQuotaPeriod,
|
||||||
seccompProfileRoot: seccompProfileRoot,
|
seccompProfileRoot: seccompProfileRoot,
|
||||||
livenessManager: livenessManager,
|
livenessManager: livenessManager,
|
||||||
|
startupManager: startupManager,
|
||||||
containerRefManager: containerRefManager,
|
containerRefManager: containerRefManager,
|
||||||
machineInfo: machineInfo,
|
machineInfo: machineInfo,
|
||||||
osInterface: osInterface,
|
osInterface: osInterface,
|
||||||
@ -590,6 +593,9 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
|
|||||||
} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
|
} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
|
||||||
// If the container failed the liveness probe, we should kill it.
|
// If the container failed the liveness probe, we should kill it.
|
||||||
message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
|
message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
|
||||||
|
} else if startup, found := m.startupManager.Get(containerStatus.ID); found && startup == proberesults.Failure {
|
||||||
|
// If the container failed the startup probe, we should kill it.
|
||||||
|
message = fmt.Sprintf("Container %s failed startup probe", container.Name)
|
||||||
} else {
|
} else {
|
||||||
// Keep the container.
|
// Keep the container.
|
||||||
keepCount++
|
keepCount++
|
||||||
|
@ -41,6 +41,7 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/features"
|
"k8s.io/kubernetes/pkg/features"
|
||||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
|
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
|
||||||
|
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -732,6 +733,7 @@ func TestComputePodActions(t *testing.T) {
|
|||||||
mutatePodFn func(*v1.Pod)
|
mutatePodFn func(*v1.Pod)
|
||||||
mutateStatusFn func(*kubecontainer.PodStatus)
|
mutateStatusFn func(*kubecontainer.PodStatus)
|
||||||
actions podActions
|
actions podActions
|
||||||
|
resetStatusFn func(*kubecontainer.PodStatus)
|
||||||
}{
|
}{
|
||||||
"everying is good; do nothing": {
|
"everying is good; do nothing": {
|
||||||
actions: noAction,
|
actions: noAction,
|
||||||
@ -850,8 +852,38 @@ func TestComputePodActions(t *testing.T) {
|
|||||||
ContainersToKill: getKillMap(basePod, baseStatus, []int{1}),
|
ContainersToKill: getKillMap(basePod, baseStatus, []int{1}),
|
||||||
ContainersToStart: []int{1},
|
ContainersToStart: []int{1},
|
||||||
},
|
},
|
||||||
// TODO: Add a test case for containers which failed the liveness
|
},
|
||||||
// check. Will need to fake the livessness check result.
|
"Kill and recreate the container if the liveness check has failed": {
|
||||||
|
mutatePodFn: func(pod *v1.Pod) {
|
||||||
|
pod.Spec.RestartPolicy = v1.RestartPolicyAlways
|
||||||
|
},
|
||||||
|
mutateStatusFn: func(status *kubecontainer.PodStatus) {
|
||||||
|
m.livenessManager.Set(status.ContainerStatuses[1].ID, proberesults.Failure, basePod)
|
||||||
|
},
|
||||||
|
actions: podActions{
|
||||||
|
SandboxID: baseStatus.SandboxStatuses[0].Id,
|
||||||
|
ContainersToKill: getKillMap(basePod, baseStatus, []int{1}),
|
||||||
|
ContainersToStart: []int{1},
|
||||||
|
},
|
||||||
|
resetStatusFn: func(status *kubecontainer.PodStatus) {
|
||||||
|
m.livenessManager.Remove(status.ContainerStatuses[1].ID)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"Kill and recreate the container if the startup check has failed": {
|
||||||
|
mutatePodFn: func(pod *v1.Pod) {
|
||||||
|
pod.Spec.RestartPolicy = v1.RestartPolicyAlways
|
||||||
|
},
|
||||||
|
mutateStatusFn: func(status *kubecontainer.PodStatus) {
|
||||||
|
m.startupManager.Set(status.ContainerStatuses[1].ID, proberesults.Failure, basePod)
|
||||||
|
},
|
||||||
|
actions: podActions{
|
||||||
|
SandboxID: baseStatus.SandboxStatuses[0].Id,
|
||||||
|
ContainersToKill: getKillMap(basePod, baseStatus, []int{1}),
|
||||||
|
ContainersToStart: []int{1},
|
||||||
|
},
|
||||||
|
resetStatusFn: func(status *kubecontainer.PodStatus) {
|
||||||
|
m.startupManager.Remove(status.ContainerStatuses[1].ID)
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"Verify we do not create a pod sandbox if no ready sandbox for pod with RestartPolicy=Never and all containers exited": {
|
"Verify we do not create a pod sandbox if no ready sandbox for pod with RestartPolicy=Never and all containers exited": {
|
||||||
mutatePodFn: func(pod *v1.Pod) {
|
mutatePodFn: func(pod *v1.Pod) {
|
||||||
@ -917,6 +949,9 @@ func TestComputePodActions(t *testing.T) {
|
|||||||
}
|
}
|
||||||
actions := m.computePodActions(pod, status)
|
actions := m.computePodActions(pod, status)
|
||||||
verifyActions(t, &test.actions, &actions, desc)
|
verifyActions(t, &test.actions, &actions, desc)
|
||||||
|
if test.resetStatusFn != nil {
|
||||||
|
test.resetStatusFn(status)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,6 +112,7 @@ func newTestManager() *manager {
|
|||||||
m := NewManager(
|
m := NewManager(
|
||||||
status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}),
|
status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}),
|
||||||
results.NewManager(),
|
results.NewManager(),
|
||||||
|
results.NewManager(),
|
||||||
nil, // runner
|
nil, // runner
|
||||||
refManager,
|
refManager,
|
||||||
&record.FakeRecorder{},
|
&record.FakeRecorder{},
|
||||||
|
@ -102,13 +102,13 @@ type manager struct {
|
|||||||
func NewManager(
|
func NewManager(
|
||||||
statusManager status.Manager,
|
statusManager status.Manager,
|
||||||
livenessManager results.Manager,
|
livenessManager results.Manager,
|
||||||
|
startupManager results.Manager,
|
||||||
runner kubecontainer.ContainerCommandRunner,
|
runner kubecontainer.ContainerCommandRunner,
|
||||||
refManager *kubecontainer.RefManager,
|
refManager *kubecontainer.RefManager,
|
||||||
recorder record.EventRecorder) Manager {
|
recorder record.EventRecorder) Manager {
|
||||||
|
|
||||||
prober := newProber(runner, refManager, recorder)
|
prober := newProber(runner, refManager, recorder)
|
||||||
readinessManager := results.NewManager()
|
readinessManager := results.NewManager()
|
||||||
startupManager := results.NewManager()
|
|
||||||
return &manager{
|
return &manager{
|
||||||
statusManager: statusManager,
|
statusManager: statusManager,
|
||||||
prober: prober,
|
prober: prober,
|
||||||
|
@ -40,14 +40,17 @@ type Manager interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Result is the type for probe results.
|
// Result is the type for probe results.
|
||||||
type Result bool
|
type Result int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// Success is encoded as "true" (type Result)
|
// Unknown is encoded as -1 (type Result)
|
||||||
Success Result = true
|
Unknown Result = iota - 1
|
||||||
|
|
||||||
// Failure is encoded as "false" (type Result)
|
// Success is encoded as 0 (type Result)
|
||||||
Failure Result = false
|
Success
|
||||||
|
|
||||||
|
// Failure is encoded as 1 (type Result)
|
||||||
|
Failure
|
||||||
)
|
)
|
||||||
|
|
||||||
func (r Result) String() string {
|
func (r Result) String() string {
|
||||||
|
@ -101,7 +101,7 @@ func newWorker(
|
|||||||
case startup:
|
case startup:
|
||||||
w.spec = container.StartupProbe
|
w.spec = container.StartupProbe
|
||||||
w.resultsManager = m.startupManager
|
w.resultsManager = m.startupManager
|
||||||
w.initialValue = results.Failure
|
w.initialValue = results.Unknown
|
||||||
}
|
}
|
||||||
|
|
||||||
basicMetricLabels := metrics.Labels{
|
basicMetricLabels := metrics.Labels{
|
||||||
|
@ -79,10 +79,12 @@ func TestDoProbe(t *testing.T) {
|
|||||||
podStatus: &pendingStatus,
|
podStatus: &pendingStatus,
|
||||||
expectContinue: true,
|
expectContinue: true,
|
||||||
expectSet: true,
|
expectSet: true,
|
||||||
|
expectedResult: results.Failure,
|
||||||
},
|
},
|
||||||
{ // Container terminated
|
{ // Container terminated
|
||||||
podStatus: &terminatedStatus,
|
podStatus: &terminatedStatus,
|
||||||
expectSet: true,
|
expectSet: true,
|
||||||
|
expectedResult: results.Failure,
|
||||||
},
|
},
|
||||||
{ // Probe successful.
|
{ // Probe successful.
|
||||||
podStatus: &runningStatus,
|
podStatus: &runningStatus,
|
||||||
@ -134,8 +136,15 @@ func TestInitialDelay(t *testing.T) {
|
|||||||
m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(probeType != startup))
|
m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(probeType != startup))
|
||||||
|
|
||||||
expectContinue(t, w, w.doProbe(), "during initial delay")
|
expectContinue(t, w, w.doProbe(), "during initial delay")
|
||||||
// Default value depends on probe, true for liveness, otherwise false.
|
// Default value depends on probe, Success for liveness, Failure for readiness, Unknown for startup
|
||||||
expectResult(t, w, results.Result(probeType == liveness), "during initial delay")
|
switch probeType {
|
||||||
|
case liveness:
|
||||||
|
expectResult(t, w, results.Success, "during initial delay")
|
||||||
|
case readiness:
|
||||||
|
expectResult(t, w, results.Failure, "during initial delay")
|
||||||
|
case startup:
|
||||||
|
expectResult(t, w, results.Unknown, "during initial delay")
|
||||||
|
}
|
||||||
|
|
||||||
// 100 seconds later...
|
// 100 seconds later...
|
||||||
laterStatus := getTestRunningStatusWithStarted(probeType != startup)
|
laterStatus := getTestRunningStatusWithStarted(probeType != startup)
|
||||||
@ -397,17 +406,17 @@ func TestResultRunOnStartupCheckFailure(t *testing.T) {
|
|||||||
// Below FailureThreshold leaves probe state unchanged
|
// Below FailureThreshold leaves probe state unchanged
|
||||||
// which is failed for startup at first.
|
// which is failed for startup at first.
|
||||||
m.prober.exec = fakeExecProber{probe.Failure, nil}
|
m.prober.exec = fakeExecProber{probe.Failure, nil}
|
||||||
msg := "probe failure, result failure"
|
msg := "probe failure, result unknown"
|
||||||
expectContinue(t, w, w.doProbe(), msg)
|
expectContinue(t, w, w.doProbe(), msg)
|
||||||
expectResult(t, w, results.Failure, msg)
|
expectResult(t, w, results.Unknown, msg)
|
||||||
if w.resultRun != 1 {
|
if w.resultRun != 1 {
|
||||||
t.Errorf("Prober resultRun should be 1")
|
t.Errorf("Prober resultRun should be 1")
|
||||||
}
|
}
|
||||||
|
|
||||||
m.prober.exec = fakeExecProber{probe.Failure, nil}
|
m.prober.exec = fakeExecProber{probe.Failure, nil}
|
||||||
msg = "2nd probe failure, result failure"
|
msg = "2nd probe failure, result unknown"
|
||||||
expectContinue(t, w, w.doProbe(), msg)
|
expectContinue(t, w, w.doProbe(), msg)
|
||||||
expectResult(t, w, results.Failure, msg)
|
expectResult(t, w, results.Unknown, msg)
|
||||||
if w.resultRun != 2 {
|
if w.resultRun != 2 {
|
||||||
t.Errorf("Prober resultRun should be 2")
|
t.Errorf("Prober resultRun should be 2")
|
||||||
}
|
}
|
||||||
@ -446,11 +455,11 @@ func TestStartupProbeDisabledByStarted(t *testing.T) {
|
|||||||
m := newTestManager()
|
m := newTestManager()
|
||||||
w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: 1, FailureThreshold: 2})
|
w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: 1, FailureThreshold: 2})
|
||||||
m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false))
|
m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false))
|
||||||
// startupProbe fails
|
// startupProbe fails < FailureThreshold, stays unknown
|
||||||
m.prober.exec = fakeExecProber{probe.Failure, nil}
|
m.prober.exec = fakeExecProber{probe.Failure, nil}
|
||||||
msg := "Not started, probe failure, result failure"
|
msg := "Not started, probe failure, result unknown"
|
||||||
expectContinue(t, w, w.doProbe(), msg)
|
expectContinue(t, w, w.doProbe(), msg)
|
||||||
expectResult(t, w, results.Failure, msg)
|
expectResult(t, w, results.Unknown, msg)
|
||||||
// startupProbe succeeds
|
// startupProbe succeeds
|
||||||
m.prober.exec = fakeExecProber{probe.Success, nil}
|
m.prober.exec = fakeExecProber{probe.Success, nil}
|
||||||
msg = "Started, probe success, result success"
|
msg = "Started, probe success, result success"
|
||||||
|
Loading…
Reference in New Issue
Block a user