Merge pull request #84279 from matthyx/kuberuntime-startupprobe

Add startupProbe result handling to kuberuntime
This commit is contained in:
Kubernetes Prow Robot 2019-11-13 13:01:53 -08:00 committed by GitHub
commit a08b09d52f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 81 additions and 20 deletions

View File

@ -582,6 +582,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
imageBackOff := flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff) imageBackOff := flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)
klet.livenessManager = proberesults.NewManager() klet.livenessManager = proberesults.NewManager()
klet.startupManager = proberesults.NewManager()
klet.podCache = kubecontainer.NewCache() klet.podCache = kubecontainer.NewCache()
var checkpointManager checkpointmanager.CheckpointManager var checkpointManager checkpointmanager.CheckpointManager
@ -671,6 +672,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
runtime, err := kuberuntime.NewKubeGenericRuntimeManager( runtime, err := kuberuntime.NewKubeGenericRuntimeManager(
kubecontainer.FilterEventRecorder(kubeDeps.Recorder), kubecontainer.FilterEventRecorder(kubeDeps.Recorder),
klet.livenessManager, klet.livenessManager,
klet.startupManager,
seccompProfileRoot, seccompProfileRoot,
containerRefManager, containerRefManager,
machineInfo, machineInfo,
@ -777,6 +779,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
klet.probeManager = prober.NewManager( klet.probeManager = prober.NewManager(
klet.statusManager, klet.statusManager,
klet.livenessManager, klet.livenessManager,
klet.startupManager,
klet.runner, klet.runner,
containerRefManager, containerRefManager,
kubeDeps.Recorder) kubeDeps.Recorder)
@ -972,6 +975,7 @@ type Kubelet struct {
probeManager prober.Manager probeManager prober.Manager
// Manages container health check results. // Manages container health check results.
livenessManager proberesults.Manager livenessManager proberesults.Manager
startupManager proberesults.Manager
// How long to keep idle streaming command execution/port forwarding // How long to keep idle streaming command execution/port forwarding
// connections open before terminating them // connections open before terminating them

View File

@ -235,6 +235,7 @@ func newTestKubeletWithImageList(
kubelet.probeManager = probetest.FakeManager{} kubelet.probeManager = probetest.FakeManager{}
kubelet.livenessManager = proberesults.NewManager() kubelet.livenessManager = proberesults.NewManager()
kubelet.startupManager = proberesults.NewManager()
kubelet.containerManager = cm.NewStubContainerManager() kubelet.containerManager = cm.NewStubContainerManager()
fakeNodeRef := &v1.ObjectReference{ fakeNodeRef := &v1.ObjectReference{

View File

@ -110,6 +110,7 @@ go_test(
"//pkg/kubelet/container/testing:go_default_library", "//pkg/kubelet/container/testing:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library", "//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/metrics:go_default_library", "//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/prober/results:go_default_library",
"//pkg/kubelet/runtimeclass:go_default_library", "//pkg/kubelet/runtimeclass:go_default_library",
"//pkg/kubelet/runtimeclass/testing:go_default_library", "//pkg/kubelet/runtimeclass/testing:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library",

View File

@ -78,6 +78,7 @@ func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageS
cpuCFSQuota: false, cpuCFSQuota: false,
cpuCFSQuotaPeriod: metav1.Duration{Duration: time.Microsecond * 100}, cpuCFSQuotaPeriod: metav1.Duration{Duration: time.Microsecond * 100},
livenessManager: proberesults.NewManager(), livenessManager: proberesults.NewManager(),
startupManager: proberesults.NewManager(),
containerRefManager: kubecontainer.NewRefManager(), containerRefManager: kubecontainer.NewRefManager(),
machineInfo: machineInfo, machineInfo: machineInfo,
osInterface: osInterface, osInterface: osInterface,

View File

@ -100,6 +100,7 @@ type kubeGenericRuntimeManager struct {
// Health check results. // Health check results.
livenessManager proberesults.Manager livenessManager proberesults.Manager
startupManager proberesults.Manager
// If true, enforce container cpu limits with CFS quota support // If true, enforce container cpu limits with CFS quota support
cpuCFSQuota bool cpuCFSQuota bool
@ -150,6 +151,7 @@ type LegacyLogProvider interface {
func NewKubeGenericRuntimeManager( func NewKubeGenericRuntimeManager(
recorder record.EventRecorder, recorder record.EventRecorder,
livenessManager proberesults.Manager, livenessManager proberesults.Manager,
startupManager proberesults.Manager,
seccompProfileRoot string, seccompProfileRoot string,
containerRefManager *kubecontainer.RefManager, containerRefManager *kubecontainer.RefManager,
machineInfo *cadvisorapi.MachineInfo, machineInfo *cadvisorapi.MachineInfo,
@ -175,6 +177,7 @@ func NewKubeGenericRuntimeManager(
cpuCFSQuotaPeriod: cpuCFSQuotaPeriod, cpuCFSQuotaPeriod: cpuCFSQuotaPeriod,
seccompProfileRoot: seccompProfileRoot, seccompProfileRoot: seccompProfileRoot,
livenessManager: livenessManager, livenessManager: livenessManager,
startupManager: startupManager,
containerRefManager: containerRefManager, containerRefManager: containerRefManager,
machineInfo: machineInfo, machineInfo: machineInfo,
osInterface: osInterface, osInterface: osInterface,
@ -590,6 +593,9 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure { } else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
// If the container failed the liveness probe, we should kill it. // If the container failed the liveness probe, we should kill it.
message = fmt.Sprintf("Container %s failed liveness probe", container.Name) message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
} else if startup, found := m.startupManager.Get(containerStatus.ID); found && startup == proberesults.Failure {
// If the container failed the startup probe, we should kill it.
message = fmt.Sprintf("Container %s failed startup probe", container.Name)
} else { } else {
// Keep the container. // Keep the container.
keepCount++ keepCount++

View File

@ -41,6 +41,7 @@ import (
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
) )
var ( var (
@ -732,6 +733,7 @@ func TestComputePodActions(t *testing.T) {
mutatePodFn func(*v1.Pod) mutatePodFn func(*v1.Pod)
mutateStatusFn func(*kubecontainer.PodStatus) mutateStatusFn func(*kubecontainer.PodStatus)
actions podActions actions podActions
resetStatusFn func(*kubecontainer.PodStatus)
}{ }{
"everying is good; do nothing": { "everying is good; do nothing": {
actions: noAction, actions: noAction,
@ -850,8 +852,38 @@ func TestComputePodActions(t *testing.T) {
ContainersToKill: getKillMap(basePod, baseStatus, []int{1}), ContainersToKill: getKillMap(basePod, baseStatus, []int{1}),
ContainersToStart: []int{1}, ContainersToStart: []int{1},
}, },
// TODO: Add a test case for containers which failed the liveness },
// check. Will need to fake the livessness check result. "Kill and recreate the container if the liveness check has failed": {
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.RestartPolicy = v1.RestartPolicyAlways
},
mutateStatusFn: func(status *kubecontainer.PodStatus) {
m.livenessManager.Set(status.ContainerStatuses[1].ID, proberesults.Failure, basePod)
},
actions: podActions{
SandboxID: baseStatus.SandboxStatuses[0].Id,
ContainersToKill: getKillMap(basePod, baseStatus, []int{1}),
ContainersToStart: []int{1},
},
resetStatusFn: func(status *kubecontainer.PodStatus) {
m.livenessManager.Remove(status.ContainerStatuses[1].ID)
},
},
"Kill and recreate the container if the startup check has failed": {
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.RestartPolicy = v1.RestartPolicyAlways
},
mutateStatusFn: func(status *kubecontainer.PodStatus) {
m.startupManager.Set(status.ContainerStatuses[1].ID, proberesults.Failure, basePod)
},
actions: podActions{
SandboxID: baseStatus.SandboxStatuses[0].Id,
ContainersToKill: getKillMap(basePod, baseStatus, []int{1}),
ContainersToStart: []int{1},
},
resetStatusFn: func(status *kubecontainer.PodStatus) {
m.startupManager.Remove(status.ContainerStatuses[1].ID)
},
}, },
"Verify we do not create a pod sandbox if no ready sandbox for pod with RestartPolicy=Never and all containers exited": { "Verify we do not create a pod sandbox if no ready sandbox for pod with RestartPolicy=Never and all containers exited": {
mutatePodFn: func(pod *v1.Pod) { mutatePodFn: func(pod *v1.Pod) {
@ -917,6 +949,9 @@ func TestComputePodActions(t *testing.T) {
} }
actions := m.computePodActions(pod, status) actions := m.computePodActions(pod, status)
verifyActions(t, &test.actions, &actions, desc) verifyActions(t, &test.actions, &actions, desc)
if test.resetStatusFn != nil {
test.resetStatusFn(status)
}
} }
} }

View File

@ -112,6 +112,7 @@ func newTestManager() *manager {
m := NewManager( m := NewManager(
status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}), status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}),
results.NewManager(), results.NewManager(),
results.NewManager(),
nil, // runner nil, // runner
refManager, refManager,
&record.FakeRecorder{}, &record.FakeRecorder{},

View File

@ -102,13 +102,13 @@ type manager struct {
func NewManager( func NewManager(
statusManager status.Manager, statusManager status.Manager,
livenessManager results.Manager, livenessManager results.Manager,
startupManager results.Manager,
runner kubecontainer.ContainerCommandRunner, runner kubecontainer.ContainerCommandRunner,
refManager *kubecontainer.RefManager, refManager *kubecontainer.RefManager,
recorder record.EventRecorder) Manager { recorder record.EventRecorder) Manager {
prober := newProber(runner, refManager, recorder) prober := newProber(runner, refManager, recorder)
readinessManager := results.NewManager() readinessManager := results.NewManager()
startupManager := results.NewManager()
return &manager{ return &manager{
statusManager: statusManager, statusManager: statusManager,
prober: prober, prober: prober,

View File

@ -40,14 +40,17 @@ type Manager interface {
} }
// Result is the type for probe results. // Result is the type for probe results.
type Result bool type Result int
const ( const (
// Success is encoded as "true" (type Result) // Unknown is encoded as -1 (type Result)
Success Result = true Unknown Result = iota - 1
// Failure is encoded as "false" (type Result) // Success is encoded as 0 (type Result)
Failure Result = false Success
// Failure is encoded as 1 (type Result)
Failure
) )
func (r Result) String() string { func (r Result) String() string {

View File

@ -101,7 +101,7 @@ func newWorker(
case startup: case startup:
w.spec = container.StartupProbe w.spec = container.StartupProbe
w.resultsManager = m.startupManager w.resultsManager = m.startupManager
w.initialValue = results.Failure w.initialValue = results.Unknown
} }
basicMetricLabels := metrics.Labels{ basicMetricLabels := metrics.Labels{

View File

@ -79,10 +79,12 @@ func TestDoProbe(t *testing.T) {
podStatus: &pendingStatus, podStatus: &pendingStatus,
expectContinue: true, expectContinue: true,
expectSet: true, expectSet: true,
expectedResult: results.Failure,
}, },
{ // Container terminated { // Container terminated
podStatus: &terminatedStatus, podStatus: &terminatedStatus,
expectSet: true, expectSet: true,
expectedResult: results.Failure,
}, },
{ // Probe successful. { // Probe successful.
podStatus: &runningStatus, podStatus: &runningStatus,
@ -134,8 +136,15 @@ func TestInitialDelay(t *testing.T) {
m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(probeType != startup)) m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(probeType != startup))
expectContinue(t, w, w.doProbe(), "during initial delay") expectContinue(t, w, w.doProbe(), "during initial delay")
// Default value depends on probe, true for liveness, otherwise false. // Default value depends on probe, Success for liveness, Failure for readiness, Unknown for startup
expectResult(t, w, results.Result(probeType == liveness), "during initial delay") switch probeType {
case liveness:
expectResult(t, w, results.Success, "during initial delay")
case readiness:
expectResult(t, w, results.Failure, "during initial delay")
case startup:
expectResult(t, w, results.Unknown, "during initial delay")
}
// 100 seconds later... // 100 seconds later...
laterStatus := getTestRunningStatusWithStarted(probeType != startup) laterStatus := getTestRunningStatusWithStarted(probeType != startup)
@ -397,17 +406,17 @@ func TestResultRunOnStartupCheckFailure(t *testing.T) {
// Below FailureThreshold leaves probe state unchanged // Below FailureThreshold leaves probe state unchanged
// which is failed for startup at first. // which is failed for startup at first.
m.prober.exec = fakeExecProber{probe.Failure, nil} m.prober.exec = fakeExecProber{probe.Failure, nil}
msg := "probe failure, result failure" msg := "probe failure, result unknown"
expectContinue(t, w, w.doProbe(), msg) expectContinue(t, w, w.doProbe(), msg)
expectResult(t, w, results.Failure, msg) expectResult(t, w, results.Unknown, msg)
if w.resultRun != 1 { if w.resultRun != 1 {
t.Errorf("Prober resultRun should be 1") t.Errorf("Prober resultRun should be 1")
} }
m.prober.exec = fakeExecProber{probe.Failure, nil} m.prober.exec = fakeExecProber{probe.Failure, nil}
msg = "2nd probe failure, result failure" msg = "2nd probe failure, result unknown"
expectContinue(t, w, w.doProbe(), msg) expectContinue(t, w, w.doProbe(), msg)
expectResult(t, w, results.Failure, msg) expectResult(t, w, results.Unknown, msg)
if w.resultRun != 2 { if w.resultRun != 2 {
t.Errorf("Prober resultRun should be 2") t.Errorf("Prober resultRun should be 2")
} }
@ -446,11 +455,11 @@ func TestStartupProbeDisabledByStarted(t *testing.T) {
m := newTestManager() m := newTestManager()
w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: 1, FailureThreshold: 2}) w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: 1, FailureThreshold: 2})
m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false)) m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false))
// startupProbe fails // startupProbe fails < FailureThreshold, stays unknown
m.prober.exec = fakeExecProber{probe.Failure, nil} m.prober.exec = fakeExecProber{probe.Failure, nil}
msg := "Not started, probe failure, result failure" msg := "Not started, probe failure, result unknown"
expectContinue(t, w, w.doProbe(), msg) expectContinue(t, w, w.doProbe(), msg)
expectResult(t, w, results.Failure, msg) expectResult(t, w, results.Unknown, msg)
// startupProbe succeeds // startupProbe succeeds
m.prober.exec = fakeExecProber{probe.Success, nil} m.prober.exec = fakeExecProber{probe.Success, nil}
msg = "Started, probe success, result success" msg = "Started, probe success, result success"