Merge pull request #35342 from timstclair/rejected

Automatic merge from submit-queue

[AppArmor] Hold bad AppArmor pods in pending rather than rejecting

Fixes https://github.com/kubernetes/kubernetes/issues/32837

Overview of the fix:

If the Kubelet needs to reject a Pod for a reason that the control plane doesn't understand (e.g. which AppArmor profiles are installed on the node), then it might contiinuously try to run the pod on the same rejecting node. This change adds a concept of "soft rejection", in which the Pod is admitted, but not allowed to run (and therefore held in a pending state). This prevents the pod from being retried on other nodes, but also prevents the high churn. This is consistent with how other missing local resources (e.g. volumes) is handled.

A side effect of the change is that Pods which are not initially runnable will be retried. This is desired behavior since it avoids a race condition when a new node is brought up but the AppArmor profiles have not yet been loaded on it.

``` release-note
Pods with invalid AppArmor configurations will be held in a Pending state, rather than rejected (failed). Check the pod status message to find out why it is not running.
```

@kubernetes/sig-node @timothysc @rrati @davidopp
This commit is contained in:
Kubernetes Submit Queue
2016-11-05 22:52:26 -07:00
committed by GitHub
7 changed files with 126 additions and 31 deletions

View File

@@ -1426,6 +1426,10 @@ func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod, gra
// NOTE(random-liu): The pod passed in could be *nil* when kubelet restarted. // NOTE(random-liu): The pod passed in could be *nil* when kubelet restarted.
func (dm *DockerManager) killPodWithSyncResult(pod *api.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) { func (dm *DockerManager) killPodWithSyncResult(pod *api.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) {
// Short circuit if there's nothing to kill.
if len(runningPod.Containers) == 0 {
return
}
// Send the kills in parallel since they may take a long time. // Send the kills in parallel since they may take a long time.
// There may be len(runningPod.Containers) or len(runningPod.Containers)-1 of result in the channel // There may be len(runningPod.Containers) or len(runningPod.Containers)-1 of result in the channel
containerResults := make(chan *kubecontainer.SyncResult, len(runningPod.Containers)) containerResults := make(chan *kubecontainer.SyncResult, len(runningPod.Containers))

View File

@@ -757,7 +757,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
return nil, fmt.Errorf("failed to initialize eviction manager: %v", err) return nil, fmt.Errorf("failed to initialize eviction manager: %v", err)
} }
klet.evictionManager = evictionManager klet.evictionManager = evictionManager
klet.AddPodAdmitHandler(evictionAdmitHandler) klet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
// add sysctl admission // add sysctl admission
runtimeSupport, err := sysctl.NewRuntimeAdmitHandler(klet.containerRuntime) runtimeSupport, err := sysctl.NewRuntimeAdmitHandler(klet.containerRuntime)
@@ -775,9 +775,9 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
if err != nil { if err != nil {
return nil, err return nil, err
} }
klet.AddPodAdmitHandler(runtimeSupport) klet.admitHandlers.AddPodAdmitHandler(runtimeSupport)
klet.AddPodAdmitHandler(safeWhitelist) klet.admitHandlers.AddPodAdmitHandler(safeWhitelist)
klet.AddPodAdmitHandler(unsafeWhitelist) klet.admitHandlers.AddPodAdmitHandler(unsafeWhitelist)
// enable active deadline handler // enable active deadline handler
activeDeadlineHandler, err := newActiveDeadlineHandler(klet.statusManager, kubeDeps.Recorder, klet.clock) activeDeadlineHandler, err := newActiveDeadlineHandler(klet.statusManager, kubeDeps.Recorder, klet.clock)
@@ -787,14 +787,15 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
klet.AddPodSyncLoopHandler(activeDeadlineHandler) klet.AddPodSyncLoopHandler(activeDeadlineHandler)
klet.AddPodSyncHandler(activeDeadlineHandler) klet.AddPodSyncHandler(activeDeadlineHandler)
klet.appArmorValidator = apparmor.NewValidator(kubeCfg.ContainerRuntime) klet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(klet.getNodeAnyWay))
klet.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator))
klet.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(klet.getNodeAnyWay))
// apply functional Option's // apply functional Option's
for _, opt := range kubeDeps.Options { for _, opt := range kubeDeps.Options {
opt(klet) opt(klet)
} }
klet.appArmorValidator = apparmor.NewValidator(kubeCfg.ContainerRuntime)
klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator))
// Finally, put the most recent version of the config on the Kubelet, so // Finally, put the most recent version of the config on the Kubelet, so
// people can see how it was configured. // people can see how it was configured.
klet.kubeletConfiguration = *kubeCfg klet.kubeletConfiguration = *kubeCfg
@@ -1052,7 +1053,13 @@ type Kubelet struct {
// TODO: think about moving this to be centralized in PodWorkers in follow-on. // TODO: think about moving this to be centralized in PodWorkers in follow-on.
// the list of handlers to call during pod admission. // the list of handlers to call during pod admission.
lifecycle.PodAdmitHandlers admitHandlers lifecycle.PodAdmitHandlers
// softAdmithandlers are applied to the pod after it is admitted by the Kubelet, but before it is
// run. A pod rejected by a softAdmitHandler will be left in a Pending state indefinitely. If a
// rejected pod should not be recreated, or the scheduler is not aware of the rejection rule, the
// admission rule should be applied by a softAdmitHandler.
softAdmitHandlers lifecycle.PodAdmitHandlers
// the list of handlers to call during pod sync loop. // the list of handlers to call during pod sync loop.
lifecycle.PodSyncLoopHandlers lifecycle.PodSyncLoopHandlers
@@ -1386,17 +1393,42 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
metrics.PodStartLatency.Observe(metrics.SinceInMicroseconds(firstSeenTime)) metrics.PodStartLatency.Observe(metrics.SinceInMicroseconds(firstSeenTime))
} }
runnable := kl.canRunPod(pod)
if !runnable.Admit {
// Pod is not runnable; update the Pod and Container statuses to why.
apiPodStatus.Reason = runnable.Reason
apiPodStatus.Message = runnable.Message
// Waiting containers are not creating.
const waitingReason = "Blocked"
for _, cs := range apiPodStatus.InitContainerStatuses {
if cs.State.Waiting != nil {
cs.State.Waiting.Reason = waitingReason
}
}
for _, cs := range apiPodStatus.ContainerStatuses {
if cs.State.Waiting != nil {
cs.State.Waiting.Reason = waitingReason
}
}
}
// Update status in the status manager // Update status in the status manager
kl.statusManager.SetPodStatus(pod, apiPodStatus) kl.statusManager.SetPodStatus(pod, apiPodStatus)
// Kill pod if it should not be running // Kill pod if it should not be running
if errOuter := canRunPod(pod); errOuter != nil || pod.DeletionTimestamp != nil || apiPodStatus.Phase == api.PodFailed { if !runnable.Admit || pod.DeletionTimestamp != nil || apiPodStatus.Phase == api.PodFailed {
if errInner := kl.killPod(pod, nil, podStatus, nil); errInner != nil { var syncErr error
errOuter = fmt.Errorf("error killing pod: %v", errInner) if err := kl.killPod(pod, nil, podStatus, nil); err != nil {
utilruntime.HandleError(errOuter) syncErr = fmt.Errorf("error killing pod: %v", err)
utilruntime.HandleError(syncErr)
} else {
if !runnable.Admit {
// There was no error killing the pod, but the pod cannot be run.
// Return an error to signal that the sync loop should back off.
syncErr = fmt.Errorf("pod cannot be run: %s", runnable.Message)
} }
// there was no error killing the pod, but the pod cannot be run, so we return that err (if any) }
return errOuter return syncErr
} }
// Create Cgroups for the pod and apply resource parameters // Create Cgroups for the pod and apply resource parameters
@@ -1619,7 +1651,7 @@ func (kl *Kubelet) canAdmitPod(pods []*api.Pod, pod *api.Pod) (bool, string, str
// TODO: move out of disk check into a pod admitter // TODO: move out of disk check into a pod admitter
// TODO: out of resource eviction should have a pod admitter call-out // TODO: out of resource eviction should have a pod admitter call-out
attrs := &lifecycle.PodAdmitAttributes{Pod: pod, OtherPods: pods} attrs := &lifecycle.PodAdmitAttributes{Pod: pod, OtherPods: pods}
for _, podAdmitHandler := range kl.PodAdmitHandlers { for _, podAdmitHandler := range kl.admitHandlers {
if result := podAdmitHandler.Admit(attrs); !result.Admit { if result := podAdmitHandler.Admit(attrs); !result.Admit {
return false, result.Reason, result.Message return false, result.Reason, result.Message
} }
@@ -1634,6 +1666,29 @@ func (kl *Kubelet) canAdmitPod(pods []*api.Pod, pod *api.Pod) (bool, string, str
return true, "", "" return true, "", ""
} }
func (kl *Kubelet) canRunPod(pod *api.Pod) lifecycle.PodAdmitResult {
attrs := &lifecycle.PodAdmitAttributes{Pod: pod}
// Get "OtherPods". Rejected pods are failed, so only include admitted pods that are alive.
attrs.OtherPods = kl.filterOutTerminatedPods(kl.podManager.GetPods())
for _, handler := range kl.softAdmitHandlers {
if result := handler.Admit(attrs); !result.Admit {
return result
}
}
// TODO: Refactor as a soft admit handler.
if err := canRunPod(pod); err != nil {
return lifecycle.PodAdmitResult{
Admit: false,
Reason: "Forbidden",
Message: err.Error(),
}
}
return lifecycle.PodAdmitResult{Admit: true}
}
// syncLoop is the main loop for processing changes. It watches for changes from // syncLoop is the main loop for processing changes. It watches for changes from
// three channels (file, apiserver, and http) and creates a union of them. For // three channels (file, apiserver, and http) and creates a union of them. For
// any new change seen, will run a sync against desired state and running state. If // any new change seen, will run a sync against desired state and running state. If

View File

@@ -220,9 +220,9 @@ func newTestKubeletWithImageList(
require.NoError(t, err, "Failed to initialize eviction manager") require.NoError(t, err, "Failed to initialize eviction manager")
kubelet.evictionManager = evictionManager kubelet.evictionManager = evictionManager
kubelet.AddPodAdmitHandler(evictionAdmitHandler) kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
// Add this as cleanup predicate pod admitter // Add this as cleanup predicate pod admitter
kubelet.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(kubelet.getNodeAnyWay)) kubelet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(kubelet.getNodeAnyWay))
plug := &volumetest.FakeVolumePlugin{PluginName: "fake", Host: nil} plug := &volumetest.FakeVolumePlugin{PluginName: "fake", Host: nil}
kubelet.volumePluginMgr, err = kubelet.volumePluginMgr, err =
@@ -1823,7 +1823,7 @@ func TestHandlePodAdditionsInvokesPodAdmitHandlers(t *testing.T) {
podToAdmit := pods[1] podToAdmit := pods[1]
podsToReject := []*api.Pod{podToReject} podsToReject := []*api.Pod{podToReject}
kl.AddPodAdmitHandler(&testPodAdmitHandler{podsToReject: podsToReject}) kl.admitHandlers.AddPodAdmitHandler(&testPodAdmitHandler{podsToReject: podsToReject})
kl.HandlePodAdditions(pods) kl.HandlePodAdditions(pods)
// Check pod status stored in the status map. // Check pod status stored in the status map.

View File

@@ -149,6 +149,11 @@ type appArmorAdmitHandler struct {
} }
func (a *appArmorAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult { func (a *appArmorAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult {
// If the pod is already running or terminated, no need to recheck AppArmor.
if attrs.Pod.Status.Phase != api.PodPending {
return PodAdmitResult{Admit: true}
}
err := a.Validate(attrs.Pod) err := a.Validate(attrs.Pod)
if err == nil { if err == nil {
return PodAdmitResult{Admit: true} return PodAdmitResult{Admit: true}

View File

@@ -121,7 +121,7 @@ func TestRunOnce(t *testing.T) {
t.Fatalf("failed to initialize eviction manager: %v", err) t.Fatalf("failed to initialize eviction manager: %v", err)
} }
kb.evictionManager = evictionManager kb.evictionManager = evictionManager
kb.AddPodAdmitHandler(evictionAdmitHandler) kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
if err := kb.setupDataDirs(); err != nil { if err := kb.setupDataDirs(); err != nil {
t.Errorf("Failed to init data dirs: %v", err) t.Errorf("Failed to init data dirs: %v", err)
} }

View File

@@ -108,5 +108,7 @@ go_test(
"//vendor:github.com/onsi/gomega/gstruct", "//vendor:github.com/onsi/gomega/gstruct",
"//vendor:github.com/onsi/gomega/types", "//vendor:github.com/onsi/gomega/types",
"//vendor:github.com/spf13/pflag", "//vendor:github.com/spf13/pflag",
"//vendor:k8s.io/client-go/pkg/api/errors",
"//vendor:k8s.io/client-go/pkg/api/unversioned",
], ],
) )

View File

@@ -26,8 +26,11 @@ import (
"strconv" "strconv"
"strings" "strings"
"k8s.io/client-go/pkg/api/errors"
"k8s.io/client-go/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/security/apparmor" "k8s.io/kubernetes/pkg/security/apparmor"
"k8s.io/kubernetes/pkg/watch"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
"github.com/davecgh/go-spew/spew" "github.com/davecgh/go-spew/spew"
@@ -46,12 +49,11 @@ var _ = framework.KubeDescribe("AppArmor [Feature:AppArmor]", func() {
f := framework.NewDefaultFramework("apparmor-test") f := framework.NewDefaultFramework("apparmor-test")
It("should reject an unloaded profile", func() { It("should reject an unloaded profile", func() {
status := runAppArmorTest(f, apparmor.ProfileNamePrefix+"non-existant-profile") status := runAppArmorTest(f, false, apparmor.ProfileNamePrefix+"non-existant-profile")
Expect(status.Phase).To(Equal(api.PodFailed), "PodStatus: %+v", status) expectSoftRejection(status)
Expect(status.Reason).To(Equal("AppArmor"), "PodStatus: %+v", status)
}) })
It("should enforce a profile blocking writes", func() { It("should enforce a profile blocking writes", func() {
status := runAppArmorTest(f, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"deny-write") status := runAppArmorTest(f, true, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"deny-write")
if len(status.ContainerStatuses) == 0 { if len(status.ContainerStatuses) == 0 {
framework.Failf("Unexpected pod status: %s", spew.Sdump(status)) framework.Failf("Unexpected pod status: %s", spew.Sdump(status))
return return
@@ -61,7 +63,7 @@ var _ = framework.KubeDescribe("AppArmor [Feature:AppArmor]", func() {
}) })
It("should enforce a permissive profile", func() { It("should enforce a permissive profile", func() {
status := runAppArmorTest(f, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"audit-write") status := runAppArmorTest(f, true, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"audit-write")
if len(status.ContainerStatuses) == 0 { if len(status.ContainerStatuses) == 0 {
framework.Failf("Unexpected pod status: %s", spew.Sdump(status)) framework.Failf("Unexpected pod status: %s", spew.Sdump(status))
return return
@@ -75,9 +77,8 @@ var _ = framework.KubeDescribe("AppArmor [Feature:AppArmor]", func() {
f := framework.NewDefaultFramework("apparmor-test") f := framework.NewDefaultFramework("apparmor-test")
It("should reject a pod with an AppArmor profile", func() { It("should reject a pod with an AppArmor profile", func() {
status := runAppArmorTest(f, apparmor.ProfileRuntimeDefault) status := runAppArmorTest(f, false, apparmor.ProfileRuntimeDefault)
Expect(status.Phase).To(Equal(api.PodFailed), "PodStatus: %+v", status) expectSoftRejection(status)
Expect(status.Reason).To(Equal("AppArmor"), "PodStatus: %+v", status)
}) })
}) })
} }
@@ -136,11 +137,31 @@ func loadTestProfiles() error {
return nil return nil
} }
func runAppArmorTest(f *framework.Framework, profile string) api.PodStatus { func runAppArmorTest(f *framework.Framework, shouldRun bool, profile string) api.PodStatus {
pod := createPodWithAppArmor(f, profile) pod := createPodWithAppArmor(f, profile)
if shouldRun {
// The pod needs to start before it stops, so wait for the longer start timeout. // The pod needs to start before it stops, so wait for the longer start timeout.
framework.ExpectNoError(framework.WaitTimeoutForPodNoLongerRunningInNamespace( framework.ExpectNoError(framework.WaitTimeoutForPodNoLongerRunningInNamespace(
f.ClientSet, pod.Name, f.Namespace.Name, "", framework.PodStartTimeout)) f.ClientSet, pod.Name, f.Namespace.Name, "", framework.PodStartTimeout))
} else {
// Pod should remain in the pending state. Wait for the Reason to be set to "AppArmor".
w, err := f.PodClient().Watch(api.SingleObject(api.ObjectMeta{Name: pod.Name}))
framework.ExpectNoError(err)
_, err = watch.Until(framework.PodStartTimeout, w, func(e watch.Event) (bool, error) {
switch e.Type {
case watch.Deleted:
return false, errors.NewNotFound(unversioned.GroupResource{Resource: "pods"}, pod.Name)
}
switch t := e.Object.(type) {
case *api.Pod:
if t.Status.Reason == "AppArmor" {
return true, nil
}
}
return false, nil
})
framework.ExpectNoError(err)
}
p, err := f.PodClient().Get(pod.Name) p, err := f.PodClient().Get(pod.Name)
framework.ExpectNoError(err) framework.ExpectNoError(err)
return p.Status return p.Status
@@ -166,6 +187,14 @@ func createPodWithAppArmor(f *framework.Framework, profile string) *api.Pod {
return f.PodClient().Create(pod) return f.PodClient().Create(pod)
} }
func expectSoftRejection(status api.PodStatus) {
args := []interface{}{"PodStatus: %+v", status}
Expect(status.Phase).To(Equal(api.PodPending), args...)
Expect(status.Reason).To(Equal("AppArmor"), args...)
Expect(status.Message).To(ContainSubstring("AppArmor"), args...)
Expect(status.ContainerStatuses[0].State.Waiting.Reason).To(Equal("Blocked"), args...)
}
func isAppArmorEnabled() bool { func isAppArmorEnabled() bool {
// TODO(timstclair): Pass this through the image setup rather than hardcoding. // TODO(timstclair): Pass this through the image setup rather than hardcoding.
if strings.Contains(framework.TestContext.NodeName, "-gci-dev-") { if strings.Contains(framework.TestContext.NodeName, "-gci-dev-") {