Merge pull request #35526 from justinsb/fix_35521_b

Automatic merge from submit-queue

kubelet bootstrap: start hostNetwork pods before we have PodCIDR

Network readiness was checked in the pod admission phase, but pods that
fail admission are not retried.  Move the check to the pod start phase.

Issue #35409 
Issue #35521
This commit is contained in:
Kubernetes Submit Queue
2016-11-06 12:53:14 -08:00
committed by GitHub
9 changed files with 64 additions and 15 deletions

View File

@@ -1422,6 +1422,11 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
return syncErr
}
// If the network plugin is not ready, only start the pod if it uses the host network
if rs := kl.runtimeState.networkErrors(); len(rs) != 0 && !podUsesHostNetwork(pod) {
return fmt.Errorf("network is not ready: %v", rs)
}
// Create Cgroups for the pod and apply resource parameters
// to them if cgroup-per-qos flag is enabled.
pcm := kl.containerManager.NewPodContainerManager()
@@ -1696,7 +1701,7 @@ func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHand
defer housekeepingTicker.Stop()
plegCh := kl.pleg.Watch()
for {
if rs := kl.runtimeState.errors(); len(rs) != 0 {
if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 {
glog.Infof("skipping pod synchronization - %v", rs)
time.Sleep(5 * time.Second)
continue

View File

@@ -591,7 +591,8 @@ func (kl *Kubelet) setNodeReadyCondition(node *api.Node) {
// ref: https://github.com/kubernetes/kubernetes/issues/16961
currentTime := unversioned.NewTime(kl.clock.Now())
var newNodeReadyCondition api.NodeCondition
if rs := kl.runtimeState.errors(); len(rs) == 0 {
rs := append(kl.runtimeState.runtimeErrors(), kl.runtimeState.networkErrors()...)
if len(rs) == 0 {
newNodeReadyCondition = api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionTrue,

View File

@@ -1054,6 +1054,48 @@ func TestPrivilegedContainerDisallowed(t *testing.T) {
assert.Error(t, err, "expected pod infra creation to fail")
}
func TestNetworkErrorsWithoutHostNetwork(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
testKubelet.fakeCadvisor.On("VersionInfo").Return(&cadvisorapi.VersionInfo{}, nil)
testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorapi.MachineInfo{}, nil)
testKubelet.fakeCadvisor.On("ImagesFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
testKubelet.fakeCadvisor.On("RootFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
kubelet := testKubelet.kubelet
kubelet.runtimeState.setNetworkState(fmt.Errorf("simulated network error"))
capabilities.SetForTests(capabilities.Capabilities{
PrivilegedSources: capabilities.PrivilegedSources{
HostNetworkSources: []string{kubetypes.ApiserverSource, kubetypes.FileSource},
},
})
pod := podWithUidNameNsSpec("12345678", "hostnetwork", "new", api.PodSpec{
SecurityContext: &api.PodSecurityContext{
HostNetwork: false,
},
Containers: []api.Container{
{Name: "foo"},
},
})
kubelet.podManager.SetPods([]*api.Pod{pod})
err := kubelet.syncPod(syncPodOptions{
pod: pod,
podStatus: &kubecontainer.PodStatus{},
updateType: kubetypes.SyncPodUpdate,
})
assert.Error(t, err, "expected pod with hostNetwork=false to fail when network in error")
pod.Annotations[kubetypes.ConfigSourceAnnotationKey] = kubetypes.FileSource
pod.Spec.SecurityContext.HostNetwork = true
err = kubelet.syncPod(syncPodOptions{
pod: pod,
podStatus: &kubecontainer.PodStatus{},
updateType: kubetypes.SyncPodUpdate,
})
assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error")
}
func TestFilterOutTerminatedPods(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
kubelet := testKubelet.kubelet

View File

@@ -83,6 +83,7 @@ func TestRunOnce(t *testing.T) {
kubeClient: &fake.Clientset{},
hostname: testKubeletHostname,
nodeName: testKubeletHostname,
runtimeState: newRuntimeState(time.Second),
}
kb.containerManager = cm.NewStubContainerManager()

View File

@@ -68,16 +68,13 @@ func (s *runtimeState) setInitError(err error) {
s.initError = err
}
func (s *runtimeState) errors() []string {
func (s *runtimeState) runtimeErrors() []string {
s.RLock()
defer s.RUnlock()
var ret []string
if s.initError != nil {
ret = append(ret, s.initError.Error())
}
if s.networkError != nil {
ret = append(ret, s.networkError.Error())
}
if !s.lastBaseRuntimeSync.Add(s.baseRuntimeSyncThreshold).After(time.Now()) {
ret = append(ret, "container runtime is down")
}
@@ -87,6 +84,16 @@ func (s *runtimeState) errors() []string {
return ret
}
func (s *runtimeState) networkErrors() []string {
s.RLock()
defer s.RUnlock()
var ret []string
if s.networkError != nil {
ret = append(ret, s.networkError.Error())
}
return ret
}
func newRuntimeState(
runtimeSyncThreshold time.Duration,
) *runtimeState {