Merge pull request #35526 from justinsb/fix_35521_b
Automatic merge from submit-queue kubelet bootstrap: start hostNetwork pods before we have PodCIDR Network readiness was checked in the pod admission phase, but pods that fail admission are not retried. Move the check to the pod start phase. Issue #35409 Issue #35521
This commit is contained in:
@@ -1422,6 +1422,11 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
|
||||
return syncErr
|
||||
}
|
||||
|
||||
// If the network plugin is not ready, only start the pod if it uses the host network
|
||||
if rs := kl.runtimeState.networkErrors(); len(rs) != 0 && !podUsesHostNetwork(pod) {
|
||||
return fmt.Errorf("network is not ready: %v", rs)
|
||||
}
|
||||
|
||||
// Create Cgroups for the pod and apply resource parameters
|
||||
// to them if cgroup-per-qos flag is enabled.
|
||||
pcm := kl.containerManager.NewPodContainerManager()
|
||||
@@ -1696,7 +1701,7 @@ func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHand
|
||||
defer housekeepingTicker.Stop()
|
||||
plegCh := kl.pleg.Watch()
|
||||
for {
|
||||
if rs := kl.runtimeState.errors(); len(rs) != 0 {
|
||||
if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 {
|
||||
glog.Infof("skipping pod synchronization - %v", rs)
|
||||
time.Sleep(5 * time.Second)
|
||||
continue
|
||||
|
||||
@@ -591,7 +591,8 @@ func (kl *Kubelet) setNodeReadyCondition(node *api.Node) {
|
||||
// ref: https://github.com/kubernetes/kubernetes/issues/16961
|
||||
currentTime := unversioned.NewTime(kl.clock.Now())
|
||||
var newNodeReadyCondition api.NodeCondition
|
||||
if rs := kl.runtimeState.errors(); len(rs) == 0 {
|
||||
rs := append(kl.runtimeState.runtimeErrors(), kl.runtimeState.networkErrors()...)
|
||||
if len(rs) == 0 {
|
||||
newNodeReadyCondition = api.NodeCondition{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
|
||||
@@ -1054,6 +1054,48 @@ func TestPrivilegedContainerDisallowed(t *testing.T) {
|
||||
assert.Error(t, err, "expected pod infra creation to fail")
|
||||
}
|
||||
|
||||
func TestNetworkErrorsWithoutHostNetwork(t *testing.T) {
|
||||
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
|
||||
testKubelet.fakeCadvisor.On("VersionInfo").Return(&cadvisorapi.VersionInfo{}, nil)
|
||||
testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorapi.MachineInfo{}, nil)
|
||||
testKubelet.fakeCadvisor.On("ImagesFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
|
||||
testKubelet.fakeCadvisor.On("RootFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
|
||||
kubelet := testKubelet.kubelet
|
||||
|
||||
kubelet.runtimeState.setNetworkState(fmt.Errorf("simulated network error"))
|
||||
capabilities.SetForTests(capabilities.Capabilities{
|
||||
PrivilegedSources: capabilities.PrivilegedSources{
|
||||
HostNetworkSources: []string{kubetypes.ApiserverSource, kubetypes.FileSource},
|
||||
},
|
||||
})
|
||||
|
||||
pod := podWithUidNameNsSpec("12345678", "hostnetwork", "new", api.PodSpec{
|
||||
SecurityContext: &api.PodSecurityContext{
|
||||
HostNetwork: false,
|
||||
},
|
||||
Containers: []api.Container{
|
||||
{Name: "foo"},
|
||||
},
|
||||
})
|
||||
|
||||
kubelet.podManager.SetPods([]*api.Pod{pod})
|
||||
err := kubelet.syncPod(syncPodOptions{
|
||||
pod: pod,
|
||||
podStatus: &kubecontainer.PodStatus{},
|
||||
updateType: kubetypes.SyncPodUpdate,
|
||||
})
|
||||
assert.Error(t, err, "expected pod with hostNetwork=false to fail when network in error")
|
||||
|
||||
pod.Annotations[kubetypes.ConfigSourceAnnotationKey] = kubetypes.FileSource
|
||||
pod.Spec.SecurityContext.HostNetwork = true
|
||||
err = kubelet.syncPod(syncPodOptions{
|
||||
pod: pod,
|
||||
podStatus: &kubecontainer.PodStatus{},
|
||||
updateType: kubetypes.SyncPodUpdate,
|
||||
})
|
||||
assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error")
|
||||
}
|
||||
|
||||
func TestFilterOutTerminatedPods(t *testing.T) {
|
||||
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
|
||||
kubelet := testKubelet.kubelet
|
||||
|
||||
@@ -83,6 +83,7 @@ func TestRunOnce(t *testing.T) {
|
||||
kubeClient: &fake.Clientset{},
|
||||
hostname: testKubeletHostname,
|
||||
nodeName: testKubeletHostname,
|
||||
runtimeState: newRuntimeState(time.Second),
|
||||
}
|
||||
kb.containerManager = cm.NewStubContainerManager()
|
||||
|
||||
|
||||
@@ -68,16 +68,13 @@ func (s *runtimeState) setInitError(err error) {
|
||||
s.initError = err
|
||||
}
|
||||
|
||||
func (s *runtimeState) errors() []string {
|
||||
func (s *runtimeState) runtimeErrors() []string {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
var ret []string
|
||||
if s.initError != nil {
|
||||
ret = append(ret, s.initError.Error())
|
||||
}
|
||||
if s.networkError != nil {
|
||||
ret = append(ret, s.networkError.Error())
|
||||
}
|
||||
if !s.lastBaseRuntimeSync.Add(s.baseRuntimeSyncThreshold).After(time.Now()) {
|
||||
ret = append(ret, "container runtime is down")
|
||||
}
|
||||
@@ -87,6 +84,16 @@ func (s *runtimeState) errors() []string {
|
||||
return ret
|
||||
}
|
||||
|
||||
func (s *runtimeState) networkErrors() []string {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
var ret []string
|
||||
if s.networkError != nil {
|
||||
ret = append(ret, s.networkError.Error())
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func newRuntimeState(
|
||||
runtimeSyncThreshold time.Duration,
|
||||
) *runtimeState {
|
||||
|
||||
Reference in New Issue
Block a user