Revert "Revert "Fix the race between configuring cbr0 and restarting static pods""
This reverts commit fd0a95dd12
.
This commit is contained in:
@@ -147,6 +147,7 @@ func NewMainKubelet(
|
||||
dockerDaemonContainer string,
|
||||
systemContainer string,
|
||||
configureCBR0 bool,
|
||||
podCIDR string,
|
||||
pods int,
|
||||
dockerExecHandler dockertools.ExecHandler) (*Kubelet, error) {
|
||||
if rootDirectory == "" {
|
||||
@@ -261,6 +262,7 @@ func NewMainKubelet(
|
||||
cgroupRoot: cgroupRoot,
|
||||
mounter: mounter,
|
||||
configureCBR0: configureCBR0,
|
||||
podCIDR: podCIDR,
|
||||
pods: pods,
|
||||
syncLoopMonitor: util.AtomicValue{},
|
||||
}
|
||||
@@ -318,6 +320,10 @@ func NewMainKubelet(
|
||||
}
|
||||
klet.containerManager = containerManager
|
||||
|
||||
// Start syncing node status immediately, this may set up things the runtime needs to run.
|
||||
go util.Until(klet.syncNetworkStatus, 30*time.Second, util.NeverStop)
|
||||
go klet.syncNodeStatus()
|
||||
|
||||
// Wait for the runtime to be up with a timeout.
|
||||
if err := waitUntilRuntimeIsUp(klet.containerRuntime, maxWaitForContainerRuntime); err != nil {
|
||||
return nil, fmt.Errorf("timed out waiting for %q to come up: %v", containerRuntime, err)
|
||||
@@ -412,6 +418,10 @@ type Kubelet struct {
|
||||
runtimeUpThreshold time.Duration
|
||||
lastTimestampRuntimeUp time.Time
|
||||
|
||||
// Network Status information
|
||||
networkConfigMutex sync.Mutex
|
||||
networkConfigured bool
|
||||
|
||||
// Volume plugins.
|
||||
volumePluginMgr volume.VolumePluginMgr
|
||||
|
||||
@@ -489,6 +499,7 @@ type Kubelet struct {
|
||||
// Whether or not kubelet should take responsibility for keeping cbr0 in
|
||||
// the correct state.
|
||||
configureCBR0 bool
|
||||
podCIDR string
|
||||
|
||||
// Number of Pods which can be run by this Kubelet
|
||||
pods int
|
||||
@@ -707,7 +718,7 @@ func (kl *Kubelet) Run(updates <-chan PodUpdate) {
|
||||
}
|
||||
|
||||
go util.Until(kl.updateRuntimeUp, 5*time.Second, util.NeverStop)
|
||||
go kl.syncNodeStatus()
|
||||
|
||||
// Run the system oom watcher forever.
|
||||
kl.statusManager.Start()
|
||||
kl.syncLoop(updates, kl)
|
||||
@@ -1705,6 +1716,11 @@ func (kl *Kubelet) syncLoopIteration(updates <-chan PodUpdate, handler SyncHandl
|
||||
glog.Infof("Skipping pod synchronization, container runtime is not up.")
|
||||
return
|
||||
}
|
||||
if !kl.doneNetworkConfigure() {
|
||||
time.Sleep(5 * time.Second)
|
||||
glog.Infof("Skipping pod synchronization, network is not configured")
|
||||
return
|
||||
}
|
||||
unsyncedPod := false
|
||||
podSyncTypes := make(map[types.UID]SyncPodType)
|
||||
select {
|
||||
@@ -1861,6 +1877,7 @@ func (kl *Kubelet) reconcileCBR0(podCIDR string) error {
|
||||
glog.V(5).Info("PodCIDR not set. Will not configure cbr0.")
|
||||
return nil
|
||||
}
|
||||
glog.V(5).Infof("PodCIDR is set to %q", podCIDR)
|
||||
_, cidr, err := net.ParseCIDR(podCIDR)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -1895,6 +1912,22 @@ func (kl *Kubelet) recordNodeStatusEvent(event string) {
|
||||
// Maintains Node.Spec.Unschedulable value from previous run of tryUpdateNodeStatus()
|
||||
var oldNodeUnschedulable bool
|
||||
|
||||
func (kl *Kubelet) syncNetworkStatus() {
|
||||
kl.networkConfigMutex.Lock()
|
||||
defer kl.networkConfigMutex.Unlock()
|
||||
|
||||
networkConfigured := true
|
||||
if kl.configureCBR0 {
|
||||
if len(kl.podCIDR) == 0 {
|
||||
networkConfigured = false
|
||||
} else if err := kl.reconcileCBR0(kl.podCIDR); err != nil {
|
||||
networkConfigured = false
|
||||
glog.Errorf("Error configuring cbr0: %v", err)
|
||||
}
|
||||
}
|
||||
kl.networkConfigured = networkConfigured
|
||||
}
|
||||
|
||||
// setNodeStatus fills in the Status fields of the given Node, overwriting
|
||||
// any fields that are currently set.
|
||||
func (kl *Kubelet) setNodeStatus(node *api.Node) error {
|
||||
@@ -1928,16 +1961,6 @@ func (kl *Kubelet) setNodeStatus(node *api.Node) error {
|
||||
}
|
||||
}
|
||||
|
||||
networkConfigured := true
|
||||
if kl.configureCBR0 {
|
||||
if len(node.Spec.PodCIDR) == 0 {
|
||||
networkConfigured = false
|
||||
} else if err := kl.reconcileCBR0(node.Spec.PodCIDR); err != nil {
|
||||
networkConfigured = false
|
||||
glog.Errorf("Error configuring cbr0: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Post NotReady if we cannot get MachineInfo from cAdvisor. This needs to start
|
||||
// cAdvisor locally, e.g. for test-cmd.sh, and in integration test.
|
||||
info, err := kl.GetCachedMachineInfo()
|
||||
@@ -1981,6 +2004,8 @@ func (kl *Kubelet) setNodeStatus(node *api.Node) error {
|
||||
|
||||
// Check whether container runtime can be reported as up.
|
||||
containerRuntimeUp := kl.containerRuntimeUp()
|
||||
// Check whether network is configured properly
|
||||
networkConfigured := kl.doneNetworkConfigure()
|
||||
|
||||
currentTime := util.Now()
|
||||
var newNodeReadyCondition api.NodeCondition
|
||||
@@ -2049,6 +2074,12 @@ func (kl *Kubelet) containerRuntimeUp() bool {
|
||||
return kl.lastTimestampRuntimeUp.Add(kl.runtimeUpThreshold).After(time.Now())
|
||||
}
|
||||
|
||||
func (kl *Kubelet) doneNetworkConfigure() bool {
|
||||
kl.networkConfigMutex.Lock()
|
||||
defer kl.networkConfigMutex.Unlock()
|
||||
return kl.networkConfigured
|
||||
}
|
||||
|
||||
// tryUpdateNodeStatus tries to update node status to master. If ReconcileCBR0
|
||||
// is set, this function will also confirm that cbr0 is configured correctly.
|
||||
func (kl *Kubelet) tryUpdateNodeStatus() error {
|
||||
@@ -2059,6 +2090,8 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
|
||||
if node == nil {
|
||||
return fmt.Errorf("no node instance returned for %q", kl.nodeName)
|
||||
}
|
||||
kl.podCIDR = node.Spec.PodCIDR
|
||||
|
||||
if err := kl.setNodeStatus(node); err != nil {
|
||||
return err
|
||||
}
|
||||
|
Reference in New Issue
Block a user