kubernetes/pkg/kubelet/node_manager.go
Yu-Ju Hong e7d1e47f31 kubelet: move all node status related methods to a separate file
The methods for registering a node and syncing node status to the apiserver
have grown large enough that it makes sense for them to live in a separate
place. This change adds a nodeManager to handle such interaction with the
apiserver.
2015-09-16 17:17:42 -07:00

462 lines
15 KiB
Go

/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kubelet
// Note: if you change code in this file, you might need to change code in
// contrib/mesos/pkg/executor/.
import (
"fmt"
"net"
"strings"
"sync"
"time"
"github.com/golang/glog"
cadvisorApi "github.com/google/cadvisor/info/v1"
"k8s.io/kubernetes/pkg/api"
apierrors "k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/client/record"
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/version"
)
const (
// nodeStatusUpdateRetry specifies how many times kubelet retries when posting node status failed.
nodeStatusUpdateRetry = 5
)
type infoGetter interface {
GetMachineInfo() (*cadvisorApi.MachineInfo, error)
ContainerRuntimeUp() bool
NetworkConfigured() bool
GetVersionInfo() (*cadvisorApi.VersionInfo, error)
}
type nodeManager interface {
Start()
GetPodCIDR() string
}
type realNodeManager struct {
// apiserver client.
client client.Interface
// Set to true to have the node register itself with the apiserver.
registerNode bool
// nodeStatusUpdateFrequency specifies how often kubelet posts node status to master.
// Note: be cautious when changing the constant, it must work with nodeMonitorGracePeriod
// in nodecontroller. There are several constraints:
// 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where
// N means number of retries allowed for kubelet to post node status. It is pointless
// to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there
// will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency.
// The constant must be less than podEvictionTimeout.
// 2. nodeStatusUpdateFrequency needs to be large enough for kubelet to generate node
// status. Kubelet may fail to update node status reliably if the value is too small,
// as it takes time to gather all necessary node information.
nodeStatusUpdateFrequency time.Duration
// Cloud provider interface
cloud cloudprovider.Interface
nodeName string
hostname string
// Number of Pods which can be run by this Kubelet.
pods int
// The EventRecorder to use
recorder record.EventRecorder
// Information about the ports which are opened by daemons on Node running this Kubelet server.
daemonEndpoints *api.NodeDaemonEndpoints
// Interface to get machine and version info.
infoGetter infoGetter
// Reference to this node.
nodeRef *api.ObjectReference
// podCIDR may be updated by node.Spec.
podCIDR string
// for internal book keeping; access only from within registerWithApiserver
registrationCompleted bool
lock sync.RWMutex
}
func newRealNodeManager(client client.Interface, cloud cloudprovider.Interface, registerNode bool,
nodeStatusUpdateFrequency time.Duration, recorder record.EventRecorder, nodeName, hostname, podCIDR string,
pods int, infoGetter infoGetter, daemonEndpoints *api.NodeDaemonEndpoints, nodeRef *api.ObjectReference) *realNodeManager {
return &realNodeManager{
client: client,
cloud: cloud,
registerNode: registerNode,
nodeStatusUpdateFrequency: nodeStatusUpdateFrequency,
recorder: recorder,
nodeName: nodeName,
hostname: hostname,
podCIDR: podCIDR,
pods: pods,
infoGetter: infoGetter,
daemonEndpoints: daemonEndpoints,
nodeRef: nodeRef,
}
}
func (nm *realNodeManager) Start() {
if nm.client != nil {
go util.Until(nm.syncNodeStatus, nm.nodeStatusUpdateFrequency, util.NeverStop)
}
}
func (nm *realNodeManager) GetPodCIDR() string {
nm.lock.RLock()
defer nm.lock.RUnlock()
return nm.podCIDR
}
// syncNodeStatus should be called periodically from a goroutine.
// It synchronizes node status to master, registering the kubelet first if
// necessary.
func (nm *realNodeManager) syncNodeStatus() {
if nm.registerNode {
// This will exit immediately if it doesn't need to do anything.
nm.registerWithApiserver()
}
if err := nm.updateNodeStatus(); err != nil {
glog.Errorf("Unable to update node status: %v", err)
}
}
func (nm *realNodeManager) initialNodeStatus() (*api.Node, error) {
node := &api.Node{
ObjectMeta: api.ObjectMeta{
Name: nm.nodeName,
Labels: map[string]string{"kubernetes.io/hostname": nm.hostname},
},
}
if nm.cloud != nil {
instances, ok := nm.cloud.Instances()
if !ok {
return nil, fmt.Errorf("failed to get instances from cloud provider")
}
// TODO(roberthbailey): Can we do this without having credentials to talk
// to the cloud provider?
// TODO: ExternalID is deprecated, we'll have to drop this code
externalID, err := instances.ExternalID(nm.nodeName)
if err != nil {
return nil, fmt.Errorf("failed to get external ID from cloud provider: %v", err)
}
node.Spec.ExternalID = externalID
// TODO: We can't assume that the node has credentials to talk to the
// cloudprovider from arbitrary nodes. At most, we should talk to a
// local metadata server here.
node.Spec.ProviderID, err = cloudprovider.GetInstanceProviderID(nm.cloud, nm.nodeName)
if err != nil {
return nil, err
}
} else {
node.Spec.ExternalID = nm.hostname
}
if err := nm.setNodeStatus(node); err != nil {
return nil, err
}
return node, nil
}
// registerWithApiserver registers the node with the cluster master. It is safe
// to call multiple times, but not concurrently (nm.registrationCompleted is
// not locked).
func (nm *realNodeManager) registerWithApiserver() {
if nm.registrationCompleted {
return
}
step := 100 * time.Millisecond
for {
time.Sleep(step)
step = step * 2
if step >= 7*time.Second {
step = 7 * time.Second
}
node, err := nm.initialNodeStatus()
if err != nil {
glog.Errorf("Unable to construct api.Node object for kubelet: %v", err)
continue
}
glog.V(2).Infof("Attempting to register node %s", node.Name)
if _, err := nm.client.Nodes().Create(node); err != nil {
if !apierrors.IsAlreadyExists(err) {
glog.V(2).Infof("Unable to register %s with the apiserver: %v", node.Name, err)
continue
}
currentNode, err := nm.client.Nodes().Get(nm.nodeName)
if err != nil {
glog.Errorf("error getting node %q: %v", nm.nodeName, err)
continue
}
if currentNode == nil {
glog.Errorf("no node instance returned for %q", nm.nodeName)
continue
}
if currentNode.Spec.ExternalID == node.Spec.ExternalID {
glog.Infof("Node %s was previously registered", node.Name)
nm.registrationCompleted = true
return
}
glog.Errorf(
"Previously %q had externalID %q; now it is %q; will delete and recreate.",
nm.nodeName, node.Spec.ExternalID, currentNode.Spec.ExternalID,
)
if err := nm.client.Nodes().Delete(node.Name); err != nil {
glog.Errorf("Unable to delete old node: %v", err)
} else {
glog.Errorf("Deleted old node object %q", nm.nodeName)
}
continue
}
glog.Infof("Successfully registered node %s", node.Name)
nm.registrationCompleted = true
return
}
}
// setNodeStatus fills in the Status fields of the given Node, overwriting
// any fields that are currently set.
func (nm *realNodeManager) setNodeStatus(node *api.Node) error {
// Set addresses for the node.
if nm.cloud != nil {
instances, ok := nm.cloud.Instances()
if !ok {
return fmt.Errorf("failed to get instances from cloud provider")
}
// TODO(roberthbailey): Can we do this without having credentials to talk
// to the cloud provider?
// TODO(justinsb): We can if CurrentNodeName() was actually CurrentNode() and returned an interface
nodeAddresses, err := instances.NodeAddresses(nm.nodeName)
if err != nil {
return fmt.Errorf("failed to get node address from cloud provider: %v", err)
}
node.Status.Addresses = nodeAddresses
} else {
addr := net.ParseIP(nm.hostname)
if addr != nil {
node.Status.Addresses = []api.NodeAddress{
{Type: api.NodeLegacyHostIP, Address: addr.String()},
{Type: api.NodeInternalIP, Address: addr.String()},
}
} else {
addrs, err := net.LookupIP(node.Name)
if err != nil {
return fmt.Errorf("can't get ip address of node %s: %v", node.Name, err)
} else if len(addrs) == 0 {
return fmt.Errorf("no ip address for node %v", node.Name)
} else {
// check all ip addresses for this node.Name and try to find the first non-loopback IPv4 address.
// If no match is found, it uses the IP of the interface with gateway on it.
for _, ip := range addrs {
if ip.IsLoopback() {
continue
}
if ip.To4() != nil {
node.Status.Addresses = []api.NodeAddress{
{Type: api.NodeLegacyHostIP, Address: ip.String()},
{Type: api.NodeInternalIP, Address: ip.String()},
}
break
}
}
if len(node.Status.Addresses) == 0 {
ip, err := util.ChooseHostInterface()
if err != nil {
return err
}
node.Status.Addresses = []api.NodeAddress{
{Type: api.NodeLegacyHostIP, Address: ip.String()},
{Type: api.NodeInternalIP, Address: ip.String()},
}
}
}
}
}
// TODO: Post NotReady if we cannot get MachineInfo from cAdvisor. This needs to start
// cAdvisor locally, e.g. for test-cmd.sh, and in integration test.
info, err := nm.infoGetter.GetMachineInfo()
if err != nil {
// TODO(roberthbailey): This is required for test-cmd.sh to pass.
// See if the test should be updated instead.
node.Status.Capacity = api.ResourceList{
api.ResourceCPU: *resource.NewMilliQuantity(0, resource.DecimalSI),
api.ResourceMemory: resource.MustParse("0Gi"),
api.ResourcePods: *resource.NewQuantity(int64(nm.pods), resource.DecimalSI),
}
glog.Errorf("Error getting machine info: %v", err)
} else {
node.Status.NodeInfo.MachineID = info.MachineID
node.Status.NodeInfo.SystemUUID = info.SystemUUID
node.Status.Capacity = CapacityFromMachineInfo(info)
node.Status.Capacity[api.ResourcePods] = *resource.NewQuantity(
int64(nm.pods), resource.DecimalSI)
if node.Status.NodeInfo.BootID != "" &&
node.Status.NodeInfo.BootID != info.BootID {
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
nm.recorder.Eventf(nm.nodeRef, "Rebooted",
"Node %s has been rebooted, boot id: %s", nm.nodeName, info.BootID)
}
node.Status.NodeInfo.BootID = info.BootID
}
verinfo, err := nm.infoGetter.GetVersionInfo()
if err != nil {
glog.Errorf("Error getting version info: %v", err)
} else {
node.Status.NodeInfo.KernelVersion = verinfo.KernelVersion
node.Status.NodeInfo.OsImage = verinfo.ContainerOsVersion
// TODO: Determine the runtime is docker or rocket
node.Status.NodeInfo.ContainerRuntimeVersion = "docker://" + verinfo.DockerVersion
node.Status.NodeInfo.KubeletVersion = version.Get().String()
// TODO: kube-proxy might be different version from kubelet in the future
node.Status.NodeInfo.KubeProxyVersion = version.Get().String()
}
node.Status.DaemonEndpoints = *nm.daemonEndpoints
// Check whether container runtime can be reported as up.
containerRuntimeUp := nm.infoGetter.ContainerRuntimeUp()
// Check whether network is configured properly
networkConfigured := nm.infoGetter.NetworkConfigured()
currentTime := util.Now()
var newNodeReadyCondition api.NodeCondition
var oldNodeReadyConditionStatus api.ConditionStatus
if containerRuntimeUp && networkConfigured {
newNodeReadyCondition = api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: "KubeletReady",
Message: "kubelet is posting ready status",
LastHeartbeatTime: currentTime,
}
} else {
var reasons []string
var messages []string
if !containerRuntimeUp {
messages = append(messages, "container runtime is down")
}
if !networkConfigured {
messages = append(reasons, "network not configured correctly")
}
newNodeReadyCondition = api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionFalse,
Reason: "KubeletNotReady",
Message: strings.Join(messages, ","),
LastHeartbeatTime: currentTime,
}
}
updated := false
for i := range node.Status.Conditions {
if node.Status.Conditions[i].Type == api.NodeReady {
oldNodeReadyConditionStatus = node.Status.Conditions[i].Status
if oldNodeReadyConditionStatus == newNodeReadyCondition.Status {
newNodeReadyCondition.LastTransitionTime = node.Status.Conditions[i].LastTransitionTime
} else {
newNodeReadyCondition.LastTransitionTime = currentTime
}
node.Status.Conditions[i] = newNodeReadyCondition
updated = true
}
}
if !updated {
newNodeReadyCondition.LastTransitionTime = currentTime
node.Status.Conditions = append(node.Status.Conditions, newNodeReadyCondition)
}
if !updated || oldNodeReadyConditionStatus != newNodeReadyCondition.Status {
if newNodeReadyCondition.Status == api.ConditionTrue {
nm.recordNodeStatusEvent("NodeReady")
} else {
nm.recordNodeStatusEvent("NodeNotReady")
}
}
if oldNodeUnschedulable != node.Spec.Unschedulable {
if node.Spec.Unschedulable {
nm.recordNodeStatusEvent("NodeNotSchedulable")
} else {
nm.recordNodeStatusEvent("NodeSchedulable")
}
oldNodeUnschedulable = node.Spec.Unschedulable
}
return nil
}
// updateNodeStatus updates node status to master with retries.
func (nm *realNodeManager) updateNodeStatus() error {
for i := 0; i < nodeStatusUpdateRetry; i++ {
if err := nm.tryUpdateNodeStatus(); err != nil {
glog.Errorf("Error updating node status, will retry: %v", err)
} else {
return nil
}
}
return fmt.Errorf("update node status exceeds retry count")
}
func (nm *realNodeManager) recordNodeStatusEvent(event string) {
glog.V(2).Infof("Recording %s event message for node %s", event, nm.nodeName)
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
nm.recorder.Eventf(nm.nodeRef, event, "Node %s status is now: %s", nm.nodeName, event)
}
// tryUpdateNodeStatus tries to update node status to master. If ReconcileCBR0
// is set, this function will also confirm that cbr0 is configured correctly.
func (nm *realNodeManager) tryUpdateNodeStatus() error {
node, err := nm.client.Nodes().Get(nm.nodeName)
if err != nil {
return fmt.Errorf("error getting node %q: %v", nm.nodeName, err)
}
if node == nil {
return fmt.Errorf("no node instance returned for %q", nm.nodeName)
}
nm.lock.Lock()
defer nm.lock.Unlock()
nm.podCIDR = node.Spec.PodCIDR
if err := nm.setNodeStatus(node); err != nil {
return err
}
// Update the current status on the API server
_, err = nm.client.Nodes().UpdateStatus(node)
return err
}